├── .editorconfig
├── .gitignore
├── data
├── cube.obj
├── sponza.obj
├── suzanne.obj
├── teapot.obj
└── triangle.obj
├── projects
├── VisualStudio
│ ├── TrimeshTracer.sln
│ ├── TrimeshTracer.vcxproj
│ ├── TrimeshTracer.vcxproj.filters
│ └── TrimeshTracer.vcxproj.user
└── Xcode
│ └── TrimeshTracer.xcodeproj
│ ├── project.pbxproj
│ └── xcshareddata
│ └── xcschemes
│ └── TrimeshTracer.xcscheme
├── readme.md
├── result1Triangle.png
├── result2Cube.png
├── result3Suzanne.png
├── result4Teapot.png
├── result5Sponza.png
├── runCases.cmd
├── runCases.sh
└── source
├── external
├── enkits
│ ├── Atomics.h
│ ├── LockLessMultiReadPipe.h
│ ├── TaskScheduler.cpp
│ ├── TaskScheduler.h
│ ├── TaskScheduler_c.cpp
│ ├── TaskScheduler_c.h
│ └── Threads.h
├── nanort.h
├── objparser-license.md
├── objparser.cpp
├── objparser.h
├── sokol_time.h
└── stb_image_write.h
├── main.cpp
├── maths.cpp
├── maths.h
├── scene.cpp
└── scene.h
/.editorconfig:
--------------------------------------------------------------------------------
1 | root = true
2 |
3 | [*]
4 | end_of_line = lf
5 | charset = utf-8
6 | trim_trailing_whitespace = true
7 | insert_final_newline = true
8 |
9 | [*.{c,h,cpp,hpp,m,mm,cc,cs,hlsl,metal,ispc}]
10 | indent_style = space
11 | indent_size = 4
12 |
13 | [{Makefile,makefile}]
14 | indent_style = tab
15 |
16 | [*.{md,markdown}]
17 | trim_trailing_whitespace = false
18 |
19 | [*.{vcxproj,vcxproj.filters,csproj,props,targets}]
20 | indent_style = space
21 | indent_size = 2
22 | end_of_line = crlf
23 | charset = utf-8-bom
24 | trim_trailing_whitespace = true
25 | insert_final_newline = false
26 | [*.{sln,sln.template}]
27 | indent_style = tab
28 | indent_size = 4
29 | end_of_line = crlf
30 | trim_trailing_whitespace = true
31 | insert_final_newline = false
32 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | build/*
2 | output*.png
3 | .vs
4 | .idea
5 | *.exe
6 | *.pdb
7 | project.xcworkspace
8 | xcschememanagement.plist
9 | xcuserdata
10 |
--------------------------------------------------------------------------------
/data/cube.obj:
--------------------------------------------------------------------------------
1 | # Unit-volume cube with the same texture coordinates on each face.
2 | #
3 | # Created by Morgan McGuire and released into the Public Domain on
4 | # July 16, 2011.
5 | #
6 | # http://graphics.cs.williams.edu/data
7 |
8 | mtllib default.mtl
9 |
10 | v -0.5 0.5 -0.5
11 | v -0.5 0.5 0.5
12 | v 0.5 0.5 0.5
13 | v 0.5 0.5 -0.5
14 | v -0.5 -0.5 -0.5
15 | v -0.5 -0.5 0.5
16 | v 0.5 -0.5 0.5
17 | v 0.5 -0.5 -0.5
18 |
19 | vt 0 1
20 | vt 0 0
21 | vt 1 0
22 | vt 1 1
23 |
24 | vn 0 1 0
25 | vn -1 0 0
26 | vn 1 0 0
27 | vn 0 0 -1
28 | vn 0 0 1
29 | vn 0 -1 0
30 |
31 | g cube
32 | usemtl default
33 |
34 | f -8/-4/-6 -7/-3/-6 -6/-2/-6
35 | f -8/-4/-6 -6/-2/-6 -5/-1/-6
36 | f -8/-4/-5 -4/-3/-5 -3/-2/-5
37 | f -8/-4/-5 -3/-2/-5 -7/-1/-5
38 | f -6/-4/-4 -2/-3/-4 -1/-2/-4
39 | f -6/-4/-4 -1/-2/-4 -5/-1/-4
40 | f -5/-4/-3 -1/-3/-3 -4/-2/-3
41 | f -5/-4/-3 -4/-2/-3 -8/-1/-3
42 | f -7/-4/-2 -3/-3/-2 -2/-2/-2
43 | f -7/-4/-2 -2/-2/-2 -6/-1/-2
44 | f -3/-4/-1 -4/-3/-1 -1/-2/-1
45 | f -3/-4/-1 -1/-2/-1 -2/-1/-1
46 |
--------------------------------------------------------------------------------
/data/triangle.obj:
--------------------------------------------------------------------------------
1 | v -0.5 0 -0.5
2 | v 0.5 0 0.5
3 | v 0 1 0
4 |
5 | vn 0 1 0
6 |
7 | f 1//1 2//1 3//1
8 |
--------------------------------------------------------------------------------
/projects/VisualStudio/TrimeshTracer.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio 15
4 | VisualStudioVersion = 15.0.28307.168
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "TrimeshTracer", "TrimeshTracer.vcxproj", "{6121F087-18BF-4606-8D98-86026EE99198}"
7 | EndProject
8 | Global
9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | Debug|x64 = Debug|x64
11 | Release|x64 = Release|x64
12 | EndGlobalSection
13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
14 | {6121F087-18BF-4606-8D98-86026EE99198}.Debug|x64.ActiveCfg = Debug|x64
15 | {6121F087-18BF-4606-8D98-86026EE99198}.Debug|x64.Build.0 = Debug|x64
16 | {6121F087-18BF-4606-8D98-86026EE99198}.Release|x64.ActiveCfg = Release|x64
17 | {6121F087-18BF-4606-8D98-86026EE99198}.Release|x64.Build.0 = Release|x64
18 | EndGlobalSection
19 | GlobalSection(SolutionProperties) = preSolution
20 | HideSolutionNode = FALSE
21 | EndGlobalSection
22 | GlobalSection(ExtensibilityGlobals) = postSolution
23 | SolutionGuid = {8343A163-F549-4D47-9DE2-49DAEBEAACEC}
24 | EndGlobalSection
25 | EndGlobal
26 |
--------------------------------------------------------------------------------
/projects/VisualStudio/TrimeshTracer.vcxproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Debug
6 | x64
7 |
8 |
9 | Release
10 | x64
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 | 15.0
35 | {6121F087-18BF-4606-8D98-86026EE99198}
36 | Win32Proj
37 | TrimeshTracer
38 | 8.1
39 |
40 |
41 |
42 | Application
43 | true
44 | v141
45 | Unicode
46 |
47 |
48 | Application
49 | false
50 | true
51 | Unicode
52 | v141
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 | true
68 | $(SolutionDir)..\..\build\$(Platform)-$(Configuration)\
69 | $(SolutionDir)..\..\build\$(Platform)-$(Configuration)\
70 |
71 |
72 | false
73 | $(SolutionDir)..\..\build\$(Platform)-$(Configuration)\
74 | $(SolutionDir)..\..\build\$(Platform)-$(Configuration)\
75 |
76 |
77 |
78 | NotUsing
79 | Level4
80 | Disabled
81 | _DEBUG;_CONSOLE;_HAS_EXCEPTIONS=0;%(PreprocessorDefinitions)
82 | true
83 |
84 |
85 | false
86 | MultiThreadedDebug
87 | false
88 | false
89 | Fast
90 |
91 |
92 | Console
93 | true
94 |
95 |
96 |
97 |
98 | NotUsing
99 | Level4
100 | MaxSpeed
101 | true
102 | true
103 | NDEBUG;_CONSOLE;_HAS_EXCEPTIONS=0;%(PreprocessorDefinitions)
104 | true
105 |
106 |
107 | MultiThreaded
108 | false
109 | false
110 | Fast
111 |
112 |
113 | Console
114 | true
115 | true
116 | true
117 |
118 |
119 |
120 |
121 |
122 |
--------------------------------------------------------------------------------
/projects/VisualStudio/TrimeshTracer.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | external
9 |
10 |
11 | external\enkiTS
12 |
13 |
14 | external\enkiTS
15 |
16 |
17 |
18 |
19 | {d9924442-c84f-46f0-ba57-70ac8a1e6fc9}
20 |
21 |
22 | {d7a73072-b709-4163-8dcc-3f20912ce4f0}
23 |
24 |
25 |
26 |
27 |
28 |
29 | external
30 |
31 |
32 | external
33 |
34 |
35 | external
36 |
37 |
38 | external\enkiTS
39 |
40 |
41 | external\enkiTS
42 |
43 |
44 | external\enkiTS
45 |
46 |
47 | external\enkiTS
48 |
49 |
50 | external\enkiTS
51 |
52 |
53 |
--------------------------------------------------------------------------------
/projects/VisualStudio/TrimeshTracer.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | $(ProjectDir)..\..
5 | WindowsLocalDebugger
6 | 640 360 4 data/cube.obj
7 |
8 |
9 | $(ProjectDir)..\..
10 | WindowsLocalDebugger
11 | 640 360 4 data/cube.obj
12 |
13 |
--------------------------------------------------------------------------------
/projects/Xcode/TrimeshTracer.xcodeproj/project.pbxproj:
--------------------------------------------------------------------------------
1 | // !$*UTF8*$!
2 | {
3 | archiveVersion = 1;
4 | classes = {
5 | };
6 | objectVersion = 50;
7 | objects = {
8 |
9 | /* Begin PBXBuildFile section */
10 | 2BBF7DCA22492D4500FCABB2 /* scene.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 2BBF7DC522492D4500FCABB2 /* scene.cpp */; };
11 | 2BBF7DCB22492D4500FCABB2 /* main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 2BBF7DC822492D4500FCABB2 /* main.cpp */; };
12 | 2BBF7DCC22492D4500FCABB2 /* maths.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 2BBF7DC922492D4500FCABB2 /* maths.cpp */; };
13 | 2BBF7DE5224A6E8D00FCABB2 /* objparser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 2BBF7DE2224A6E8D00FCABB2 /* objparser.cpp */; };
14 | 2BBF7DEF224BB32000FCABB2 /* TaskScheduler_c.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 2BBF7DE8224BB32000FCABB2 /* TaskScheduler_c.cpp */; };
15 | 2BBF7DF0224BB32000FCABB2 /* TaskScheduler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 2BBF7DEE224BB32000FCABB2 /* TaskScheduler.cpp */; };
16 | /* End PBXBuildFile section */
17 |
18 | /* Begin PBXCopyFilesBuildPhase section */
19 | 2BBF7DB022492CB600FCABB2 /* CopyFiles */ = {
20 | isa = PBXCopyFilesBuildPhase;
21 | buildActionMask = 2147483647;
22 | dstPath = /usr/share/man/man1/;
23 | dstSubfolderSpec = 0;
24 | files = (
25 | );
26 | runOnlyForDeploymentPostprocessing = 1;
27 | };
28 | /* End PBXCopyFilesBuildPhase section */
29 |
30 | /* Begin PBXFileReference section */
31 | 2BBF7DB222492CB600FCABB2 /* TrimeshTracer */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = TrimeshTracer; sourceTree = BUILT_PRODUCTS_DIR; };
32 | 2BBF7DC522492D4500FCABB2 /* scene.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = scene.cpp; sourceTree = ""; };
33 | 2BBF7DC622492D4500FCABB2 /* maths.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = maths.h; sourceTree = ""; };
34 | 2BBF7DC722492D4500FCABB2 /* scene.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = scene.h; sourceTree = ""; };
35 | 2BBF7DC822492D4500FCABB2 /* main.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = main.cpp; sourceTree = ""; };
36 | 2BBF7DC922492D4500FCABB2 /* maths.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = maths.cpp; sourceTree = ""; };
37 | 2BBF7DE1224A6E8D00FCABB2 /* objparser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = objparser.h; sourceTree = ""; };
38 | 2BBF7DE2224A6E8D00FCABB2 /* objparser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = objparser.cpp; sourceTree = ""; };
39 | 2BBF7DE3224A6E8D00FCABB2 /* sokol_time.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sokol_time.h; sourceTree = ""; };
40 | 2BBF7DE4224A6E8D00FCABB2 /* stb_image_write.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = stb_image_write.h; sourceTree = ""; };
41 | 2BBF7DE8224BB32000FCABB2 /* TaskScheduler_c.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = TaskScheduler_c.cpp; sourceTree = ""; };
42 | 2BBF7DE9224BB32000FCABB2 /* LockLessMultiReadPipe.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = LockLessMultiReadPipe.h; sourceTree = ""; };
43 | 2BBF7DEA224BB32000FCABB2 /* TaskScheduler_c.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = TaskScheduler_c.h; sourceTree = ""; };
44 | 2BBF7DEB224BB32000FCABB2 /* TaskScheduler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = TaskScheduler.h; sourceTree = ""; };
45 | 2BBF7DEC224BB32000FCABB2 /* Threads.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Threads.h; sourceTree = ""; };
46 | 2BBF7DED224BB32000FCABB2 /* Atomics.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Atomics.h; sourceTree = ""; };
47 | 2BBF7DEE224BB32000FCABB2 /* TaskScheduler.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = TaskScheduler.cpp; sourceTree = ""; };
48 | 2BBF7DF3224CA0FE00FCABB2 /* nanort.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = nanort.h; sourceTree = ""; };
49 | /* End PBXFileReference section */
50 |
51 | /* Begin PBXFrameworksBuildPhase section */
52 | 2BBF7DAF22492CB600FCABB2 /* Frameworks */ = {
53 | isa = PBXFrameworksBuildPhase;
54 | buildActionMask = 2147483647;
55 | files = (
56 | );
57 | runOnlyForDeploymentPostprocessing = 0;
58 | };
59 | /* End PBXFrameworksBuildPhase section */
60 |
61 | /* Begin PBXGroup section */
62 | 2BBF7DA922492CB600FCABB2 = {
63 | isa = PBXGroup;
64 | children = (
65 | 2BBF7DB422492CB600FCABB2 /* TrimeshTracer */,
66 | 2BBF7DB322492CB600FCABB2 /* Products */,
67 | );
68 | sourceTree = "";
69 | };
70 | 2BBF7DB322492CB600FCABB2 /* Products */ = {
71 | isa = PBXGroup;
72 | children = (
73 | 2BBF7DB222492CB600FCABB2 /* TrimeshTracer */,
74 | );
75 | name = Products;
76 | sourceTree = "";
77 | };
78 | 2BBF7DB422492CB600FCABB2 /* TrimeshTracer */ = {
79 | isa = PBXGroup;
80 | children = (
81 | 2BBF7DCD22492D5700FCABB2 /* external */,
82 | 2BBF7DC822492D4500FCABB2 /* main.cpp */,
83 | 2BBF7DC922492D4500FCABB2 /* maths.cpp */,
84 | 2BBF7DC622492D4500FCABB2 /* maths.h */,
85 | 2BBF7DC522492D4500FCABB2 /* scene.cpp */,
86 | 2BBF7DC722492D4500FCABB2 /* scene.h */,
87 | );
88 | name = TrimeshTracer;
89 | path = ../../source;
90 | sourceTree = SOURCE_ROOT;
91 | };
92 | 2BBF7DCD22492D5700FCABB2 /* external */ = {
93 | isa = PBXGroup;
94 | children = (
95 | 2BBF7DF3224CA0FE00FCABB2 /* nanort.h */,
96 | 2BBF7DE7224BB32000FCABB2 /* enkits */,
97 | 2BBF7DE2224A6E8D00FCABB2 /* objparser.cpp */,
98 | 2BBF7DE1224A6E8D00FCABB2 /* objparser.h */,
99 | 2BBF7DE3224A6E8D00FCABB2 /* sokol_time.h */,
100 | 2BBF7DE4224A6E8D00FCABB2 /* stb_image_write.h */,
101 | );
102 | path = external;
103 | sourceTree = "";
104 | };
105 | 2BBF7DE7224BB32000FCABB2 /* enkits */ = {
106 | isa = PBXGroup;
107 | children = (
108 | 2BBF7DE8224BB32000FCABB2 /* TaskScheduler_c.cpp */,
109 | 2BBF7DE9224BB32000FCABB2 /* LockLessMultiReadPipe.h */,
110 | 2BBF7DEA224BB32000FCABB2 /* TaskScheduler_c.h */,
111 | 2BBF7DEB224BB32000FCABB2 /* TaskScheduler.h */,
112 | 2BBF7DEC224BB32000FCABB2 /* Threads.h */,
113 | 2BBF7DED224BB32000FCABB2 /* Atomics.h */,
114 | 2BBF7DEE224BB32000FCABB2 /* TaskScheduler.cpp */,
115 | );
116 | path = enkits;
117 | sourceTree = "";
118 | };
119 | /* End PBXGroup section */
120 |
121 | /* Begin PBXNativeTarget section */
122 | 2BBF7DB122492CB600FCABB2 /* TrimeshTracer */ = {
123 | isa = PBXNativeTarget;
124 | buildConfigurationList = 2BBF7DB922492CB600FCABB2 /* Build configuration list for PBXNativeTarget "TrimeshTracer" */;
125 | buildPhases = (
126 | 2BBF7DAE22492CB600FCABB2 /* Sources */,
127 | 2BBF7DAF22492CB600FCABB2 /* Frameworks */,
128 | 2BBF7DB022492CB600FCABB2 /* CopyFiles */,
129 | );
130 | buildRules = (
131 | );
132 | dependencies = (
133 | );
134 | name = TrimeshTracer;
135 | productName = TrimeshTracer;
136 | productReference = 2BBF7DB222492CB600FCABB2 /* TrimeshTracer */;
137 | productType = "com.apple.product-type.tool";
138 | };
139 | /* End PBXNativeTarget section */
140 |
141 | /* Begin PBXProject section */
142 | 2BBF7DAA22492CB600FCABB2 /* Project object */ = {
143 | isa = PBXProject;
144 | attributes = {
145 | LastUpgradeCheck = 1010;
146 | ORGANIZATIONNAME = "Unity Technologies";
147 | TargetAttributes = {
148 | 2BBF7DB122492CB600FCABB2 = {
149 | CreatedOnToolsVersion = 10.1;
150 | };
151 | };
152 | };
153 | buildConfigurationList = 2BBF7DAD22492CB600FCABB2 /* Build configuration list for PBXProject "TrimeshTracer" */;
154 | compatibilityVersion = "Xcode 9.3";
155 | developmentRegion = en;
156 | hasScannedForEncodings = 0;
157 | knownRegions = (
158 | en,
159 | );
160 | mainGroup = 2BBF7DA922492CB600FCABB2;
161 | productRefGroup = 2BBF7DB322492CB600FCABB2 /* Products */;
162 | projectDirPath = "";
163 | projectRoot = "";
164 | targets = (
165 | 2BBF7DB122492CB600FCABB2 /* TrimeshTracer */,
166 | );
167 | };
168 | /* End PBXProject section */
169 |
170 | /* Begin PBXSourcesBuildPhase section */
171 | 2BBF7DAE22492CB600FCABB2 /* Sources */ = {
172 | isa = PBXSourcesBuildPhase;
173 | buildActionMask = 2147483647;
174 | files = (
175 | 2BBF7DE5224A6E8D00FCABB2 /* objparser.cpp in Sources */,
176 | 2BBF7DEF224BB32000FCABB2 /* TaskScheduler_c.cpp in Sources */,
177 | 2BBF7DF0224BB32000FCABB2 /* TaskScheduler.cpp in Sources */,
178 | 2BBF7DCC22492D4500FCABB2 /* maths.cpp in Sources */,
179 | 2BBF7DCB22492D4500FCABB2 /* main.cpp in Sources */,
180 | 2BBF7DCA22492D4500FCABB2 /* scene.cpp in Sources */,
181 | );
182 | runOnlyForDeploymentPostprocessing = 0;
183 | };
184 | /* End PBXSourcesBuildPhase section */
185 |
186 | /* Begin XCBuildConfiguration section */
187 | 2BBF7DB722492CB600FCABB2 /* Debug */ = {
188 | isa = XCBuildConfiguration;
189 | buildSettings = {
190 | ALWAYS_SEARCH_USER_PATHS = NO;
191 | CLANG_ANALYZER_NONNULL = YES;
192 | CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
193 | CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
194 | CLANG_CXX_LIBRARY = "libc++";
195 | CLANG_ENABLE_MODULES = YES;
196 | CLANG_ENABLE_OBJC_ARC = YES;
197 | CLANG_ENABLE_OBJC_WEAK = YES;
198 | CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
199 | CLANG_WARN_BOOL_CONVERSION = YES;
200 | CLANG_WARN_COMMA = YES;
201 | CLANG_WARN_CONSTANT_CONVERSION = YES;
202 | CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
203 | CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
204 | CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
205 | CLANG_WARN_EMPTY_BODY = YES;
206 | CLANG_WARN_ENUM_CONVERSION = YES;
207 | CLANG_WARN_INFINITE_RECURSION = YES;
208 | CLANG_WARN_INT_CONVERSION = YES;
209 | CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
210 | CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
211 | CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
212 | CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
213 | CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
214 | CLANG_WARN_STRICT_PROTOTYPES = YES;
215 | CLANG_WARN_SUSPICIOUS_MOVE = YES;
216 | CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
217 | CLANG_WARN_UNREACHABLE_CODE = YES;
218 | CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
219 | CODE_SIGN_IDENTITY = "-";
220 | COPY_PHASE_STRIP = NO;
221 | DEBUG_INFORMATION_FORMAT = dwarf;
222 | ENABLE_STRICT_OBJC_MSGSEND = YES;
223 | ENABLE_TESTABILITY = YES;
224 | GCC_C_LANGUAGE_STANDARD = gnu11;
225 | GCC_DYNAMIC_NO_PIC = NO;
226 | GCC_NO_COMMON_BLOCKS = YES;
227 | GCC_OPTIMIZATION_LEVEL = 0;
228 | GCC_PREPROCESSOR_DEFINITIONS = (
229 | "DEBUG=1",
230 | "$(inherited)",
231 | );
232 | GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
233 | GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
234 | GCC_WARN_UNDECLARED_SELECTOR = YES;
235 | GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
236 | GCC_WARN_UNUSED_FUNCTION = YES;
237 | GCC_WARN_UNUSED_VARIABLE = YES;
238 | MACOSX_DEPLOYMENT_TARGET = 10.12;
239 | MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
240 | MTL_FAST_MATH = YES;
241 | ONLY_ACTIVE_ARCH = YES;
242 | SDKROOT = macosx;
243 | };
244 | name = Debug;
245 | };
246 | 2BBF7DB822492CB600FCABB2 /* Release */ = {
247 | isa = XCBuildConfiguration;
248 | buildSettings = {
249 | ALWAYS_SEARCH_USER_PATHS = NO;
250 | CLANG_ANALYZER_NONNULL = YES;
251 | CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
252 | CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
253 | CLANG_CXX_LIBRARY = "libc++";
254 | CLANG_ENABLE_MODULES = YES;
255 | CLANG_ENABLE_OBJC_ARC = YES;
256 | CLANG_ENABLE_OBJC_WEAK = YES;
257 | CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
258 | CLANG_WARN_BOOL_CONVERSION = YES;
259 | CLANG_WARN_COMMA = YES;
260 | CLANG_WARN_CONSTANT_CONVERSION = YES;
261 | CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
262 | CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
263 | CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
264 | CLANG_WARN_EMPTY_BODY = YES;
265 | CLANG_WARN_ENUM_CONVERSION = YES;
266 | CLANG_WARN_INFINITE_RECURSION = YES;
267 | CLANG_WARN_INT_CONVERSION = YES;
268 | CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
269 | CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
270 | CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
271 | CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
272 | CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
273 | CLANG_WARN_STRICT_PROTOTYPES = YES;
274 | CLANG_WARN_SUSPICIOUS_MOVE = YES;
275 | CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
276 | CLANG_WARN_UNREACHABLE_CODE = YES;
277 | CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
278 | CODE_SIGN_IDENTITY = "-";
279 | COPY_PHASE_STRIP = NO;
280 | DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
281 | ENABLE_NS_ASSERTIONS = NO;
282 | ENABLE_STRICT_OBJC_MSGSEND = YES;
283 | GCC_C_LANGUAGE_STANDARD = gnu11;
284 | GCC_NO_COMMON_BLOCKS = YES;
285 | GCC_PREPROCESSOR_DEFINITIONS = "NDEBUG=1";
286 | GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
287 | GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
288 | GCC_WARN_UNDECLARED_SELECTOR = YES;
289 | GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
290 | GCC_WARN_UNUSED_FUNCTION = YES;
291 | GCC_WARN_UNUSED_VARIABLE = YES;
292 | MACOSX_DEPLOYMENT_TARGET = 10.12;
293 | MTL_ENABLE_DEBUG_INFO = NO;
294 | MTL_FAST_MATH = YES;
295 | SDKROOT = macosx;
296 | };
297 | name = Release;
298 | };
299 | 2BBF7DBA22492CB600FCABB2 /* Debug */ = {
300 | isa = XCBuildConfiguration;
301 | buildSettings = {
302 | CODE_SIGN_STYLE = Automatic;
303 | PRODUCT_NAME = "$(TARGET_NAME)";
304 | };
305 | name = Debug;
306 | };
307 | 2BBF7DBB22492CB600FCABB2 /* Release */ = {
308 | isa = XCBuildConfiguration;
309 | buildSettings = {
310 | CODE_SIGN_STYLE = Automatic;
311 | PRODUCT_NAME = "$(TARGET_NAME)";
312 | };
313 | name = Release;
314 | };
315 | /* End XCBuildConfiguration section */
316 |
317 | /* Begin XCConfigurationList section */
318 | 2BBF7DAD22492CB600FCABB2 /* Build configuration list for PBXProject "TrimeshTracer" */ = {
319 | isa = XCConfigurationList;
320 | buildConfigurations = (
321 | 2BBF7DB722492CB600FCABB2 /* Debug */,
322 | 2BBF7DB822492CB600FCABB2 /* Release */,
323 | );
324 | defaultConfigurationIsVisible = 0;
325 | defaultConfigurationName = Release;
326 | };
327 | 2BBF7DB922492CB600FCABB2 /* Build configuration list for PBXNativeTarget "TrimeshTracer" */ = {
328 | isa = XCConfigurationList;
329 | buildConfigurations = (
330 | 2BBF7DBA22492CB600FCABB2 /* Debug */,
331 | 2BBF7DBB22492CB600FCABB2 /* Release */,
332 | );
333 | defaultConfigurationIsVisible = 0;
334 | defaultConfigurationName = Release;
335 | };
336 | /* End XCConfigurationList section */
337 | };
338 | rootObject = 2BBF7DAA22492CB600FCABB2 /* Project object */;
339 | }
340 |
--------------------------------------------------------------------------------
/projects/Xcode/TrimeshTracer.xcodeproj/xcshareddata/xcschemes/TrimeshTracer.xcscheme:
--------------------------------------------------------------------------------
1 |
2 |
5 |
8 |
9 |
15 |
21 |
22 |
23 |
24 |
25 |
30 |
31 |
32 |
33 |
39 |
40 |
41 |
42 |
43 |
44 |
55 |
57 |
63 |
64 |
65 |
66 |
69 |
70 |
73 |
74 |
77 |
78 |
81 |
82 |
85 |
86 |
87 |
88 |
89 |
90 |
96 |
98 |
104 |
105 |
106 |
107 |
109 |
110 |
113 |
114 |
115 |
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | # Toy Mesh Path Tracer for a job interview task
2 |
3 | I used the task below for some job interviews I did in 2019 Q2. The initial version was a super simple brute-force
4 | triangle mesh path tracer (no acceleration structures, no threading, etc.), and the task was to make it faster.
5 | Of course I wanted to know how much faster it *can* get, so I did some simple speedups myself in parallel.
6 |
7 | The original non-optimized-at-all version is at
8 | [`01-initial-job-task`](https://github.com/aras-p/ToyMeshPathTracer/tree/01-initial-job-task) tag.
9 | I made other tags for later snapshots; here they are with performance numbers *(measured on 2018 MacBookPro i9)*,
10 | on the `teapot.obj` (15706 triangles) and `sponza.obj` (66452 triangles) scenes respectively:
11 |
12 | * `01-initial-job-task`, initial: 3.5 and LOLNOPE Krays/s
13 | * `02-multi-threaded`, multi-threading: 18.9 and LOLNOPE Krays/s
14 | * `03-bvh`, simple bounding volume hierarchy: 6978.2 and 1350.3 Krays/s
15 | * `04-simd`, simplistic naïve SIMD: 8919.0 and 1853.1 Krays/s
16 | * `05-change-ray-tri-algo`, better ray-triangle intersection test: 10251.1 and 1952.5 Krays/s
17 | * `06-shadow-rays`, faster code path for shadow rays: 10888.0 and 2568.3 Krays/s
18 | * `07-compare-with-embree`, compare with [Intel Embree](https://embree.github.io/) 3.5.2: 75965.0 and 40800.8 Krays/s *(yup, Embree is fast!)*
19 | * `08-compare-with-nanort`, compare with [NanoRT](https://github.com/lighttransport/nanort): 20638.2 and 4197.9 Krays/s
20 |
21 | And yeah, I know -- most obvious next steps would be to use better BVH building heuristics (like SAH), and
22 | doing SIMD properly instead of "let's make our vector/ray/color class use SIMD". I did not get to that (yet?),
23 | but in any case -- the code is already over 3000 *times* faster than the initial version. With the BVH being the major
24 | win, as one would expect.
25 |
26 | *Original desceription of the interview task is below:*
27 |
28 | # Interview task/assignment: speed up a simple path tracer
29 |
30 | This project contains a simple triangle mesh path tracer implemented in C++.
31 | It's a command line application that takes screen size & input .OBJ data file parameters,
32 | renders it using "path tracing" algorithm and produces `output.png` result file.
33 |
34 | Here are some images that the program can produce with the data files present under `data/` directory:
35 |
36 | 
37 | 
38 |
39 | ## The Task
40 |
41 | Current program is slow. *Really slow*. Rendering that monkey head model ("Suzanne") at a lowly 640x360 resolution,
42 | 4 samples per pixel, takes **one minute** on my PC! Rendering the other image ("Sponza") at the same resolution takes **five hours**.
43 |
44 | Your task then is, of course, to make the program faster :)
45 |
46 | I know for sure that it is possible to make it faster by *more than a hundred times* -- e.g. I got Suzanne from a minute down
47 | to 0.2 seconds, and Sponza from five hours down to 8 seconds. It might be possible to make it even faster, but I
48 | did not quite go there.
49 |
50 | Now, **your task is to make it run as fast as you can**. I'm not asking for a "hundred times", but something like
51 | "**at least ten times faster**" is what you should aim for.
52 |
53 | It's entirely your choice how you will do it. Better algorithms? More efficient math? Better data layout? Multi-threading? SIMD?
54 | Rewrite in assembly? Rewrite as a compute shader / CUDA / OpenCL? Rewrite for NVIDIA RTX? All of these? You pick :) Some of what I listed
55 | here is "certainly overkill" and not needed; achieving a 10x faster performance is entirely possible using relatively simple means.
56 |
57 | Go!
58 |
59 | #### What I will be looking at
60 |
61 | * Overall I would recommend making a clone of this project on github and using "actual version control" workflow to make your changes.
62 | If you don't like git or version control, that's fine; I can accept submissions in zip or dropbox or google drive (or whatever) form.
63 | As long as I can see the code and try it out.
64 | * Your optimized program should produce same looking images as the original one, just *do it faster*.
65 | * I'll be looking at "everything" that is important in programming job: whether the code works correctly, is understandable,
66 | how is it structured, how it behaves performance wise (computing usage, memory usage etc.).
67 | * It is *very* likely that your first submission will not be quite good, so do not delay it until the last day! Usually it
68 | takes 2-4 iterations to arrive at a good solution.
69 |
70 |
71 |
72 | ## About the code
73 |
74 | I made it work on Windows (Visual Studio 2017) and Mac (Xcode 10); the project files for them are respectively in
75 | `projects/VisualStudio/TrimeshTracer.sln` and `projects/Xcode/TrimeshTracer.xcodeproj`. In Visual Studio project, you might need to update settings to whatever Windows SDK version you have, I picked the oldest I had on my machine. If you have any trouble building or running it,
76 | ask me!
77 |
78 | The application accepts four command line arguments as input: ` `:
79 |
80 | * `width` is image width in pixels,
81 | * `height` is image height in pixels,
82 | * `spp` is "samples per pixel"; kind of like "anti-aliasing level",
83 | * `datafile` is path to a mesh to render; in Wavefront .OBJ format.
84 |
85 | I added some example meshes under `data/` folder; initially I suggest starting with e.g. `data/cube.obj` which is just a simple
86 | cube. I do *not* recommend trying to run the non-optimized version of the program on for example the Sponza model - it contains 66k
87 | triangles and will run *very very slow*.
88 |
89 | The code itself is fairly simple C++ code, and I tried to comment it extensively. No previous experience with ray-tracers
90 | or path-tracers should be required.
91 |
92 | If you *do* want to read up on what this "path tracing" thing is *(and maybe even get some ideas how to make it faster? who knows)*,
93 | I can recommend "Ray Tracing in a Weekend", "Ray Tracing: The Next Week" and "Ray Tracing: the Rest of Your Life" minibook series,
94 | which have been recently [made free here](http://www.realtimerendering.com/raytracing/). The internet is full of other information about ray/path tracing as well.
95 |
96 | For reference, here are the performance numbers I get on my laptop (2019 MacBookPro i9 2.9GHz), rendering at 640x360, 4 samples per pixel, on this un-optimized implementation:
97 |
98 | * cube.obj (14 triangles): 3818.3 KRay/s (0.6 sec)
99 | * suzanne.obj (970 triangles): 48.3 KRays/s (53.0 sec)
100 | * teapot.obj (15706 triangles): 3.5 KRays/s (683.0 sec)
101 | * sponza.obj (66452 triangles): LOL nope, ain't nobody got time for that
102 |
--------------------------------------------------------------------------------
/result1Triangle.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aras-p/ToyMeshPathTracer/e74b743256ad11c5eb4be92a9dfe2badd6cb3d7d/result1Triangle.png
--------------------------------------------------------------------------------
/result2Cube.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aras-p/ToyMeshPathTracer/e74b743256ad11c5eb4be92a9dfe2badd6cb3d7d/result2Cube.png
--------------------------------------------------------------------------------
/result3Suzanne.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aras-p/ToyMeshPathTracer/e74b743256ad11c5eb4be92a9dfe2badd6cb3d7d/result3Suzanne.png
--------------------------------------------------------------------------------
/result4Teapot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aras-p/ToyMeshPathTracer/e74b743256ad11c5eb4be92a9dfe2badd6cb3d7d/result4Teapot.png
--------------------------------------------------------------------------------
/result5Sponza.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aras-p/ToyMeshPathTracer/e74b743256ad11c5eb4be92a9dfe2badd6cb3d7d/result5Sponza.png
--------------------------------------------------------------------------------
/runCases.cmd:
--------------------------------------------------------------------------------
1 | @echo off
2 | rem (build\x64-Release\TrimeshTracer.exe 640 360 4 data/triangle.obj) && (move /Y output.png output1Triangle.png)
3 | (build\x64-Release\TrimeshTracer.exe 640 360 4 data/cube.obj) && (move /Y output.png output2Cube.png)
4 | (build\x64-Release\TrimeshTracer.exe 640 360 4 data/suzanne.obj) && (move /Y output.png output3Suzanne.png)
5 | (build\x64-Release\TrimeshTracer.exe 640 360 4 data/teapot.obj) && (move /Y output.png output4Teapot.png)
6 | (build\x64-Release\TrimeshTracer.exe 640 360 4 data/sponza.obj) && (move /Y output.png output5Sponza.png)
7 | pause
8 |
--------------------------------------------------------------------------------
/runCases.sh:
--------------------------------------------------------------------------------
1 | #(build/TrimeshTracer/Build/Products/Release/TrimeshTracer 640 360 4 data/triangle.obj) && (mv output.png output1Triangle.png)
2 | (build/TrimeshTracer/Build/Products/Release/TrimeshTracer 640 360 4 data/cube.obj) && (mv output.png output2Cube.png)
3 | (build/TrimeshTracer/Build/Products/Release/TrimeshTracer 640 360 4 data/suzanne.obj) && (mv output.png output3Suzanne.png)
4 | (build/TrimeshTracer/Build/Products/Release/TrimeshTracer 640 360 4 data/teapot.obj) && (mv output.png output4Teapot.png)
5 | (build/TrimeshTracer/Build/Products/Release/TrimeshTracer 640 360 4 data/sponza.obj) && (mv output.png output5Sponza.png)
6 |
--------------------------------------------------------------------------------
/source/external/enkits/Atomics.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013 Doug Binks
2 | //
3 | // This software is provided 'as-is', without any express or implied
4 | // warranty. In no event will the authors be held liable for any damages
5 | // arising from the use of this software.
6 | //
7 | // Permission is granted to anyone to use this software for any purpose,
8 | // including commercial applications, and to alter it and redistribute it
9 | // freely, subject to the following restrictions:
10 | //
11 | // 1. The origin of this software must not be misrepresented; you must not
12 | // claim that you wrote the original software. If you use this software
13 | // in a product, an acknowledgement in the product documentation would be
14 | // appreciated but is not required.
15 | // 2. Altered source versions must be plainly marked as such, and must not be
16 | // misrepresented as being the original software.
17 | // 3. This notice may not be removed or altered from any source distribution.
18 |
19 | #pragma once
20 |
21 | #include
22 |
23 | #ifdef _WIN32
24 | #define WIN32_LEAN_AND_MEAN
25 | #include
26 | #undef GetObject
27 | #include
28 |
29 | extern "C" void _ReadWriteBarrier();
30 | #pragma intrinsic(_ReadWriteBarrier)
31 | #pragma intrinsic(_InterlockedCompareExchange)
32 | #pragma intrinsic(_InterlockedExchangeAdd)
33 |
34 | // Memory Barriers to prevent CPU and Compiler re-ordering
35 | #define BASE_MEMORYBARRIER_ACQUIRE() _ReadWriteBarrier()
36 | #define BASE_MEMORYBARRIER_RELEASE() _ReadWriteBarrier()
37 | #define BASE_ALIGN(x) __declspec( align( x ) )
38 |
39 | #else
40 | #define BASE_MEMORYBARRIER_ACQUIRE() __asm__ __volatile__("": : :"memory")
41 | #define BASE_MEMORYBARRIER_RELEASE() __asm__ __volatile__("": : :"memory")
42 | #define BASE_ALIGN(x) __attribute__ ((aligned( x )))
43 | #endif
44 |
45 | namespace enki
46 | {
47 | // Atomically performs: if( *pDest == compareWith ) { *pDest = swapTo; }
48 | // returns old *pDest (so if successfull, returns compareWith)
49 | inline uint32_t AtomicCompareAndSwap( volatile uint32_t* pDest, uint32_t swapTo, uint32_t compareWith )
50 | {
51 | #ifdef _WIN32
52 | // assumes two's complement - unsigned / signed conversion leads to same bit pattern
53 | return _InterlockedCompareExchange( (volatile long*)pDest,swapTo, compareWith );
54 | #else
55 | return __sync_val_compare_and_swap( pDest, compareWith, swapTo );
56 | #endif
57 | }
58 |
59 | inline uint64_t AtomicCompareAndSwap( volatile uint64_t* pDest, uint64_t swapTo, uint64_t compareWith )
60 | {
61 | #ifdef _WIN32
62 | // assumes two's complement - unsigned / signed conversion leads to same bit pattern
63 | return _InterlockedCompareExchange64( (__int64 volatile*)pDest, swapTo, compareWith );
64 | #else
65 | return __sync_val_compare_and_swap( pDest, compareWith, swapTo );
66 | #endif
67 | }
68 |
69 | // Atomically performs: tmp = *pDest; *pDest += value; return tmp;
70 | inline int32_t AtomicAdd( volatile int32_t* pDest, int32_t value )
71 | {
72 | #ifdef _WIN32
73 | return _InterlockedExchangeAdd( (long*)pDest, value );
74 | #else
75 | return __sync_fetch_and_add( pDest, value );
76 | #endif
77 | }
78 |
79 | }
--------------------------------------------------------------------------------
/source/external/enkits/LockLessMultiReadPipe.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013 Doug Binks
2 | //
3 | // This software is provided 'as-is', without any express or implied
4 | // warranty. In no event will the authors be held liable for any damages
5 | // arising from the use of this software.
6 | //
7 | // Permission is granted to anyone to use this software for any purpose,
8 | // including commercial applications, and to alter it and redistribute it
9 | // freely, subject to the following restrictions:
10 | //
11 | // 1. The origin of this software must not be misrepresented; you must not
12 | // claim that you wrote the original software. If you use this software
13 | // in a product, an acknowledgement in the product documentation would be
14 | // appreciated but is not required.
15 | // 2. Altered source versions must be plainly marked as such, and must not be
16 | // misrepresented as being the original software.
17 | // 3. This notice may not be removed or altered from any source distribution.
18 |
19 | #pragma once
20 |
21 | #include
22 | #include
23 |
24 | #include "Atomics.h"
25 | #include
26 |
27 |
28 | namespace enki
29 | {
30 | // LockLessMultiReadPipe - Single writer, multiple reader thread safe pipe using (semi) lockless programming
31 | // Readers can only read from the back of the pipe
32 | // The single writer can write to the front of the pipe, and read from both ends (a writer can be a reader)
33 | // for many of the principles used here, see http://msdn.microsoft.com/en-us/library/windows/desktop/ee418650(v=vs.85).aspx
34 | // Note: using log2 sizes so we do not need to clamp (multi-operation)
35 | // T is the contained type
36 | // Note this is not true lockless as the use of flags as a form of lock state.
37 | template class LockLessMultiReadPipe
38 | {
39 | public:
40 | LockLessMultiReadPipe();
41 | ~LockLessMultiReadPipe() {}
42 |
43 | // ReaderTryReadBack returns false if we were unable to read
44 | // This is thread safe for both multiple readers and the writer
45 | bool ReaderTryReadBack( T* pOut );
46 |
47 | // WriterTryReadFront returns false if we were unable to read
48 | // This is thread safe for the single writer, but should not be called by readers
49 | bool WriterTryReadFront( T* pOut );
50 |
51 | // WriterTryWriteFront returns false if we were unable to write
52 | // This is thread safe for the single writer, but should not be called by readers
53 | bool WriterTryWriteFront( const T& in );
54 |
55 | // IsPipeEmpty() is a utility function, not intended for general use
56 | // Should only be used very prudently.
57 | bool IsPipeEmpty() const
58 | {
59 | return 0 == m_WriteIndex - m_ReadCount;
60 | }
61 |
62 | void Clear()
63 | {
64 | m_WriteIndex = 0;
65 | m_ReadIndex = 0;
66 | m_ReadCount = 0;
67 | memset( (void*)m_Flags, 0, sizeof( m_Flags ) );
68 | }
69 |
70 | private:
71 | const static uint32_t ms_cSize = ( 1 << cSizeLog2 );
72 | const static uint32_t ms_cIndexMask = ms_cSize - 1;
73 | const static uint32_t FLAG_INVALID = 0xFFFFFFFF; // 32bit for CAS
74 | const static uint32_t FLAG_CAN_WRITE = 0x00000000; // 32bit for CAS
75 | const static uint32_t FLAG_CAN_READ = 0x11111111; // 32bit for CAS
76 |
77 | T m_Buffer[ ms_cSize ];
78 |
79 | // read and write indexes allow fast access to the pipe, but actual access
80 | // controlled by the access flags.
81 | volatile uint32_t BASE_ALIGN(4) m_WriteIndex;
82 | volatile uint32_t BASE_ALIGN(4) m_ReadCount;
83 | volatile uint32_t m_Flags[ ms_cSize ];
84 | volatile uint32_t BASE_ALIGN(4) m_ReadIndex;
85 | };
86 |
87 | template inline
88 | LockLessMultiReadPipe::LockLessMultiReadPipe()
89 | : m_WriteIndex(0)
90 | , m_ReadIndex(0)
91 | , m_ReadCount(0)
92 | {
93 | assert( cSizeLog2 < 32 );
94 | memset( (void*)m_Flags, 0, sizeof( m_Flags ) );
95 | }
96 |
97 | template inline
98 | bool LockLessMultiReadPipe::ReaderTryReadBack( T* pOut )
99 | {
100 |
101 | uint32_t actualReadIndex;
102 |
103 | uint32_t readCount = m_ReadCount;
104 |
105 | // We get hold of read index for consistency,
106 | // and do first pass starting at read count
107 | uint32_t readIndexToUse = readCount;
108 |
109 |
110 | while(true)
111 | {
112 |
113 | uint32_t writeIndex = m_WriteIndex;
114 | // power of two sizes ensures we can use a simple calc without modulus
115 | uint32_t numInPipe = writeIndex - readCount;
116 | if( 0 == numInPipe )
117 | {
118 | return false;
119 | }
120 | if( readIndexToUse >= writeIndex )
121 | {
122 | // move back to start
123 | readIndexToUse = m_ReadIndex;
124 | }
125 |
126 |
127 | // power of two sizes ensures we can perform AND for a modulus
128 | actualReadIndex = readIndexToUse & ms_cIndexMask;
129 |
130 | // Multiple potential readers mean we should check if the data is valid,
131 | // using an atomic compare exchange
132 | uint32_t previous = AtomicCompareAndSwap( &m_Flags[ actualReadIndex ], FLAG_INVALID, FLAG_CAN_READ );
133 | if( FLAG_CAN_READ == previous )
134 | {
135 | break;
136 | }
137 | ++readIndexToUse;
138 |
139 | //update known readcount
140 | readCount = m_ReadCount;
141 | }
142 |
143 | // we update the read index using an atomic add, as we've only read one piece of data.
144 | // this ensure consistency of the read index, and the above loop ensures readers
145 | // only read from unread data
146 | AtomicAdd( (volatile int32_t*)&m_ReadCount, 1 );
147 |
148 | BASE_MEMORYBARRIER_ACQUIRE();
149 | // now read data, ensuring we do so after above reads & CAS
150 | *pOut = m_Buffer[ actualReadIndex ];
151 |
152 | m_Flags[ actualReadIndex ] = FLAG_CAN_WRITE;
153 |
154 | return true;
155 | }
156 |
157 | template inline
158 | bool LockLessMultiReadPipe::WriterTryReadFront( T* pOut )
159 | {
160 | uint32_t writeIndex = m_WriteIndex;
161 | uint32_t frontReadIndex = writeIndex;
162 |
163 | // Multiple potential readers mean we should check if the data is valid,
164 | // using an atomic compare exchange - which acts as a form of lock (so not quite lockless really).
165 | uint32_t previous = FLAG_INVALID;
166 | uint32_t actualReadIndex = 0;
167 | while( true )
168 | {
169 | // power of two sizes ensures we can use a simple calc without modulus
170 | uint32_t readCount = m_ReadCount;
171 | uint32_t numInPipe = writeIndex - readCount;
172 | if( 0 == numInPipe || 0 == frontReadIndex )
173 | {
174 | // frontReadIndex can get to 0 here if that item was just being read by another thread.
175 | m_ReadIndex = readCount;
176 | return false;
177 | }
178 | --frontReadIndex;
179 | actualReadIndex = frontReadIndex & ms_cIndexMask;
180 | previous = AtomicCompareAndSwap( &m_Flags[ actualReadIndex ], FLAG_INVALID, FLAG_CAN_READ );
181 | if( FLAG_CAN_READ == previous )
182 | {
183 | break;
184 | }
185 | else if( m_ReadIndex >= frontReadIndex )
186 | {
187 | return false;
188 | }
189 | }
190 |
191 | // now read data, ensuring we do so after above reads & CAS
192 | *pOut = m_Buffer[ actualReadIndex ];
193 |
194 | m_Flags[ actualReadIndex ] = FLAG_CAN_WRITE;
195 |
196 | BASE_MEMORYBARRIER_RELEASE();
197 |
198 | // 32-bit aligned stores are atomic, and writer owns the write index
199 | // we only move one back as this is as many as we have read, not where we have read from.
200 | --m_WriteIndex;
201 | return true;
202 | }
203 |
204 |
205 | template inline
206 | bool LockLessMultiReadPipe::WriterTryWriteFront( const T& in )
207 | {
208 | // The writer 'owns' the write index, and readers can only reduce
209 | // the amount of data in the pipe.
210 | // We get hold of both values for consistency and to reduce false sharing
211 | // impacting more than one access
212 | uint32_t writeIndex = m_WriteIndex;
213 |
214 |
215 | // power of two sizes ensures we can perform AND for a modulus
216 | uint32_t actualWriteIndex = writeIndex & ms_cIndexMask;
217 |
218 | // a reader may still be reading this item, as there are multiple readers
219 | if( m_Flags[ actualWriteIndex ] != FLAG_CAN_WRITE )
220 | {
221 | return false; // still being read, so have caught up with tail.
222 | }
223 |
224 |
225 | // as we are the only writer we can update the data without atomics
226 | // whilst the write index has not been updated
227 | m_Buffer[ actualWriteIndex ] = in;
228 | m_Flags[ actualWriteIndex ] = FLAG_CAN_READ;
229 |
230 | // We need to ensure the above writes occur prior to updating the write index,
231 | // otherwise another thread might read before it's finished
232 | BASE_MEMORYBARRIER_RELEASE();
233 |
234 | // 32-bit aligned stores are atomic, and the writer controls the write index
235 | ++writeIndex;
236 | m_WriteIndex = writeIndex;
237 | return true;
238 | }
239 |
240 | }
241 |
--------------------------------------------------------------------------------
/source/external/enkits/TaskScheduler.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013 Doug Binks
2 | //
3 | // This software is provided 'as-is', without any express or implied
4 | // warranty. In no event will the authors be held liable for any damages
5 | // arising from the use of this software.
6 | //
7 | // Permission is granted to anyone to use this software for any purpose,
8 | // including commercial applications, and to alter it and redistribute it
9 | // freely, subject to the following restrictions:
10 | //
11 | // 1. The origin of this software must not be misrepresented; you must not
12 | // claim that you wrote the original software. If you use this software
13 | // in a product, an acknowledgement in the product documentation would be
14 | // appreciated but is not required.
15 | // 2. Altered source versions must be plainly marked as such, and must not be
16 | // misrepresented as being the original software.
17 | // 3. This notice may not be removed or altered from any source distribution.
18 |
19 | #include
20 |
21 | #include "TaskScheduler.h"
22 | #include "LockLessMultiReadPipe.h"
23 |
24 |
25 |
26 | using namespace enki;
27 |
28 |
29 | static const uint32_t PIPESIZE_LOG2 = 8;
30 | static const uint32_t SPIN_COUNT = 100;
31 | static const uint32_t SPIN_BACKOFF_MULTIPLIER = 10;
32 | static const uint32_t MAX_NUM_INITIAL_PARTITIONS = 8;
33 |
34 | // each software thread gets it's own copy of gtl_threadNum, so this is safe to use as a static variable
35 | static THREAD_LOCAL uint32_t gtl_threadNum = 0;
36 |
37 | namespace enki
38 | {
39 | struct SubTaskSet
40 | {
41 | ITaskSet* pTask;
42 | TaskSetPartition partition;
43 | };
44 |
45 | // we derive class TaskPipe rather than typedef to get forward declaration working easily
46 | class TaskPipe : public LockLessMultiReadPipe {};
47 |
48 | struct ThreadArgs
49 | {
50 | uint32_t threadNum;
51 | TaskScheduler* pTaskScheduler;
52 | };
53 | }
54 |
55 | namespace
56 | {
57 | SubTaskSet SplitTask( SubTaskSet& subTask_, uint32_t rangeToSplit_ )
58 | {
59 | SubTaskSet splitTask = subTask_;
60 | uint32_t rangeLeft = subTask_.partition.end - subTask_.partition.start;
61 |
62 | if( rangeToSplit_ > rangeLeft )
63 | {
64 | rangeToSplit_ = rangeLeft;
65 | }
66 | splitTask.partition.end = subTask_.partition.start + rangeToSplit_;
67 | subTask_.partition.start = splitTask.partition.end;
68 | return splitTask;
69 | }
70 |
71 | #if defined _WIN32
72 | #if defined _M_IX86 || defined _M_X64
73 | #pragma intrinsic(_mm_pause)
74 | inline void Pause() { _mm_pause(); }
75 | #endif
76 | #elif defined __i386__ || defined __x86_64__
77 | inline void Pause() { __asm__ __volatile__("pause;"); }
78 | #else
79 | inline void Pause() { ;} // may have NOP or yield equiv
80 | #endif
81 | }
82 |
83 |
84 | static void SafeCallback(ProfilerCallbackFunc func_, uint32_t threadnum_)
85 | {
86 | if( func_ )
87 | {
88 | func_(threadnum_);
89 | }
90 | }
91 |
92 | ProfilerCallbacks* TaskScheduler::GetProfilerCallbacks()
93 | {
94 | return &m_ProfilerCallbacks;
95 | }
96 |
97 | THREADFUNC_DECL TaskScheduler::TaskingThreadFunction( void* pArgs )
98 | {
99 | ThreadArgs args = *(ThreadArgs*)pArgs;
100 | uint32_t threadNum = args.threadNum;
101 | TaskScheduler* pTS = args.pTaskScheduler;
102 | gtl_threadNum = threadNum;
103 |
104 | SafeCallback( pTS->m_ProfilerCallbacks.threadStart, threadNum );
105 |
106 | uint32_t spinCount = 0;
107 | uint32_t hintPipeToCheck_io = threadNum + 1; // does not need to be clamped.
108 | while( pTS->m_bRunning )
109 | {
110 | if(!pTS->TryRunTask( threadNum, hintPipeToCheck_io ) )
111 | {
112 | // no tasks, will spin then wait
113 | ++spinCount;
114 | if( spinCount > SPIN_COUNT )
115 | {
116 | pTS->WaitForTasks( threadNum );
117 | spinCount = 0;
118 | }
119 | else
120 | {
121 | uint32_t spinBackoffCount = spinCount * SPIN_BACKOFF_MULTIPLIER;
122 | while( spinBackoffCount )
123 | {
124 | Pause();
125 | --spinBackoffCount;
126 | }
127 | }
128 | }
129 | else
130 | {
131 | spinCount = 0;
132 | }
133 | }
134 |
135 | AtomicAdd( &pTS->m_NumThreadsRunning, -1 );
136 | SafeCallback( pTS->m_ProfilerCallbacks.threadStop, threadNum );
137 |
138 | return 0;
139 | }
140 |
141 |
142 | void TaskScheduler::StartThreads()
143 | {
144 | if( m_bHaveThreads )
145 | {
146 | return;
147 | }
148 | m_bRunning = true;
149 |
150 | SemaphoreCreate( m_NewTaskSemaphore );
151 |
152 | // we create one less thread than m_NumThreads as the main thread counts as one
153 | m_pThreadNumStore = new ThreadArgs[m_NumThreads];
154 | m_pThreadIDs = new threadid_t[m_NumThreads];
155 | m_pThreadNumStore[0].threadNum = 0;
156 | m_pThreadNumStore[0].pTaskScheduler = this;
157 | m_pThreadIDs[0] = 0;
158 | m_NumThreadsWaiting = 0;
159 | m_NumThreadsRunning = 1;// acount for main thread
160 | for( uint32_t thread = 1; thread < m_NumThreads; ++thread )
161 | {
162 | m_pThreadNumStore[thread].threadNum = thread;
163 | m_pThreadNumStore[thread].pTaskScheduler = this;
164 | ThreadCreate( &m_pThreadIDs[thread], TaskingThreadFunction, &m_pThreadNumStore[thread] );
165 | ++m_NumThreadsRunning;
166 | }
167 |
168 | // ensure we have sufficient tasks to equally fill either all threads including main
169 | // or just the threads we've launched, this is outside the firstinit as we want to be able
170 | // to runtime change it
171 | if( 1 == m_NumThreads )
172 | {
173 | m_NumPartitions = 1;
174 | m_NumInitialPartitions = 1;
175 | }
176 | else
177 | {
178 | m_NumPartitions = m_NumThreads * (m_NumThreads - 1);
179 | m_NumInitialPartitions = m_NumThreads - 1;
180 | if( m_NumInitialPartitions > MAX_NUM_INITIAL_PARTITIONS )
181 | {
182 | m_NumInitialPartitions = MAX_NUM_INITIAL_PARTITIONS;
183 | }
184 | }
185 |
186 | m_bHaveThreads = true;
187 | }
188 |
189 | void TaskScheduler::StopThreads( bool bWait_ )
190 | {
191 | if( m_bHaveThreads )
192 | {
193 | // wait for them threads quit before deleting data
194 | m_bRunning = false;
195 | while( bWait_ && m_NumThreadsRunning > 1 )
196 | {
197 | // keep firing event to ensure all threads pick up state of m_bRunning
198 | SemaphoreSignal( m_NewTaskSemaphore, m_NumThreadsRunning );
199 | }
200 |
201 | for( uint32_t thread = 1; thread < m_NumThreads; ++thread )
202 | {
203 | ThreadTerminate( m_pThreadIDs[thread] );
204 | }
205 |
206 | m_NumThreads = 0;
207 | delete[] m_pThreadNumStore;
208 | delete[] m_pThreadIDs;
209 | m_pThreadNumStore = 0;
210 | m_pThreadIDs = 0;
211 | SemaphoreClose( m_NewTaskSemaphore );
212 |
213 | m_bHaveThreads = false;
214 | m_NumThreadsWaiting = 0;
215 | m_NumThreadsRunning = 0;
216 | }
217 | }
218 |
219 | bool TaskScheduler::TryRunTask( uint32_t threadNum, uint32_t& hintPipeToCheck_io_ )
220 | {
221 | // check for tasks
222 | SubTaskSet subTask;
223 | bool bHaveTask = m_pPipesPerThread[ threadNum ].WriterTryReadFront( &subTask );
224 |
225 | uint32_t threadToCheck = hintPipeToCheck_io_;
226 | uint32_t checkCount = 0;
227 | while( !bHaveTask && checkCount < m_NumThreads )
228 | {
229 | threadToCheck = ( hintPipeToCheck_io_ + checkCount ) % m_NumThreads;
230 | if( threadToCheck != threadNum )
231 | {
232 | bHaveTask = m_pPipesPerThread[ threadToCheck ].ReaderTryReadBack( &subTask );
233 | }
234 | ++checkCount;
235 | }
236 |
237 | if( bHaveTask )
238 | {
239 | // update hint, will preserve value unless actually got task from another thread.
240 | hintPipeToCheck_io_ = threadToCheck;
241 |
242 | uint32_t partitionSize = subTask.partition.end - subTask.partition.start;
243 | if( subTask.pTask->m_RangeToRun < partitionSize )
244 | {
245 | SubTaskSet taskToRun = SplitTask( subTask, subTask.pTask->m_RangeToRun );
246 | SplitAndAddTask( gtl_threadNum, subTask, subTask.pTask->m_RangeToRun, 0 );
247 | taskToRun.pTask->ExecuteRange( taskToRun.partition, threadNum );
248 | AtomicAdd( &taskToRun.pTask->m_RunningCount, -1 );
249 | }
250 | else
251 | {
252 |
253 | // the task has already been divided up by AddTaskSetToPipe, so just run it
254 | subTask.pTask->ExecuteRange( subTask.partition, threadNum );
255 | AtomicAdd( &subTask.pTask->m_RunningCount, -1 );
256 | }
257 | }
258 |
259 | return bHaveTask;
260 |
261 | }
262 |
263 | void TaskScheduler::WaitForTasks( uint32_t threadNum )
264 | {
265 | // We incrememt the number of threads waiting here in order
266 | // to ensure that the check for tasks occurs after the increment
267 | // to prevent a task being added after a check, then the thread waiting.
268 | // This will occasionally result in threads being mistakenly awoken,
269 | // but they will then go back to sleep.
270 | AtomicAdd( &m_NumThreadsWaiting, 1 );
271 |
272 | bool bHaveTasks = false;
273 | for( uint32_t thread = 0; thread < m_NumThreads; ++thread )
274 | {
275 | if( !m_pPipesPerThread[ thread ].IsPipeEmpty() )
276 | {
277 | bHaveTasks = true;
278 | break;
279 | }
280 | }
281 | if( !bHaveTasks )
282 | {
283 | SafeCallback( m_ProfilerCallbacks.waitStart, threadNum );
284 | SemaphoreWait( m_NewTaskSemaphore );
285 | SafeCallback( m_ProfilerCallbacks.waitStop, threadNum );
286 | }
287 |
288 | int32_t prev = AtomicAdd( &m_NumThreadsWaiting, -1 );
289 | (void)prev;
290 | assert( prev != 0 );
291 | }
292 |
293 | void TaskScheduler::WakeThreads()
294 | {
295 | SemaphoreSignal( m_NewTaskSemaphore, m_NumThreadsWaiting );
296 | }
297 |
298 | void TaskScheduler::SplitAndAddTask( uint32_t threadNum_, SubTaskSet subTask_,
299 | uint32_t rangeToSplit_, int32_t runningCountOffset_ )
300 | {
301 | int32_t numAdded = 0;
302 | while( subTask_.partition.start != subTask_.partition.end )
303 | {
304 | SubTaskSet taskToAdd = SplitTask( subTask_, rangeToSplit_ );
305 |
306 | // add the partition to the pipe
307 | ++numAdded;
308 | if( !m_pPipesPerThread[ gtl_threadNum ].WriterTryWriteFront( taskToAdd ) )
309 | {
310 | if( numAdded > 1 )
311 | {
312 | WakeThreads();
313 | }
314 | // alter range to run the appropriate fraction
315 | if( taskToAdd.pTask->m_RangeToRun < rangeToSplit_ )
316 | {
317 | taskToAdd.partition.end = taskToAdd.partition.start + taskToAdd.pTask->m_RangeToRun;
318 | subTask_.partition.start = taskToAdd.partition.end;
319 | }
320 | taskToAdd.pTask->ExecuteRange( taskToAdd.partition, threadNum_ );
321 | --numAdded;
322 | }
323 | }
324 |
325 | // increment running count by number added
326 | AtomicAdd( &subTask_.pTask->m_RunningCount, numAdded + runningCountOffset_ );
327 |
328 | WakeThreads();
329 | }
330 |
331 | void TaskScheduler::AddTaskSetToPipe( ITaskSet* pTaskSet )
332 | {
333 | // set running count to -1 to guarantee it won't be found complete until all subtasks added
334 | pTaskSet->m_RunningCount = -1;
335 |
336 | // divide task up and add to pipe
337 | pTaskSet->m_RangeToRun = pTaskSet->m_SetSize / m_NumPartitions;
338 | if( pTaskSet->m_RangeToRun < pTaskSet->m_MinRange ) { pTaskSet->m_RangeToRun = pTaskSet->m_MinRange; }
339 |
340 | uint32_t rangeToSplit = pTaskSet->m_SetSize / m_NumInitialPartitions;
341 | if( rangeToSplit < pTaskSet->m_MinRange ) { rangeToSplit = pTaskSet->m_MinRange; }
342 |
343 | SubTaskSet subTask;
344 | subTask.pTask = pTaskSet;
345 | subTask.partition.start = 0;
346 | subTask.partition.end = pTaskSet->m_SetSize;
347 | SplitAndAddTask( gtl_threadNum, subTask, rangeToSplit, 1 );
348 | }
349 |
350 | void TaskScheduler::WaitforTaskSet( const ITaskSet* pTaskSet )
351 | {
352 | uint32_t hintPipeToCheck_io = gtl_threadNum + 1; // does not need to be clamped.
353 | if( pTaskSet )
354 | {
355 | while( pTaskSet->m_RunningCount )
356 | {
357 | TryRunTask( gtl_threadNum, hintPipeToCheck_io );
358 | // should add a spin then wait for task completion event.
359 | }
360 | }
361 | else
362 | {
363 | TryRunTask( gtl_threadNum, hintPipeToCheck_io );
364 | }
365 | }
366 |
367 | void TaskScheduler::WaitforAll()
368 | {
369 | bool bHaveTasks = true;
370 | uint32_t hintPipeToCheck_io = gtl_threadNum + 1; // does not need to be clamped.
371 | int32_t threadsRunning = m_NumThreadsRunning - 1;
372 | while( bHaveTasks || m_NumThreadsWaiting < threadsRunning )
373 | {
374 | TryRunTask( gtl_threadNum, hintPipeToCheck_io );
375 | bHaveTasks = false;
376 | for( uint32_t thread = 0; thread < m_NumThreads; ++thread )
377 | {
378 | if( !m_pPipesPerThread[ thread ].IsPipeEmpty() )
379 | {
380 | bHaveTasks = true;
381 | break;
382 | }
383 | }
384 | }
385 | }
386 |
387 | void TaskScheduler::WaitforAllAndShutdown()
388 | {
389 | WaitforAll();
390 | StopThreads(true);
391 | delete[] m_pPipesPerThread;
392 | m_pPipesPerThread = 0;
393 | }
394 |
395 | uint32_t TaskScheduler::GetNumTaskThreads() const
396 | {
397 | return m_NumThreads;
398 | }
399 |
400 | TaskScheduler::TaskScheduler()
401 | : m_pPipesPerThread(NULL)
402 | , m_NumThreads(0)
403 | , m_pThreadNumStore(NULL)
404 | , m_pThreadIDs(NULL)
405 | , m_bRunning(false)
406 | , m_NumThreadsRunning(0)
407 | , m_NumThreadsWaiting(0)
408 | , m_NumPartitions(0)
409 | , m_bHaveThreads(false)
410 | {
411 | memset(&m_ProfilerCallbacks, 0, sizeof(m_ProfilerCallbacks));
412 | }
413 |
414 | TaskScheduler::~TaskScheduler()
415 | {
416 | StopThreads( true ); // Stops threads, waiting for them.
417 |
418 | delete[] m_pPipesPerThread;
419 | m_pPipesPerThread = 0;
420 | }
421 |
422 | void TaskScheduler::Initialize( uint32_t numThreads_ )
423 | {
424 | assert( numThreads_ );
425 | StopThreads( true ); // Stops threads, waiting for them.
426 | delete[] m_pPipesPerThread;
427 |
428 | m_NumThreads = numThreads_;
429 |
430 | m_pPipesPerThread = new TaskPipe[ m_NumThreads ];
431 |
432 | StartThreads();
433 | }
434 |
435 | void TaskScheduler::Initialize()
436 | {
437 | Initialize( GetNumHardwareThreads() );
438 | }
--------------------------------------------------------------------------------
/source/external/enkits/TaskScheduler.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013 Doug Binks
2 | //
3 | // This software is provided 'as-is', without any express or implied
4 | // warranty. In no event will the authors be held liable for any damages
5 | // arising from the use of this software.
6 | //
7 | // Permission is granted to anyone to use this software for any purpose,
8 | // including commercial applications, and to alter it and redistribute it
9 | // freely, subject to the following restrictions:
10 | //
11 | // 1. The origin of this software must not be misrepresented; you must not
12 | // claim that you wrote the original software. If you use this software
13 | // in a product, an acknowledgement in the product documentation would be
14 | // appreciated but is not required.
15 | // 2. Altered source versions must be plainly marked as such, and must not be
16 | // misrepresented as being the original software.
17 | // 3. This notice may not be removed or altered from any source distribution.
18 |
19 | #pragma once
20 |
21 | #include
22 | #include "Threads.h"
23 |
24 | namespace enki
25 | {
26 |
27 | struct TaskSetPartition
28 | {
29 | uint32_t start;
30 | uint32_t end;
31 | };
32 |
33 | class TaskScheduler;
34 | class TaskPipe;
35 | struct ThreadArgs;
36 | struct SubTaskSet;
37 |
38 | // Subclass ITaskSet to create tasks.
39 | // TaskSets can be re-used, but check
40 | class ITaskSet
41 | {
42 | public:
43 | ITaskSet()
44 | : m_SetSize(1)
45 | , m_MinRange(1)
46 | , m_RunningCount(0)
47 | , m_RangeToRun(1)
48 | {}
49 |
50 | ITaskSet( uint32_t setSize_ )
51 | : m_SetSize( setSize_ )
52 | , m_MinRange(1)
53 | , m_RunningCount(0)
54 | , m_RangeToRun(1)
55 | {}
56 |
57 | ITaskSet( uint32_t setSize_, uint32_t minRange_ )
58 | : m_SetSize( setSize_ )
59 | , m_MinRange( minRange_ )
60 | , m_RunningCount(0)
61 | , m_RangeToRun(minRange_)
62 | {}
63 |
64 | // Execute range should be overloaded to process tasks. It will be called with a
65 | // range_ where range.start >= 0; range.start < range.end; and range.end < m_SetSize;
66 | // The range values should be mapped so that linearly processing them in order is cache friendly
67 | // i.e. neighbouring values should be close together.
68 | // threadnum should not be used for changing processing of data, it's intended purpose
69 | // is to allow per-thread data buckets for output.
70 | virtual void ExecuteRange( TaskSetPartition range, uint32_t threadnum ) = 0;
71 |
72 | // Size of set - usually the number of data items to be processed, see ExecuteRange. Defaults to 1
73 | uint32_t m_SetSize;
74 |
75 | // Minimum size of of TaskSetPartition range when splitting a task set into partitions.
76 | // This should be set to a value which results in computation effort of at least 10k
77 | // clock cycles to minimize tast scheduler overhead.
78 | // NOTE: The last partition will be smaller than m_MinRange if m_SetSize is not a multiple
79 | // of m_MinRange.
80 | // Also known as grain size in literature.
81 | uint32_t m_MinRange;
82 |
83 | bool GetIsComplete()
84 | {
85 | return 0 == m_RunningCount;
86 | }
87 | private:
88 | friend class TaskScheduler;
89 | volatile int32_t m_RunningCount;
90 | uint32_t m_RangeToRun;
91 | };
92 |
93 | // TaskScheduler implements several callbacks intended for profilers
94 | typedef void (*ProfilerCallbackFunc)( uint32_t threadnum_ );
95 | struct ProfilerCallbacks
96 | {
97 | ProfilerCallbackFunc threadStart;
98 | ProfilerCallbackFunc threadStop;
99 | ProfilerCallbackFunc waitStart;
100 | ProfilerCallbackFunc waitStop;
101 | };
102 |
103 | class TaskScheduler
104 | {
105 | public:
106 | TaskScheduler();
107 | ~TaskScheduler();
108 |
109 | // Call either Initialize() or Initialize( numThreads_ ) before adding tasks.
110 |
111 | // Initialize() will create GetNumHardwareThreads()-1 threads, which is
112 | // sufficient to fill the system when including the main thread.
113 | // Initialize can be called multiple times - it will wait for completion
114 | // before re-initializing.
115 | void Initialize();
116 |
117 | // Initialize( numThreads_ ) - numThreads_ (must be > 0)
118 | // will create numThreads_-1 threads, as thread 0 is
119 | // the thread on which the initialize was called.
120 | void Initialize( uint32_t numThreads_ );
121 |
122 |
123 | // Adds the TaskSet to pipe and returns if the pipe is not full.
124 | // If the pipe is full, pTaskSet is run.
125 | // should only be called from main thread, or within a task
126 | void AddTaskSetToPipe( ITaskSet* pTaskSet );
127 |
128 | // Runs the TaskSets in pipe until true == pTaskSet->GetIsComplete();
129 | // should only be called from thread which created the taskscheduler , or within a task
130 | // if called with 0 it will try to run tasks, and return if none available.
131 | void WaitforTaskSet( const ITaskSet* pTaskSet );
132 |
133 | // Waits for all task sets to complete - not guaranteed to work unless we know we
134 | // are in a situation where tasks aren't being continuosly added.
135 | void WaitforAll();
136 |
137 | // Waits for all task sets to complete and shutdown threads - not guaranteed to work unless we know we
138 | // are in a situation where tasks aren't being continuosly added.
139 | void WaitforAllAndShutdown();
140 |
141 | // Returns the number of threads created for running tasks + 1
142 | // to account for the main thread.
143 | uint32_t GetNumTaskThreads() const;
144 |
145 | // Returns the ProfilerCallbacks structure so that it can be modified to
146 | // set the callbacks.
147 | ProfilerCallbacks* GetProfilerCallbacks();
148 |
149 | private:
150 | static THREADFUNC_DECL TaskingThreadFunction( void* pArgs );
151 | void WaitForTasks( uint32_t threadNum );
152 | bool TryRunTask( uint32_t threadNum, uint32_t& hintPipeToCheck_io_ );
153 | void StartThreads();
154 | void StopThreads( bool bWait_ );
155 | void SplitAndAddTask( uint32_t threadNum_, SubTaskSet subTask_,
156 | uint32_t rangeToSplit_, int32_t runningCountOffset_ );
157 | void WakeThreads();
158 |
159 | TaskPipe* m_pPipesPerThread;
160 |
161 | uint32_t m_NumThreads;
162 | ThreadArgs* m_pThreadNumStore;
163 | threadid_t* m_pThreadIDs;
164 | volatile bool m_bRunning;
165 | volatile int32_t m_NumThreadsRunning;
166 | volatile int32_t m_NumThreadsWaiting;
167 | uint32_t m_NumPartitions;
168 | uint32_t m_NumInitialPartitions;
169 | semaphoreid_t m_NewTaskSemaphore;
170 | bool m_bHaveThreads;
171 | ProfilerCallbacks m_ProfilerCallbacks;
172 |
173 | TaskScheduler( const TaskScheduler& nocopy );
174 | TaskScheduler& operator=( const TaskScheduler& nocopy );
175 | };
176 |
177 | }
--------------------------------------------------------------------------------
/source/external/enkits/TaskScheduler_c.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013 Doug Binks
2 | //
3 | // This software is provided 'as-is', without any express or implied
4 | // warranty. In no event will the authors be held liable for any damages
5 | // arising from the use of this software.
6 | //
7 | // Permission is granted to anyone to use this software for any purpose,
8 | // including commercial applications, and to alter it and redistribute it
9 | // freely, subject to the following restrictions:
10 | //
11 | // 1. The origin of this software must not be misrepresented; you must not
12 | // claim that you wrote the original software. If you use this software
13 | // in a product, an acknowledgement in the product documentation would be
14 | // appreciated but is not required.
15 | // 2. Altered source versions must be plainly marked as such, and must not be
16 | // misrepresented as being the original software.
17 | // 3. This notice may not be removed or altered from any source distribution.
18 |
19 | #include "TaskScheduler_c.h"
20 | #include "TaskScheduler.h"
21 |
22 | #include
23 |
24 | using namespace enki;
25 |
26 | struct enkiTaskScheduler : TaskScheduler
27 | {
28 | };
29 |
30 | struct enkiTaskSet : ITaskSet
31 | {
32 | enkiTaskSet( enkiTaskExecuteRange taskFun_ ) : taskFun(taskFun_), pArgs(NULL) {}
33 |
34 | virtual void ExecuteRange( TaskSetPartition range, uint32_t threadnum )
35 | {
36 | taskFun( range.start, range.end, threadnum, pArgs );
37 | }
38 |
39 | enkiTaskExecuteRange taskFun;
40 | void* pArgs;
41 | };
42 |
43 | enkiTaskScheduler* enkiNewTaskScheduler()
44 | {
45 | enkiTaskScheduler* pETS = new enkiTaskScheduler();
46 | return pETS;
47 | }
48 |
49 | void enkiInitTaskScheduler( enkiTaskScheduler* pETS_ )
50 | {
51 | pETS_->Initialize();
52 | }
53 |
54 | void enkiInitTaskSchedulerNumThreads( enkiTaskScheduler* pETS_, uint32_t numThreads_ )
55 | {
56 | pETS_->Initialize( numThreads_ );
57 | }
58 |
59 | void enkiDeleteTaskScheduler( enkiTaskScheduler* pETS_ )
60 | {
61 | delete pETS_;
62 | }
63 |
64 | enkiTaskSet* enkiCreateTaskSet( enkiTaskScheduler* pETS_, enkiTaskExecuteRange taskFunc_ )
65 | {
66 | (void)pETS_;
67 | return new enkiTaskSet( taskFunc_ );
68 | }
69 |
70 | void enkiDeleteTaskSet( enkiTaskSet* pTaskSet_ )
71 | {
72 | delete pTaskSet_;
73 | }
74 |
75 | void enkiAddTaskSetToPipe( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_, void* pArgs_, uint32_t setSize_ )
76 | {
77 | assert( pTaskSet_ );
78 | assert( pTaskSet_->taskFun );
79 |
80 | pTaskSet_->m_SetSize = setSize_;
81 | pTaskSet_->pArgs = pArgs_;
82 | pETS_->AddTaskSetToPipe( pTaskSet_ );
83 | }
84 |
85 | void enkiAddTaskSetToPipeMinRange(enkiTaskScheduler * pETS_, enkiTaskSet * pTaskSet_, void * pArgs_, uint32_t setSize_, uint32_t minRange_)
86 | {
87 | assert( pTaskSet_ );
88 | assert( pTaskSet_->taskFun );
89 |
90 | pTaskSet_->m_SetSize = setSize_;
91 | pTaskSet_->m_MinRange = minRange_;
92 | pTaskSet_->pArgs = pArgs_;
93 | pETS_->AddTaskSetToPipe( pTaskSet_ );
94 | }
95 |
96 | int enkiIsTaskSetComplete( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_ )
97 | {
98 | (void)pETS_;
99 | assert( pTaskSet_ );
100 | return ( pTaskSet_->GetIsComplete() ) ? 1 : 0;
101 | }
102 |
103 | void enkiWaitForTaskSet( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_ )
104 | {
105 | pETS_->WaitforTaskSet( pTaskSet_ );
106 | }
107 |
108 | void enkiWaitForAll( enkiTaskScheduler* pETS_ )
109 | {
110 | pETS_->WaitforAll();
111 | }
112 |
113 |
114 | uint32_t enkiGetNumTaskThreads( enkiTaskScheduler* pETS_ )
115 | {
116 | return pETS_->GetNumTaskThreads();
117 | }
118 |
119 | enkiProfilerCallbacks* enkiGetProfilerCallbacks( enkiTaskScheduler* pETS_ )
120 | {
121 | assert( sizeof(enkiProfilerCallbacks) == sizeof(enki::ProfilerCallbacks) );
122 | return (enkiProfilerCallbacks*)pETS_->GetProfilerCallbacks();
123 | }
124 |
125 |
--------------------------------------------------------------------------------
/source/external/enkits/TaskScheduler_c.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013 Doug Binks
2 | //
3 | // This software is provided 'as-is', without any express or implied
4 | // warranty. In no event will the authors be held liable for any damages
5 | // arising from the use of this software.
6 | //
7 | // Permission is granted to anyone to use this software for any purpose,
8 | // including commercial applications, and to alter it and redistribute it
9 | // freely, subject to the following restrictions:
10 | //
11 | // 1. The origin of this software must not be misrepresented; you must not
12 | // claim that you wrote the original software. If you use this software
13 | // in a product, an acknowledgement in the product documentation would be
14 | // appreciated but is not required.
15 | // 2. Altered source versions must be plainly marked as such, and must not be
16 | // misrepresented as being the original software.
17 | // 3. This notice may not be removed or altered from any source distribution.
18 |
19 | #pragma once
20 |
21 | #ifdef __cplusplus
22 | extern "C" {
23 | #endif
24 |
25 | #include
26 |
27 | typedef struct enkiTaskScheduler enkiTaskScheduler;
28 | typedef struct enkiTaskSet enkiTaskSet;
29 |
30 | typedef void (* enkiTaskExecuteRange)( uint32_t start_, uint32_t end, uint32_t threadnum_, void* pArgs_ );
31 |
32 |
33 | // Create a new task scheduler
34 | enkiTaskScheduler* enkiNewTaskScheduler();
35 |
36 | // Initialize task scheduler - will create GetNumHardwareThreads()-1 threads, which is
37 | // sufficient to fill the system when including the main thread.
38 | // Initialize can be called multiple times - it will wait for completion
39 | // before re-initializing.
40 | void enkiInitTaskScheduler( enkiTaskScheduler* pETS_ );
41 |
42 | // Initialize a task scheduler with numThreads_ (must be > 0)
43 | // will create numThreads_-1 threads, as thread 0 is
44 | // the thread on which the initialize was called.
45 | void enkiInitTaskSchedulerNumThreads( enkiTaskScheduler* pETS_, uint32_t numThreads_ );
46 |
47 |
48 | // Delete a task scheduler
49 | void enkiDeleteTaskScheduler( enkiTaskScheduler* pETS_ );
50 |
51 | // Create a task set.
52 | enkiTaskSet* enkiCreateTaskSet( enkiTaskScheduler* pETS_, enkiTaskExecuteRange taskFunc_ );
53 |
54 | // Delete a task set.
55 | void enkiDeleteTaskSet( enkiTaskSet* pTaskSet_ );
56 |
57 | // Schedule the task
58 | void enkiAddTaskSetToPipe( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_,
59 | void* pArgs_, uint32_t setSize_ );
60 |
61 | // Schedule the task with a minimum range.
62 | // This should be set to a value which results in computation effort of at least 10k
63 | // clock cycles to minimize tast scheduler overhead.
64 | // NOTE: The last partition will be smaller than m_MinRange if m_SetSize is not a multiple
65 | // of m_MinRange.
66 | // Also known as grain size in literature.
67 | void enkiAddTaskSetToPipeMinRange( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_,
68 | void* pArgs_, uint32_t setSize_, uint32_t minRange_ );
69 |
70 |
71 | // Check if TaskSet is complete. Doesn't wait. Returns 1 if complete, 0 if not.
72 | int enkiIsTaskSetComplete( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_ );
73 |
74 |
75 | // Wait for a given task.
76 | // should only be called from thread which created the taskscheduler , or within a task
77 | // if called with 0 it will try to run tasks, and return if none available.
78 | void enkiWaitForTaskSet( enkiTaskScheduler* pETS_, enkiTaskSet* pTaskSet_ );
79 |
80 |
81 | // Waits for all task sets to complete - not guaranteed to work unless we know we
82 | // are in a situation where tasks aren't being continuosly added.
83 | void enkiWaitForAll( enkiTaskScheduler* pETS_ );
84 |
85 |
86 | // get number of threads
87 | uint32_t enkiGetNumTaskThreads( enkiTaskScheduler* pETS_ );
88 |
89 | // TaskScheduler implements several callbacks intended for profilers
90 | typedef void (*enkiProfilerCallbackFunc)( uint32_t threadnum_ );
91 | struct enkiProfilerCallbacks
92 | {
93 | enkiProfilerCallbackFunc threadStart;
94 | enkiProfilerCallbackFunc threadStop;
95 | enkiProfilerCallbackFunc waitStart;
96 | enkiProfilerCallbackFunc waitStop;
97 | };
98 |
99 | // Get the callback structure so it can be set
100 | struct enkiProfilerCallbacks* enkiGetProfilerCallbacks( enkiTaskScheduler* pETS_ );
101 |
102 | #ifdef __cplusplus
103 | }
104 | #endif
--------------------------------------------------------------------------------
/source/external/enkits/Threads.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2013 Doug Binks
2 | //
3 | // This software is provided 'as-is', without any express or implied
4 | // warranty. In no event will the authors be held liable for any damages
5 | // arising from the use of this software.
6 | //
7 | // Permission is granted to anyone to use this software for any purpose,
8 | // including commercial applications, and to alter it and redistribute it
9 | // freely, subject to the following restrictions:
10 | //
11 | // 1. The origin of this software must not be misrepresented; you must not
12 | // claim that you wrote the original software. If you use this software
13 | // in a product, an acknowledgement in the product documentation would be
14 | // appreciated but is not required.
15 | // 2. Altered source versions must be plainly marked as such, and must not be
16 | // misrepresented as being the original software.
17 | // 3. This notice may not be removed or altered from any source distribution.
18 |
19 | #pragma once
20 |
21 | #include
22 | #include
23 |
24 | #ifdef _WIN32
25 |
26 | #include "Atomics.h"
27 |
28 | #define WIN32_LEAN_AND_MEAN
29 | #include
30 |
31 | #define THREADFUNC_DECL DWORD WINAPI
32 | #define THREAD_LOCAL __declspec( thread )
33 |
34 | namespace enki
35 | {
36 | typedef HANDLE threadid_t;
37 |
38 | // declare the thread start function as:
39 | // THREADFUNC_DECL MyThreadStart( void* pArg );
40 | inline bool ThreadCreate( threadid_t* returnid, DWORD ( WINAPI *StartFunc) (void* ), void* pArg )
41 | {
42 | // posix equiv pthread_create
43 | DWORD threadid;
44 | *returnid = CreateThread( 0, 0, StartFunc, pArg, 0, &threadid );
45 | return *returnid != NULL;
46 | }
47 |
48 | inline bool ThreadTerminate( threadid_t threadid )
49 | {
50 | // posix equiv pthread_cancel
51 | return CloseHandle( threadid ) == 0;
52 | }
53 |
54 | inline uint32_t GetNumHardwareThreads()
55 | {
56 | SYSTEM_INFO sysInfo;
57 | GetSystemInfo(&sysInfo);
58 | return sysInfo.dwNumberOfProcessors;
59 | }
60 | }
61 |
62 | #else // posix
63 |
64 | #include
65 | #include
66 | #define THREADFUNC_DECL void*
67 | #define THREAD_LOCAL __thread
68 |
69 | namespace enki
70 | {
71 | typedef pthread_t threadid_t;
72 |
73 | // declare the thread start function as:
74 | // THREADFUNC_DECL MyThreadStart( void* pArg );
75 | inline bool ThreadCreate( threadid_t* returnid, void* ( *StartFunc) (void* ), void* pArg )
76 | {
77 | // posix equiv pthread_create
78 | int32_t retval = pthread_create( returnid, NULL, StartFunc, pArg );
79 |
80 | return retval == 0;
81 | }
82 |
83 | inline bool ThreadTerminate( threadid_t threadid )
84 | {
85 | // posix equiv pthread_cancel
86 | return pthread_cancel( threadid ) == 0;
87 | }
88 |
89 | inline uint32_t GetNumHardwareThreads()
90 | {
91 | return (uint32_t)sysconf( _SC_NPROCESSORS_ONLN );
92 | }
93 | }
94 |
95 | #endif // posix
96 |
97 |
98 | // Semaphore implementation
99 | #ifdef _WIN32
100 |
101 | namespace enki
102 | {
103 | struct semaphoreid_t
104 | {
105 | HANDLE sem;
106 | };
107 |
108 | inline void SemaphoreCreate( semaphoreid_t& semaphoreid )
109 | {
110 | semaphoreid.sem = CreateSemaphore(NULL, 0, MAXLONG, NULL );
111 | }
112 |
113 | inline void SemaphoreClose( semaphoreid_t& semaphoreid )
114 | {
115 | CloseHandle( semaphoreid.sem );
116 | }
117 |
118 | inline void SemaphoreWait( semaphoreid_t& semaphoreid )
119 | {
120 | DWORD retval = WaitForSingleObject( semaphoreid.sem, INFINITE );
121 | (void)retval;
122 |
123 | assert( retval != WAIT_FAILED );
124 | }
125 |
126 | inline void SemaphoreSignal( semaphoreid_t& semaphoreid, int32_t countWaiting )
127 | {
128 | if( countWaiting )
129 | {
130 | ReleaseSemaphore( semaphoreid.sem, countWaiting, NULL );
131 | }
132 | }
133 | }
134 | #elif defined(__MACH__)
135 |
136 | // OS X does not have POSIX semaphores
137 | // see https://developer.apple.com/library/content/documentation/Darwin/Conceptual/KernelProgramming/synchronization/synchronization.html
138 | #include
139 |
140 | namespace enki
141 | {
142 |
143 | struct semaphoreid_t
144 | {
145 | semaphore_t sem;
146 | };
147 |
148 | inline void SemaphoreCreate( semaphoreid_t& semaphoreid )
149 | {
150 | semaphore_create( mach_task_self(), &semaphoreid.sem, SYNC_POLICY_FIFO, 0 );
151 | }
152 |
153 | inline void SemaphoreClose( semaphoreid_t& semaphoreid )
154 | {
155 | semaphore_destroy( mach_task_self(), semaphoreid.sem );
156 | }
157 |
158 | inline void SemaphoreWait( semaphoreid_t& semaphoreid )
159 | {
160 | semaphore_wait( semaphoreid.sem );
161 | }
162 |
163 | inline void SemaphoreSignal( semaphoreid_t& semaphoreid, int32_t countWaiting )
164 | {
165 | while( countWaiting-- > 0 )
166 | {
167 | semaphore_signal( semaphoreid.sem );
168 | }
169 | }
170 | }
171 |
172 | #else // POSIX
173 |
174 | #include
175 |
176 | namespace enki
177 | {
178 |
179 | struct semaphoreid_t
180 | {
181 | sem_t sem;
182 | };
183 |
184 | inline void SemaphoreCreate( semaphoreid_t& semaphoreid )
185 | {
186 | int err = sem_init( &semaphoreid.sem, 0, 0 );
187 | assert( err == 0 );
188 | }
189 |
190 | inline void SemaphoreClose( semaphoreid_t& semaphoreid )
191 | {
192 | sem_destroy( &semaphoreid.sem );
193 | }
194 |
195 | inline void SemaphoreWait( semaphoreid_t& semaphoreid )
196 | {
197 | int err = sem_wait( &semaphoreid.sem );
198 | assert( err == 0 );
199 | }
200 |
201 | inline void SemaphoreSignal( semaphoreid_t& semaphoreid, int32_t countWaiting )
202 | {
203 | while( countWaiting-- > 0 )
204 | {
205 | sem_post( &semaphoreid.sem );
206 | }
207 | }
208 | }
209 | #endif
210 |
211 |
212 |
--------------------------------------------------------------------------------
/source/external/objparser-license.md:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2016-2019 Arseny Kapoulkine
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/source/external/objparser.cpp:
--------------------------------------------------------------------------------
1 | #ifndef _CRT_SECURE_NO_WARNINGS
2 | #define _CRT_SECURE_NO_WARNINGS
3 | #endif
4 |
5 | #include "objparser.h"
6 |
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 |
13 | template
14 | static void growArray(T*& data, size_t& capacity)
15 | {
16 | size_t newcapacity = capacity == 0 ? 32 : capacity + capacity / 2;
17 | T* newdata = new T[newcapacity];
18 |
19 | if (data)
20 | {
21 | memcpy(newdata, data, capacity * sizeof(T));
22 | delete[] data;
23 | }
24 |
25 | data = newdata;
26 | capacity = newcapacity;
27 | }
28 |
29 | static int fixupIndex(int index, size_t size)
30 | {
31 | return (index >= 0) ? index - 1 : int(size) + index;
32 | }
33 |
34 | static int parseInt(const char* s, const char** end)
35 | {
36 | // skip whitespace
37 | while (*s == ' ' || *s == '\t')
38 | s++;
39 |
40 | // read sign bit
41 | int sign = (*s == '-');
42 | s += (*s == '-' || *s == '+');
43 |
44 | unsigned int result = 0;
45 |
46 | for (;;)
47 | {
48 | if (unsigned(*s - '0') < 10)
49 | result = result * 10 + (*s - '0');
50 | else
51 | break;
52 |
53 | s++;
54 | }
55 |
56 | // return end-of-string
57 | *end = s;
58 |
59 | return sign ? -int(result) : int(result);
60 | }
61 |
62 | static float parseFloat(const char* s, const char** end)
63 | {
64 | static const double digits[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
65 | static const double powers[] = {1e0, 1e+1, 1e+2, 1e+3, 1e+4, 1e+5, 1e+6, 1e+7, 1e+8, 1e+9, 1e+10, 1e+11, 1e+12, 1e+13, 1e+14, 1e+15, 1e+16, 1e+17, 1e+18, 1e+19, 1e+20, 1e+21, 1e+22};
66 |
67 | // skip whitespace
68 | while (*s == ' ' || *s == '\t')
69 | s++;
70 |
71 | // read sign
72 | double sign = (*s == '-') ? -1 : 1;
73 | s += (*s == '-' || *s == '+');
74 |
75 | // read integer part
76 | double result = 0;
77 | int power = 0;
78 |
79 | while (unsigned(*s - '0') < 10)
80 | {
81 | result = result * 10 + digits[*s - '0'];
82 | s++;
83 | }
84 |
85 | // read fractional part
86 | if (*s == '.')
87 | {
88 | s++;
89 |
90 | while (unsigned(*s - '0') < 10)
91 | {
92 | result = result * 10 + digits[*s - '0'];
93 | s++;
94 | power--;
95 | }
96 | }
97 |
98 | // read exponent part
99 | if ((*s | ' ') == 'e')
100 | {
101 | s++;
102 |
103 | // read exponent sign
104 | int expsign = (*s == '-') ? -1 : 1;
105 | s += (*s == '-' || *s == '+');
106 |
107 | // read exponent
108 | int exppower = 0;
109 |
110 | while (unsigned(*s - '0') < 10)
111 | {
112 | exppower = exppower * 10 + (*s - '0');
113 | s++;
114 | }
115 |
116 | // done!
117 | power += expsign * exppower;
118 | }
119 |
120 | // return end-of-string
121 | *end = s;
122 |
123 | // note: this is precise if result < 9e15
124 | // for longer inputs we lose a bit of precision here
125 | if (unsigned(-power) < sizeof(powers) / sizeof(powers[0]))
126 | return float(sign * result / powers[-power]);
127 | else if (unsigned(power) < sizeof(powers) / sizeof(powers[0]))
128 | return float(sign * result * powers[power]);
129 | else
130 | return float(sign * result * pow(10.0, power));
131 | }
132 |
133 | static const char* parseFace(const char* s, int& vi, int& vti, int& vni)
134 | {
135 | while (*s == ' ' || *s == '\t')
136 | s++;
137 |
138 | vi = parseInt(s, &s);
139 |
140 | if (*s != '/')
141 | return s;
142 | s++;
143 |
144 | // handle vi//vni indices
145 | if (*s != '/')
146 | vti = parseInt(s, &s);
147 |
148 | if (*s != '/')
149 | return s;
150 | s++;
151 |
152 | vni = parseInt(s, &s);
153 |
154 | return s;
155 | }
156 |
157 | ObjFile::ObjFile()
158 | : v(0)
159 | , v_size(0)
160 | , v_cap(0)
161 | , vt(0)
162 | , vt_size(0)
163 | , vt_cap(0)
164 | , vn(0)
165 | , vn_size(0)
166 | , vn_cap(0)
167 | , f(0)
168 | , f_size(0)
169 | , f_cap(0)
170 | , g(0)
171 | , g_size(0)
172 | , g_cap(0)
173 | {
174 | }
175 |
176 | ObjFile::~ObjFile()
177 | {
178 | delete[] v;
179 | delete[] vt;
180 | delete[] vn;
181 | delete[] f;
182 | delete[] g;
183 | }
184 |
185 | void objParseLine(ObjFile& result, const char* line)
186 | {
187 | if (line[0] == 'v' && line[1] == ' ')
188 | {
189 | const char* s = line + 2;
190 |
191 | float x = parseFloat(s, &s);
192 | float y = parseFloat(s, &s);
193 | float z = parseFloat(s, &s);
194 |
195 | if (result.v_size + 3 > result.v_cap)
196 | growArray(result.v, result.v_cap);
197 |
198 | result.v[result.v_size++] = x;
199 | result.v[result.v_size++] = y;
200 | result.v[result.v_size++] = z;
201 | }
202 | else if (line[0] == 'v' && line[1] == 't' && line[2] == ' ')
203 | {
204 | const char* s = line + 3;
205 |
206 | float u = parseFloat(s, &s);
207 | float v = parseFloat(s, &s);
208 | float w = parseFloat(s, &s);
209 |
210 | if (result.vt_size + 3 > result.vt_cap)
211 | growArray(result.vt, result.vt_cap);
212 |
213 | result.vt[result.vt_size++] = u;
214 | result.vt[result.vt_size++] = v;
215 | result.vt[result.vt_size++] = w;
216 | }
217 | else if (line[0] == 'v' && line[1] == 'n' && line[2] == ' ')
218 | {
219 | const char* s = line + 3;
220 |
221 | float x = parseFloat(s, &s);
222 | float y = parseFloat(s, &s);
223 | float z = parseFloat(s, &s);
224 |
225 | if (result.vn_size + 3 > result.vn_cap)
226 | growArray(result.vn, result.vn_cap);
227 |
228 | result.vn[result.vn_size++] = x;
229 | result.vn[result.vn_size++] = y;
230 | result.vn[result.vn_size++] = z;
231 | }
232 | else if (line[0] == 'f' && line[1] == ' ')
233 | {
234 | const char* s = line + 2;
235 |
236 | if (!result.g)
237 | {
238 | growArray(result.g, result.g_cap);
239 |
240 | ObjGroup g = {};
241 | result.g[result.g_size++] = g;
242 | }
243 |
244 | size_t v = result.v_size / 3;
245 | size_t vt = result.vt_size / 3;
246 | size_t vn = result.vn_size / 3;
247 |
248 | int fv = 0;
249 | int f[3][3] = {};
250 |
251 | while (*s)
252 | {
253 | int vi = 0, vti = 0, vni = 0;
254 | s = parseFace(s, vi, vti, vni);
255 |
256 | if (vi == 0)
257 | break;
258 |
259 | f[fv][0] = fixupIndex(vi, v);
260 | f[fv][1] = fixupIndex(vti, vt);
261 | f[fv][2] = fixupIndex(vni, vn);
262 |
263 | if (fv == 2)
264 | {
265 | if (result.f_size + 9 > result.f_cap)
266 | growArray(result.f, result.f_cap);
267 |
268 | memcpy(&result.f[result.f_size], f, 9 * sizeof(int));
269 | result.f_size += 9;
270 |
271 | result.g[result.g_size - 1].index_count += 3;
272 |
273 | f[1][0] = f[2][0];
274 | f[1][1] = f[2][1];
275 | f[1][2] = f[2][2];
276 | }
277 | else
278 | {
279 | fv++;
280 | }
281 | }
282 | }
283 | else if (strncmp(line, "usemtl", 6) == 0)
284 | {
285 | const char* s = line + 6;
286 |
287 | // skip whitespace
288 | while (*s == ' ' || *s == '\t')
289 | s++;
290 |
291 | if (result.g_size + 1 > result.g_cap)
292 | growArray(result.g, result.g_cap);
293 |
294 | ObjGroup g = {};
295 | g.index_offset = result.f_size / 3;
296 |
297 | strncpy(g.material, s, sizeof(g.material));
298 | g.material[sizeof(g.material) - 1] = 0;
299 |
300 | result.g[result.g_size++] = g;
301 | }
302 | }
303 |
304 | bool objParseFile(ObjFile& result, const char* path)
305 | {
306 | FILE* file = fopen(path, "rb");
307 | if (!file)
308 | return false;
309 |
310 | char buffer[65536];
311 | size_t size = 0;
312 |
313 | while (!feof(file))
314 | {
315 | size += fread(buffer + size, 1, sizeof(buffer) - size, file);
316 |
317 | size_t line = 0;
318 |
319 | while (line < size)
320 | {
321 | // find the end of current line
322 | void* eol = memchr(buffer + line, '\n', size - line);
323 | if (!eol)
324 | break;
325 |
326 | // zero-terminate for objParseLine
327 | size_t next = static_cast(eol) - buffer;
328 |
329 | buffer[next] = 0;
330 |
331 | // process next line
332 | objParseLine(result, buffer + line);
333 |
334 | line = next + 1;
335 | }
336 |
337 | // move prefix of the last line in the buffer to the beginning of the buffer for next iteration
338 | assert(line <= size);
339 |
340 | memmove(buffer, buffer + line, size - line);
341 | size -= line;
342 | }
343 |
344 | if (size)
345 | {
346 | // process last line
347 | assert(size < sizeof(buffer));
348 | buffer[size] = 0;
349 |
350 | objParseLine(result, buffer);
351 | }
352 |
353 | fclose(file);
354 | return true;
355 | }
356 |
357 | bool objValidate(const ObjFile& result)
358 | {
359 | size_t v = result.v_size / 3;
360 | size_t vt = result.vt_size / 3;
361 | size_t vn = result.vn_size / 3;
362 |
363 | for (size_t i = 0; i < result.f_size; i += 3)
364 | {
365 | int vi = result.f[i + 0];
366 | int vti = result.f[i + 1];
367 | int vni = result.f[i + 2];
368 |
369 | if (vi < 0)
370 | return false;
371 |
372 | if (vi >= 0 && size_t(vi) >= v)
373 | return false;
374 |
375 | if (vti >= 0 && size_t(vti) >= vt)
376 | return false;
377 |
378 | if (vni >= 0 && size_t(vni) >= vn)
379 | return false;
380 | }
381 |
382 | return true;
383 | }
384 |
--------------------------------------------------------------------------------
/source/external/objparser.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 |
5 | struct ObjGroup
6 | {
7 | char material[256];
8 |
9 | size_t index_offset;
10 | size_t index_count;
11 | };
12 |
13 | class ObjFile
14 | {
15 | public:
16 | float* v; // positions; stride 3 (xyz)
17 | size_t v_size, v_cap;
18 |
19 | float* vt; // texture coordinates; stride 3 (uvw)
20 | size_t vt_size, vt_cap;
21 |
22 | float* vn; // vertex normals; stride 3 (xyz)
23 | size_t vn_size, vn_cap;
24 |
25 | int* f; // face elements; stride 9 (3 groups of indices into v/vt/vn)
26 | size_t f_size, f_cap;
27 |
28 | ObjGroup* g;
29 | size_t g_size, g_cap;
30 |
31 | ObjFile();
32 | ~ObjFile();
33 |
34 | private:
35 | ObjFile(const ObjFile&);
36 | ObjFile& operator=(const ObjFile&);
37 | };
38 |
39 | void objParseLine(ObjFile& result, const char* line);
40 | bool objParseFile(ObjFile& result, const char* path);
41 |
42 | bool objValidate(const ObjFile& result);
43 |
--------------------------------------------------------------------------------
/source/external/sokol_time.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | /*
3 | sokol_time.h -- simple cross-platform time measurement
4 |
5 | Do this:
6 | #define SOKOL_IMPL
7 | before you include this file in *one* C or C++ file to create the
8 | implementation.
9 |
10 | Optionally provide the following defines with your own implementations:
11 | SOKOL_ASSERT(c) - your own assert macro (default: assert(c))
12 | SOKOL_API_DECL - public function declaration prefix (default: extern)
13 | SOKOL_API_IMPL - public function implementation prefix (default: -)
14 |
15 | void stm_setup();
16 | Call once before any other functions to initialize sokol_time
17 | (this calls for instance QueryPerformanceFrequency on Windows)
18 |
19 | uint64_t stm_now();
20 | Get current point in time in unspecified 'ticks'. The value that
21 | is returned has no relation to the 'wall-clock' time and is
22 | not in a specific time unit, it is only useful to compute
23 | time differences.
24 |
25 | uint64_t stm_diff(uint64_t new, uint64_t old);
26 | Computes the time difference between new and old. This will always
27 | return a positive, non-zero value.
28 |
29 | uint64_t stm_since(uint64_t start);
30 | Takes the current time, and returns the elapsed time since start
31 | (this is a shortcut for "stm_diff(stm_now(), start)")
32 |
33 | uint64_t stm_laptime(uint64_t* last_time);
34 | This is useful for measuring frame time and other recurring
35 | events. It takes the current time, returns the time difference
36 | to the value in last_time, and stores the current time in
37 | last_time for the next call. If the value in last_time is 0,
38 | the return value will be zero (this usually happens on the
39 | very first call).
40 |
41 | Use the following functions to convert a duration in ticks into
42 | useful time units:
43 |
44 | double stm_sec(uint64_t ticks);
45 | double stm_ms(uint64_t ticks);
46 | double stm_us(uint64_t ticks);
47 | double stm_ns(uint64_t ticks);
48 | Converts a tick value into seconds, milliseconds, microseconds
49 | or nanoseconds. Note that not all platforms will have nanosecond
50 | or even microsecond precision.
51 |
52 | Uses the following time measurement functions under the hood:
53 |
54 | Windows: QueryPerformanceFrequency() / QueryPerformanceCounter()
55 | MacOS/iOS: mach_absolute_time()
56 | emscripten: performance.now()
57 | Linux+others: clock_gettime(CLOCK_MONOTONIC)
58 |
59 | zlib/libpng license
60 |
61 | Copyright (c) 2018 Andre Weissflog
62 |
63 | This software is provided 'as-is', without any express or implied warranty.
64 | In no event will the authors be held liable for any damages arising from the
65 | use of this software.
66 |
67 | Permission is granted to anyone to use this software for any purpose,
68 | including commercial applications, and to alter it and redistribute it
69 | freely, subject to the following restrictions:
70 |
71 | 1. The origin of this software must not be misrepresented; you must not
72 | claim that you wrote the original software. If you use this software in a
73 | product, an acknowledgment in the product documentation would be
74 | appreciated but is not required.
75 |
76 | 2. Altered source versions must be plainly marked as such, and must not
77 | be misrepresented as being the original software.
78 |
79 | 3. This notice may not be removed or altered from any source
80 | distribution.
81 | */
82 | #define SOKOL_TIME_INCLUDED (1)
83 | #include
84 |
85 | #ifndef SOKOL_API_DECL
86 | #define SOKOL_API_DECL extern
87 | #endif
88 |
89 | #ifdef __cplusplus
90 | extern "C" {
91 | #endif
92 |
93 | SOKOL_API_DECL void stm_setup(void);
94 | SOKOL_API_DECL uint64_t stm_now(void);
95 | SOKOL_API_DECL uint64_t stm_diff(uint64_t new_ticks, uint64_t old_ticks);
96 | SOKOL_API_DECL uint64_t stm_since(uint64_t start_ticks);
97 | SOKOL_API_DECL uint64_t stm_laptime(uint64_t* last_time);
98 | SOKOL_API_DECL double stm_sec(uint64_t ticks);
99 | SOKOL_API_DECL double stm_ms(uint64_t ticks);
100 | SOKOL_API_DECL double stm_us(uint64_t ticks);
101 | SOKOL_API_DECL double stm_ns(uint64_t ticks);
102 |
103 | #ifdef __cplusplus
104 | } /* extern "C" */
105 | #endif
106 |
107 | /*-- IMPLEMENTATION ----------------------------------------------------------*/
108 | #ifdef SOKOL_IMPL
109 | #define SOKOL_TIME_IMPL_INCLUDED (1)
110 | #include /* memset */
111 |
112 | #ifndef SOKOL_API_IMPL
113 | #define SOKOL_API_IMPL
114 | #endif
115 | #ifndef SOKOL_ASSERT
116 | #include
117 | #define SOKOL_ASSERT(c) assert(c)
118 | #endif
119 | #ifndef _SOKOL_PRIVATE
120 | #if defined(__GNUC__)
121 | #define _SOKOL_PRIVATE __attribute__((unused)) static
122 | #else
123 | #define _SOKOL_PRIVATE static
124 | #endif
125 | #endif
126 |
127 | #if defined(_WIN32)
128 | #ifndef WIN32_LEAN_AND_MEAN
129 | #define WIN32_LEAN_AND_MEAN
130 | #endif
131 | #include
132 | typedef struct {
133 | uint32_t initialized;
134 | LARGE_INTEGER freq;
135 | LARGE_INTEGER start;
136 | } _stm_state_t;
137 | #elif defined(__APPLE__) && defined(__MACH__)
138 | #include
139 | typedef struct {
140 | uint32_t initialized;
141 | mach_timebase_info_data_t timebase;
142 | uint64_t start;
143 | } _stm_state_t;
144 | #elif defined(__EMSCRIPTEN__)
145 | #include
146 | typedef struct {
147 | uint32_t initialized;
148 | double start;
149 | } _stm_state_t;
150 | #else /* anything else, this will need more care for non-Linux platforms */
151 | #include
152 | typedef struct {
153 | uint32_t initialized;
154 | uint64_t start;
155 | } _stm_state_t;
156 | #endif
157 | static _stm_state_t _stm;
158 |
159 | /* prevent 64-bit overflow when computing relative timestamp
160 | see https://gist.github.com/jspohr/3dc4f00033d79ec5bdaf67bc46c813e3
161 | */
162 | #if defined(_WIN32) || (defined(__APPLE__) && defined(__MACH__))
163 | _SOKOL_PRIVATE int64_t int64_muldiv(int64_t value, int64_t numer, int64_t denom) {
164 | int64_t q = value / denom;
165 | int64_t r = value % denom;
166 | return q * numer + r * numer / denom;
167 | }
168 | #endif
169 |
170 | #if defined(__EMSCRIPTEN__)
171 | EM_JS(double, _stm_js_perfnow, (void), {
172 | return performance.now();
173 | });
174 | #endif
175 |
176 | SOKOL_API_IMPL void stm_setup(void) {
177 | memset(&_stm, 0, sizeof(_stm));
178 | _stm.initialized = 0xABCDABCD;
179 | #if defined(_WIN32)
180 | QueryPerformanceFrequency(&_stm.freq);
181 | QueryPerformanceCounter(&_stm.start);
182 | #elif defined(__APPLE__) && defined(__MACH__)
183 | mach_timebase_info(&_stm.timebase);
184 | _stm.start = mach_absolute_time();
185 | #elif defined(__EMSCRIPTEN__)
186 | _stm.start = _stm_js_perfnow();
187 | #else
188 | struct timespec ts;
189 | clock_gettime(CLOCK_MONOTONIC, &ts);
190 | _stm.start = (uint64_t)ts.tv_sec*1000000000 + (uint64_t)ts.tv_nsec;
191 | #endif
192 | }
193 |
194 | SOKOL_API_IMPL uint64_t stm_now(void) {
195 | SOKOL_ASSERT(_stm.initialized == 0xABCDABCD);
196 | uint64_t now;
197 | #if defined(_WIN32)
198 | LARGE_INTEGER qpc_t;
199 | QueryPerformanceCounter(&qpc_t);
200 | now = int64_muldiv(qpc_t.QuadPart - _stm.start.QuadPart, 1000000000, _stm.freq.QuadPart);
201 | #elif defined(__APPLE__) && defined(__MACH__)
202 | const uint64_t mach_now = mach_absolute_time() - _stm.start;
203 | now = int64_muldiv(mach_now, _stm.timebase.numer, _stm.timebase.denom);
204 | #elif defined(__EMSCRIPTEN__)
205 | double js_now = _stm_js_perfnow() - _stm.start;
206 | SOKOL_ASSERT(js_now >= 0.0);
207 | now = (uint64_t) (js_now * 1000000.0);
208 | #else
209 | struct timespec ts;
210 | clock_gettime(CLOCK_MONOTONIC, &ts);
211 | now = ((uint64_t)ts.tv_sec*1000000000 + (uint64_t)ts.tv_nsec) - _stm.start;
212 | #endif
213 | return now;
214 | }
215 |
216 | SOKOL_API_IMPL uint64_t stm_diff(uint64_t new_ticks, uint64_t old_ticks) {
217 | if (new_ticks > old_ticks) {
218 | return new_ticks - old_ticks;
219 | }
220 | else {
221 | return 1;
222 | }
223 | }
224 |
225 | SOKOL_API_IMPL uint64_t stm_since(uint64_t start_ticks) {
226 | return stm_diff(stm_now(), start_ticks);
227 | }
228 |
229 | SOKOL_API_IMPL uint64_t stm_laptime(uint64_t* last_time) {
230 | SOKOL_ASSERT(last_time);
231 | uint64_t dt = 0;
232 | uint64_t now = stm_now();
233 | if (0 != *last_time) {
234 | dt = stm_diff(now, *last_time);
235 | }
236 | *last_time = now;
237 | return dt;
238 | }
239 |
240 | SOKOL_API_IMPL double stm_sec(uint64_t ticks) {
241 | return (double)ticks / 1000000000.0;
242 | }
243 |
244 | SOKOL_API_IMPL double stm_ms(uint64_t ticks) {
245 | return (double)ticks / 1000000.0;
246 | }
247 |
248 | SOKOL_API_IMPL double stm_us(uint64_t ticks) {
249 | return (double)ticks / 1000.0;
250 | }
251 |
252 | SOKOL_API_IMPL double stm_ns(uint64_t ticks) {
253 | return (double)ticks;
254 | }
255 | #endif /* SOKOL_IMPL */
256 |
257 |
--------------------------------------------------------------------------------
/source/external/stb_image_write.h:
--------------------------------------------------------------------------------
1 | /* stb_image_write - v1.13 - public domain - http://nothings.org/stb/stb_image_write.h
2 | writes out PNG/BMP/TGA/JPEG/HDR images to C stdio - Sean Barrett 2010-2015
3 | no warranty implied; use at your own risk
4 |
5 | Before #including,
6 |
7 | #define STB_IMAGE_WRITE_IMPLEMENTATION
8 |
9 | in the file that you want to have the implementation.
10 |
11 | Will probably not work correctly with strict-aliasing optimizations.
12 |
13 | If using a modern Microsoft Compiler, non-safe versions of CRT calls may cause
14 | compilation warnings or even errors. To avoid this, also before #including,
15 |
16 | #define STBI_MSC_SECURE_CRT
17 |
18 | ABOUT:
19 |
20 | This header file is a library for writing images to C stdio or a callback.
21 |
22 | The PNG output is not optimal; it is 20-50% larger than the file
23 | written by a decent optimizing implementation; though providing a custom
24 | zlib compress function (see STBIW_ZLIB_COMPRESS) can mitigate that.
25 | This library is designed for source code compactness and simplicity,
26 | not optimal image file size or run-time performance.
27 |
28 | BUILDING:
29 |
30 | You can #define STBIW_ASSERT(x) before the #include to avoid using assert.h.
31 | You can #define STBIW_MALLOC(), STBIW_REALLOC(), and STBIW_FREE() to replace
32 | malloc,realloc,free.
33 | You can #define STBIW_MEMMOVE() to replace memmove()
34 | You can #define STBIW_ZLIB_COMPRESS to use a custom zlib-style compress function
35 | for PNG compression (instead of the builtin one), it must have the following signature:
36 | unsigned char * my_compress(unsigned char *data, int data_len, int *out_len, int quality);
37 | The returned data will be freed with STBIW_FREE() (free() by default),
38 | so it must be heap allocated with STBIW_MALLOC() (malloc() by default),
39 |
40 | UNICODE:
41 |
42 | If compiling for Windows and you wish to use Unicode filenames, compile
43 | with
44 | #define STBIW_WINDOWS_UTF8
45 | and pass utf8-encoded filenames. Call stbiw_convert_wchar_to_utf8 to convert
46 | Windows wchar_t filenames to utf8.
47 |
48 | USAGE:
49 |
50 | There are five functions, one for each image file format:
51 |
52 | int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes);
53 | int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data);
54 | int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data);
55 | int stbi_write_jpg(char const *filename, int w, int h, int comp, const void *data, int quality);
56 | int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data);
57 |
58 | void stbi_flip_vertically_on_write(int flag); // flag is non-zero to flip data vertically
59 |
60 | There are also five equivalent functions that use an arbitrary write function. You are
61 | expected to open/close your file-equivalent before and after calling these:
62 |
63 | int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data, int stride_in_bytes);
64 | int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data);
65 | int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data);
66 | int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data);
67 | int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality);
68 |
69 | where the callback is:
70 | void stbi_write_func(void *context, void *data, int size);
71 |
72 | You can configure it with these global variables:
73 | int stbi_write_tga_with_rle; // defaults to true; set to 0 to disable RLE
74 | int stbi_write_png_compression_level; // defaults to 8; set to higher for more compression
75 | int stbi_write_force_png_filter; // defaults to -1; set to 0..5 to force a filter mode
76 |
77 |
78 | You can define STBI_WRITE_NO_STDIO to disable the file variant of these
79 | functions, so the library will not use stdio.h at all. However, this will
80 | also disable HDR writing, because it requires stdio for formatted output.
81 |
82 | Each function returns 0 on failure and non-0 on success.
83 |
84 | The functions create an image file defined by the parameters. The image
85 | is a rectangle of pixels stored from left-to-right, top-to-bottom.
86 | Each pixel contains 'comp' channels of data stored interleaved with 8-bits
87 | per channel, in the following order: 1=Y, 2=YA, 3=RGB, 4=RGBA. (Y is
88 | monochrome color.) The rectangle is 'w' pixels wide and 'h' pixels tall.
89 | The *data pointer points to the first byte of the top-left-most pixel.
90 | For PNG, "stride_in_bytes" is the distance in bytes from the first byte of
91 | a row of pixels to the first byte of the next row of pixels.
92 |
93 | PNG creates output files with the same number of components as the input.
94 | The BMP format expands Y to RGB in the file format and does not
95 | output alpha.
96 |
97 | PNG supports writing rectangles of data even when the bytes storing rows of
98 | data are not consecutive in memory (e.g. sub-rectangles of a larger image),
99 | by supplying the stride between the beginning of adjacent rows. The other
100 | formats do not. (Thus you cannot write a native-format BMP through the BMP
101 | writer, both because it is in BGR order and because it may have padding
102 | at the end of the line.)
103 |
104 | PNG allows you to set the deflate compression level by setting the global
105 | variable 'stbi_write_png_compression_level' (it defaults to 8).
106 |
107 | HDR expects linear float data. Since the format is always 32-bit rgb(e)
108 | data, alpha (if provided) is discarded, and for monochrome data it is
109 | replicated across all three channels.
110 |
111 | TGA supports RLE or non-RLE compressed data. To use non-RLE-compressed
112 | data, set the global variable 'stbi_write_tga_with_rle' to 0.
113 |
114 | JPEG does ignore alpha channels in input data; quality is between 1 and 100.
115 | Higher quality looks better but results in a bigger image.
116 | JPEG baseline (no JPEG progressive).
117 |
118 | CREDITS:
119 |
120 |
121 | Sean Barrett - PNG/BMP/TGA
122 | Baldur Karlsson - HDR
123 | Jean-Sebastien Guay - TGA monochrome
124 | Tim Kelsey - misc enhancements
125 | Alan Hickman - TGA RLE
126 | Emmanuel Julien - initial file IO callback implementation
127 | Jon Olick - original jo_jpeg.cpp code
128 | Daniel Gibson - integrate JPEG, allow external zlib
129 | Aarni Koskela - allow choosing PNG filter
130 |
131 | bugfixes:
132 | github:Chribba
133 | Guillaume Chereau
134 | github:jry2
135 | github:romigrou
136 | Sergio Gonzalez
137 | Jonas Karlsson
138 | Filip Wasil
139 | Thatcher Ulrich
140 | github:poppolopoppo
141 | Patrick Boettcher
142 | github:xeekworx
143 | Cap Petschulat
144 | Simon Rodriguez
145 | Ivan Tikhonov
146 | github:ignotion
147 | Adam Schackart
148 |
149 | LICENSE
150 |
151 | See end of file for license information.
152 |
153 | */
154 |
155 | #ifndef INCLUDE_STB_IMAGE_WRITE_H
156 | #define INCLUDE_STB_IMAGE_WRITE_H
157 |
158 | #include
159 |
160 | // if STB_IMAGE_WRITE_STATIC causes problems, try defining STBIWDEF to 'inline' or 'static inline'
161 | #ifndef STBIWDEF
162 | #ifdef STB_IMAGE_WRITE_STATIC
163 | #define STBIWDEF static
164 | #else
165 | #ifdef __cplusplus
166 | #define STBIWDEF extern "C"
167 | #else
168 | #define STBIWDEF extern
169 | #endif
170 | #endif
171 | #endif
172 |
173 | #ifndef STB_IMAGE_WRITE_STATIC // C++ forbids static forward declarations
174 | extern int stbi_write_tga_with_rle;
175 | extern int stbi_write_png_compression_level;
176 | extern int stbi_write_force_png_filter;
177 | #endif
178 |
179 | #ifndef STBI_WRITE_NO_STDIO
180 | STBIWDEF int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes);
181 | STBIWDEF int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data);
182 | STBIWDEF int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data);
183 | STBIWDEF int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data);
184 | STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality);
185 |
186 | #ifdef STBI_WINDOWS_UTF8
187 | STBIWDEF int stbiw_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input);
188 | #endif
189 | #endif
190 |
191 | typedef void stbi_write_func(void *context, void *data, int size);
192 |
193 | STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data, int stride_in_bytes);
194 | STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data);
195 | STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data);
196 | STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data);
197 | STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality);
198 |
199 | STBIWDEF void stbi_flip_vertically_on_write(int flip_boolean);
200 |
201 | #endif//INCLUDE_STB_IMAGE_WRITE_H
202 |
203 | #ifdef STB_IMAGE_WRITE_IMPLEMENTATION
204 |
205 | #ifdef _WIN32
206 | #ifndef _CRT_SECURE_NO_WARNINGS
207 | #define _CRT_SECURE_NO_WARNINGS
208 | #endif
209 | #ifndef _CRT_NONSTDC_NO_DEPRECATE
210 | #define _CRT_NONSTDC_NO_DEPRECATE
211 | #endif
212 | #endif
213 |
214 | #ifndef STBI_WRITE_NO_STDIO
215 | #include
216 | #endif // STBI_WRITE_NO_STDIO
217 |
218 | #include
219 | #include
220 | #include
221 | #include
222 |
223 | #if defined(STBIW_MALLOC) && defined(STBIW_FREE) && (defined(STBIW_REALLOC) || defined(STBIW_REALLOC_SIZED))
224 | // ok
225 | #elif !defined(STBIW_MALLOC) && !defined(STBIW_FREE) && !defined(STBIW_REALLOC) && !defined(STBIW_REALLOC_SIZED)
226 | // ok
227 | #else
228 | #error "Must define all or none of STBIW_MALLOC, STBIW_FREE, and STBIW_REALLOC (or STBIW_REALLOC_SIZED)."
229 | #endif
230 |
231 | #ifndef STBIW_MALLOC
232 | #define STBIW_MALLOC(sz) malloc(sz)
233 | #define STBIW_REALLOC(p,newsz) realloc(p,newsz)
234 | #define STBIW_FREE(p) free(p)
235 | #endif
236 |
237 | #ifndef STBIW_REALLOC_SIZED
238 | #define STBIW_REALLOC_SIZED(p,oldsz,newsz) STBIW_REALLOC(p,newsz)
239 | #endif
240 |
241 |
242 | #ifndef STBIW_MEMMOVE
243 | #define STBIW_MEMMOVE(a,b,sz) memmove(a,b,sz)
244 | #endif
245 |
246 |
247 | #ifndef STBIW_ASSERT
248 | #include
249 | #define STBIW_ASSERT(x) assert(x)
250 | #endif
251 |
252 | #define STBIW_UCHAR(x) (unsigned char) ((x) & 0xff)
253 |
254 | #ifdef STB_IMAGE_WRITE_STATIC
255 | static int stbi__flip_vertically_on_write=0;
256 | static int stbi_write_png_compression_level = 8;
257 | static int stbi_write_tga_with_rle = 1;
258 | static int stbi_write_force_png_filter = -1;
259 | #else
260 | int stbi_write_png_compression_level = 8;
261 | int stbi__flip_vertically_on_write=0;
262 | int stbi_write_tga_with_rle = 1;
263 | int stbi_write_force_png_filter = -1;
264 | #endif
265 |
266 | STBIWDEF void stbi_flip_vertically_on_write(int flag)
267 | {
268 | stbi__flip_vertically_on_write = flag;
269 | }
270 |
271 | typedef struct
272 | {
273 | stbi_write_func *func;
274 | void *context;
275 | } stbi__write_context;
276 |
277 | // initialize a callback-based context
278 | static void stbi__start_write_callbacks(stbi__write_context *s, stbi_write_func *c, void *context)
279 | {
280 | s->func = c;
281 | s->context = context;
282 | }
283 |
284 | #ifndef STBI_WRITE_NO_STDIO
285 |
286 | static void stbi__stdio_write(void *context, void *data, int size)
287 | {
288 | fwrite(data,1,size,(FILE*) context);
289 | }
290 |
291 | #if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)
292 | #ifdef __cplusplus
293 | #define STBIW_EXTERN extern "C"
294 | #else
295 | #define STBIW_EXTERN extern
296 | #endif
297 | STBIW_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide);
298 | STBIW_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default);
299 |
300 | STBIWDEF int stbiw_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input)
301 | {
302 | return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int) bufferlen, NULL, NULL);
303 | }
304 | #endif
305 |
306 | static FILE *stbiw__fopen(char const *filename, char const *mode)
307 | {
308 | FILE *f;
309 | #if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)
310 | wchar_t wMode[64];
311 | wchar_t wFilename[1024];
312 | if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)))
313 | return 0;
314 |
315 | if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)))
316 | return 0;
317 |
318 | #if _MSC_VER >= 1400
319 | if (0 != _wfopen_s(&f, wFilename, wMode))
320 | f = 0;
321 | #else
322 | f = _wfopen(wFilename, wMode);
323 | #endif
324 |
325 | #elif defined(_MSC_VER) && _MSC_VER >= 1400
326 | if (0 != fopen_s(&f, filename, mode))
327 | f=0;
328 | #else
329 | f = fopen(filename, mode);
330 | #endif
331 | return f;
332 | }
333 |
334 | static int stbi__start_write_file(stbi__write_context *s, const char *filename)
335 | {
336 | FILE *f = stbiw__fopen(filename, "wb");
337 | stbi__start_write_callbacks(s, stbi__stdio_write, (void *) f);
338 | return f != NULL;
339 | }
340 |
341 | static void stbi__end_write_file(stbi__write_context *s)
342 | {
343 | fclose((FILE *)s->context);
344 | }
345 |
346 | #endif // !STBI_WRITE_NO_STDIO
347 |
348 | typedef unsigned int stbiw_uint32;
349 | typedef int stb_image_write_test[sizeof(stbiw_uint32)==4 ? 1 : -1];
350 |
351 | static void stbiw__writefv(stbi__write_context *s, const char *fmt, va_list v)
352 | {
353 | while (*fmt) {
354 | switch (*fmt++) {
355 | case ' ': break;
356 | case '1': { unsigned char x = STBIW_UCHAR(va_arg(v, int));
357 | s->func(s->context,&x,1);
358 | break; }
359 | case '2': { int x = va_arg(v,int);
360 | unsigned char b[2];
361 | b[0] = STBIW_UCHAR(x);
362 | b[1] = STBIW_UCHAR(x>>8);
363 | s->func(s->context,b,2);
364 | break; }
365 | case '4': { stbiw_uint32 x = va_arg(v,int);
366 | unsigned char b[4];
367 | b[0]=STBIW_UCHAR(x);
368 | b[1]=STBIW_UCHAR(x>>8);
369 | b[2]=STBIW_UCHAR(x>>16);
370 | b[3]=STBIW_UCHAR(x>>24);
371 | s->func(s->context,b,4);
372 | break; }
373 | default:
374 | STBIW_ASSERT(0);
375 | return;
376 | }
377 | }
378 | }
379 |
380 | static void stbiw__writef(stbi__write_context *s, const char *fmt, ...)
381 | {
382 | va_list v;
383 | va_start(v, fmt);
384 | stbiw__writefv(s, fmt, v);
385 | va_end(v);
386 | }
387 |
388 | static void stbiw__putc(stbi__write_context *s, unsigned char c)
389 | {
390 | s->func(s->context, &c, 1);
391 | }
392 |
393 | static void stbiw__write3(stbi__write_context *s, unsigned char a, unsigned char b, unsigned char c)
394 | {
395 | unsigned char arr[3];
396 | arr[0] = a; arr[1] = b; arr[2] = c;
397 | s->func(s->context, arr, 3);
398 | }
399 |
400 | static void stbiw__write_pixel(stbi__write_context *s, int rgb_dir, int comp, int write_alpha, int expand_mono, unsigned char *d)
401 | {
402 | unsigned char bg[3] = { 255, 0, 255}, px[3];
403 | int k;
404 |
405 | if (write_alpha < 0)
406 | s->func(s->context, &d[comp - 1], 1);
407 |
408 | switch (comp) {
409 | case 2: // 2 pixels = mono + alpha, alpha is written separately, so same as 1-channel case
410 | case 1:
411 | if (expand_mono)
412 | stbiw__write3(s, d[0], d[0], d[0]); // monochrome bmp
413 | else
414 | s->func(s->context, d, 1); // monochrome TGA
415 | break;
416 | case 4:
417 | if (!write_alpha) {
418 | // composite against pink background
419 | for (k = 0; k < 3; ++k)
420 | px[k] = bg[k] + ((d[k] - bg[k]) * d[3]) / 255;
421 | stbiw__write3(s, px[1 - rgb_dir], px[1], px[1 + rgb_dir]);
422 | break;
423 | }
424 | /* FALLTHROUGH */
425 | case 3:
426 | stbiw__write3(s, d[1 - rgb_dir], d[1], d[1 + rgb_dir]);
427 | break;
428 | }
429 | if (write_alpha > 0)
430 | s->func(s->context, &d[comp - 1], 1);
431 | }
432 |
433 | static void stbiw__write_pixels(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, void *data, int write_alpha, int scanline_pad, int expand_mono)
434 | {
435 | stbiw_uint32 zero = 0;
436 | int i,j, j_end;
437 |
438 | if (y <= 0)
439 | return;
440 |
441 | if (stbi__flip_vertically_on_write)
442 | vdir *= -1;
443 |
444 | if (vdir < 0) {
445 | j_end = -1; j = y-1;
446 | } else {
447 | j_end = y; j = 0;
448 | }
449 |
450 | for (; j != j_end; j += vdir) {
451 | for (i=0; i < x; ++i) {
452 | unsigned char *d = (unsigned char *) data + (j*x+i)*comp;
453 | stbiw__write_pixel(s, rgb_dir, comp, write_alpha, expand_mono, d);
454 | }
455 | s->func(s->context, &zero, scanline_pad);
456 | }
457 | }
458 |
459 | static int stbiw__outfile(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, int expand_mono, void *data, int alpha, int pad, const char *fmt, ...)
460 | {
461 | if (y < 0 || x < 0) {
462 | return 0;
463 | } else {
464 | va_list v;
465 | va_start(v, fmt);
466 | stbiw__writefv(s, fmt, v);
467 | va_end(v);
468 | stbiw__write_pixels(s,rgb_dir,vdir,x,y,comp,data,alpha,pad, expand_mono);
469 | return 1;
470 | }
471 | }
472 |
473 | static int stbi_write_bmp_core(stbi__write_context *s, int x, int y, int comp, const void *data)
474 | {
475 | int pad = (-x*3) & 3;
476 | return stbiw__outfile(s,-1,-1,x,y,comp,1,(void *) data,0,pad,
477 | "11 4 22 4" "4 44 22 444444",
478 | 'B', 'M', 14+40+(x*3+pad)*y, 0,0, 14+40, // file header
479 | 40, x,y, 1,24, 0,0,0,0,0,0); // bitmap header
480 | }
481 |
482 | STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data)
483 | {
484 | stbi__write_context s;
485 | stbi__start_write_callbacks(&s, func, context);
486 | return stbi_write_bmp_core(&s, x, y, comp, data);
487 | }
488 |
489 | #ifndef STBI_WRITE_NO_STDIO
490 | STBIWDEF int stbi_write_bmp(char const *filename, int x, int y, int comp, const void *data)
491 | {
492 | stbi__write_context s;
493 | if (stbi__start_write_file(&s,filename)) {
494 | int r = stbi_write_bmp_core(&s, x, y, comp, data);
495 | stbi__end_write_file(&s);
496 | return r;
497 | } else
498 | return 0;
499 | }
500 | #endif //!STBI_WRITE_NO_STDIO
501 |
502 | static int stbi_write_tga_core(stbi__write_context *s, int x, int y, int comp, void *data)
503 | {
504 | int has_alpha = (comp == 2 || comp == 4);
505 | int colorbytes = has_alpha ? comp-1 : comp;
506 | int format = colorbytes < 2 ? 3 : 2; // 3 color channels (RGB/RGBA) = 2, 1 color channel (Y/YA) = 3
507 |
508 | if (y < 0 || x < 0)
509 | return 0;
510 |
511 | if (!stbi_write_tga_with_rle) {
512 | return stbiw__outfile(s, -1, -1, x, y, comp, 0, (void *) data, has_alpha, 0,
513 | "111 221 2222 11", 0, 0, format, 0, 0, 0, 0, 0, x, y, (colorbytes + has_alpha) * 8, has_alpha * 8);
514 | } else {
515 | int i,j,k;
516 | int jend, jdir;
517 |
518 | stbiw__writef(s, "111 221 2222 11", 0,0,format+8, 0,0,0, 0,0,x,y, (colorbytes + has_alpha) * 8, has_alpha * 8);
519 |
520 | if (stbi__flip_vertically_on_write) {
521 | j = 0;
522 | jend = y;
523 | jdir = 1;
524 | } else {
525 | j = y-1;
526 | jend = -1;
527 | jdir = -1;
528 | }
529 | for (; j != jend; j += jdir) {
530 | unsigned char *row = (unsigned char *) data + j * x * comp;
531 | int len;
532 |
533 | for (i = 0; i < x; i += len) {
534 | unsigned char *begin = row + i * comp;
535 | int diff = 1;
536 | len = 1;
537 |
538 | if (i < x - 1) {
539 | ++len;
540 | diff = memcmp(begin, row + (i + 1) * comp, comp);
541 | if (diff) {
542 | const unsigned char *prev = begin;
543 | for (k = i + 2; k < x && len < 128; ++k) {
544 | if (memcmp(prev, row + k * comp, comp)) {
545 | prev += comp;
546 | ++len;
547 | } else {
548 | --len;
549 | break;
550 | }
551 | }
552 | } else {
553 | for (k = i + 2; k < x && len < 128; ++k) {
554 | if (!memcmp(begin, row + k * comp, comp)) {
555 | ++len;
556 | } else {
557 | break;
558 | }
559 | }
560 | }
561 | }
562 |
563 | if (diff) {
564 | unsigned char header = STBIW_UCHAR(len - 1);
565 | s->func(s->context, &header, 1);
566 | for (k = 0; k < len; ++k) {
567 | stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin + k * comp);
568 | }
569 | } else {
570 | unsigned char header = STBIW_UCHAR(len - 129);
571 | s->func(s->context, &header, 1);
572 | stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin);
573 | }
574 | }
575 | }
576 | }
577 | return 1;
578 | }
579 |
580 | STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data)
581 | {
582 | stbi__write_context s;
583 | stbi__start_write_callbacks(&s, func, context);
584 | return stbi_write_tga_core(&s, x, y, comp, (void *) data);
585 | }
586 |
587 | #ifndef STBI_WRITE_NO_STDIO
588 | STBIWDEF int stbi_write_tga(char const *filename, int x, int y, int comp, const void *data)
589 | {
590 | stbi__write_context s;
591 | if (stbi__start_write_file(&s,filename)) {
592 | int r = stbi_write_tga_core(&s, x, y, comp, (void *) data);
593 | stbi__end_write_file(&s);
594 | return r;
595 | } else
596 | return 0;
597 | }
598 | #endif
599 |
600 | // *************************************************************************************************
601 | // Radiance RGBE HDR writer
602 | // by Baldur Karlsson
603 |
604 | #define stbiw__max(a, b) ((a) > (b) ? (a) : (b))
605 |
606 | static void stbiw__linear_to_rgbe(unsigned char *rgbe, float *linear)
607 | {
608 | int exponent;
609 | float maxcomp = stbiw__max(linear[0], stbiw__max(linear[1], linear[2]));
610 |
611 | if (maxcomp < 1e-32f) {
612 | rgbe[0] = rgbe[1] = rgbe[2] = rgbe[3] = 0;
613 | } else {
614 | float normalize = (float) frexp(maxcomp, &exponent) * 256.0f/maxcomp;
615 |
616 | rgbe[0] = (unsigned char)(linear[0] * normalize);
617 | rgbe[1] = (unsigned char)(linear[1] * normalize);
618 | rgbe[2] = (unsigned char)(linear[2] * normalize);
619 | rgbe[3] = (unsigned char)(exponent + 128);
620 | }
621 | }
622 |
623 | static void stbiw__write_run_data(stbi__write_context *s, int length, unsigned char databyte)
624 | {
625 | unsigned char lengthbyte = STBIW_UCHAR(length+128);
626 | STBIW_ASSERT(length+128 <= 255);
627 | s->func(s->context, &lengthbyte, 1);
628 | s->func(s->context, &databyte, 1);
629 | }
630 |
631 | static void stbiw__write_dump_data(stbi__write_context *s, int length, unsigned char *data)
632 | {
633 | unsigned char lengthbyte = STBIW_UCHAR(length);
634 | STBIW_ASSERT(length <= 128); // inconsistent with spec but consistent with official code
635 | s->func(s->context, &lengthbyte, 1);
636 | s->func(s->context, data, length);
637 | }
638 |
639 | static void stbiw__write_hdr_scanline(stbi__write_context *s, int width, int ncomp, unsigned char *scratch, float *scanline)
640 | {
641 | unsigned char scanlineheader[4] = { 2, 2, 0, 0 };
642 | unsigned char rgbe[4];
643 | float linear[3];
644 | int x;
645 |
646 | scanlineheader[2] = (width&0xff00)>>8;
647 | scanlineheader[3] = (width&0x00ff);
648 |
649 | /* skip RLE for images too small or large */
650 | if (width < 8 || width >= 32768) {
651 | for (x=0; x < width; x++) {
652 | switch (ncomp) {
653 | case 4: /* fallthrough */
654 | case 3: linear[2] = scanline[x*ncomp + 2];
655 | linear[1] = scanline[x*ncomp + 1];
656 | linear[0] = scanline[x*ncomp + 0];
657 | break;
658 | default:
659 | linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0];
660 | break;
661 | }
662 | stbiw__linear_to_rgbe(rgbe, linear);
663 | s->func(s->context, rgbe, 4);
664 | }
665 | } else {
666 | int c,r;
667 | /* encode into scratch buffer */
668 | for (x=0; x < width; x++) {
669 | switch(ncomp) {
670 | case 4: /* fallthrough */
671 | case 3: linear[2] = scanline[x*ncomp + 2];
672 | linear[1] = scanline[x*ncomp + 1];
673 | linear[0] = scanline[x*ncomp + 0];
674 | break;
675 | default:
676 | linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0];
677 | break;
678 | }
679 | stbiw__linear_to_rgbe(rgbe, linear);
680 | scratch[x + width*0] = rgbe[0];
681 | scratch[x + width*1] = rgbe[1];
682 | scratch[x + width*2] = rgbe[2];
683 | scratch[x + width*3] = rgbe[3];
684 | }
685 |
686 | s->func(s->context, scanlineheader, 4);
687 |
688 | /* RLE each component separately */
689 | for (c=0; c < 4; c++) {
690 | unsigned char *comp = &scratch[width*c];
691 |
692 | x = 0;
693 | while (x < width) {
694 | // find first run
695 | r = x;
696 | while (r+2 < width) {
697 | if (comp[r] == comp[r+1] && comp[r] == comp[r+2])
698 | break;
699 | ++r;
700 | }
701 | if (r+2 >= width)
702 | r = width;
703 | // dump up to first run
704 | while (x < r) {
705 | int len = r-x;
706 | if (len > 128) len = 128;
707 | stbiw__write_dump_data(s, len, &comp[x]);
708 | x += len;
709 | }
710 | // if there's a run, output it
711 | if (r+2 < width) { // same test as what we break out of in search loop, so only true if we break'd
712 | // find next byte after run
713 | while (r < width && comp[r] == comp[x])
714 | ++r;
715 | // output run up to r
716 | while (x < r) {
717 | int len = r-x;
718 | if (len > 127) len = 127;
719 | stbiw__write_run_data(s, len, comp[x]);
720 | x += len;
721 | }
722 | }
723 | }
724 | }
725 | }
726 | }
727 |
728 | static int stbi_write_hdr_core(stbi__write_context *s, int x, int y, int comp, float *data)
729 | {
730 | if (y <= 0 || x <= 0 || data == NULL)
731 | return 0;
732 | else {
733 | // Each component is stored separately. Allocate scratch space for full output scanline.
734 | unsigned char *scratch = (unsigned char *) STBIW_MALLOC(x*4);
735 | int i, len;
736 | char buffer[128];
737 | char header[] = "#?RADIANCE\n# Written by stb_image_write.h\nFORMAT=32-bit_rle_rgbe\n";
738 | s->func(s->context, header, sizeof(header)-1);
739 |
740 | #ifdef __STDC_WANT_SECURE_LIB__
741 | len = sprintf_s(buffer, sizeof(buffer), "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x);
742 | #else
743 | len = sprintf(buffer, "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x);
744 | #endif
745 | s->func(s->context, buffer, len);
746 |
747 | for(i=0; i < y; i++)
748 | stbiw__write_hdr_scanline(s, x, comp, scratch, data + comp*x*(stbi__flip_vertically_on_write ? y-1-i : i));
749 | STBIW_FREE(scratch);
750 | return 1;
751 | }
752 | }
753 |
754 | STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const float *data)
755 | {
756 | stbi__write_context s;
757 | stbi__start_write_callbacks(&s, func, context);
758 | return stbi_write_hdr_core(&s, x, y, comp, (float *) data);
759 | }
760 |
761 | #ifndef STBI_WRITE_NO_STDIO
762 | STBIWDEF int stbi_write_hdr(char const *filename, int x, int y, int comp, const float *data)
763 | {
764 | stbi__write_context s;
765 | if (stbi__start_write_file(&s,filename)) {
766 | int r = stbi_write_hdr_core(&s, x, y, comp, (float *) data);
767 | stbi__end_write_file(&s);
768 | return r;
769 | } else
770 | return 0;
771 | }
772 | #endif // STBI_WRITE_NO_STDIO
773 |
774 |
775 | //////////////////////////////////////////////////////////////////////////////
776 | //
777 | // PNG writer
778 | //
779 |
780 | #ifndef STBIW_ZLIB_COMPRESS
781 | // stretchy buffer; stbiw__sbpush() == vector<>::push_back() -- stbiw__sbcount() == vector<>::size()
782 | #define stbiw__sbraw(a) ((int *) (a) - 2)
783 | #define stbiw__sbm(a) stbiw__sbraw(a)[0]
784 | #define stbiw__sbn(a) stbiw__sbraw(a)[1]
785 |
786 | #define stbiw__sbneedgrow(a,n) ((a)==0 || stbiw__sbn(a)+n >= stbiw__sbm(a))
787 | #define stbiw__sbmaybegrow(a,n) (stbiw__sbneedgrow(a,(n)) ? stbiw__sbgrow(a,n) : 0)
788 | #define stbiw__sbgrow(a,n) stbiw__sbgrowf((void **) &(a), (n), sizeof(*(a)))
789 |
790 | #define stbiw__sbpush(a, v) (stbiw__sbmaybegrow(a,1), (a)[stbiw__sbn(a)++] = (v))
791 | #define stbiw__sbcount(a) ((a) ? stbiw__sbn(a) : 0)
792 | #define stbiw__sbfree(a) ((a) ? STBIW_FREE(stbiw__sbraw(a)),0 : 0)
793 |
794 | static void *stbiw__sbgrowf(void **arr, int increment, int itemsize)
795 | {
796 | int m = *arr ? 2*stbiw__sbm(*arr)+increment : increment+1;
797 | void *p = STBIW_REALLOC_SIZED(*arr ? stbiw__sbraw(*arr) : 0, *arr ? (stbiw__sbm(*arr)*itemsize + sizeof(int)*2) : 0, itemsize * m + sizeof(int)*2);
798 | STBIW_ASSERT(p);
799 | if (p) {
800 | if (!*arr) ((int *) p)[1] = 0;
801 | *arr = (void *) ((int *) p + 2);
802 | stbiw__sbm(*arr) = m;
803 | }
804 | return *arr;
805 | }
806 |
807 | static unsigned char *stbiw__zlib_flushf(unsigned char *data, unsigned int *bitbuffer, int *bitcount)
808 | {
809 | while (*bitcount >= 8) {
810 | stbiw__sbpush(data, STBIW_UCHAR(*bitbuffer));
811 | *bitbuffer >>= 8;
812 | *bitcount -= 8;
813 | }
814 | return data;
815 | }
816 |
817 | static int stbiw__zlib_bitrev(int code, int codebits)
818 | {
819 | int res=0;
820 | while (codebits--) {
821 | res = (res << 1) | (code & 1);
822 | code >>= 1;
823 | }
824 | return res;
825 | }
826 |
827 | static unsigned int stbiw__zlib_countm(unsigned char *a, unsigned char *b, int limit)
828 | {
829 | int i;
830 | for (i=0; i < limit && i < 258; ++i)
831 | if (a[i] != b[i]) break;
832 | return i;
833 | }
834 |
835 | static unsigned int stbiw__zhash(unsigned char *data)
836 | {
837 | stbiw_uint32 hash = data[0] + (data[1] << 8) + (data[2] << 16);
838 | hash ^= hash << 3;
839 | hash += hash >> 5;
840 | hash ^= hash << 4;
841 | hash += hash >> 17;
842 | hash ^= hash << 25;
843 | hash += hash >> 6;
844 | return hash;
845 | }
846 |
847 | #define stbiw__zlib_flush() (out = stbiw__zlib_flushf(out, &bitbuf, &bitcount))
848 | #define stbiw__zlib_add(code,codebits) \
849 | (bitbuf |= (code) << bitcount, bitcount += (codebits), stbiw__zlib_flush())
850 | #define stbiw__zlib_huffa(b,c) stbiw__zlib_add(stbiw__zlib_bitrev(b,c),c)
851 | // default huffman tables
852 | #define stbiw__zlib_huff1(n) stbiw__zlib_huffa(0x30 + (n), 8)
853 | #define stbiw__zlib_huff2(n) stbiw__zlib_huffa(0x190 + (n)-144, 9)
854 | #define stbiw__zlib_huff3(n) stbiw__zlib_huffa(0 + (n)-256,7)
855 | #define stbiw__zlib_huff4(n) stbiw__zlib_huffa(0xc0 + (n)-280,8)
856 | #define stbiw__zlib_huff(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : (n) <= 255 ? stbiw__zlib_huff2(n) : (n) <= 279 ? stbiw__zlib_huff3(n) : stbiw__zlib_huff4(n))
857 | #define stbiw__zlib_huffb(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : stbiw__zlib_huff2(n))
858 |
859 | #define stbiw__ZHASH 16384
860 |
861 | #endif // STBIW_ZLIB_COMPRESS
862 |
863 | STBIWDEF unsigned char * stbi_zlib_compress(unsigned char *data, int data_len, int *out_len, int quality)
864 | {
865 | #ifdef STBIW_ZLIB_COMPRESS
866 | // user provided a zlib compress implementation, use that
867 | return STBIW_ZLIB_COMPRESS(data, data_len, out_len, quality);
868 | #else // use builtin
869 | static unsigned short lengthc[] = { 3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227,258, 259 };
870 | static unsigned char lengtheb[]= { 0,0,0,0,0,0,0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0 };
871 | static unsigned short distc[] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577, 32768 };
872 | static unsigned char disteb[] = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13 };
873 | unsigned int bitbuf=0;
874 | int i,j, bitcount=0;
875 | unsigned char *out = NULL;
876 | unsigned char ***hash_table = (unsigned char***) STBIW_MALLOC(stbiw__ZHASH * sizeof(char**));
877 | if (hash_table == NULL)
878 | return NULL;
879 | if (quality < 5) quality = 5;
880 |
881 | stbiw__sbpush(out, 0x78); // DEFLATE 32K window
882 | stbiw__sbpush(out, 0x5e); // FLEVEL = 1
883 | stbiw__zlib_add(1,1); // BFINAL = 1
884 | stbiw__zlib_add(1,2); // BTYPE = 1 -- fixed huffman
885 |
886 | for (i=0; i < stbiw__ZHASH; ++i)
887 | hash_table[i] = NULL;
888 |
889 | i=0;
890 | while (i < data_len-3) {
891 | // hash next 3 bytes of data to be compressed
892 | int h = stbiw__zhash(data+i)&(stbiw__ZHASH-1), best=3;
893 | unsigned char *bestloc = 0;
894 | unsigned char **hlist = hash_table[h];
895 | int n = stbiw__sbcount(hlist);
896 | for (j=0; j < n; ++j) {
897 | if (hlist[j]-data > i-32768) { // if entry lies within window
898 | int d = stbiw__zlib_countm(hlist[j], data+i, data_len-i);
899 | if (d >= best) { best=d; bestloc=hlist[j]; }
900 | }
901 | }
902 | // when hash table entry is too long, delete half the entries
903 | if (hash_table[h] && stbiw__sbn(hash_table[h]) == 2*quality) {
904 | STBIW_MEMMOVE(hash_table[h], hash_table[h]+quality, sizeof(hash_table[h][0])*quality);
905 | stbiw__sbn(hash_table[h]) = quality;
906 | }
907 | stbiw__sbpush(hash_table[h],data+i);
908 |
909 | if (bestloc) {
910 | // "lazy matching" - check match at *next* byte, and if it's better, do cur byte as literal
911 | h = stbiw__zhash(data+i+1)&(stbiw__ZHASH-1);
912 | hlist = hash_table[h];
913 | n = stbiw__sbcount(hlist);
914 | for (j=0; j < n; ++j) {
915 | if (hlist[j]-data > i-32767) {
916 | int e = stbiw__zlib_countm(hlist[j], data+i+1, data_len-i-1);
917 | if (e > best) { // if next match is better, bail on current match
918 | bestloc = NULL;
919 | break;
920 | }
921 | }
922 | }
923 | }
924 |
925 | if (bestloc) {
926 | int d = (int) (data+i - bestloc); // distance back
927 | STBIW_ASSERT(d <= 32767 && best <= 258);
928 | for (j=0; best > lengthc[j+1]-1; ++j);
929 | stbiw__zlib_huff(j+257);
930 | if (lengtheb[j]) stbiw__zlib_add(best - lengthc[j], lengtheb[j]);
931 | for (j=0; d > distc[j+1]-1; ++j);
932 | stbiw__zlib_add(stbiw__zlib_bitrev(j,5),5);
933 | if (disteb[j]) stbiw__zlib_add(d - distc[j], disteb[j]);
934 | i += best;
935 | } else {
936 | stbiw__zlib_huffb(data[i]);
937 | ++i;
938 | }
939 | }
940 | // write out final bytes
941 | for (;i < data_len; ++i)
942 | stbiw__zlib_huffb(data[i]);
943 | stbiw__zlib_huff(256); // end of block
944 | // pad with 0 bits to byte boundary
945 | while (bitcount)
946 | stbiw__zlib_add(0,1);
947 |
948 | for (i=0; i < stbiw__ZHASH; ++i)
949 | (void) stbiw__sbfree(hash_table[i]);
950 | STBIW_FREE(hash_table);
951 |
952 | {
953 | // compute adler32 on input
954 | unsigned int s1=1, s2=0;
955 | int blocklen = (int) (data_len % 5552);
956 | j=0;
957 | while (j < data_len) {
958 | for (i=0; i < blocklen; ++i) { s1 += data[j+i]; s2 += s1; }
959 | s1 %= 65521; s2 %= 65521;
960 | j += blocklen;
961 | blocklen = 5552;
962 | }
963 | stbiw__sbpush(out, STBIW_UCHAR(s2 >> 8));
964 | stbiw__sbpush(out, STBIW_UCHAR(s2));
965 | stbiw__sbpush(out, STBIW_UCHAR(s1 >> 8));
966 | stbiw__sbpush(out, STBIW_UCHAR(s1));
967 | }
968 | *out_len = stbiw__sbn(out);
969 | // make returned pointer freeable
970 | STBIW_MEMMOVE(stbiw__sbraw(out), out, *out_len);
971 | return (unsigned char *) stbiw__sbraw(out);
972 | #endif // STBIW_ZLIB_COMPRESS
973 | }
974 |
975 | static unsigned int stbiw__crc32(unsigned char *buffer, int len)
976 | {
977 | #ifdef STBIW_CRC32
978 | return STBIW_CRC32(buffer, len);
979 | #else
980 | static unsigned int crc_table[256] =
981 | {
982 | 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3,
983 | 0x0eDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91,
984 | 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7,
985 | 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5,
986 | 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B,
987 | 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59,
988 | 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F,
989 | 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D,
990 | 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433,
991 | 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01,
992 | 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457,
993 | 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65,
994 | 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB,
995 | 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9,
996 | 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F,
997 | 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD,
998 | 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683,
999 | 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1,
1000 | 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7,
1001 | 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5,
1002 | 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B,
1003 | 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79,
1004 | 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F,
1005 | 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D,
1006 | 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713,
1007 | 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21,
1008 | 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777,
1009 | 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45,
1010 | 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB,
1011 | 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9,
1012 | 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF,
1013 | 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D
1014 | };
1015 |
1016 | unsigned int crc = ~0u;
1017 | int i;
1018 | for (i=0; i < len; ++i)
1019 | crc = (crc >> 8) ^ crc_table[buffer[i] ^ (crc & 0xff)];
1020 | return ~crc;
1021 | #endif
1022 | }
1023 |
1024 | #define stbiw__wpng4(o,a,b,c,d) ((o)[0]=STBIW_UCHAR(a),(o)[1]=STBIW_UCHAR(b),(o)[2]=STBIW_UCHAR(c),(o)[3]=STBIW_UCHAR(d),(o)+=4)
1025 | #define stbiw__wp32(data,v) stbiw__wpng4(data, (v)>>24,(v)>>16,(v)>>8,(v));
1026 | #define stbiw__wptag(data,s) stbiw__wpng4(data, s[0],s[1],s[2],s[3])
1027 |
1028 | static void stbiw__wpcrc(unsigned char **data, int len)
1029 | {
1030 | unsigned int crc = stbiw__crc32(*data - len - 4, len+4);
1031 | stbiw__wp32(*data, crc);
1032 | }
1033 |
1034 | static unsigned char stbiw__paeth(int a, int b, int c)
1035 | {
1036 | int p = a + b - c, pa = abs(p-a), pb = abs(p-b), pc = abs(p-c);
1037 | if (pa <= pb && pa <= pc) return STBIW_UCHAR(a);
1038 | if (pb <= pc) return STBIW_UCHAR(b);
1039 | return STBIW_UCHAR(c);
1040 | }
1041 |
1042 | // @OPTIMIZE: provide an option that always forces left-predict or paeth predict
1043 | static void stbiw__encode_png_line(unsigned char *pixels, int stride_bytes, int width, int height, int y, int n, int filter_type, signed char *line_buffer)
1044 | {
1045 | static int mapping[] = { 0,1,2,3,4 };
1046 | static int firstmap[] = { 0,1,0,5,6 };
1047 | int *mymap = (y != 0) ? mapping : firstmap;
1048 | int i;
1049 | int type = mymap[filter_type];
1050 | unsigned char *z = pixels + stride_bytes * (stbi__flip_vertically_on_write ? height-1-y : y);
1051 | int signed_stride = stbi__flip_vertically_on_write ? -stride_bytes : stride_bytes;
1052 |
1053 | if (type==0) {
1054 | memcpy(line_buffer, z, width*n);
1055 | return;
1056 | }
1057 |
1058 | // first loop isn't optimized since it's just one pixel
1059 | for (i = 0; i < n; ++i) {
1060 | switch (type) {
1061 | case 1: line_buffer[i] = z[i]; break;
1062 | case 2: line_buffer[i] = z[i] - z[i-signed_stride]; break;
1063 | case 3: line_buffer[i] = z[i] - (z[i-signed_stride]>>1); break;
1064 | case 4: line_buffer[i] = (signed char) (z[i] - stbiw__paeth(0,z[i-signed_stride],0)); break;
1065 | case 5: line_buffer[i] = z[i]; break;
1066 | case 6: line_buffer[i] = z[i]; break;
1067 | }
1068 | }
1069 | switch (type) {
1070 | case 1: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - z[i-n]; break;
1071 | case 2: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - z[i-signed_stride]; break;
1072 | case 3: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - ((z[i-n] + z[i-signed_stride])>>1); break;
1073 | case 4: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - stbiw__paeth(z[i-n], z[i-signed_stride], z[i-signed_stride-n]); break;
1074 | case 5: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - (z[i-n]>>1); break;
1075 | case 6: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - stbiw__paeth(z[i-n], 0,0); break;
1076 | }
1077 | }
1078 |
1079 | STBIWDEF unsigned char *stbi_write_png_to_mem(const unsigned char *pixels, int stride_bytes, int x, int y, int n, int *out_len)
1080 | {
1081 | int force_filter = stbi_write_force_png_filter;
1082 | int ctype[5] = { -1, 0, 4, 2, 6 };
1083 | unsigned char sig[8] = { 137,80,78,71,13,10,26,10 };
1084 | unsigned char *out,*o, *filt, *zlib;
1085 | signed char *line_buffer;
1086 | int j,zlen;
1087 |
1088 | if (stride_bytes == 0)
1089 | stride_bytes = x * n;
1090 |
1091 | if (force_filter >= 5) {
1092 | force_filter = -1;
1093 | }
1094 |
1095 | filt = (unsigned char *) STBIW_MALLOC((x*n+1) * y); if (!filt) return 0;
1096 | line_buffer = (signed char *) STBIW_MALLOC(x * n); if (!line_buffer) { STBIW_FREE(filt); return 0; }
1097 | for (j=0; j < y; ++j) {
1098 | int filter_type;
1099 | if (force_filter > -1) {
1100 | filter_type = force_filter;
1101 | stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, force_filter, line_buffer);
1102 | } else { // Estimate the best filter by running through all of them:
1103 | int best_filter = 0, best_filter_val = 0x7fffffff, est, i;
1104 | for (filter_type = 0; filter_type < 5; filter_type++) {
1105 | stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, filter_type, line_buffer);
1106 |
1107 | // Estimate the entropy of the line using this filter; the less, the better.
1108 | est = 0;
1109 | for (i = 0; i < x*n; ++i) {
1110 | est += abs((signed char) line_buffer[i]);
1111 | }
1112 | if (est < best_filter_val) {
1113 | best_filter_val = est;
1114 | best_filter = filter_type;
1115 | }
1116 | }
1117 | if (filter_type != best_filter) { // If the last iteration already got us the best filter, don't redo it
1118 | stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, best_filter, line_buffer);
1119 | filter_type = best_filter;
1120 | }
1121 | }
1122 | // when we get here, filter_type contains the filter type, and line_buffer contains the data
1123 | filt[j*(x*n+1)] = (unsigned char) filter_type;
1124 | STBIW_MEMMOVE(filt+j*(x*n+1)+1, line_buffer, x*n);
1125 | }
1126 | STBIW_FREE(line_buffer);
1127 | zlib = stbi_zlib_compress(filt, y*( x*n+1), &zlen, stbi_write_png_compression_level);
1128 | STBIW_FREE(filt);
1129 | if (!zlib) return 0;
1130 |
1131 | // each tag requires 12 bytes of overhead
1132 | out = (unsigned char *) STBIW_MALLOC(8 + 12+13 + 12+zlen + 12);
1133 | if (!out) return 0;
1134 | *out_len = 8 + 12+13 + 12+zlen + 12;
1135 |
1136 | o=out;
1137 | STBIW_MEMMOVE(o,sig,8); o+= 8;
1138 | stbiw__wp32(o, 13); // header length
1139 | stbiw__wptag(o, "IHDR");
1140 | stbiw__wp32(o, x);
1141 | stbiw__wp32(o, y);
1142 | *o++ = 8;
1143 | *o++ = STBIW_UCHAR(ctype[n]);
1144 | *o++ = 0;
1145 | *o++ = 0;
1146 | *o++ = 0;
1147 | stbiw__wpcrc(&o,13);
1148 |
1149 | stbiw__wp32(o, zlen);
1150 | stbiw__wptag(o, "IDAT");
1151 | STBIW_MEMMOVE(o, zlib, zlen);
1152 | o += zlen;
1153 | STBIW_FREE(zlib);
1154 | stbiw__wpcrc(&o, zlen);
1155 |
1156 | stbiw__wp32(o,0);
1157 | stbiw__wptag(o, "IEND");
1158 | stbiw__wpcrc(&o,0);
1159 |
1160 | STBIW_ASSERT(o == out + *out_len);
1161 |
1162 | return out;
1163 | }
1164 |
1165 | #ifndef STBI_WRITE_NO_STDIO
1166 | STBIWDEF int stbi_write_png(char const *filename, int x, int y, int comp, const void *data, int stride_bytes)
1167 | {
1168 | FILE *f;
1169 | int len;
1170 | unsigned char *png = stbi_write_png_to_mem((const unsigned char *) data, stride_bytes, x, y, comp, &len);
1171 | if (png == NULL) return 0;
1172 |
1173 | f = stbiw__fopen(filename, "wb");
1174 | if (!f) { STBIW_FREE(png); return 0; }
1175 | fwrite(png, 1, len, f);
1176 | fclose(f);
1177 | STBIW_FREE(png);
1178 | return 1;
1179 | }
1180 | #endif
1181 |
1182 | STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int stride_bytes)
1183 | {
1184 | int len;
1185 | unsigned char *png = stbi_write_png_to_mem((const unsigned char *) data, stride_bytes, x, y, comp, &len);
1186 | if (png == NULL) return 0;
1187 | func(context, png, len);
1188 | STBIW_FREE(png);
1189 | return 1;
1190 | }
1191 |
1192 |
1193 | /* ***************************************************************************
1194 | *
1195 | * JPEG writer
1196 | *
1197 | * This is based on Jon Olick's jo_jpeg.cpp:
1198 | * public domain Simple, Minimalistic JPEG writer - http://www.jonolick.com/code.html
1199 | */
1200 |
1201 | static const unsigned char stbiw__jpg_ZigZag[] = { 0,1,5,6,14,15,27,28,2,4,7,13,16,26,29,42,3,8,12,17,25,30,41,43,9,11,18,
1202 | 24,31,40,44,53,10,19,23,32,39,45,52,54,20,22,33,38,46,51,55,60,21,34,37,47,50,56,59,61,35,36,48,49,57,58,62,63 };
1203 |
1204 | static void stbiw__jpg_writeBits(stbi__write_context *s, int *bitBufP, int *bitCntP, const unsigned short *bs) {
1205 | int bitBuf = *bitBufP, bitCnt = *bitCntP;
1206 | bitCnt += bs[1];
1207 | bitBuf |= bs[0] << (24 - bitCnt);
1208 | while(bitCnt >= 8) {
1209 | unsigned char c = (bitBuf >> 16) & 255;
1210 | stbiw__putc(s, c);
1211 | if(c == 255) {
1212 | stbiw__putc(s, 0);
1213 | }
1214 | bitBuf <<= 8;
1215 | bitCnt -= 8;
1216 | }
1217 | *bitBufP = bitBuf;
1218 | *bitCntP = bitCnt;
1219 | }
1220 |
1221 | static void stbiw__jpg_DCT(float *d0p, float *d1p, float *d2p, float *d3p, float *d4p, float *d5p, float *d6p, float *d7p) {
1222 | float d0 = *d0p, d1 = *d1p, d2 = *d2p, d3 = *d3p, d4 = *d4p, d5 = *d5p, d6 = *d6p, d7 = *d7p;
1223 | float z1, z2, z3, z4, z5, z11, z13;
1224 |
1225 | float tmp0 = d0 + d7;
1226 | float tmp7 = d0 - d7;
1227 | float tmp1 = d1 + d6;
1228 | float tmp6 = d1 - d6;
1229 | float tmp2 = d2 + d5;
1230 | float tmp5 = d2 - d5;
1231 | float tmp3 = d3 + d4;
1232 | float tmp4 = d3 - d4;
1233 |
1234 | // Even part
1235 | float tmp10 = tmp0 + tmp3; // phase 2
1236 | float tmp13 = tmp0 - tmp3;
1237 | float tmp11 = tmp1 + tmp2;
1238 | float tmp12 = tmp1 - tmp2;
1239 |
1240 | d0 = tmp10 + tmp11; // phase 3
1241 | d4 = tmp10 - tmp11;
1242 |
1243 | z1 = (tmp12 + tmp13) * 0.707106781f; // c4
1244 | d2 = tmp13 + z1; // phase 5
1245 | d6 = tmp13 - z1;
1246 |
1247 | // Odd part
1248 | tmp10 = tmp4 + tmp5; // phase 2
1249 | tmp11 = tmp5 + tmp6;
1250 | tmp12 = tmp6 + tmp7;
1251 |
1252 | // The rotator is modified from fig 4-8 to avoid extra negations.
1253 | z5 = (tmp10 - tmp12) * 0.382683433f; // c6
1254 | z2 = tmp10 * 0.541196100f + z5; // c2-c6
1255 | z4 = tmp12 * 1.306562965f + z5; // c2+c6
1256 | z3 = tmp11 * 0.707106781f; // c4
1257 |
1258 | z11 = tmp7 + z3; // phase 5
1259 | z13 = tmp7 - z3;
1260 |
1261 | *d5p = z13 + z2; // phase 6
1262 | *d3p = z13 - z2;
1263 | *d1p = z11 + z4;
1264 | *d7p = z11 - z4;
1265 |
1266 | *d0p = d0; *d2p = d2; *d4p = d4; *d6p = d6;
1267 | }
1268 |
1269 | static void stbiw__jpg_calcBits(int val, unsigned short bits[2]) {
1270 | int tmp1 = val < 0 ? -val : val;
1271 | val = val < 0 ? val-1 : val;
1272 | bits[1] = 1;
1273 | while(tmp1 >>= 1) {
1274 | ++bits[1];
1275 | }
1276 | bits[0] = val & ((1<0)&&(DU[end0pos]==0); --end0pos) {
1314 | }
1315 | // end0pos = first element in reverse order !=0
1316 | if(end0pos == 0) {
1317 | stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB);
1318 | return DU[0];
1319 | }
1320 | for(i = 1; i <= end0pos; ++i) {
1321 | int startpos = i;
1322 | int nrzeroes;
1323 | unsigned short bits[2];
1324 | for (; DU[i]==0 && i<=end0pos; ++i) {
1325 | }
1326 | nrzeroes = i-startpos;
1327 | if ( nrzeroes >= 16 ) {
1328 | int lng = nrzeroes>>4;
1329 | int nrmarker;
1330 | for (nrmarker=1; nrmarker <= lng; ++nrmarker)
1331 | stbiw__jpg_writeBits(s, bitBuf, bitCnt, M16zeroes);
1332 | nrzeroes &= 15;
1333 | }
1334 | stbiw__jpg_calcBits(DU[i], bits);
1335 | stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTAC[(nrzeroes<<4)+bits[1]]);
1336 | stbiw__jpg_writeBits(s, bitBuf, bitCnt, bits);
1337 | }
1338 | if(end0pos != 63) {
1339 | stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB);
1340 | }
1341 | return DU[0];
1342 | }
1343 |
1344 | static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, int comp, const void* data, int quality) {
1345 | // Constants that don't pollute global namespace
1346 | static const unsigned char std_dc_luminance_nrcodes[] = {0,0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0};
1347 | static const unsigned char std_dc_luminance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11};
1348 | static const unsigned char std_ac_luminance_nrcodes[] = {0,0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d};
1349 | static const unsigned char std_ac_luminance_values[] = {
1350 | 0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xa1,0x08,
1351 | 0x23,0x42,0xb1,0xc1,0x15,0x52,0xd1,0xf0,0x24,0x33,0x62,0x72,0x82,0x09,0x0a,0x16,0x17,0x18,0x19,0x1a,0x25,0x26,0x27,0x28,
1352 | 0x29,0x2a,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59,
1353 | 0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x83,0x84,0x85,0x86,0x87,0x88,0x89,
1354 | 0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6,
1355 | 0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe1,0xe2,
1356 | 0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa
1357 | };
1358 | static const unsigned char std_dc_chrominance_nrcodes[] = {0,0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0};
1359 | static const unsigned char std_dc_chrominance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11};
1360 | static const unsigned char std_ac_chrominance_nrcodes[] = {0,0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,0x77};
1361 | static const unsigned char std_ac_chrominance_values[] = {
1362 | 0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91,
1363 | 0xa1,0xb1,0xc1,0x09,0x23,0x33,0x52,0xf0,0x15,0x62,0x72,0xd1,0x0a,0x16,0x24,0x34,0xe1,0x25,0xf1,0x17,0x18,0x19,0x1a,0x26,
1364 | 0x27,0x28,0x29,0x2a,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,
1365 | 0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x82,0x83,0x84,0x85,0x86,0x87,
1366 | 0x88,0x89,0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,
1367 | 0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,
1368 | 0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa
1369 | };
1370 | // Huffman tables
1371 | static const unsigned short YDC_HT[256][2] = { {0,2},{2,3},{3,3},{4,3},{5,3},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9}};
1372 | static const unsigned short UVDC_HT[256][2] = { {0,2},{1,2},{2,2},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9},{1022,10},{2046,11}};
1373 | static const unsigned short YAC_HT[256][2] = {
1374 | {10,4},{0,2},{1,2},{4,3},{11,4},{26,5},{120,7},{248,8},{1014,10},{65410,16},{65411,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1375 | {12,4},{27,5},{121,7},{502,9},{2038,11},{65412,16},{65413,16},{65414,16},{65415,16},{65416,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1376 | {28,5},{249,8},{1015,10},{4084,12},{65417,16},{65418,16},{65419,16},{65420,16},{65421,16},{65422,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1377 | {58,6},{503,9},{4085,12},{65423,16},{65424,16},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1378 | {59,6},{1016,10},{65430,16},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1379 | {122,7},{2039,11},{65438,16},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1380 | {123,7},{4086,12},{65446,16},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1381 | {250,8},{4087,12},{65454,16},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1382 | {504,9},{32704,15},{65462,16},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1383 | {505,9},{65470,16},{65471,16},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1384 | {506,9},{65479,16},{65480,16},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1385 | {1017,10},{65488,16},{65489,16},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1386 | {1018,10},{65497,16},{65498,16},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1387 | {2040,11},{65506,16},{65507,16},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1388 | {65515,16},{65516,16},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{0,0},{0,0},{0,0},{0,0},{0,0},
1389 | {2041,11},{65525,16},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0}
1390 | };
1391 | static const unsigned short UVAC_HT[256][2] = {
1392 | {0,2},{1,2},{4,3},{10,4},{24,5},{25,5},{56,6},{120,7},{500,9},{1014,10},{4084,12},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1393 | {11,4},{57,6},{246,8},{501,9},{2038,11},{4085,12},{65416,16},{65417,16},{65418,16},{65419,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1394 | {26,5},{247,8},{1015,10},{4086,12},{32706,15},{65420,16},{65421,16},{65422,16},{65423,16},{65424,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1395 | {27,5},{248,8},{1016,10},{4087,12},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{65430,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1396 | {58,6},{502,9},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{65438,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1397 | {59,6},{1017,10},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{65446,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1398 | {121,7},{2039,11},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{65454,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1399 | {122,7},{2040,11},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{65462,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1400 | {249,8},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{65470,16},{65471,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1401 | {503,9},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{65479,16},{65480,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1402 | {504,9},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{65488,16},{65489,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1403 | {505,9},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{65497,16},{65498,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1404 | {506,9},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{65506,16},{65507,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1405 | {2041,11},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{65515,16},{65516,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},
1406 | {16352,14},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{65525,16},{0,0},{0,0},{0,0},{0,0},{0,0},
1407 | {1018,10},{32707,15},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0}
1408 | };
1409 | static const int YQT[] = {16,11,10,16,24,40,51,61,12,12,14,19,26,58,60,55,14,13,16,24,40,57,69,56,14,17,22,29,51,87,80,62,18,22,
1410 | 37,56,68,109,103,77,24,35,55,64,81,104,113,92,49,64,78,87,103,121,120,101,72,92,95,98,112,100,103,99};
1411 | static const int UVQT[] = {17,18,24,47,99,99,99,99,18,21,26,66,99,99,99,99,24,26,56,99,99,99,99,99,47,66,99,99,99,99,99,99,
1412 | 99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99};
1413 | static const float aasf[] = { 1.0f * 2.828427125f, 1.387039845f * 2.828427125f, 1.306562965f * 2.828427125f, 1.175875602f * 2.828427125f,
1414 | 1.0f * 2.828427125f, 0.785694958f * 2.828427125f, 0.541196100f * 2.828427125f, 0.275899379f * 2.828427125f };
1415 |
1416 | int row, col, i, k;
1417 | float fdtbl_Y[64], fdtbl_UV[64];
1418 | unsigned char YTable[64], UVTable[64];
1419 |
1420 | if(!data || !width || !height || comp > 4 || comp < 1) {
1421 | return 0;
1422 | }
1423 |
1424 | quality = quality ? quality : 90;
1425 | quality = quality < 1 ? 1 : quality > 100 ? 100 : quality;
1426 | quality = quality < 50 ? 5000 / quality : 200 - quality * 2;
1427 |
1428 | for(i = 0; i < 64; ++i) {
1429 | int uvti, yti = (YQT[i]*quality+50)/100;
1430 | YTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (yti < 1 ? 1 : yti > 255 ? 255 : yti);
1431 | uvti = (UVQT[i]*quality+50)/100;
1432 | UVTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (uvti < 1 ? 1 : uvti > 255 ? 255 : uvti);
1433 | }
1434 |
1435 | for(row = 0, k = 0; row < 8; ++row) {
1436 | for(col = 0; col < 8; ++col, ++k) {
1437 | fdtbl_Y[k] = 1 / (YTable [stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]);
1438 | fdtbl_UV[k] = 1 / (UVTable[stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]);
1439 | }
1440 | }
1441 |
1442 | // Write Headers
1443 | {
1444 | static const unsigned char head0[] = { 0xFF,0xD8,0xFF,0xE0,0,0x10,'J','F','I','F',0,1,1,0,0,1,0,1,0,0,0xFF,0xDB,0,0x84,0 };
1445 | static const unsigned char head2[] = { 0xFF,0xDA,0,0xC,3,1,0,2,0x11,3,0x11,0,0x3F,0 };
1446 | const unsigned char head1[] = { 0xFF,0xC0,0,0x11,8,(unsigned char)(height>>8),STBIW_UCHAR(height),(unsigned char)(width>>8),STBIW_UCHAR(width),
1447 | 3,1,0x11,0,2,0x11,1,3,0x11,1,0xFF,0xC4,0x01,0xA2,0 };
1448 | s->func(s->context, (void*)head0, sizeof(head0));
1449 | s->func(s->context, (void*)YTable, sizeof(YTable));
1450 | stbiw__putc(s, 1);
1451 | s->func(s->context, UVTable, sizeof(UVTable));
1452 | s->func(s->context, (void*)head1, sizeof(head1));
1453 | s->func(s->context, (void*)(std_dc_luminance_nrcodes+1), sizeof(std_dc_luminance_nrcodes)-1);
1454 | s->func(s->context, (void*)std_dc_luminance_values, sizeof(std_dc_luminance_values));
1455 | stbiw__putc(s, 0x10); // HTYACinfo
1456 | s->func(s->context, (void*)(std_ac_luminance_nrcodes+1), sizeof(std_ac_luminance_nrcodes)-1);
1457 | s->func(s->context, (void*)std_ac_luminance_values, sizeof(std_ac_luminance_values));
1458 | stbiw__putc(s, 1); // HTUDCinfo
1459 | s->func(s->context, (void*)(std_dc_chrominance_nrcodes+1), sizeof(std_dc_chrominance_nrcodes)-1);
1460 | s->func(s->context, (void*)std_dc_chrominance_values, sizeof(std_dc_chrominance_values));
1461 | stbiw__putc(s, 0x11); // HTUACinfo
1462 | s->func(s->context, (void*)(std_ac_chrominance_nrcodes+1), sizeof(std_ac_chrominance_nrcodes)-1);
1463 | s->func(s->context, (void*)std_ac_chrominance_values, sizeof(std_ac_chrominance_values));
1464 | s->func(s->context, (void*)head2, sizeof(head2));
1465 | }
1466 |
1467 | // Encode 8x8 macroblocks
1468 | {
1469 | static const unsigned short fillBits[] = {0x7F, 7};
1470 | const unsigned char *imageData = (const unsigned char *)data;
1471 | int DCY=0, DCU=0, DCV=0;
1472 | int bitBuf=0, bitCnt=0;
1473 | // comp == 2 is grey+alpha (alpha is ignored)
1474 | int ofsG = comp > 2 ? 1 : 0, ofsB = comp > 2 ? 2 : 0;
1475 | int x, y, pos;
1476 | for(y = 0; y < height; y += 8) {
1477 | for(x = 0; x < width; x += 8) {
1478 | float YDU[64], UDU[64], VDU[64];
1479 | for(row = y, pos = 0; row < y+8; ++row) {
1480 | // row >= height => use last input row
1481 | int clamped_row = (row < height) ? row : height - 1;
1482 | int base_p = (stbi__flip_vertically_on_write ? (height-1-clamped_row) : clamped_row)*width*comp;
1483 | for(col = x; col < x+8; ++col, ++pos) {
1484 | float r, g, b;
1485 | // if col >= width => use pixel from last input column
1486 | int p = base_p + ((col < width) ? col : (width-1))*comp;
1487 |
1488 | r = imageData[p+0];
1489 | g = imageData[p+ofsG];
1490 | b = imageData[p+ofsB];
1491 | YDU[pos]=+0.29900f*r+0.58700f*g+0.11400f*b-128;
1492 | UDU[pos]=-0.16874f*r-0.33126f*g+0.50000f*b;
1493 | VDU[pos]=+0.50000f*r-0.41869f*g-0.08131f*b;
1494 | }
1495 | }
1496 |
1497 | DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, YDU, fdtbl_Y, DCY, YDC_HT, YAC_HT);
1498 | DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, UDU, fdtbl_UV, DCU, UVDC_HT, UVAC_HT);
1499 | DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, VDU, fdtbl_UV, DCV, UVDC_HT, UVAC_HT);
1500 | }
1501 | }
1502 |
1503 | // Do the bit alignment of the EOI marker
1504 | stbiw__jpg_writeBits(s, &bitBuf, &bitCnt, fillBits);
1505 | }
1506 |
1507 | // EOI
1508 | stbiw__putc(s, 0xFF);
1509 | stbiw__putc(s, 0xD9);
1510 |
1511 | return 1;
1512 | }
1513 |
1514 | STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality)
1515 | {
1516 | stbi__write_context s;
1517 | stbi__start_write_callbacks(&s, func, context);
1518 | return stbi_write_jpg_core(&s, x, y, comp, (void *) data, quality);
1519 | }
1520 |
1521 |
1522 | #ifndef STBI_WRITE_NO_STDIO
1523 | STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality)
1524 | {
1525 | stbi__write_context s;
1526 | if (stbi__start_write_file(&s,filename)) {
1527 | int r = stbi_write_jpg_core(&s, x, y, comp, data, quality);
1528 | stbi__end_write_file(&s);
1529 | return r;
1530 | } else
1531 | return 0;
1532 | }
1533 | #endif
1534 |
1535 | #endif // STB_IMAGE_WRITE_IMPLEMENTATION
1536 |
1537 | /* Revision history
1538 | 1.10 (2019-02-07)
1539 | support utf8 filenames in Windows; fix warnings and platform ifdefs
1540 | 1.09 (2018-02-11)
1541 | fix typo in zlib quality API, improve STB_I_W_STATIC in C++
1542 | 1.08 (2018-01-29)
1543 | add stbi__flip_vertically_on_write, external zlib, zlib quality, choose PNG filter
1544 | 1.07 (2017-07-24)
1545 | doc fix
1546 | 1.06 (2017-07-23)
1547 | writing JPEG (using Jon Olick's code)
1548 | 1.05 ???
1549 | 1.04 (2017-03-03)
1550 | monochrome BMP expansion
1551 | 1.03 ???
1552 | 1.02 (2016-04-02)
1553 | avoid allocating large structures on the stack
1554 | 1.01 (2016-01-16)
1555 | STBIW_REALLOC_SIZED: support allocators with no realloc support
1556 | avoid race-condition in crc initialization
1557 | minor compile issues
1558 | 1.00 (2015-09-14)
1559 | installable file IO function
1560 | 0.99 (2015-09-13)
1561 | warning fixes; TGA rle support
1562 | 0.98 (2015-04-08)
1563 | added STBIW_MALLOC, STBIW_ASSERT etc
1564 | 0.97 (2015-01-18)
1565 | fixed HDR asserts, rewrote HDR rle logic
1566 | 0.96 (2015-01-17)
1567 | add HDR output
1568 | fix monochrome BMP
1569 | 0.95 (2014-08-17)
1570 | add monochrome TGA output
1571 | 0.94 (2014-05-31)
1572 | rename private functions to avoid conflicts with stb_image.h
1573 | 0.93 (2014-05-27)
1574 | warning fixes
1575 | 0.92 (2010-08-01)
1576 | casts to unsigned char to fix warnings
1577 | 0.91 (2010-07-17)
1578 | first public release
1579 | 0.90 first internal release
1580 | */
1581 |
1582 | /*
1583 | ------------------------------------------------------------------------------
1584 | This software is available under 2 licenses -- choose whichever you prefer.
1585 | ------------------------------------------------------------------------------
1586 | ALTERNATIVE A - MIT License
1587 | Copyright (c) 2017 Sean Barrett
1588 | Permission is hereby granted, free of charge, to any person obtaining a copy of
1589 | this software and associated documentation files (the "Software"), to deal in
1590 | the Software without restriction, including without limitation the rights to
1591 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
1592 | of the Software, and to permit persons to whom the Software is furnished to do
1593 | so, subject to the following conditions:
1594 | The above copyright notice and this permission notice shall be included in all
1595 | copies or substantial portions of the Software.
1596 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1597 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1598 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1599 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1600 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
1601 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
1602 | SOFTWARE.
1603 | ------------------------------------------------------------------------------
1604 | ALTERNATIVE B - Public Domain (www.unlicense.org)
1605 | This is free and unencumbered software released into the public domain.
1606 | Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
1607 | software, either in source code form or as a compiled binary, for any purpose,
1608 | commercial or non-commercial, and by any means.
1609 | In jurisdictions that recognize copyright laws, the author or authors of this
1610 | software dedicate any and all copyright interest in the software to the public
1611 | domain. We make this dedication for the benefit of the public at large and to
1612 | the detriment of our heirs and successors. We intend this dedication to be an
1613 | overt act of relinquishment in perpetuity of all present and future rights to
1614 | this software under copyright law.
1615 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1616 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1617 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1618 | AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
1619 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
1620 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
1621 | ------------------------------------------------------------------------------
1622 | */
1623 |
--------------------------------------------------------------------------------
/source/main.cpp:
--------------------------------------------------------------------------------
1 | // main program entry point and the actual raytracing bits
2 |
3 | #include "maths.h"
4 | #include "scene.h"
5 | #include
6 |
7 | // Include external libraries:
8 | // - PNG writing
9 | #define STBI_MSC_SECURE_CRT
10 | #define STB_IMAGE_WRITE_IMPLEMENTATION
11 | #include "external/stb_image_write.h"
12 | // - time measurement
13 | #define SOKOL_IMPL
14 | #include "external/sokol_time.h"
15 | // - OBJ file loading
16 | #include "external/objparser.h"
17 | // - Multi-threaded job system
18 | #include "external/enkits/TaskScheduler_c.h"
19 |
20 |
21 | // --------------------------------------------------------------------------
22 | // "ray/path tracing" bits
23 |
24 | // general minimum/maximum distances for rays (from "very close to surface but not exacttly on it"
25 | // to "ten million units")
26 | const float kMinT = 0.001f;
27 | const float kMaxT = 1.0e7f;
28 | // maximum raytracing recursion depth, i.e. number of light bounces
29 | const int kMaxDepth = 10;
30 |
31 | // we have one hardcoded directional light, with this direction and color
32 | static const float3 kLightDir = normalize(float3(-0.7f,1.0f,0.5f));
33 | static const float3 kLightColor = float3(0.7f,0.6f,0.5f);
34 |
35 |
36 | // when a ray "r" has just hit a surface at point "hit", decide what to do about it:
37 | // in our very simple case, we assume the surface is perfectly diffuse, so we'll return:
38 | // - surface albedo ("color") in "attenuation"
39 | // - new random ray for the next light bounce in "scattered"
40 | // - illumination from the directional light in "outLightE"
41 | static bool Scatter(const Ray& r, const Hit& hit, float3& attenuation, Ray& scattered, float3& outLightE, uint32_t& rngState, int& inoutRayCount)
42 | {
43 | outLightE = float3(0,0,0);
44 |
45 | // model a perfectly diffuse material:
46 |
47 | // random point on unit sphere that is tangent to the hit point
48 | float3 target = hit.pos + hit.normal + RandomUnitVector(rngState);
49 | scattered = Ray(hit.pos, normalize(target - hit.pos));
50 |
51 | // make color slightly based on surface normals
52 | float3 albedo = hit.normal * 0.0f + float3(0.7f,0.7f,0.7f);
53 | attenuation = albedo;
54 |
55 | // explicit directional light by shooting a shadow ray
56 | ++inoutRayCount;
57 | if (!HitSceneShadow(Ray(hit.pos, kLightDir), kMinT, kMaxT))
58 | {
59 | // ray towards the light did not hit anything in the scene, so
60 | // that means we are not in shadow: compute illumination from it
61 | float3 rdir = r.dir;
62 | AssertUnit(rdir);
63 | float3 nl = dot(hit.normal, rdir) < 0 ? hit.normal : -hit.normal;
64 | outLightE += albedo * kLightColor * (fmax(0.0f, dot(kLightDir, nl)));
65 | }
66 |
67 | return true;
68 | }
69 |
70 |
71 | // trace a ray into the scene, and return the final color for it
72 | static float3 Trace(const Ray& r, int depth, uint32_t& rngState, int& inoutRayCount)
73 | {
74 | ++inoutRayCount;
75 | Hit hit;
76 | int id = HitScene(r, kMinT, kMaxT, hit);
77 | if (id != -1)
78 | {
79 | // ray hits something in the scene
80 | Ray scattered;
81 | float3 attenuation;
82 | float3 lightE;
83 | if (depth < kMaxDepth && Scatter(r, hit, attenuation, scattered, lightE, rngState, inoutRayCount))
84 | {
85 | // we got a new ray bounced from the surface; recursively trace it
86 | return lightE + attenuation * Trace(scattered, depth+1, rngState, inoutRayCount);
87 | }
88 | else
89 | {
90 | // reached recursion limit, or surface fully absorbed the ray: return black
91 | return float3(0,0,0);
92 | }
93 | }
94 | else
95 | {
96 | // ray does not hit anything: return illumination from the sky (just a simple gradient really)
97 | float3 unitDir = r.dir;
98 | float t = 0.5f*(unitDir.getY() + 1.0f);
99 | return ((1.0f - t)*float3(1.0f, 1.0f, 1.0f) + t * float3(0.5f, 0.7f, 1.0f)) * 0.5f;
100 | }
101 | }
102 |
103 |
104 | // load scene from an .OBJ file
105 | static bool LoadScene(const char* dataFile, float3& outBoundsMin, float3& outBoundsMax)
106 | {
107 | ObjFile objFile;
108 | if (!objParseFile(objFile, dataFile))
109 | {
110 | printf("ERROR: failed to load .obj file\n");
111 | return false;
112 | }
113 | outBoundsMin = float3(+1.0e6f, +1.0e6f, +1.0e6f);
114 | outBoundsMax = float3(-1.0e6f, -1.0e6f, -1.0e6f);
115 |
116 | int objTriCount = int(objFile.f_size / 9);
117 | Triangle* tris = new Triangle[objTriCount + 2]; // will add two triangles for the "floor"
118 | for (int i = 0; i < objTriCount; ++i)
119 | {
120 | int idx0 = objFile.f[i * 9 + 0] * 3;
121 | int idx1 = objFile.f[i * 9 + 3] * 3;
122 | int idx2 = objFile.f[i * 9 + 6] * 3;
123 | float3 v0 = float3(objFile.v[idx0 + 0], objFile.v[idx0 + 1], objFile.v[idx0 + 2]);
124 | float3 v1 = float3(objFile.v[idx1 + 0], objFile.v[idx1 + 1], objFile.v[idx1 + 2]);
125 | float3 v2 = float3(objFile.v[idx2 + 0], objFile.v[idx2 + 1], objFile.v[idx2 + 2]);
126 | tris[i].v0 = v0;
127 | tris[i].v1 = v1;
128 | tris[i].v2 = v2;
129 | outBoundsMin = min(outBoundsMin, v0); outBoundsMax = max(outBoundsMax, v0);
130 | outBoundsMin = min(outBoundsMin, v1); outBoundsMax = max(outBoundsMax, v1);
131 | outBoundsMin = min(outBoundsMin, v2); outBoundsMax = max(outBoundsMax, v2);
132 | }
133 |
134 | // add two triangles that are right "under the scene" and covering larger area than the scene
135 | // itself, to serve as a "floor"
136 | float3 size = outBoundsMax - outBoundsMin;
137 | float3 extra = size * 0.7f;
138 | tris[objTriCount+0].v0 = float3(outBoundsMin.getX()-extra.getX(), outBoundsMin.getY(), outBoundsMin.getZ()-extra.getZ());
139 | tris[objTriCount+0].v1 = float3(outBoundsMin.getX()-extra.getX(), outBoundsMin.getY(), outBoundsMax.getZ()+extra.getZ());
140 | tris[objTriCount+0].v2 = float3(outBoundsMax.getX()+extra.getX(), outBoundsMin.getY(), outBoundsMin.getZ()-extra.getZ());
141 | tris[objTriCount+1].v0 = float3(outBoundsMin.getX()-extra.getX(), outBoundsMin.getY(), outBoundsMax.getZ()+extra.getZ());
142 | tris[objTriCount+1].v1 = float3(outBoundsMax.getX()+extra.getX(), outBoundsMin.getY(), outBoundsMax.getZ()+extra.getZ());
143 | tris[objTriCount+1].v2 = float3(outBoundsMax.getX()+extra.getX(), outBoundsMin.getY(), outBoundsMin.getZ()-extra.getZ());
144 |
145 | uint64_t t0 = stm_now();
146 | InitializeScene(objTriCount + 2, tris);
147 | printf("Initialized scene '%s' (%i tris) in %.3fs\n", dataFile, objTriCount+2, stm_sec(stm_since(t0)));
148 |
149 | delete[] tris;
150 | return true;
151 | }
152 |
153 | struct TraceData
154 | {
155 | int screenWidth, screenHeight, samplesPerPixel;
156 | uint8_t* image;
157 | const Camera* camera;
158 | std::atomic rayCount;
159 | };
160 |
161 | static void TraceImageJob(uint32_t start, uint32_t end, uint32_t threadnum, void* data_)
162 | {
163 | (void)threadnum;
164 | TraceData& data = *(TraceData*)data_;
165 | uint8_t* image = data.image + start * data.screenWidth * 4;
166 |
167 | float invWidth = 1.0f / data.screenWidth;
168 | float invHeight = 1.0f / data.screenHeight;
169 |
170 | int rayCount = 0;
171 | // go over the image: each pixel row
172 | for (uint32_t y = start; y < end; ++y)
173 | {
174 | // go over the image: each pixel in the row
175 | uint32_t rngState = y * 9781 + 1;
176 | for (int x = 0; x < data.screenWidth; ++x)
177 | {
178 | float3 col(0, 0, 0);
179 | // we'll trace N slightly jittered rays for each pixel, to get anti-aliasing, loop over them here
180 | for (int s = 0; s < data.samplesPerPixel; s++)
181 | {
182 | // get a ray from camera, and trace it
183 | float u = float(x + RandomFloat01(rngState)) * invWidth;
184 | float v = float(y + RandomFloat01(rngState)) * invHeight;
185 | Ray r = data.camera->GetRay(u, v, rngState);
186 | col += Trace(r, 0, rngState, rayCount);
187 | }
188 | col *= 1.0f / float(data.samplesPerPixel);
189 |
190 | // simplistic "gamma correction" by just taking a square root of the final color
191 | col.setX(sqrtf(col.getX()));
192 | col.setY(sqrtf(col.getY()));
193 | col.setZ(sqrtf(col.getZ()));
194 |
195 | // our image is bytes in 0-255 range, turn our floats into them here and write into the image
196 | image[0] = uint8_t(saturate(col.getX()) * 255.0f);
197 | image[1] = uint8_t(saturate(col.getY()) * 255.0f);
198 | image[2] = uint8_t(saturate(col.getZ()) * 255.0f);
199 | image[3] = 255;
200 | image += 4;
201 | }
202 | }
203 | data.rayCount += rayCount;
204 | }
205 |
206 |
207 | int main(int argc, const char** argv)
208 | {
209 | // initialize timer
210 | stm_setup();
211 |
212 | // parse screen size command line arguments
213 | int screenWidth, screenHeight, samplesPerPixel;
214 | if (argc < 5)
215 | {
216 | printf("Usage: TrimeshTracer.exe [width] [height] [samplesPerPixel] [objFile]\n");
217 | return 1;
218 | }
219 | screenWidth = atoi(argv[1]);
220 | if (screenWidth < 1 || screenWidth > 10000)
221 | {
222 | printf("ERROR: invalid width argument '%s'\n", argv[1]);
223 | return 1;
224 | }
225 | screenHeight = atoi(argv[2]);
226 | if (screenHeight < 1 || screenHeight > 10000)
227 | {
228 | printf("ERROR: invalid height argument '%s'\n", argv[2]);
229 | return 1;
230 | }
231 | samplesPerPixel = atoi(argv[3]);
232 | if (samplesPerPixel < 1 || samplesPerPixel > 1024)
233 | {
234 | printf("ERROR: invalid samplesPerPixel argument '%s'\n", argv[3]);
235 | return 1;
236 | }
237 |
238 | // load model file and initialize the scene
239 | float3 sceneMin, sceneMax;
240 | if (!LoadScene(argv[4], sceneMin, sceneMax))
241 | return 1;
242 |
243 | // place a camera: put it a bit outside scene bounds, looking at the center of it
244 | float3 sceneSize = sceneMax - sceneMin;
245 | float3 sceneCenter = (sceneMin + sceneMax) * 0.5f;
246 | float3 lookfrom = sceneCenter + sceneSize * float3(0.3f,0.6f,1.2f);
247 | if (strstr(argv[4], "sponza.obj") != nullptr) // sponza looks bad when viewed from outside; hardcode camera position
248 | lookfrom = float3(-5.96f, 4.08f, -1.22f);
249 | float3 lookat = sceneCenter + sceneSize * float3(0,-0.1f,0);
250 | float distToFocus = length(lookfrom - lookat);
251 | float aperture = 0.03f;
252 | auto camera = Camera(lookfrom, lookat, float3(0, 1, 0), 60, float(screenWidth) / float(screenHeight), aperture, distToFocus);
253 |
254 | // create RGBA image for the result
255 | uint8_t* image = new uint8_t[screenWidth * screenHeight * 4];
256 |
257 | // initialize job system for threading
258 | enkiTaskScheduler* jobSystem = enkiNewTaskScheduler();
259 | enkiInitTaskScheduler(jobSystem);
260 | enkiTaskSet* job = enkiCreateTaskSet(jobSystem, TraceImageJob);
261 |
262 | // generate the image - spawn TraceImageJob jobs to cover the whole image, and wait for all of them to complete
263 | uint64_t t0 = stm_now();
264 |
265 | TraceData data;
266 | data.screenWidth = screenWidth;
267 | data.screenHeight = screenHeight;
268 | data.samplesPerPixel = samplesPerPixel;
269 | data.image = image;
270 | data.camera = &camera;
271 | data.rayCount = 0;
272 | // for debugging: set to false to effectively turn off threading (will create one job invocation
273 | // only, that covers the whole screen)
274 | bool threaded = true;
275 | enkiAddTaskSetToPipeMinRange(jobSystem, job, &data, screenHeight, threaded ? 1 : screenHeight);
276 | enkiWaitForTaskSet(jobSystem, job);
277 |
278 | double dt = stm_sec(stm_since(t0));
279 | printf("Rendered scene at %ix%i,%ispp in %.3f s\n", screenWidth, screenHeight, samplesPerPixel, dt);
280 | printf("- %.1f K Rays, %.1f K Rays/s\n", data.rayCount/1000.0, data.rayCount/1000.0/dt);
281 |
282 | // write resulting image as PNG
283 | stbi_flip_vertically_on_write(1);
284 | stbi_write_png("output.png", screenWidth, screenHeight, 4, image, screenWidth*4);
285 |
286 | // cleanup and exit
287 | delete[] image;
288 | enkiDeleteTaskSet(job);
289 | enkiDeleteTaskScheduler(jobSystem);
290 | CleanupScene();
291 | return 0;
292 | }
293 |
--------------------------------------------------------------------------------
/source/maths.cpp:
--------------------------------------------------------------------------------
1 | #include "maths.h"
2 | #include
3 | #include
4 |
5 | static uint32_t XorShift32(uint32_t& state)
6 | {
7 | uint32_t x = state;
8 | x ^= x << 13;
9 | x ^= x >> 17;
10 | x ^= x << 15;
11 | state = x;
12 | return x;
13 | }
14 |
15 | float RandomFloat01(uint32_t& state)
16 | {
17 | return (XorShift32(state) & 0xFFFFFF) / 16777216.0f;
18 | }
19 |
20 | float3 RandomInUnitDisk(uint32_t& state)
21 | {
22 | float3 p;
23 | do
24 | {
25 | p = 2.0 * float3(RandomFloat01(state),RandomFloat01(state),0) - float3(1,1,0);
26 | } while (dot(p,p) >= 1.0);
27 | return p;
28 | }
29 |
30 | float3 RandomUnitVector(uint32_t& state)
31 | {
32 | float z = RandomFloat01(state) * 2.0f - 1.0f;
33 | float a = RandomFloat01(state) * 2.0f * kPI;
34 | float r = sqrtf(1.0f - z * z);
35 | float x = r * cosf(a);
36 | float y = r * sinf(a);
37 | return float3(x, y, z);
38 | }
39 |
--------------------------------------------------------------------------------
/source/maths.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | // --------------------------------------------------------------------------
4 | // various math utilities
5 |
6 | #define NOMINMAX
7 | #include
8 | #include
9 | #include
10 |
11 | #define DO_FLOAT3_WITH_SIMD 1
12 |
13 | #define kPI 3.1415926f
14 |
15 | // --------------------------------------------------------------------------
16 | // simple 3D vector with x,y,z components - both SIMD (SSE) and simple scalar C paths
17 |
18 | #if DO_FLOAT3_WITH_SIMD
19 |
20 |
21 | // ---- SSE implementation, largely based on http://www.codersnotes.com/notes/maths-lib-2016/
22 |
23 | #include
24 | #include
25 | #include
26 |
27 | // SHUFFLE3(v, 0,1,2) leaves the vector unchanged (v.xyz).
28 | // SHUFFLE3(v, 0,0,0) splats the X (v.xxx).
29 | #define SHUFFLE3(V, X,Y,Z) float3(_mm_shuffle_ps((V).m, (V).m, _MM_SHUFFLE(Z,Z,Y,X)))
30 |
31 | struct float3
32 | {
33 | inline float3() {}
34 | inline explicit float3(const float *p) { m = _mm_set_ps(p[2], p[2], p[1], p[0]); }
35 | inline explicit float3(float x, float y, float z) { m = _mm_set_ps(z, z, y, x); }
36 | inline explicit float3(float v) { m = _mm_set1_ps(v); }
37 | inline explicit float3(__m128 v) { m = v; }
38 |
39 | inline float getX() const { return _mm_cvtss_f32(m); }
40 | inline float getY() const { return _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(1, 1, 1, 1))); }
41 | inline float getZ() const { return _mm_cvtss_f32(_mm_shuffle_ps(m, m, _MM_SHUFFLE(2, 2, 2, 2))); }
42 |
43 | inline float3 yzx() const { return SHUFFLE3(*this, 1, 2, 0); }
44 | inline float3 zxy() const { return SHUFFLE3(*this, 2, 0, 1); }
45 |
46 | inline void store(float *p) const { p[0] = getX(); p[1] = getY(); p[2] = getZ(); }
47 |
48 | void setX(float x)
49 | {
50 | m = _mm_move_ss(m, _mm_set_ss(x));
51 | }
52 | void setY(float y)
53 | {
54 | __m128 t = _mm_move_ss(m, _mm_set_ss(y));
55 | t = _mm_shuffle_ps(t, t, _MM_SHUFFLE(3, 2, 0, 0));
56 | m = _mm_move_ss(t, m);
57 | }
58 | void setZ(float z)
59 | {
60 | __m128 t = _mm_move_ss(m, _mm_set_ss(z));
61 | t = _mm_shuffle_ps(t, t, _MM_SHUFFLE(3, 0, 1, 0));
62 | m = _mm_move_ss(t, m);
63 | }
64 |
65 | __m128 m;
66 | };
67 |
68 | typedef float3 bool3;
69 |
70 | inline float3 operator+ (float3 a, float3 b) { a.m = _mm_add_ps(a.m, b.m); return a; }
71 | inline float3 operator- (float3 a, float3 b) { a.m = _mm_sub_ps(a.m, b.m); return a; }
72 | inline float3 operator* (float3 a, float3 b) { a.m = _mm_mul_ps(a.m, b.m); return a; }
73 | inline float3 operator/ (float3 a, float3 b) { a.m = _mm_div_ps(a.m, b.m); return a; }
74 | inline float3 operator* (float3 a, float b) { a.m = _mm_mul_ps(a.m, _mm_set1_ps(b)); return a; }
75 | inline float3 operator/ (float3 a, float b) { a.m = _mm_div_ps(a.m, _mm_set1_ps(b)); return a; }
76 | inline float3 operator* (float a, float3 b) { b.m = _mm_mul_ps(_mm_set1_ps(a), b.m); return b; }
77 | inline float3 operator/ (float a, float3 b) { b.m = _mm_div_ps(_mm_set1_ps(a), b.m); return b; }
78 | inline float3& operator+= (float3 &a, float3 b) { a = a + b; return a; }
79 | inline float3& operator-= (float3 &a, float3 b) { a = a - b; return a; }
80 | inline float3& operator*= (float3 &a, float3 b) { a = a * b; return a; }
81 | inline float3& operator/= (float3 &a, float3 b) { a = a / b; return a; }
82 | inline float3& operator*= (float3 &a, float b) { a = a * b; return a; }
83 | inline float3& operator/= (float3 &a, float b) { a = a / b; return a; }
84 | inline bool3 operator==(float3 a, float3 b) { a.m = _mm_cmpeq_ps(a.m, b.m); return a; }
85 | inline bool3 operator!=(float3 a, float3 b) { a.m = _mm_cmpneq_ps(a.m, b.m); return a; }
86 | inline bool3 operator< (float3 a, float3 b) { a.m = _mm_cmplt_ps(a.m, b.m); return a; }
87 | inline bool3 operator> (float3 a, float3 b) { a.m = _mm_cmpgt_ps(a.m, b.m); return a; }
88 | inline bool3 operator<=(float3 a, float3 b) { a.m = _mm_cmple_ps(a.m, b.m); return a; }
89 | inline bool3 operator>=(float3 a, float3 b) { a.m = _mm_cmpge_ps(a.m, b.m); return a; }
90 | inline float3 min(float3 a, float3 b) { a.m = _mm_min_ps(a.m, b.m); return a; }
91 | inline float3 max(float3 a, float3 b) { a.m = _mm_max_ps(a.m, b.m); return a; }
92 |
93 | inline float3 operator- (float3 a) { return float3(_mm_setzero_ps()) - a; }
94 |
95 | inline float hmin(float3 v)
96 | {
97 | v = min(v, SHUFFLE3(v, 1, 0, 2));
98 | return min(v, SHUFFLE3(v, 2, 0, 1)).getX();
99 | }
100 | inline float hmax(float3 v)
101 | {
102 | v = max(v, SHUFFLE3(v, 1, 0, 2));
103 | return max(v, SHUFFLE3(v, 2, 0, 1)).getX();
104 | }
105 |
106 | inline float3 cross(float3 a, float3 b)
107 | {
108 | // x <- a.y*b.z - a.z*b.y
109 | // y <- a.z*b.x - a.x*b.z
110 | // z <- a.x*b.y - a.y*b.x
111 | // We can save a shuffle by grouping it in this wacky order:
112 | return (a.zxy()*b - a*b.zxy()).zxy();
113 | }
114 |
115 | // Returns a 3-bit code where bit0..bit2 is X..Z
116 | inline unsigned mask(float3 v) { return _mm_movemask_ps(v.m) & 7; }
117 | // Once we have a comparison, we can branch based on its results:
118 | inline bool any(bool3 v) { return mask(v) != 0; }
119 | inline bool all(bool3 v) { return mask(v) == 7; }
120 |
121 | inline float3 clamp(float3 t, float3 a, float3 b) { return min(max(t, a), b); }
122 | inline float sum(float3 v) { return v.getX() + v.getY() + v.getZ(); }
123 | inline float dot(float3 a, float3 b) { return sum(a*b); }
124 |
125 |
126 | #else // #if DO_FLOAT3_WITH_SIMD
127 |
128 | // ---- Simple scalar C implementation
129 |
130 |
131 | struct float3
132 | {
133 | float3() : x(0), y(0), z(0) {}
134 | float3(float x_, float y_, float z_) : x(x_), y(y_), z(z_) {}
135 |
136 | float3 operator-() const { return float3(-x, -y, -z); }
137 | float3& operator+=(const float3& o) { x+=o.x; y+=o.y; z+=o.z; return *this; }
138 | float3& operator-=(const float3& o) { x-=o.x; y-=o.y; z-=o.z; return *this; }
139 | float3& operator*=(const float3& o) { x*=o.x; y*=o.y; z*=o.z; return *this; }
140 | float3& operator*=(float o) { x*=o; y*=o; z*=o; return *this; }
141 |
142 | inline float getX() const { return x; }
143 | inline float getY() const { return y; }
144 | inline float getZ() const { return z; }
145 | inline void setX(float x_) { x = x_; }
146 | inline void setY(float y_) { y = y_; }
147 | inline void setZ(float z_) { z = z_; }
148 | inline void store(float *p) const { p[0] = getX(); p[1] = getY(); p[2] = getZ(); }
149 |
150 | float x, y, z;
151 | };
152 |
153 | inline float3 operator+(const float3& a, const float3& b) { return float3(a.x+b.x,a.y+b.y,a.z+b.z); }
154 | inline float3 operator-(const float3& a, const float3& b) { return float3(a.x-b.x,a.y-b.y,a.z-b.z); }
155 | inline float3 operator*(const float3& a, const float3& b) { return float3(a.x*b.x,a.y*b.y,a.z*b.z); }
156 | inline float3 operator*(const float3& a, float b) { return float3(a.x*b,a.y*b,a.z*b); }
157 | inline float3 operator*(float a, const float3& b) { return float3(a*b.x,a*b.y,a*b.z); }
158 |
159 | inline float dot(const float3& a, const float3& b) { return a.x*b.x+a.y*b.y+a.z*b.z; }
160 |
161 | inline float3 cross(const float3& a, const float3& b)
162 | {
163 | return float3(a.y*b.z - a.z*b.y, -(a.x*b.z - a.z*b.x), a.x*b.y - a.y*b.x);
164 | }
165 |
166 | inline float3 min(const float3& a, const float3& b)
167 | {
168 | return float3(fmin(a.x,b.x), fmin(a.y,b.y), fmin(a.z,b.z));
169 | }
170 | inline float3 max(const float3& a, const float3& b)
171 | {
172 | return float3(fmax(a.x,b.x), fmax(a.y,b.y), fmax(a.z,b.z));
173 | }
174 | #endif // #else of #if DO_FLOAT3_WITH_SIMD
175 |
176 | inline float length(float3 v) { return sqrtf(dot(v, v)); }
177 | inline float sqLength(float3 v) { return dot(v, v); }
178 | inline float3 normalize(float3 v) { return v * (1.0f / length(v)); }
179 |
180 | inline float saturate(float v) { if (v < 0) return 0; if (v > 1) return 1; return v; }
181 |
182 |
183 | inline void AssertUnit(float3 v)
184 | {
185 | (void)v;
186 | assert(fabsf(sqLength(v) - 1.0f) < 0.01f);
187 | }
188 |
189 |
190 | // --------------------------------------------------------------------------
191 | // ray: starting position (origin) and direction.
192 | // direction is assumed to be normalized
193 |
194 | struct Ray
195 | {
196 | Ray() {}
197 | Ray(float3 orig_, float3 dir_) : orig(orig_), dir(dir_) { AssertUnit(dir); }
198 |
199 | float3 pointAt(float t) const { return orig + dir * t; }
200 |
201 | float3 orig;
202 | float3 dir;
203 | };
204 |
205 |
206 | // --------------------------------------------------------------------------
207 | // ray hit point information: position where it hit something;
208 | // normal of the surface that was hit, and "t" position along the ray
209 |
210 | struct Hit
211 | {
212 | float3 pos;
213 | float3 normal;
214 | float t;
215 | };
216 |
217 |
218 | // --------------------------------------------------------------------------
219 | // random number generator utilities
220 |
221 | float RandomFloat01(uint32_t& state);
222 | float3 RandomInUnitDisk(uint32_t& state);
223 | float3 RandomUnitVector(uint32_t& state);
224 |
225 |
226 | // --------------------------------------------------------------------------
227 | // camera
228 |
229 | struct Camera
230 | {
231 | Camera() {}
232 |
233 | // vfov is top to bottom in degrees
234 | Camera(const float3& lookFrom, const float3& lookAt, const float3& vup, float vfov, float aspect, float aperture, float focusDist)
235 | {
236 | lensRadius = aperture / 2;
237 | float theta = vfov*kPI/180;
238 | float halfHeight = tanf(theta/2);
239 | float halfWidth = aspect * halfHeight;
240 | origin = lookFrom;
241 | w = normalize(lookFrom - lookAt);
242 | u = normalize(cross(vup, w));
243 | v = cross(w, u);
244 | lowerLeftCorner = origin - halfWidth*focusDist*u - halfHeight*focusDist*v - focusDist*w;
245 | horizontal = 2*halfWidth*focusDist*u;
246 | vertical = 2*halfHeight*focusDist*v;
247 | }
248 |
249 | Ray GetRay(float s, float t, uint32_t& state) const
250 | {
251 | float3 rd = lensRadius * RandomInUnitDisk(state);
252 | float3 offset = u * rd.getX() + v * rd.getY();
253 | return Ray(origin + offset, normalize(lowerLeftCorner + s*horizontal + t*vertical - origin - offset));
254 | }
255 |
256 | float3 origin;
257 | float3 lowerLeftCorner;
258 | float3 horizontal;
259 | float3 vertical;
260 | float3 u, v, w;
261 | float lensRadius;
262 | };
263 |
264 |
--------------------------------------------------------------------------------
/source/scene.cpp:
--------------------------------------------------------------------------------
1 | #include "scene.h"
2 | #include
3 | #include
4 | #include
5 | #include
6 |
7 | // Use our own simple BVH implementation to speed up ray queries?
8 | #define USE_BVH 1
9 |
10 | // Use Intel Embree for BVH and all ray queries?
11 | #define USE_EMBREE 0
12 | #if USE_EMBREE
13 | #include "external/embree3/rtcore.h"
14 | #endif
15 |
16 | // Use NanoRT for BVH and all ray queries?
17 | #define USE_NANORT 0
18 | #if USE_NANORT
19 | #include "external/nanort.h"
20 | #endif
21 |
22 |
23 | #if USE_EMBREE || USE_NANORT
24 | #undef USE_BVH
25 | #endif
26 |
27 | // --------------------------------------------------------------------------
28 | // Axis-aligned bounding box and related functions
29 |
30 | #if USE_BVH
31 | struct AABB
32 | {
33 | float3 bmin;
34 | float3 bmax;
35 | };
36 |
37 | // from "A Ray-Box Intersection Algorithm and Efficient Dynamic Voxel Rendering"
38 | // http://jcgt.org/published/0007/03/04/
39 | // note: ray direction should be inverted, i.e 1.0/direction!
40 | static bool HitAABB(const Ray& r, const AABB& box, float tMin, float tMax)
41 | {
42 | float3 t0 = (box.bmin - r.orig) * r.dir;
43 | float3 t1 = (box.bmax - r.orig) * r.dir;
44 |
45 | float3 tsmaller = min(t0, t1);
46 | float3 tbigger = max(t0, t1);
47 |
48 | tMin = std::max(tMin, hmax(tsmaller));
49 | tMax = std::min(tMax, hmin(tbigger));
50 |
51 | return tMin <= tMax;
52 | }
53 |
54 | static AABB AABBUnion(const AABB& a, const AABB& b)
55 | {
56 | AABB res;
57 | res.bmin = min(a.bmin, b.bmin);
58 | res.bmax = max(a.bmax, b.bmax);
59 | return res;
60 | }
61 |
62 | static AABB AABBEnclose(const AABB& a, const float3& p)
63 | {
64 | AABB res;
65 | res.bmin = min(a.bmin, p);
66 | res.bmax = max(a.bmax, p);
67 | return res;
68 | }
69 |
70 | static AABB AABBOfTriangle(const Triangle& tri)
71 | {
72 | AABB res;
73 | res.bmin = tri.v0;
74 | res.bmax = tri.v0;
75 | res = AABBEnclose(res, tri.v1);
76 | res = AABBEnclose(res, tri.v2);
77 | return res;
78 | }
79 | #endif // #if USE_BVH
80 |
81 |
82 | // --------------------------------------------------------------------------
83 | // Checks if one triangle is hit by a ray segment.
84 | // based on "The Graphics Codex"
85 |
86 | #if !USE_EMBREE && !USE_NANORT
87 | static bool HitTriangle(const Ray& r, const Triangle& tri, float tMin, float tMax, Hit& outHit)
88 | {
89 | float3 e1 = tri.v1 - tri.v0;
90 | float3 e2 = tri.v2 - tri.v0;
91 | float3 p = cross(r.dir, e2);
92 | float a = dot(e1, p);
93 | if (fabs(a) < 1e-5f)
94 | return false; // parallel to the plane
95 |
96 | float f = 1.0f / a;
97 | float3 s = r.orig - tri.v0;
98 | float u = f * dot(s, p);
99 |
100 | if (u < 0.0f || u > 1.0f)
101 | return false; // but outside the triangle
102 |
103 | float3 q = cross(s, e1);
104 | float v = f * dot(r.dir, q);
105 |
106 | if (v < 0.0f || (u + v) > 1.0f)
107 | return false; // but outside the triangle
108 |
109 | float t = f * dot(e2, q);
110 |
111 | if (t > tMin && t < tMax)
112 | {
113 | outHit.t = t;
114 | outHit.pos = r.pointAt(t);
115 | outHit.normal = normalize(cross(e1, e2));
116 | return true;
117 | }
118 | return false;
119 | }
120 |
121 | static bool HitTriangleShadow(const Ray& r, const Triangle& tri, float tMin, float tMax)
122 | {
123 | float3 e1 = tri.v1 - tri.v0;
124 | float3 e2 = tri.v2 - tri.v0;
125 | float3 p = cross(r.dir, e2);
126 | float a = dot(e1, p);
127 | if (fabs(a) < 1e-5f)
128 | return false; // parallel to the plane
129 |
130 | float f = 1.0f / a;
131 | float3 s = r.orig - tri.v0;
132 | float u = f * dot(s, p);
133 |
134 | if (u < 0.0f || u > 1.0f)
135 | return false; // but outside the triangle
136 |
137 | float3 q = cross(s, e1);
138 | float v = f * dot(r.dir, q);
139 |
140 | if (v < 0.0f || (u + v) > 1.0f)
141 | return false; // but outside the triangle
142 |
143 | float t = f * dot(e2, q);
144 |
145 | if (t > tMin && t < tMax)
146 | return true;
147 | return false;
148 | }
149 | #endif // #if !USE_EMBREE && !USE_NANORT
150 |
151 |
152 | // --------------------------------------------------------------------------
153 | // bounding volume hierarchy
154 |
155 | #if USE_BVH
156 | struct BVHNode
157 | {
158 | AABB box;
159 | int data1; // node: left index; leaf: start triangle index
160 | int data2; // node: right index; leaf: triangle count
161 | bool leaf;
162 | };
163 | #endif // #if USE_BVH
164 |
165 | // Scene information: a copy of the input triangles
166 | static int s_TriangleCount;
167 | static Triangle* s_Triangles;
168 | static int* s_TriIndices;
169 | #if USE_BVH
170 | static std::vector s_BVH;
171 | #endif
172 |
173 | #if USE_EMBREE
174 | static RTCDevice s_Device;
175 | static RTCScene s_Scene;
176 | #endif
177 |
178 | #if USE_NANORT
179 | static unsigned int* s_Indices;
180 | static nanort::BVHAccel s_BVH;
181 | static nanort::TriangleMesh* s_Mesh;
182 | #endif
183 |
184 | #if USE_BVH
185 | static uint32_t XorShift32(uint32_t& state)
186 | {
187 | uint32_t x = state;
188 | x ^= x << 13;
189 | x ^= x >> 17;
190 | x ^= x << 15;
191 | state = x;
192 | return x;
193 | }
194 |
195 | static int CreateBVH(int triStart, int triCount, uint32_t& rngState)
196 | {
197 | // sort input triangles by a randomly chosen axis
198 | int axis = XorShift32(rngState) % 3;
199 | if (axis == 0)
200 | std::sort(s_TriIndices+triStart, s_TriIndices+triStart + triCount, [](int a, int b)
201 | {
202 | assert(a >= 0 && a < s_TriangleCount);
203 | assert(b >= 0 && b < s_TriangleCount);
204 | AABB boxa = AABBOfTriangle(s_Triangles[a]);
205 | AABB boxb = AABBOfTriangle(s_Triangles[b]);
206 | return boxa.bmin.getX() < boxb.bmin.getX();
207 | });
208 | else if (axis == 1)
209 | std::sort(s_TriIndices+triStart, s_TriIndices+triStart + triCount, [](int a, int b)
210 | {
211 | assert(a >= 0 && a < s_TriangleCount);
212 | assert(b >= 0 && b < s_TriangleCount);
213 | AABB boxa = AABBOfTriangle(s_Triangles[a]);
214 | AABB boxb = AABBOfTriangle(s_Triangles[b]);
215 | return boxa.bmin.getY() < boxb.bmin.getY();
216 | });
217 | else if (axis == 2)
218 | std::sort(s_TriIndices+triStart, s_TriIndices+triStart + triCount, [](int a, int b)
219 | {
220 | assert(a >= 0 && a < s_TriangleCount);
221 | assert(b >= 0 && b < s_TriangleCount);
222 | AABB boxa = AABBOfTriangle(s_Triangles[a]);
223 | AABB boxb = AABBOfTriangle(s_Triangles[b]);
224 | return boxa.bmin.getZ() < boxb.bmin.getZ();
225 | });
226 |
227 | // create the node
228 | BVHNode node;
229 | int nodeIndex = (int)s_BVH.size();
230 | s_BVH.push_back(node);
231 |
232 | // if we have less than N triangles, make this node a leaf that just has all of them
233 | if (triCount <= 4)
234 | {
235 | node.data1 = triStart;
236 | node.data2 = triCount;
237 | node.leaf = true;
238 | node.box = AABBOfTriangle(s_Triangles[s_TriIndices[triStart]]);
239 | for (int i = 1; i < triCount; ++i)
240 | {
241 | auto tribox = AABBOfTriangle(s_Triangles[s_TriIndices[triStart+i]]);
242 | node.box = AABBUnion(node.box, tribox);
243 | }
244 | }
245 | else
246 | {
247 | node.data1 = CreateBVH(triStart, triCount / 2, rngState);
248 | node.data2 = CreateBVH(triStart + triCount / 2, triCount - triCount / 2, rngState);
249 | node.leaf = false;
250 | assert(node.data1 >= 0 && node.data1 < s_BVH.size());
251 | assert(node.data2 >= 0 && node.data2 < s_BVH.size());
252 | node.box = AABBUnion(s_BVH[node.data1].box, s_BVH[node.data2].box);
253 | }
254 | s_BVH[nodeIndex] = node;
255 | return nodeIndex;
256 | }
257 | #endif // #if USE_BVH
258 |
259 | void InitializeScene(int triangleCount, const Triangle* triangles)
260 | {
261 | s_TriangleCount = triangleCount;
262 | s_Triangles = new Triangle[triangleCount];
263 | memcpy(s_Triangles, triangles, triangleCount * sizeof(triangles[0]));
264 |
265 | #if USE_EMBREE
266 | s_Device = rtcNewDevice("threads=1");
267 | s_Scene = rtcNewScene(s_Device);
268 |
269 | RTCGeometry mesh = rtcNewGeometry (s_Device, RTC_GEOMETRY_TYPE_TRIANGLE);
270 | float* dstVerts = (float*)rtcSetNewGeometryBuffer(mesh, RTC_BUFFER_TYPE_VERTEX, 0, RTC_FORMAT_FLOAT3, 12, triangleCount*3);
271 | int* indices = (int*)rtcSetNewGeometryBuffer(mesh, RTC_BUFFER_TYPE_INDEX, 0, RTC_FORMAT_UINT3, 12, triangleCount);
272 | for (int i = 0; i < triangleCount; ++i)
273 | {
274 | memcpy(dstVerts+i*9+0, &triangles[i].v0, 12);
275 | memcpy(dstVerts+i*9+3, &triangles[i].v1, 12);
276 | memcpy(dstVerts+i*9+6, &triangles[i].v2, 12);
277 | indices[i*3+0] = i*3+0;
278 | indices[i*3+1] = i*3+1;
279 | indices[i*3+2] = i*3+2;
280 | }
281 | rtcCommitGeometry(mesh);
282 | rtcAttachGeometry(s_Scene, mesh);
283 | rtcReleaseGeometry(mesh);
284 |
285 | rtcCommitScene(s_Scene);
286 |
287 | #elif USE_NANORT
288 |
289 | nanort::BVHBuildOptions buildOptions;
290 | buildOptions.cache_bbox = false;
291 |
292 | s_Indices = new unsigned int[triangleCount*3];
293 | for (int i = 0; i < triangleCount*3; ++i)
294 | s_Indices[i] = i;
295 | s_Mesh = new nanort::TriangleMesh((const float*)s_Triangles, s_Indices, sizeof(float3));
296 | nanort::TriangleSAHPred pred((const float*)s_Triangles, s_Indices, sizeof(float3));
297 | s_BVH.Build(triangleCount, *s_Mesh, pred, buildOptions);
298 |
299 | #elif USE_BVH
300 |
301 | // build BVH
302 | s_TriIndices = new int[triangleCount];
303 | for (int i = 0; i < triangleCount; ++i)
304 | s_TriIndices[i] = i;
305 | uint32_t rngState = 1;
306 | CreateBVH(0, triangleCount, rngState);
307 | #endif
308 | }
309 |
310 | void CleanupScene()
311 | {
312 | delete[] s_Triangles;
313 | #if USE_EMBREE
314 | rtcReleaseScene(s_Scene);
315 | rtcReleaseDevice(s_Device);
316 | #elif USE_NANORT
317 | delete s_Mesh;
318 | delete[] s_Indices;
319 | #elif USE_BVH
320 | s_BVH.clear();
321 | delete[] s_TriIndices;
322 | #endif
323 | }
324 |
325 | #if USE_BVH
326 | static int HitBVH(int index, const Ray& r, const Ray& invR, float tMin, float tMax, Hit& outHit)
327 | {
328 | // check if ray hits us at all
329 | const BVHNode& node = s_BVH[index];
330 | if (!HitAABB(invR, node.box, tMin, tMax))
331 | return -1;
332 |
333 | // if leaf node, check against triangles
334 | if (node.leaf)
335 | {
336 | int hitID = -1;
337 | for (int i = 0; i < node.data2; ++i)
338 | {
339 | int triIndex = s_TriIndices[node.data1 + i];
340 | assert(triIndex >= 0 && triIndex < s_TriangleCount);
341 | if (HitTriangle(r, s_Triangles[triIndex], tMin, tMax, outHit))
342 | {
343 | hitID = triIndex;
344 | tMax = outHit.t;
345 | }
346 | }
347 | return hitID;
348 | }
349 |
350 | // not a leaf node, go into child nodes
351 | int leftId = HitBVH(node.data1, r, invR, tMin, tMax, outHit);
352 | if (leftId != -1)
353 | {
354 | // left was hit: only check right hit up until left hit distance
355 | int rightId = HitBVH(node.data2, r, invR, tMin, outHit.t, outHit);
356 | if (rightId != -1)
357 | return rightId;
358 | return leftId;
359 | }
360 | // left was not hit: check right
361 | int rightId = HitBVH(node.data2, r, invR, tMin, tMax, outHit);
362 | return rightId;
363 | }
364 |
365 | static bool HitShadowBVH(int index, const Ray& r, const Ray& invR, float tMin, float tMax)
366 | {
367 | // check if ray hits us at all
368 | const BVHNode& node = s_BVH[index];
369 | if (!HitAABB(invR, node.box, tMin, tMax))
370 | return false;
371 |
372 | // if leaf node, check against triangles
373 | if (node.leaf)
374 | {
375 | for (int i = 0; i < node.data2; ++i)
376 | {
377 | int triIndex = s_TriIndices[node.data1 + i];
378 | assert(triIndex >= 0 && triIndex < s_TriangleCount);
379 | if (HitTriangleShadow(r, s_Triangles[triIndex], tMin, tMax))
380 | return true;
381 | }
382 | return false;
383 | }
384 |
385 | if (HitShadowBVH(node.data1, r, invR, tMin, tMax))
386 | return true;
387 | if (HitShadowBVH(node.data2, r, invR, tMin, tMax))
388 | return true;
389 | return false;
390 | }
391 | #endif // #if USE_BVH
392 |
393 |
394 | // Check all the triangles in the scene for a hit, and return the closest one.
395 | int HitScene(const Ray& r, float tMin, float tMax, Hit& outHit)
396 | {
397 | #if USE_EMBREE
398 | RTCIntersectContext ctx;
399 | rtcInitIntersectContext(&ctx);
400 |
401 | RTCRayHit rh;
402 | r.orig.store(&rh.ray.org_x);
403 | rh.ray.tnear = tMin;
404 | r.dir.store(&rh.ray.dir_x);
405 | rh.ray.time = 0;
406 | rh.ray.tfar = tMax;
407 | rh.ray.mask = 0;
408 | rh.ray.id = 0;
409 | rh.ray.flags = 0;
410 | rh.hit.geomID = RTC_INVALID_GEOMETRY_ID;
411 | rh.hit.primID = RTC_INVALID_GEOMETRY_ID;
412 |
413 | rtcIntersect1(s_Scene, &ctx, &rh);
414 | if (rh.hit.geomID == RTC_INVALID_GEOMETRY_ID)
415 | return -1;
416 | outHit.t = rh.ray.tfar;
417 | outHit.pos = r.pointAt(outHit.t);
418 | outHit.normal = normalize(float3(rh.hit.Ng_x, rh.hit.Ng_y, rh.hit.Ng_z));
419 | return rh.hit.primID;
420 |
421 | #elif USE_NANORT
422 | nanort::Ray ray;
423 | ray.min_t = tMin;
424 | ray.max_t = tMax;
425 | r.orig.store(ray.org);
426 | r.dir.store(ray.dir);
427 |
428 | nanort::TriangleIntersector<> intersector((const float*)s_Triangles, s_Indices, sizeof(float3));
429 | nanort::TriangleIntersection<> isect;
430 | bool hit = s_BVH.Traverse(ray, intersector, &isect);
431 | if (!hit)
432 | return -1;
433 |
434 | outHit.t = isect.t;
435 | outHit.pos = r.pointAt(isect.t);
436 | const Triangle& tri = s_Triangles[isect.prim_id];
437 |
438 | float3 e1 = tri.v1 - tri.v0;
439 | float3 e2 = tri.v2 - tri.v0;
440 | float3 n = normalize(cross(e1,e2));
441 | outHit.normal = n;
442 | return isect.prim_id;
443 |
444 | #elif USE_BVH
445 |
446 | if (s_BVH.empty())
447 | return -1;
448 |
449 | Ray invR = r;
450 | invR.dir = float3(1.0f) / r.dir;
451 | return HitBVH(0, r, invR, tMin, tMax, outHit);
452 |
453 | #else
454 |
455 | float hitMinT = tMax;
456 | int hitID = -1;
457 | for (int i = 0; i < s_TriangleCount; ++i)
458 | {
459 | Hit hit;
460 | if (HitTriangle(r, s_Triangles[i], tMin, tMax, hit))
461 | {
462 | if (hit.t < hitMinT)
463 | {
464 | hitMinT = hit.t;
465 | hitID = i;
466 | outHit = hit;
467 | }
468 | }
469 | }
470 |
471 | return hitID;
472 | #endif
473 | }
474 |
475 | bool HitSceneShadow(const Ray& r, float tMin, float tMax)
476 | {
477 | #if USE_EMBREE
478 | RTCIntersectContext ctx;
479 | rtcInitIntersectContext(&ctx);
480 |
481 | RTCRay rh;
482 | r.orig.store(&rh.org_x);
483 | rh.tnear = tMin;
484 | r.dir.store(&rh.dir_x);
485 | rh.time = 0;
486 | rh.tfar = tMax;
487 | rh.mask = 0;
488 | rh.id = 0;
489 | rh.flags = 0;
490 |
491 | rtcOccluded1(s_Scene, &ctx, &rh);
492 | return rh.tfar < 0;
493 |
494 | #elif USE_NANORT
495 | nanort::Ray ray;
496 | ray.min_t = tMin;
497 | ray.max_t = tMax;
498 | r.orig.store(ray.org);
499 | r.dir.store(ray.dir);
500 |
501 | nanort::TriangleIntersector<> intersector((const float*)s_Triangles, s_Indices, sizeof(float3));
502 | nanort::TriangleIntersection<> isect;
503 | return s_BVH.Traverse(ray, intersector, &isect);
504 |
505 | #elif USE_BVH
506 | if (s_BVH.empty())
507 | return false;
508 |
509 | Ray invR = r;
510 | invR.dir = float3(1.0f) / r.dir;
511 | return HitShadowBVH(0, r, invR, tMin, tMax);
512 |
513 | #else
514 | for (int i = 0; i < s_TriangleCount; ++i)
515 | {
516 | if (HitTriangleShadow(r, s_Triangles[i], tMin, tMax))
517 | return true;
518 | }
519 | return false;
520 |
521 | #endif
522 | }
523 |
--------------------------------------------------------------------------------
/source/scene.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | // Scene: this represents all the scene geometry that the ray tracer works on.
4 |
5 | #include "maths.h"
6 |
7 |
8 | // One triangle: just three vertex positions.
9 | struct Triangle
10 | {
11 | float3 v0, v1, v2;
12 | };
13 |
14 |
15 | // Our scene structure is very simple: just a bunch of triangles and nothing else
16 | // (no "objects", "instances" or "materials").
17 | void InitializeScene(int triangleCount, const Triangle* triangles);
18 |
19 | // Cleanup any data or memory that the scene might have allocated.
20 | void CleanupScene();
21 |
22 | // Checks if the ray segment hits a scene. If any triangle is hit by the ray, this
23 | // function should return information about the closest one.
24 | //
25 | // - r: the ray itself,
26 | // - tMin and tMax: segment of the ray that is checked,
27 | // - outHit: hit information, if any,
28 | //
29 | // Function returns the triangle index, or -1 if nothing is hit by the ray.
30 | int HitScene(const Ray& r, float tMin, float tMax, Hit& outHit);
31 |
32 | // Similar to HitScene, but only returns if the ray hit anything
33 | bool HitSceneShadow(const Ray& r, float tMin, float tMax);
34 |
--------------------------------------------------------------------------------