├── .github
└── workflows
│ └── release.yml
├── .gitignore
├── App.config
├── BandwidthRunner.cs
├── BenchmarkDll.c
├── BenchmarkDll.vcxproj
├── BenchmarkDll.vcxproj.filters
├── BenchmarkDllCommon.h
├── BenchmarkInteropFunctions.cs
├── BenchmarkSubmission.cs
├── BenchmarkSubmissionDialog.Designer.cs
├── BenchmarkSubmissionDialog.cs
├── BenchmarkSubmissionDialog.resx
├── GlobalTestSettings.cs
├── LatencyRunner.cs
├── MemoryBandwidthFunctions.asm
├── MemoryLatency.c
├── MemoryLatencyFunctions.asm
├── MicrobenchmarkForm.Designer.cs
├── MicrobenchmarkForm.cs
├── MicrobenchmarkForm.resx
├── MicrobenchmarkGui.csproj
├── MicrobenchmarkGui.sln
├── OpCode.cs
├── OpenCL
├── LICENSE
├── README.md
├── include
│ └── CL
│ │ ├── cl.h
│ │ ├── cl_d3d10.h
│ │ ├── cl_d3d11.h
│ │ ├── cl_dx9_media_sharing.h
│ │ ├── cl_dx9_media_sharing_intel.h
│ │ ├── cl_egl.h
│ │ ├── cl_ext.h
│ │ ├── cl_ext_intel.h
│ │ ├── cl_gl.h
│ │ ├── cl_gl_ext.h
│ │ ├── cl_half.h
│ │ ├── cl_icd.h
│ │ ├── cl_platform.h
│ │ ├── cl_va_api_media_sharing_intel.h
│ │ ├── cl_version.h
│ │ └── opencl.h
└── lib
│ └── OpenCL.lib
├── OpenCLTest.cs
├── OpenClFunctions.c
├── Program.cs
├── Properties
├── AssemblyInfo.cs
├── Resources.Designer.cs
├── Resources.resx
├── Settings.Designer.cs
└── Settings.settings
├── README.md
├── TestUtilities.cs
├── app.manifest
├── framework.h
├── img
├── guiscreenshot.png
├── guiscreenshot_latency.png
└── lockpages.png
├── latencykernel.cl
├── latencykernel_tex.cl
├── packages.config
├── pch.c
└── pch.h
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
1 | name: Build and Release
2 |
3 | on:
4 | push:
5 | branches: [ master ]
6 |
7 | jobs:
8 | build:
9 | runs-on: windows-latest
10 |
11 | steps:
12 | - uses: actions/checkout@v4
13 |
14 | - name: Setup MSBuild
15 | uses: microsoft/setup-msbuild@v2
16 |
17 | - name: Get Latest NASM Version
18 | id: nasm-version
19 | run: |
20 | $nasmPage = Invoke-WebRequest -Uri "https://www.nasm.us"
21 | if ($nasmPage.Content -match 'releasebuilds/(\d+\.\d+\.\d+)/') {
22 | $version = $matches[1]
23 | echo "NASM_VERSION=$version" >> $env:GITHUB_ENV
24 | Write-Host "Latest NASM version: $version"
25 | } else {
26 | Write-Error "Could not determine latest NASM version"
27 | exit 1
28 | }
29 |
30 | - name: Setup NASM
31 | run: |
32 | $version = $env:NASM_VERSION
33 | $url = "https://www.nasm.us/pub/nasm/releasebuilds/$version/win64/nasm-$version-win64.zip"
34 | Write-Host "Downloading NASM from: $url"
35 | Invoke-WebRequest -Uri $url -OutFile nasm.zip
36 | Expand-Archive nasm.zip -DestinationPath .
37 | echo "$pwd\nasm-$version" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
38 |
39 | - name: Build Solution
40 | run: |
41 | msbuild /p:Configuration=Release /p:Platform=x64 MicrobenchmarkGui.sln
42 |
43 | - name: Create Release ZIP
44 | run: |
45 | # Create temporary directory for ZIP contents
46 | New-Item -ItemType Directory -Path tmp
47 |
48 | # Copy only the required files (flat structure)
49 | Copy-Item "x64\Release\MicrobenchmarkGui.exe.config" tmp/
50 | Copy-Item "x64\Release\MicrobenchmarkGui.exe" tmp/
51 | Copy-Item "x64\Release\BenchmarkDll.dll" tmp/
52 | Copy-Item "x64\Release\Newtonsoft.Json.dll" tmp/
53 |
54 | # Create release name with short commit hash
55 | $commitHash = $env:GITHUB_SHA.Substring(0, 7)
56 | $releaseName = "MicrobenchmarkGui-$commitHash"
57 |
58 | # Create ZIP file
59 | Compress-Archive -Path "tmp\*" -DestinationPath "$releaseName.zip"
60 |
61 | # Store names for next step
62 | echo "RELEASE_NAME=$releaseName" >> $env:GITHUB_ENV
63 | echo "COMMIT_HASH=$commitHash" >> $env:GITHUB_ENV
64 |
65 | - name: Create Release
66 | id: create_release
67 | uses: softprops/action-gh-release@v2
68 | env:
69 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
70 | with:
71 | tag_name: ${{ env.COMMIT_HASH }}
72 | name: ${{ env.RELEASE_NAME }}
73 | body: |
74 | - ${{ env.COMMIT_HASH }} ${{ github.event.head_commit.message }}
75 | - Built with NASM ${{ env.NASM_VERSION }}
76 | draft: false
77 | prerelease: false
78 | files: ${{ env.RELEASE_NAME }}.zip
79 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ## Ignore Visual Studio temporary files, build results, and
2 | ## files generated by popular Visual Studio add-ons.
3 | ##
4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
5 |
6 | # User-specific files
7 | *.rsuser
8 | *.suo
9 | *.user
10 | *.userosscache
11 | *.sln.docstates
12 | *.swp
13 | *generatednasm*
14 |
15 | # User-specific files (MonoDevelop/Xamarin Studio)
16 | *.userprefs
17 |
18 | # Mono auto generated files
19 | mono_crash.*
20 |
21 | # Build results
22 | [Dd]ebug/
23 | [Dd]ebugPublic/
24 | [Rr]elease/
25 | [Rr]eleases/
26 | x64/
27 | x86/
28 | [Ww][Ii][Nn]32/
29 | [Aa][Rr][Mm]/
30 | [Aa][Rr][Mm]64/
31 | bld/
32 | [Bb]in/
33 | [Oo]bj/
34 | [Ll]og/
35 | [Ll]ogs/
36 | clammicrobench/*.asm
37 |
38 | # Visual Studio 2015/2017 cache/options directory
39 | .vs/
40 | # Uncomment if you have tasks that create the project's static files in wwwroot
41 | #wwwroot/
42 |
43 | # Visual Studio 2017 auto generated files
44 | Generated\ Files/
45 |
46 | # MSTest test Results
47 | [Tt]est[Rr]esult*/
48 | [Bb]uild[Ll]og.*
49 |
50 | # NUnit
51 | *.VisualState.xml
52 | TestResult.xml
53 | nunit-*.xml
54 |
55 | # Build Results of an ATL Project
56 | [Dd]ebugPS/
57 | [Rr]eleasePS/
58 | dlldata.c
59 |
60 | # Benchmark Results
61 | BenchmarkDotNet.Artifacts/
62 |
63 | # .NET Core
64 | project.lock.json
65 | project.fragment.lock.json
66 | artifacts/
67 |
68 | # ASP.NET Scaffolding
69 | ScaffoldingReadMe.txt
70 |
71 | # StyleCop
72 | StyleCopReport.xml
73 |
74 | # Files built by Visual Studio
75 | *_i.c
76 | *_p.c
77 | *_h.h
78 | *.ilk
79 | *.meta
80 | *.obj
81 | *.iobj
82 | *.pch
83 | *.pdb
84 | *.ipdb
85 | *.pgc
86 | *.pgd
87 | *.rsp
88 | *.sbr
89 | *.tlb
90 | *.tli
91 | *.tlh
92 | *.tmp
93 | *.tmp_proj
94 | *_wpftmp.csproj
95 | *.log
96 | *.tlog
97 | *.vspscc
98 | *.vssscc
99 | .builds
100 | *.pidb
101 | *.svclog
102 | *.scc
103 |
104 | # Chutzpah Test files
105 | _Chutzpah*
106 |
107 | # Visual C++ cache files
108 | ipch/
109 | *.aps
110 | *.ncb
111 | *.opendb
112 | *.opensdf
113 | *.sdf
114 | *.cachefile
115 | *.VC.db
116 | *.VC.VC.opendb
117 |
118 | # Visual Studio profiler
119 | *.psess
120 | *.vsp
121 | *.vspx
122 | *.sap
123 |
124 | # Visual Studio Trace Files
125 | *.e2e
126 |
127 | # TFS 2012 Local Workspace
128 | $tf/
129 |
130 | # Guidance Automation Toolkit
131 | *.gpState
132 |
133 | # ReSharper is a .NET coding add-in
134 | _ReSharper*/
135 | *.[Rr]e[Ss]harper
136 | *.DotSettings.user
137 |
138 | # TeamCity is a build add-in
139 | _TeamCity*
140 |
141 | # DotCover is a Code Coverage Tool
142 | *.dotCover
143 |
144 | # AxoCover is a Code Coverage Tool
145 | .axoCover/*
146 | !.axoCover/settings.json
147 |
148 | # Coverlet is a free, cross platform Code Coverage Tool
149 | coverage*.json
150 | coverage*.xml
151 | coverage*.info
152 |
153 | # Visual Studio code coverage results
154 | *.coverage
155 | *.coveragexml
156 |
157 | # NCrunch
158 | _NCrunch_*
159 | .*crunch*.local.xml
160 | nCrunchTemp_*
161 |
162 | # MightyMoose
163 | *.mm.*
164 | AutoTest.Net/
165 |
166 | # Web workbench (sass)
167 | .sass-cache/
168 |
169 | # Installshield output folder
170 | [Ee]xpress/
171 |
172 | # DocProject is a documentation generator add-in
173 | DocProject/buildhelp/
174 | DocProject/Help/*.HxT
175 | DocProject/Help/*.HxC
176 | DocProject/Help/*.hhc
177 | DocProject/Help/*.hhk
178 | DocProject/Help/*.hhp
179 | DocProject/Help/Html2
180 | DocProject/Help/html
181 |
182 | # Click-Once directory
183 | publish/
184 |
185 | # Publish Web Output
186 | *.[Pp]ublish.xml
187 | *.azurePubxml
188 | # Note: Comment the next line if you want to checkin your web deploy settings,
189 | # but database connection strings (with potential passwords) will be unencrypted
190 | *.pubxml
191 | *.publishproj
192 |
193 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
194 | # checkin your Azure Web App publish settings, but sensitive information contained
195 | # in these scripts will be unencrypted
196 | PublishScripts/
197 |
198 | # NuGet Packages
199 | *.nupkg
200 | # NuGet Symbol Packages
201 | *.snupkg
202 | # The packages folder can be ignored because of Package Restore
203 | **/[Pp]ackages/*
204 | # except build/, which is used as an MSBuild target.
205 | !**/[Pp]ackages/build/
206 | # Uncomment if necessary however generally it will be regenerated when needed
207 | #!**/[Pp]ackages/repositories.config
208 | # NuGet v3's project.json files produces more ignorable files
209 | *.nuget.props
210 | *.nuget.targets
211 |
212 | # Nuget personal access tokens and Credentials
213 | nuget.config
214 |
215 | # Microsoft Azure Build Output
216 | csx/
217 | *.build.csdef
218 |
219 | # Microsoft Azure Emulator
220 | ecf/
221 | rcf/
222 |
223 | # Windows Store app package directories and files
224 | AppPackages/
225 | BundleArtifacts/
226 | Package.StoreAssociation.xml
227 | _pkginfo.txt
228 | *.appx
229 | *.appxbundle
230 | *.appxupload
231 |
232 | # Visual Studio cache files
233 | # files ending in .cache can be ignored
234 | *.[Cc]ache
235 | # but keep track of directories ending in .cache
236 | !?*.[Cc]ache/
237 |
238 | # Others
239 | ClientBin/
240 | ~$*
241 | *~
242 | *.dbmdl
243 | *.dbproj.schemaview
244 | *.jfm
245 | *.pfx
246 | *.publishsettings
247 | orleans.codegen.cs
248 |
249 | # Including strong name files can present a security risk
250 | # (https://github.com/github/gitignore/pull/2483#issue-259490424)
251 | #*.snk
252 |
253 | # Since there are multiple workflows, uncomment next line to ignore bower_components
254 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
255 | #bower_components/
256 |
257 | # RIA/Silverlight projects
258 | Generated_Code/
259 |
260 | # Backup & report files from converting an old project file
261 | # to a newer Visual Studio version. Backup files are not needed,
262 | # because we have git ;-)
263 | _UpgradeReport_Files/
264 | Backup*/
265 | UpgradeLog*.XML
266 | UpgradeLog*.htm
267 | ServiceFabricBackup/
268 | *.rptproj.bak
269 |
270 | # SQL Server files
271 | *.mdf
272 | *.ldf
273 | *.ndf
274 |
275 | # Business Intelligence projects
276 | *.rdl.data
277 | *.bim.layout
278 | *.bim_*.settings
279 | *.rptproj.rsuser
280 | *- [Bb]ackup.rdl
281 | *- [Bb]ackup ([0-9]).rdl
282 | *- [Bb]ackup ([0-9][0-9]).rdl
283 |
284 | # Microsoft Fakes
285 | FakesAssemblies/
286 |
287 | # GhostDoc plugin setting file
288 | *.GhostDoc.xml
289 |
290 | # Node.js Tools for Visual Studio
291 | .ntvs_analysis.dat
292 | node_modules/
293 |
294 | # Visual Studio 6 build log
295 | *.plg
296 |
297 | # Visual Studio 6 workspace options file
298 | *.opt
299 |
300 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
301 | *.vbw
302 |
303 | # Visual Studio LightSwitch build output
304 | **/*.HTMLClient/GeneratedArtifacts
305 | **/*.DesktopClient/GeneratedArtifacts
306 | **/*.DesktopClient/ModelManifest.xml
307 | **/*.Server/GeneratedArtifacts
308 | **/*.Server/ModelManifest.xml
309 | _Pvt_Extensions
310 |
311 | # Paket dependency manager
312 | .paket/paket.exe
313 | paket-files/
314 |
315 | # FAKE - F# Make
316 | .fake/
317 |
318 | # CodeRush personal settings
319 | .cr/personal
320 |
321 | # Python Tools for Visual Studio (PTVS)
322 | __pycache__/
323 | *.pyc
324 |
325 | # Cake - Uncomment if you are using it
326 | # tools/**
327 | # !tools/packages.config
328 |
329 | # Tabs Studio
330 | *.tss
331 |
332 | # Telerik's JustMock configuration file
333 | *.jmconfig
334 |
335 | # BizTalk build output
336 | *.btp.cs
337 | *.btm.cs
338 | *.odx.cs
339 | *.xsd.cs
340 |
341 | # OpenCover UI analysis results
342 | OpenCover/
343 |
344 | # Azure Stream Analytics local run output
345 | ASALocalRun/
346 |
347 | # MSBuild Binary and Structured Log
348 | *.binlog
349 |
350 | # NVidia Nsight GPU debugger configuration file
351 | *.nvuser
352 |
353 | # MFractors (Xamarin productivity tool) working folder
354 | .mfractor/
355 |
356 | # Local History for Visual Studio
357 | .localhistory/
358 |
359 | # BeatPulse healthcheck temp database
360 | healthchecksdb
361 |
362 | # Backup folder for Package Reference Convert tool in Visual Studio 2017
363 | MigrationBackup/
364 |
365 | # Ionide (cross platform F# VS Code tools) working folder
366 | .ionide/
367 |
368 | # Fody - auto-generated XML schema
369 | FodyWeavers.xsd
370 |
371 | # VS Code files for those working on multiple tools
372 | .vscode/*
373 | !.vscode/settings.json
374 | !.vscode/tasks.json
375 | !.vscode/launch.json
376 | !.vscode/extensions.json
377 | *.code-workspace
378 |
379 | # Local History for Visual Studio Code
380 | .history/
381 |
382 | # Windows Installer files from build outputs
383 | *.cab
384 | *.msi
385 | *.msix
386 | *.msm
387 | *.msp
388 |
389 | # JetBrains Rider
390 | .idea/
391 | *.sln.iml
392 |
--------------------------------------------------------------------------------
/App.config:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/BandwidthRunner.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Diagnostics;
4 | using System.Text;
5 | using System.Threading;
6 | using System.Threading.Tasks;
7 | using System.Windows.Forms;
8 | using System.Windows.Forms.DataVisualization.Charting;
9 |
10 | namespace MicrobenchmarkGui
11 | {
12 | public class BandwidthRunner
13 | {
14 | public uint[] testSizes = { 2, 4, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 512, 600, 768, 1024, 1536, 2048,
15 | 3072, 4096, 5120, 6144, 8192, 10240, 12288, 16384, 24567, 32768, 65536, 98304,
16 | 131072, 262144, 393216, 524288, 1048576, 1572864, 2097152, 3145728 };
17 |
18 | public bool running = false;
19 |
20 | ///
21 | /// Test type to run, for automated test
22 | ///
23 | public BenchmarkInteropFunctions.TestType testType;
24 |
25 | // run results
26 | public Dictionary>> RunResults;
27 |
28 | // last run results
29 | public string[][] formattedResults;
30 |
31 | ///
32 | /// List of test results from last run
33 | ///
34 | public List testResultsList;
35 |
36 | ///
37 | /// List of tested points from last run
38 | ///
39 | public List floatTestPoints;
40 |
41 | private ListView resultListView;
42 | private Chart resultChart;
43 | private MicrobenchmarkForm.SafeSetResultListView setListViewDelegate;
44 | private MicrobenchmarkForm.SafeSetResultListViewColumns setListViewColumnsDelegate;
45 | private MicrobenchmarkForm.SafeSetResultsChart setChartDelegate;
46 | private MicrobenchmarkForm.SafeSetProgressLabel setProgressLabelDelegate;
47 | private Label progressLabel;
48 | private string[] bwCols = { "Data Size", "Bandwidth" };
49 |
50 | public BandwidthRunner(MicrobenchmarkForm.SafeSetResultListView setListViewDelegate,
51 | MicrobenchmarkForm.SafeSetResultListViewColumns setListViewColsDelegate,
52 | MicrobenchmarkForm.SafeSetResultsChart setChartDelegate,
53 | MicrobenchmarkForm.SafeSetProgressLabel setLabelDelegate,
54 | ListView resultListView,
55 | Chart resultChart,
56 | Label progressLabel)
57 | {
58 | this.setListViewColumnsDelegate = setListViewColsDelegate;
59 | this.setListViewDelegate = setListViewDelegate;
60 | this.setChartDelegate = setChartDelegate;
61 | this.setProgressLabelDelegate = setLabelDelegate;
62 | this.resultListView = resultListView;
63 | this.resultChart = resultChart;
64 | this.progressLabel = progressLabel;
65 |
66 | this.RunResults = new Dictionary>>();
67 | }
68 |
69 | private uint GetIterationCount(uint testSize, uint dataGb)
70 | {
71 | uint gbToTransfer = dataGb;
72 | if (testSize > 64) gbToTransfer = dataGb / 2;
73 | if (testSize > 512) gbToTransfer = dataGb / 4;
74 | if (testSize > 8192) gbToTransfer = dataGb / 8;
75 | uint iterations = gbToTransfer * 1024 * 1024 / testSize;
76 | if (iterations % 2 != 0) iterations += 1;
77 |
78 | if (iterations < 4) return 4; // Set a minimum to reduce noise
79 | else return iterations;
80 | }
81 |
82 | // Run through test sizes, meant to be run in a background thread
83 | public void StartFullTest(uint threads, bool shared, BenchmarkInteropFunctions.TestType testType, CancellationToken runCancel)
84 | {
85 | running = true;
86 | string testLabel = threads + "T " + testType.ToString();
87 | List> currentRunResults = new List>();
88 | testResultsList = new List();
89 | floatTestPoints = new List();
90 | resultListView.Invoke(setListViewColumnsDelegate, new object[] { bwCols });
91 | float[] testResults = new float[testSizes.Length];
92 | formattedResults = new string[testSizes.Length][];
93 |
94 | for (uint i = 0; i < testSizes.Length; i++)
95 | {
96 | testResults[i] = 0;
97 | formattedResults[i] = new string[2];
98 | formattedResults[i][0] = string.Format("{0} KB", testSizes[i]);
99 | formattedResults[i][1] = "Not Run";
100 | }
101 |
102 | resultListView.Invoke(setListViewDelegate, new object[] { formattedResults });
103 |
104 | float lastTimeMs = 0;
105 | for (uint testIdx = 0; testIdx < testSizes.Length; testIdx++)
106 | {
107 | if (runCancel.IsCancellationRequested)
108 | {
109 | break;
110 | }
111 |
112 | uint testSize = testSizes[testIdx];
113 | ulong currentIterations = GetIterationCount(testSize, 32);
114 | float targetTimeMs = 3000, minTimeMs = 1000, result;
115 |
116 | if (GlobalTestSettings.MinTestSizeKb != 0 && GlobalTestSettings.MinTestSizeKb > testSize) continue;
117 |
118 | Stopwatch debugStopwatch = new Stopwatch();
119 |
120 | do {
121 | float dataTransferredGb = (float)((currentIterations * testSize * 1024.0 * (shared ? threads : 1)) / 1e9);
122 | string progressMessage = string.Format("Testing bandwidth over {0} KB, {1}K iterations = {2:F2} GB, last run = {3:F2} ms", testSize, currentIterations / 1000, dataTransferredGb, lastTimeMs);
123 | progressLabel.Invoke(setProgressLabelDelegate, new object[] { progressMessage });
124 |
125 | debugStopwatch.Restart();
126 | result = BenchmarkInteropFunctions.MeasureBw(testSize, currentIterations, threads, shared ? 1 : 0, testType);
127 | debugStopwatch.Stop();
128 |
129 | lastTimeMs = 1000 * dataTransferredGb / result;
130 | currentIterations = TestUtilities.ScaleIterations(currentIterations, targetTimeMs, lastTimeMs);
131 | Console.WriteLine("Reported {0:F2} ms, sw {1} ms. Next Iteration Count: {2}", lastTimeMs, debugStopwatch.ElapsedMilliseconds, currentIterations);
132 | } while (lastTimeMs < minTimeMs);
133 |
134 | testResults[testIdx] = result;
135 | if (result != 0) formattedResults[testIdx][1] = string.Format("{0:F2} GB/s", result);
136 | else formattedResults[testIdx][1] = "N/A";
137 | resultListView.Invoke(setListViewDelegate, new object[] { formattedResults });
138 |
139 | if (result != 0)
140 | {
141 | floatTestPoints.Add(testSize);
142 | testResultsList.Add(result);
143 | currentRunResults.Add(new Tuple(testSize, result));
144 | resultChart.Invoke(setChartDelegate, new object[] { testLabel, floatTestPoints.ToArray(), testResultsList.ToArray(), MicrobenchmarkForm.ResultChartType.CpuMemoryBandwidth });
145 | }
146 | }
147 |
148 | progressLabel.Invoke(setProgressLabelDelegate, new object[] { "Run finished" });
149 | running = false;
150 | RunResults.Add(testLabel, currentRunResults);
151 | }
152 |
153 | // Run a single test size, meant to be run in a background thread
154 | public void RunSingleTest(uint sizeKb, uint threads, bool shared, BenchmarkInteropFunctions.TestType testType)
155 | {
156 | running = true;
157 | float result = BenchmarkInteropFunctions.MeasureBw(sizeKb, GetIterationCount(sizeKb, 512), threads, shared ? 1 : 0, testType);
158 | resultListView.Invoke(setListViewColumnsDelegate, new object[] { bwCols });
159 | string[][] formattedResults = new string[1][];
160 | formattedResults[0] = new string[2];
161 | formattedResults[0][0] = sizeKb + " KB";
162 | formattedResults[0][1] = result + " GB/s";
163 | resultListView.Invoke(setListViewDelegate, new object[] { formattedResults });
164 | }
165 |
166 | public string GetTestSizesAsString()
167 | {
168 | return string.Join(",", testSizes);
169 | }
170 |
171 | // Shouldn't be called when test is running, but UI will take care of that
172 | public void SetTestSizes(string input)
173 | {
174 | string[] inputArr = input.Split(new char[] { ',' } , StringSplitOptions.RemoveEmptyEntries);
175 | uint[] newTestSizes = new uint[inputArr.Length];
176 | for (uint i = 0;i < inputArr.Length; i++)
177 | {
178 | newTestSizes[i] = uint.Parse(inputArr[i]);
179 | }
180 |
181 | testSizes = newTestSizes;
182 | }
183 | }
184 | }
185 |
--------------------------------------------------------------------------------
/BenchmarkDll.c:
--------------------------------------------------------------------------------
1 | #include "pch.h"
2 | #include "BenchmarkDllCommon.h"
3 |
4 | extern float mmx_asm_read(void* arr, uint64_t arr_length, uint64_t iterations);
5 | extern float mmx_asm_write(void* arr, uint64_t arr_length, uint64_t iterations);
6 | extern float mmx_asm_ntwrite(void* arr, uint64_t arr_length, uint64_t iterations);
7 | extern float sse_asm_read(void* arr, uint64_t arr_length, uint64_t iterations);
8 | extern float sse_asm_write(void* arr, uint64_t arr_length, uint64_t iterations);
9 | extern float sse_asm_ntwrite(void* arr, uint64_t arr_length, uint64_t iterations);
10 | extern float sse_asm_ntread(void* arr, uint64_t arr_length, uint64_t iterations);
11 | extern float sse_asm_copy(void* arr, uint64_t arr_length, uint64_t iterations);
12 | extern float sse_asm_add(void* arr, uint64_t arr_length, uint64_t iterations);
13 | extern float avx_asm_read(void* arr, uint64_t arr_length, uint64_t iterations);
14 | extern float avx_asm_write(void* arr, uint64_t arr_length, uint64_t iterations);
15 | extern float avx_asm_ntwrite(void* arr, uint64_t arr_length, uint64_t iterations);
16 | extern float avx_asm_copy(void* arr, uint64_t arr_length, uint64_t iterations);
17 | extern float avx_asm_cflip(void* arr, uint64_t arr_length, uint64_t iterations);
18 | extern float avx_asm_add(void* arr, uint64_t arr_length, uint64_t iterations);
19 | extern float avx512_asm_read(void* arr, uint64_t arr_length, uint64_t iterations);
20 | extern float avx512_asm_write(void* arr, uint64_t arr_length, uint64_t iterations);
21 | extern float avx512_asm_ntwrite(void* arr, uint64_t arr_length, uint64_t iterations);
22 | extern float avx512_asm_add(void* arr, uint64_t arr_length, uint64_t iterations);
23 | extern float repmovsb_copy(void* arr, uint64_t arr_length, uint64_t iterations);
24 | extern float repstosb_write(void* arr, uint64_t arr_length, uint64_t iterations);
25 | extern float repmovsd_copy(void* arr, uint64_t arr_length, uint64_t iterations);
26 | extern float repstosd_write(void* arr, uint64_t arr_length, uint64_t iterations);
27 | float (*bw_func)(void*, uint64_t, uint64_t) = sse_asm_read;
28 |
29 | float __fastcall instr_read(void* arr, uint64_t arr_length, uint64_t iterations);
30 |
31 | BOOL APIENTRY DllMain( HMODULE hModule,
32 | DWORD ul_reason_for_call,
33 | LPVOID lpReserved
34 | )
35 | {
36 | switch (ul_reason_for_call)
37 | {
38 | case DLL_PROCESS_ATTACH:
39 | case DLL_THREAD_ATTACH:
40 | case DLL_THREAD_DETACH:
41 | case DLL_PROCESS_DETACH:
42 | break;
43 | }
44 | return TRUE;
45 | }
46 |
47 | // Does thing work?
48 | __declspec(dllexport) float __stdcall test(int size);
49 | float __stdcall test(int size)
50 | {
51 | return (float)size + 0.1f;
52 | }
53 |
54 | __declspec(dllexport) int __stdcall CheckAvxSupport();
55 | int __stdcall CheckAvxSupport()
56 | {
57 | int cpuid_data[4];
58 | __cpuidex(cpuid_data, 1, 0);
59 | if (cpuid_data[2] & (1UL << 28)) return 1;
60 | return 0;
61 | }
62 |
63 | __declspec(dllexport) int __stdcall CheckAvx512Support();
64 | int __stdcall CheckAvx512Support()
65 | {
66 | int cpuid_data[4];
67 | __cpuidex(cpuid_data, 7, 0);
68 | if (cpuid_data[1] & (1UL << 16)) return 1;
69 | return 0;
70 | }
71 |
72 | enum TestType {
73 | None = 0,
74 | SseRead = 1,
75 | SseWrite = 2,
76 | SseCopy = 3,
77 | SseAdd = 4,
78 | AvxRead = 5,
79 | AvxWrite = 6,
80 | AvxCopy = 7,
81 | AvxCflip = 8,
82 | AvxAdd = 9,
83 | Avx512Read = 10,
84 | Avx512Write = 11,
85 | Avx512Add = 12,
86 | Instr4 = 13,
87 | Instr8 = 14,
88 | K8Instr4 = 15,
89 | Branch16 = 16,
90 | MmxRead = 17,
91 | MmxWrite = 18,
92 | MmxNtWrite = 19,
93 | SseNtWrite = 20,
94 | AvxNtWrite = 21,
95 | Avx512NtWrite = 22,
96 | SseNtRead = 23,
97 | RepMovsb = 24,
98 | RepStosb = 25,
99 | RepMovsd = 26,
100 | RepStosd = 27
101 | };
102 |
103 | typedef struct BandwidthTestThreadData {
104 | uint32_t iterations;
105 | uint32_t arr_length;
106 | float* arr;
107 | float bw; // written to by the thread
108 | } BandwidthTestThreadData;
109 |
110 | DWORD WINAPI ReadBandwidthTestThread(LPVOID param) {
111 | BandwidthTestThreadData* bwTestData = (BandwidthTestThreadData*)param;
112 | float sum = bw_func(bwTestData->arr, bwTestData->arr_length, bwTestData->iterations);
113 | if (sum == 0) return 1;
114 | return 0;
115 | }
116 |
117 | void FillInstructionArray(uint64_t* arr, uint64_t sizeKb, enum TestType nopSize)
118 | {
119 | char nop8b[8] = { 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 };
120 |
121 | // zen/piledriver optimization manual uses this pattern
122 | char nop4b[8] = { 0x0F, 0x1F, 0x40, 0x00, 0x0F, 0x1F, 0x40, 0x00 };
123 |
124 | // athlon64 (K8) optimization manual pattern
125 | char k8_nop4b[8] = { 0x66, 0x66, 0x66, 0x90, 0x66, 0x66, 0x66, 0x90 };
126 |
127 | uint64_t elements = (sizeKb * 1024 / 8) - 1; // leave room for ret
128 | unsigned char* functionEnd = (unsigned char*)(arr + elements);
129 |
130 | if (nopSize != Branch16) {
131 | uint64_t* nopPtr;
132 | if (nopSize == Instr8) nopPtr = (uint64_t*)(nop8b);
133 | else if (nopSize == Instr4) nopPtr = (uint64_t*)(nop4b);
134 | else if (nopSize == K8Instr4) nopPtr = (uint64_t*)(k8_nop4b);
135 | else {
136 | return;
137 | }
138 |
139 | for (uint64_t nopIdx = 0; nopIdx < elements; nopIdx++) {
140 | arr[nopIdx] = *nopPtr;
141 | }
142 |
143 | functionEnd[0] = 0xC3;
144 | }
145 | else {
146 | // jump forward 14 bytes
147 | char branch16b[8] = { 0xEB, 0x0E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
148 | char ret8b[8] = { 0xC3, 0, 0, 0, 0, 0, 0, 0 };
149 | uint64_t* branchPtr = (uint64_t*)(branch16b);
150 | uint64_t* nopPtr = (uint64_t*)(nop8b); // doesn't really matter, we should never hit this
151 |
152 | // last iteration must have nopIdx % 2 == 1, so the jump will go to the return statement
153 | // i.e. branchElements for loop must be even, so the last iteration is odd
154 | uint64_t branchElements = elements % 2 == 0 ? elements : elements - 1;
155 | uint64_t nopIdx;
156 | for (nopIdx = 0; nopIdx < branchElements; nopIdx++) {
157 | arr[nopIdx] = nopIdx % 2 == 0 ? *branchPtr : *nopPtr;
158 | }
159 |
160 | arr[nopIdx] = *(uint64_t*)ret8b;
161 | }
162 | }
163 | __declspec(dllexport) float __stdcall MeasureBw(uint32_t sizeKb, uint64_t iterations, uint32_t threads, int shared, enum TestType mode);
164 |
165 | float __stdcall MeasureBw(uint32_t sizeKb, uint64_t iterations, uint32_t threads, int shared, enum TestType mode) {
166 | struct timeb start, end;
167 | float bw = 0;
168 | uint32_t elements = sizeKb * 1024 / sizeof(float);
169 | uint32_t private_elements = (uint32_t)ceil((double)sizeKb / (double)threads) * 256;
170 | DWORD protection_flags = PAGE_EXECUTE_READWRITE;
171 | if (!shared) elements = private_elements;
172 | if (!shared && sizeKb < threads) {
173 | //fprintf(stderr, "Too many threads for this size, continuing\n");
174 | return 0;
175 | }
176 |
177 | if (mode == None)
178 | {
179 | // need to auto detect later
180 | bw_func = sse_asm_read; // guaranteed to work
181 | }
182 | else if (mode == AvxRead) { bw_func = avx_asm_read; }
183 | else if (mode == AvxWrite) { bw_func = avx_asm_write; }
184 | else if (mode == AvxAdd) { bw_func = avx_asm_add; }
185 | else if (mode == AvxCflip) { bw_func = avx_asm_cflip; }
186 | else if (mode == AvxCopy) { bw_func = avx_asm_copy; }
187 | else if (mode == SseRead) { bw_func = sse_asm_read; }
188 | else if (mode == SseWrite) { bw_func = sse_asm_write; }
189 | else if (mode == SseAdd) { bw_func = sse_asm_add; }
190 | else if (mode == SseCopy) { bw_func = sse_asm_copy; }
191 | else if (mode == Avx512Read) { bw_func = avx512_asm_read; }
192 | else if (mode == Avx512Write) { bw_func = avx512_asm_write; }
193 | else if (mode == Avx512Add) { bw_func = avx512_asm_add; }
194 | else if (mode == MmxRead) { bw_func = mmx_asm_read; }
195 | else if (mode == MmxWrite) { bw_func = mmx_asm_write; }
196 | else if (mode == MmxNtWrite) { bw_func = mmx_asm_ntwrite; }
197 | else if (mode == SseNtWrite) { bw_func = sse_asm_ntwrite; }
198 | else if (mode == AvxNtWrite) { bw_func = avx_asm_ntwrite; }
199 | else if (mode == Avx512NtWrite) { bw_func = avx512_asm_ntwrite; }
200 | else if (mode == SseNtRead) { bw_func = sse_asm_ntread; }
201 | else if (mode == Instr4 || mode == Instr8 || mode == K8Instr4 || mode == Branch16)
202 | {
203 | bw_func = instr_read;
204 | }
205 | else if (mode == RepMovsb) { bw_func = repmovsb_copy; }
206 | else if (mode == RepStosb) { bw_func = repstosb_write; }
207 | else if (mode == RepMovsd) { bw_func = repmovsd_copy; }
208 | else if (mode == RepStosd) { bw_func = repstosd_write; }
209 | else
210 | {
211 | return -3;
212 | }
213 |
214 | // make array and fill it with something
215 | float* testArr = NULL;
216 | if (shared) {
217 | testArr = (float*)VirtualAlloc(NULL, elements * sizeof(float), MEM_COMMIT | MEM_RESERVE, protection_flags);
218 | if (testArr == NULL) {
219 | return 15;
220 | }
221 |
222 | if (mode != None)
223 | {
224 | FillInstructionArray((uint64_t*)testArr, sizeKb, mode);
225 | }
226 | else {
227 | for (uint32_t i = 0; i < elements; i++) {
228 | testArr[i] = i + 0.5f;
229 | }
230 | }
231 | }
232 |
233 | HANDLE* testThreads = (HANDLE*)malloc(threads * sizeof(HANDLE));
234 | DWORD* tids = (DWORD*)malloc(threads * sizeof(DWORD));
235 | struct BandwidthTestThreadData* threadData = (struct BandwidthTestThreadData*)malloc(threads * sizeof(struct BandwidthTestThreadData));
236 |
237 | for (uint64_t i = 0; i < threads; i++) {
238 | if (shared) {
239 | threadData[i].arr = testArr;
240 | }
241 | else {
242 | threadData[i].arr = (float*)VirtualAlloc(NULL, elements * sizeof(float), MEM_COMMIT | MEM_RESERVE, protection_flags);
243 | if (threadData[i].arr == NULL) {
244 | return 0;
245 | }
246 |
247 | if (mode != None)
248 | {
249 | FillInstructionArray((uint64_t*)threadData[i].arr, (elements * 4) / 1024, mode);
250 | }
251 | else
252 | {
253 | for (uint64_t arr_idx = 0; arr_idx < elements; arr_idx++) {
254 | threadData[i].arr[arr_idx] = arr_idx + i + 0.5f;
255 | }
256 | }
257 | }
258 |
259 | threadData[i].arr_length = elements;
260 | threadData[i].bw = 0;
261 | threadData[i].iterations = iterations;
262 | testThreads[i] = CreateThread(NULL, 0, ReadBandwidthTestThread, threadData + i, CREATE_SUSPENDED, tids + i);
263 |
264 | // turns out setting affinity makes no difference, and it's easier to set affinity via start /affinity anyway
265 | //SetThreadAffinityMask(testThreads[i], 1UL << i);
266 | }
267 |
268 | ftime(&start);
269 | for (uint32_t i = 0; i < threads; i++) ResumeThread(testThreads[i]);
270 | WaitForMultipleObjects((DWORD)threads, testThreads, TRUE, INFINITE);
271 | ftime(&end);
272 |
273 | int64_t time_diff_ms = 1000 * (end.time - start.time) + (end.millitm - start.millitm);
274 | double gbTransferred = (uint64_t)iterations * sizeof(float) * elements * threads / (double)1e9;
275 | bw = (float)(1000 * gbTransferred / (double)time_diff_ms);
276 |
277 | free(testThreads);
278 | if (shared) VirtualFree(testArr, elements * sizeof(float), MEM_RELEASE);
279 | free(tids);
280 |
281 | if (!shared) {
282 | for (uint32_t i = 0; i < threads; i++) {
283 | VirtualFreeEx(GetCurrentProcess(), threadData[i].arr, 0, MEM_RELEASE);
284 | }
285 | }
286 |
287 | free(threadData);
288 | return bw;
289 | }
290 |
291 | ///
292 | /// Bandwidth measuring function for instruction-side BW. Simply jumps into the
293 | /// array its given. So that array better be filled with valid instructions, with a
294 | /// return at the end.
295 | ///
296 | /// Array containing instructions, terminated with a return
297 | /// Length of arr in bytes, not used as arr better be ret-terminated
298 | /// How many times to run the nop function (arr)
299 | /// Nothing useful lol
300 | float __fastcall instr_read(void* arr, uint64_t arr_length, uint64_t iterations)
301 | {
302 | void (*nopfunc)(uint64_t);
303 | nopfunc = (void(*)(uint64_t))arr;
304 | int iterIdx;
305 | for (iterIdx = 0; iterIdx < iterations; iterIdx++) nopfunc(iterations);
306 | return (float)iterIdx;
307 | }
--------------------------------------------------------------------------------
/BenchmarkDll.vcxproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Debug
6 | Win32
7 |
8 |
9 | Release
10 | Win32
11 |
12 |
13 | Debug
14 | x64
15 |
16 |
17 | Release
18 | x64
19 |
20 |
21 |
22 | 16.0
23 | Win32Proj
24 | {2fc1b46d-1c99-4f82-96f4-e99320552268}
25 | BenchmarkDll
26 | 10.0
27 |
28 |
29 |
30 | DynamicLibrary
31 | true
32 | v143
33 | Unicode
34 |
35 |
36 | DynamicLibrary
37 | false
38 | v143
39 | true
40 | Unicode
41 |
42 |
43 | DynamicLibrary
44 | true
45 | v143
46 | Unicode
47 |
48 |
49 | DynamicLibrary
50 | false
51 | v143
52 | true
53 | Unicode
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 | true
75 |
76 |
77 | false
78 |
79 |
80 | true
81 |
82 |
83 | false
84 |
85 |
86 |
87 | Level3
88 | true
89 | WIN32;_DEBUG;BENCHMARKDLL_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)
90 | true
91 | Use
92 | pch.h
93 |
94 |
95 | Windows
96 | true
97 | false
98 |
99 |
100 |
101 |
102 | Level3
103 | true
104 | true
105 | true
106 | WIN32;NDEBUG;BENCHMARKDLL_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)
107 | true
108 | Use
109 | pch.h
110 |
111 |
112 | Windows
113 | true
114 | true
115 | true
116 | false
117 |
118 |
119 |
120 |
121 | Level3
122 | true
123 | _CRT_SECURE_NO_WARNINGS;_DEBUG;BENCHMARKDLL_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)
124 | true
125 | Use
126 | pch.h
127 | $(SolutionDir)\OpenCL\include;%(AdditionalIncludeDirectories)
128 |
129 |
130 | Windows
131 | true
132 | false
133 | $(SolutionDir)\OpenCL\lib;%(AdditionalLibraryDirectories)
134 | OpenCL.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
135 |
136 |
137 |
138 |
139 | Level3
140 | true
141 | true
142 | true
143 | _CRT_SECURE_NO_WARNINGS;NDEBUG;BENCHMARKDLL_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)
144 | true
145 | Use
146 | pch.h
147 | $(SolutionDir)\OpenCL\include;%(AdditionalIncludeDirectories)
148 |
149 |
150 | Windows
151 | true
152 | true
153 | true
154 | false
155 | $(SolutionDir)\OpenCL\lib;%(AdditionalLibraryDirectories)
156 | kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;OpenCL.lib;%(AdditionalDependencies)
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 | Create
170 | Create
171 | Create
172 | Create
173 |
174 |
175 |
176 |
177 | Document
178 | nasm -f win64 MemoryBandwidthFunctions.asm
179 | Running NASM
180 | MemoryBandwidthFunctions.obj
181 | nasm -f win64 MemoryBandwidthFunctions.asm
182 | Running NASM
183 | MemoryBandwidthFunctions.obj
184 |
185 |
186 |
187 |
188 | false
189 | Document
190 | nasm -f win64 MemoryLatencyFunctions.asm
191 | Building Memory Latency functions
192 | MemoryLatencyFunctions.obj
193 | nasm -f win64 MemoryLatencyFunctions.asm
194 | MemoryLatencyFunctions.obj
195 |
196 |
197 |
198 |
199 |
200 |
--------------------------------------------------------------------------------
/BenchmarkDll.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
6 | cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx
7 |
8 |
9 | {93995380-89BD-4b04-88EB-625FBE52EBFB}
10 | h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd
11 |
12 |
13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
15 |
16 |
17 |
18 |
19 | Header Files
20 |
21 |
22 | Header Files
23 |
24 |
25 | Header Files
26 |
27 |
28 |
29 |
30 | Source Files
31 |
32 |
33 | Source Files
34 |
35 |
36 | Source Files
37 |
38 |
39 | Source Files
40 |
41 |
42 |
43 |
44 | Source Files
45 |
46 |
47 | Source Files
48 |
49 |
50 |
--------------------------------------------------------------------------------
/BenchmarkDllCommon.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 |
10 | void FillPatternArr(uint32_t* pattern_arr, uint32_t list_size, uint32_t byte_increment);
11 | void FillTlbTestPatternArr(uint32_t* pattern_arr, uint32_t list_size, uint32_t cacheline_size, uint32_t page_size);
--------------------------------------------------------------------------------
/BenchmarkInteropFunctions.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Runtime.InteropServices;
3 |
4 | namespace MicrobenchmarkGui
5 | {
6 | public static class BenchmarkInteropFunctions
7 | {
8 | [DllImport(@"BenchmarkDll.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.StdCall)]
9 | public static extern float test(int size);
10 |
11 | // must be kept in sync with the one in bandwidth.c
12 | public enum TestType
13 | {
14 | None = 0,
15 | SseRead = 1,
16 | SseWrite = 2,
17 | SseCopy = 3,
18 | SseAdd = 4,
19 | AvxRead = 5,
20 | AvxWrite = 6,
21 | AvxCopy = 7,
22 | AvxCflip = 8,
23 | AvxAdd = 9,
24 | Avx512Read = 10,
25 | Avx512Write = 11,
26 | Avx512Add = 12,
27 | Instr4 = 13,
28 | Instr8 = 14,
29 | K8Instr4 = 15,
30 | Branch16 = 16,
31 | MmxRead = 17,
32 | MmxWrite = 18,
33 | MmxNtWrite = 19,
34 | SseNtWrite = 20,
35 | AvxNtWrite = 21,
36 | Avx512NtWrite = 22,
37 | SseNtRead = 23,
38 | RepMovsb = 24,
39 | RepStosb = 25,
40 | RepMovsd = 26,
41 | RepStosd = 27
42 | };
43 |
44 | [DllImport(@"BenchmarkDll.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.StdCall)]
45 | public static extern float MeasureBw(uint sizeKb, ulong iterations, uint threads, int shared, TestType testType);
46 |
47 | [DllImport(@"BenchmarkDll.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.StdCall)]
48 | public static extern int CheckAvxSupport();
49 |
50 | [DllImport(@"BenchmarkDll.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.StdCall)]
51 | public static extern int CheckAvx512Support();
52 |
53 | [DllImport(@"BenchmarkDll.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.StdCall)]
54 | public static extern int SetLargePages(uint enable);
55 |
56 | [DllImport(@"BenchmarkDll.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.StdCall)]
57 | public static extern float RunLatencyTest(uint sizeKb, ulong iterations);
58 |
59 | [DllImport(@"BenchmarkDll.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.StdCall)]
60 | public static extern float RunAsmLatencyTest(uint sizeKb, ulong iterations);
61 |
62 | // OpenCL related functions
63 | [DllImport(@"BenchmarkDll.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.StdCall)]
64 | public static extern int SetOpenCLContext(int platformIndex, int deviceIndex);
65 |
66 | [DllImport(@"BenchmarkDll.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.StdCall)]
67 | public static extern int GetPlatformCount();
68 |
69 | [DllImport(@"BenchmarkDll.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.StdCall)]
70 | public static extern int GetDeviceCount(int platformIndex);
71 |
72 | ///
73 | /// Gets an OpenCL device's name
74 | ///
75 | /// Platform index
76 | /// Device index
77 | /// Pointer to block of memory to put the device name into
78 | /// Max length of device (size of memory block above). Includes terminating null
79 | /// 0 on success, opencl error code on failure
80 | [DllImport(@"BenchmarkDll.dll", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.StdCall)]
81 | public static extern int GetDeviceName(int platformIndex, int deviceIndex, IntPtr deviceNamePtr, int maxDeviceNameLen);
82 |
83 | ///
84 | /// Gets an OpenCL platform's name
85 | ///
86 | /// Platform index
87 | /// Pointer to block of memory to put the name into
88 | /// Max name length, includes terminating null
89 | /// 0 on success, error code on fail
90 | [DllImport(@"BenchmarkDll.dll", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.StdCall)]
91 | public static extern int GetPlatformName(int platformIndex, IntPtr platformNamePtr, int maxPlatformNameLen);
92 |
93 | // keep in sync with the one in OpenCLFunctions.c
94 | public enum CLTestType
95 | {
96 | None = 0,
97 | GlobalScalar = 1,
98 | GlobalVector = 2,
99 | ConstantScalar = 3,
100 | Texture = 4,
101 | Local = 5,
102 | LinkBw = 6
103 | };
104 |
105 | [DllImport(@"BenchmarkDll.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.StdCall)]
106 | public static extern float RunCLLatencyTest(uint sizeKb, uint iterations, CLTestType testType, int tlb);
107 |
108 | [DllImport(@"BenchmarkDll.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.StdCall)]
109 | public static extern float RunCLLinkBwTest(uint sizeKb, uint iterations, int cpuToGpu);
110 |
111 | [DllImport(@"BenchmarkDll.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.StdCall)]
112 | public static extern int InitializeLatencyTest(CLTestType testType);
113 |
114 | [DllImport(@"BenchmarkDll.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.StdCall)]
115 | public static extern int DeinitializeLatencyTest();
116 |
117 | [DllImport(@"BenchmarkDll.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.StdCall)]
118 | public static extern ulong GetDeviceMaxConstantBufferSize();
119 |
120 | [DllImport(@"BenchmarkDll.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.StdCall)]
121 | public static extern ulong GetDeviceMaxBufferSize();
122 |
123 | [DllImport(@"BenchmarkDll.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.StdCall)]
124 | public static extern ulong GetDeviceMaxTextureSize();
125 |
126 | [DllImport(@"BenchmarkDll.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.StdCall)]
127 | public static extern void SetGpuPtrChasingStride(uint stride);
128 |
129 | [DllImport(@"BenchmarkDll.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.StdCall)]
130 | public static extern uint GetGpuPtrChasingStride();
131 |
132 | [DllImport(@"BenchmarkDll.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.StdCall)]
133 | public static extern void SetGpuEstimatedPageSize(uint pageSize);
134 |
135 | [DllImport(@"BenchmarkDll.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.StdCall)]
136 | public static extern void GetGpuEstimatedPageSize();
137 | }
138 | }
139 |
--------------------------------------------------------------------------------
/BenchmarkSubmission.cs:
--------------------------------------------------------------------------------
1 | using System.Collections.Generic;
2 |
3 | namespace MicrobenchmarkGui
4 | {
5 | public class BenchmarkSubmission
6 | {
7 | public string TestName { get; set; }
8 | public string CpuName { get; set; }
9 | public string MotherboardName { get; set; }
10 | public string MemoryConfig { get; set; }
11 | public string Notes { get; set; }
12 | public float[][] Results { get; set; }
13 |
14 | public BenchmarkSubmission()
15 | {
16 | Results = new float[0][];
17 | }
18 | }
19 | }
--------------------------------------------------------------------------------
/BenchmarkSubmissionDialog.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Drawing;
4 | using System.Threading.Tasks;
5 | using System.Windows.Forms;
6 | using System.Text;
7 | using System.Linq;
8 | using System.Management;
9 | using Newtonsoft.Json;
10 | using System.Net.Http;
11 |
12 | namespace MicrobenchmarkGui
13 | {
14 | public partial class BenchmarkSubmissionDialog : Form
15 | {
16 | private readonly string testName;
17 | private readonly List<(float size, float result)> results;
18 | private readonly BenchmarkSubmission submission;
19 | private const string SERVER_URL = "https://memrank.reali.es";
20 |
21 | public BenchmarkSubmissionDialog(string testName, List<(float size, float result)> results)
22 | {
23 | InitializeComponent();
24 | this.testName = testName;
25 | this.results = results;
26 | this.submission = new BenchmarkSubmission();
27 |
28 | // Set window properties
29 | this.MinimumSize = new Size(500, 400);
30 | this.StartPosition = FormStartPosition.CenterParent;
31 |
32 | // Initialize summary text
33 | UpdateSummary();
34 |
35 | // Pre-populate system information
36 | PopulateSystemInfo();
37 | }
38 |
39 | private void PopulateSystemInfo()
40 | {
41 | try
42 | {
43 | // Get CPU info
44 | cpuNameTextBox.Text = OpCode.GetProcessorName() ?? "Unknown CPU";
45 |
46 | // Get motherboard info using WMI
47 | string motherboardInfo = "Unknown Motherboard";
48 | try
49 | {
50 | using (var searcher = new ManagementObjectSearcher("SELECT * FROM Win32_BaseBoard"))
51 | {
52 | foreach (ManagementObject board in searcher.Get())
53 | {
54 | string manufacturer = board["Manufacturer"]?.ToString() ?? "";
55 | string product = board["Product"]?.ToString() ?? "";
56 | motherboardInfo = $"{manufacturer} {product}".Trim();
57 | if (string.IsNullOrWhiteSpace(motherboardInfo))
58 | {
59 | motherboardInfo = "Unknown Motherboard";
60 | }
61 | break;
62 | }
63 | }
64 | }
65 | catch
66 | {
67 | // Keep default "Unknown Motherboard" value
68 | }
69 | motherboardTextBox.Text = motherboardInfo;
70 |
71 | // Get memory configuration using WMI
72 | string memoryConfig = "Unknown Memory Configuration";
73 | try
74 | {
75 | // First try to get actual running speed from BIOS
76 | int currentSpeed = 0;
77 | try
78 | {
79 | using (var searcher = new ManagementObjectSearcher(@"root\WMI", "SELECT * FROM MSMemory_Performance"))
80 | {
81 | foreach (ManagementObject obj in searcher.Get())
82 | {
83 | if (obj["ConfiguredMemoryClockSpeed"] != null)
84 | {
85 | currentSpeed = Convert.ToInt32(obj["ConfiguredMemoryClockSpeed"]);
86 | break;
87 | }
88 | }
89 | }
90 | }
91 | catch
92 | {
93 | // If we can't get the actual speed, we'll fall back to rated speed
94 | }
95 |
96 | using (var searcher = new ManagementObjectSearcher("SELECT * FROM Win32_PhysicalMemory"))
97 | {
98 | var memoryModules = new List();
99 | ulong totalCapacity = 0;
100 | int moduleCount = 0;
101 |
102 | foreach (ManagementObject memory in searcher.Get())
103 | {
104 | moduleCount++;
105 | ulong capacity = Convert.ToUInt64(memory["Capacity"]);
106 | totalCapacity += capacity;
107 |
108 | // Use actual speed if we got it, otherwise fall back to rated speed
109 | int speed = currentSpeed > 0 ? currentSpeed :
110 | Convert.ToInt32(memory["ConfiguredClockSpeed"] ?? memory["Speed"] ?? 0);
111 |
112 | string memoryType = GetMemoryType(memory);
113 |
114 | memoryModules.Add($"{capacity / (1024 * 1024 * 1024)}GB {memoryType}" +
115 | (speed > 0 ? $" @ {speed}MHz" : ""));
116 | }
117 |
118 | if (totalCapacity > 0)
119 | {
120 | memoryConfig = $"{totalCapacity / (1024 * 1024 * 1024)}GB Total ({moduleCount} modules)";
121 | if (memoryModules.Count > 0)
122 | {
123 | memoryConfig += $" - {string.Join(", ", memoryModules)}";
124 | }
125 | }
126 | }
127 | }
128 | catch
129 | {
130 | // Keep default "Unknown Memory Configuration" value
131 | }
132 | memoryConfigTextBox.Text = memoryConfig;
133 |
134 | // Make the fields read-only without edit option
135 | cpuNameTextBox.ReadOnly = true;
136 | motherboardTextBox.ReadOnly = true;
137 | memoryConfigTextBox.ReadOnly = true;
138 | }
139 | catch (Exception ex)
140 | {
141 | // Set default values if overall detection fails
142 | cpuNameTextBox.Text = "Unknown CPU";
143 | motherboardTextBox.Text = "Unknown Motherboard";
144 | memoryConfigTextBox.Text = "Unknown Memory Configuration";
145 |
146 | MessageBox.Show($"Error retrieving system information: {ex.Message}\nDefault values have been set.",
147 | "System Info Error", MessageBoxButtons.OK, MessageBoxIcon.Warning);
148 | }
149 | }
150 |
151 | private string GetMemoryType(ManagementObject memory)
152 | {
153 | // Try to get detailed memory type
154 | try
155 | {
156 | int memoryType = Convert.ToInt32(memory["SMBIOSMemoryType"]);
157 | switch (memoryType)
158 | {
159 | case 26: return "DDR4";
160 | case 30: return "LPDDR4";
161 | case 34: return "DDR5";
162 | case 35: return "LPDDR5";
163 | default:
164 | if (memory["MemoryType"] != null)
165 | {
166 | return $"DDR{memory["MemoryType"]}";
167 | }
168 | return "DDR";
169 | }
170 | }
171 | catch
172 | {
173 | return "DDR";
174 | }
175 | }
176 |
177 | private void UpdateSummary()
178 | {
179 | var summary = new StringBuilder();
180 | summary.AppendLine($"Test: {testName}");
181 | summary.AppendLine($"Number of data points: {results.Count}");
182 |
183 | if (results.Any())
184 | {
185 | summary.AppendLine($"Size range: {results.Min(r => r.size):F2} KB to {results.Max(r => r.size):F2} KB");
186 | summary.AppendLine($"Result range: {results.Min(r => r.result):F2} to {results.Max(r => r.result):F2}");
187 | }
188 |
189 | summaryTextBox.Text = summary.ToString();
190 | }
191 |
192 | private async void submitButton_Click(object sender, EventArgs e)
193 | {
194 | if (await SubmitAsync())
195 | {
196 | DialogResult = DialogResult.OK;
197 | }
198 | }
199 |
200 | public async Task SubmitAsync()
201 | {
202 | // Get values from form controls
203 | submission.TestName = testName;
204 | submission.CpuName = cpuNameTextBox.Text;
205 | submission.MotherboardName = motherboardTextBox.Text;
206 | submission.MemoryConfig = memoryConfigTextBox.Text;
207 | submission.Notes = notesTextBox.Text;
208 | submission.Results = results.Select(r => new float[] { r.size, r.result }).ToArray();
209 |
210 | // Validate required fields
211 | if (string.IsNullOrWhiteSpace(submission.CpuName) ||
212 | string.IsNullOrWhiteSpace(submission.MotherboardName) ||
213 | string.IsNullOrWhiteSpace(submission.MemoryConfig))
214 | {
215 | MessageBox.Show("Please fill in all required fields.", "Validation Error",
216 | MessageBoxButtons.OK, MessageBoxIcon.Warning);
217 | return false;
218 | }
219 |
220 | try
221 | {
222 | string jsonSubmission = JsonConvert.SerializeObject(submission, Formatting.Indented);
223 |
224 | using (var client = new HttpClient())
225 | {
226 | var content = new StringContent(jsonSubmission, Encoding.UTF8, "application/json");
227 | var response = await client.PostAsync($"{SERVER_URL}/submit", content);
228 |
229 | if (!response.IsSuccessStatusCode)
230 | {
231 | throw new Exception($"Server returned status code: {response.StatusCode}");
232 | }
233 |
234 | var responseJson = await response.Content.ReadAsStringAsync();
235 | var result = JsonConvert.DeserializeAnonymousType(responseJson, new { success = false, url = "", id = 0 });
236 |
237 | if (result?.success == true && !string.IsNullOrEmpty(result.url))
238 | {
239 | System.Diagnostics.Process.Start(new System.Diagnostics.ProcessStartInfo
240 | {
241 | FileName = $"{SERVER_URL}{result.url}",
242 | UseShellExecute = true
243 | });
244 | return true;
245 | }
246 |
247 | throw new Exception("Invalid response from server");
248 | }
249 | }
250 | catch (Exception ex)
251 | {
252 | MessageBox.Show($"Error submitting results: {ex.Message}",
253 | "Submission Error",
254 | MessageBoxButtons.OK, MessageBoxIcon.Error);
255 | return false;
256 | }
257 | }
258 |
259 | protected override void OnFormClosing(FormClosingEventArgs e)
260 | {
261 | if (DialogResult == DialogResult.None)
262 | {
263 | DialogResult = DialogResult.Cancel;
264 | }
265 | base.OnFormClosing(e);
266 | }
267 |
268 | private void cancelButton_Click(object sender, EventArgs e)
269 | {
270 | DialogResult = DialogResult.Cancel;
271 | }
272 | }
273 | }
274 |
--------------------------------------------------------------------------------
/BenchmarkSubmissionDialog.resx:
--------------------------------------------------------------------------------
1 |
2 |
3 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 | text/microsoft-resx
110 |
111 |
112 | 2.0
113 |
114 |
115 | System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
116 |
117 |
118 | System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
119 |
120 |
--------------------------------------------------------------------------------
/GlobalTestSettings.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 | using System.Threading.Tasks;
6 |
7 | namespace MicrobenchmarkGui
8 | {
9 | ///
10 | /// Container class for settings that should apply across tests
11 | ///
12 | public static class GlobalTestSettings
13 | {
14 | ///
15 | /// Minimum test size in KB, for tests that go through multiple sizes
16 | ///
17 | public static uint MinTestSizeKb = 0;
18 | }
19 | }
20 |
--------------------------------------------------------------------------------
/LatencyRunner.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Diagnostics;
4 | using System.Threading;
5 | using System.Windows.Forms;
6 | using System.Windows.Forms.DataVisualization.Charting;
7 |
8 | namespace MicrobenchmarkGui
9 | {
10 | public class LatencyRunner
11 | {
12 | public uint[] testSizes = { 2, 4, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 512, 600, 768, 1024, 1536, 2048,
13 | 3072, 4096, 5120, 6144, 8192, 10240, 12288, 16384, 24567, 32768, 65536, 98304,
14 | 131072, 262144, 393216, 524288, 1048576 };
15 |
16 | public bool running = false;
17 |
18 | // run results
19 | public Dictionary>> RunResults;
20 |
21 | // last run results
22 | public string[][] formattedResults;
23 |
24 | ///
25 | /// List of test results from last run
26 | ///
27 | public List testResultsList;
28 |
29 | ///
30 | /// List of tested points from last run
31 | ///
32 | public List floatTestPoints;
33 |
34 | private ListView resultListView;
35 | private Chart resultChart;
36 | private MicrobenchmarkForm.SafeSetResultListView setListViewDelegate;
37 | private MicrobenchmarkForm.SafeSetResultListViewColumns setListViewColumnsDelegate;
38 | private MicrobenchmarkForm.SafeSetResultsChart setChartDelegate;
39 | private MicrobenchmarkForm.SafeSetProgressLabel setProgressLabelDelegate;
40 | private Label progressLabel;
41 | private string[] bwCols = { "Data Size", "Latency" };
42 |
43 | public LatencyRunner(MicrobenchmarkForm.SafeSetResultListView setListViewDelegate,
44 | MicrobenchmarkForm.SafeSetResultListViewColumns setListViewColsDelegate,
45 | MicrobenchmarkForm.SafeSetResultsChart setChartDelegate,
46 | MicrobenchmarkForm.SafeSetProgressLabel setLabelDelegate,
47 | ListView resultListView,
48 | Chart resultChart,
49 | Label progressLabel)
50 | {
51 | this.setListViewColumnsDelegate = setListViewColsDelegate;
52 | this.setListViewDelegate = setListViewDelegate;
53 | this.setChartDelegate = setChartDelegate;
54 | this.setProgressLabelDelegate = setLabelDelegate;
55 | this.resultListView = resultListView;
56 | this.resultChart = resultChart;
57 | this.progressLabel = progressLabel;
58 |
59 | this.RunResults = new Dictionary>>();
60 | }
61 |
62 | // Run through test sizes, meant to be run in a background thread
63 | public void StartFullTest(bool asm, bool largePages, CancellationToken runCancel)
64 | {
65 | string testLabel = (asm ? "ASM" : "C") + ", " + (largePages ? "Large Pages" : "Default Pages");
66 | List> currentRunResults = new List>();
67 | testResultsList = new List();
68 | floatTestPoints = new List();
69 | resultListView.Invoke(setListViewColumnsDelegate, new object[] { bwCols });
70 | float[] testResults = new float[testSizes.Length];
71 | formattedResults = new string[testSizes.Length][];
72 |
73 | for (uint i = 0; i < testSizes.Length; i++)
74 | {
75 | testResults[i] = 0;
76 | formattedResults[i] = new string[2];
77 | formattedResults[i][0] = string.Format("{0} KB", testSizes[i]);
78 | formattedResults[i][1] = "Not Run";
79 | }
80 |
81 | resultListView.Invoke(setListViewDelegate, new object[] { formattedResults });
82 |
83 | if (!largePages)
84 | {
85 | BenchmarkInteropFunctions.SetLargePages(0);
86 | }
87 | else
88 | {
89 | uint maxTestSize = testSizes[testSizes.Length - 1];
90 | int rc = BenchmarkInteropFunctions.SetLargePages(maxTestSize * 1024);
91 | if (rc == -1)
92 | {
93 | progressLabel.Invoke(setProgressLabelDelegate,
94 | new object[] { "Failed to get SeLockMemoryPrivilege for large lages. See README.md" });
95 | return;
96 | }
97 | else if (rc == -2)
98 | {
99 | progressLabel.Invoke(setProgressLabelDelegate,
100 | new object[] { "Could not allocate " + maxTestSize + " KB with large pages. If you have enough free memory, try rebooting" });
101 | return;
102 | }
103 | }
104 |
105 | float targetTimeMs = 3500, minTimeMs = 1500, lastTimeMs = 0;
106 | Stopwatch testStopwatch = new Stopwatch();
107 | for (uint testIdx = 0; testIdx < testSizes.Length; testIdx++)
108 | {
109 | if (runCancel.IsCancellationRequested)
110 | {
111 | break;
112 | }
113 |
114 | uint testSize = testSizes[testIdx];
115 | float result;
116 | ulong currentIterations = 2500000;
117 |
118 | if (GlobalTestSettings.MinTestSizeKb != 0 && GlobalTestSettings.MinTestSizeKb > testSize) continue;
119 |
120 | do
121 | {
122 | progressLabel.Invoke(setProgressLabelDelegate, new object[] { $"Testing {testSize} KB with {currentIterations / 1000}K iterations. Last run = {lastTimeMs} ms" });
123 |
124 | Console.WriteLine("Starting run with {0}K iterations", currentIterations / 1000);
125 | testStopwatch.Restart();
126 | if (asm) result = BenchmarkInteropFunctions.RunAsmLatencyTest(testSize, currentIterations);
127 | else result = BenchmarkInteropFunctions.RunLatencyTest(testSize, currentIterations);
128 | testStopwatch.Stop();
129 |
130 | lastTimeMs = (float)(result * currentIterations / 1e6);
131 | Console.WriteLine("Calculated time: {0:F2}, stopwatch time: {1}", lastTimeMs, testStopwatch.ElapsedMilliseconds);
132 | currentIterations = TestUtilities.ScaleIterations(currentIterations, targetTimeMs, lastTimeMs);
133 | } while (lastTimeMs < minTimeMs);
134 |
135 | testResults[testIdx] = result;
136 |
137 | if (result != 0) formattedResults[testIdx][1] = string.Format("{0:F2} ns", result);
138 | else formattedResults[testIdx][1] = "N/A";
139 | resultListView.Invoke(setListViewDelegate, new object[] { formattedResults });
140 |
141 | if (result != 0)
142 | {
143 | floatTestPoints.Add(testSize);
144 | testResultsList.Add(result);
145 | currentRunResults.Add(new Tuple(testSize, result));
146 | resultChart.Invoke(setChartDelegate, new object[] { testLabel, floatTestPoints.ToArray(), testResultsList.ToArray(), MicrobenchmarkForm.ResultChartType.CpuMemoryLatency });
147 | }
148 | }
149 |
150 | progressLabel.Invoke(setProgressLabelDelegate, new object[] { "Run finished" });
151 | RunResults.Add(testLabel, currentRunResults);
152 | }
153 |
154 | public string GetTestSizesAsString()
155 | {
156 | return string.Join(",", testSizes);
157 | }
158 |
159 | // Shouldn't be called when test is running, but UI will take care of that
160 | public void SetTestSizes(string input)
161 | {
162 | string[] inputArr = input.Split(new char[] { ',' } , StringSplitOptions.RemoveEmptyEntries);
163 | uint[] newTestSizes = new uint[inputArr.Length];
164 | for (uint i = 0;i < inputArr.Length; i++)
165 | {
166 | newTestSizes[i] = uint.Parse(inputArr[i]);
167 | }
168 |
169 | testSizes = newTestSizes;
170 | }
171 | }
172 | }
173 |
--------------------------------------------------------------------------------
/MemoryLatency.c:
--------------------------------------------------------------------------------
1 | #include "pch.h"
2 | #include "BenchmarkDllCommon.h"
3 |
4 | // If set, memory latency tests will use this as the test array
5 | // If not set, test runs will use malloc()
6 | void* mem = NULL;
7 |
8 | // mem latency functions
9 | __declspec(dllexport) float __stdcall RunAsmLatencyTest(uint32_t size_kb, uint64_t iterations);
10 | __declspec(dllexport) float __stdcall RunLatencyTest(uint32_t size_kb, uint64_t iterations);
11 | __declspec(dllexport) int __stdcall SetLargePages(uint32_t enable);
12 |
13 | int GetPrivilege();
14 |
15 | ///
16 | /// Sets large pages state. Will allocate array if large pages are enabled
17 | ///
18 | /// If greater than 0, enable large pages, with array set to specified size in bytes. If 0, disable large pages and free any allocated arr
19 | /// 0 on success, something else otherwise
20 | int SetLargePages(uint32_t enable)
21 | {
22 | if (enable == 0)
23 | {
24 | if (mem != NULL)
25 | {
26 | VirtualFree(mem, 0, MEM_RELEASE);
27 | mem = NULL;
28 | }
29 |
30 | return 0;
31 | }
32 | else
33 | {
34 | if (mem != NULL)
35 | {
36 | VirtualFree(mem, 0, MEM_RELEASE);
37 | mem = NULL;
38 | }
39 |
40 | if (GetPrivilege() != 0)
41 | {
42 | return -1;
43 | }
44 |
45 | mem = VirtualAlloc(NULL, enable, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE);
46 | if (mem == NULL)
47 | {
48 | return -2;
49 | }
50 |
51 | return 0;
52 | }
53 | }
54 |
55 | ///
56 | /// Fills pattern array with 32-bit integers
57 | ///
58 | /// array to fill
59 | /// number of 32-bit elements
60 | /// how far apart elements should be spaced
61 | void FillPatternArr(uint32_t* pattern_arr, uint32_t list_size, uint32_t byte_increment) {
62 | uint32_t increment = byte_increment / sizeof(uint32_t);
63 | uint32_t element_count = list_size / increment;
64 | for (int i = 0; i < element_count; i++) {
65 | pattern_arr[i * increment] = i * increment;
66 | }
67 |
68 | int iter = element_count;
69 | while (iter > 1) {
70 | iter -= 1;
71 | int j = iter - 1 == 0 ? 0 : rand() % (iter - 1);
72 | uint32_t tmp = pattern_arr[iter * increment];
73 | pattern_arr[iter * increment] = pattern_arr[j * increment];
74 | pattern_arr[j * increment] = tmp;
75 | }
76 | }
77 |
78 | uint32_t GetTlbShiftedOffset(uint32_t index, uint32_t cacheline_size, uint32_t page_size)
79 | {
80 | uint32_t page_increment = page_size / sizeof(uint32_t);
81 | uint32_t cacheline_increment = cacheline_size / sizeof(uint32_t);
82 | uint32_t byte_offset = (index * cacheline_increment) & (page_increment - 1);
83 | return index * page_increment + byte_offset;
84 | }
85 |
86 | ///
87 | /// Fills pattern array with page_size as the pointer chasing stride, but
88 | ///
89 | ///
90 | ///
91 | ///
92 | ///
93 | ///
94 | void FillTlbTestPatternArr(uint32_t* pattern_arr, uint32_t list_size, uint32_t cacheline_size, uint32_t page_size) {
95 | // fill a temporary array with the element count
96 | uint32_t element_count = list_size * sizeof(uint32_t) / page_size;
97 | uint32_t* temp_arr = (uint32_t*)malloc(sizeof(uint32_t) * element_count);
98 | uint32_t page_increment = page_size / sizeof(uint32_t);
99 | FillPatternArr(temp_arr, element_count, sizeof(uint32_t));
100 | memset(pattern_arr, INT_MAX, list_size * sizeof(uint32_t));
101 | for (uint32_t i = 0; i < element_count; i++)
102 | {
103 | uint32_t dst_index = GetTlbShiftedOffset(i, cacheline_size, page_size);
104 | uint32_t dst_value = GetTlbShiftedOffset(temp_arr[i], cacheline_size, page_size);
105 | pattern_arr[dst_index] = dst_value;
106 | }
107 | free(temp_arr);
108 | }
109 |
110 | ///
111 | /// Fills pattern array with 64-bit integers
112 | ///
113 | /// array to fill
114 | /// number of 64-bit elements in array
115 | /// how far apart elements should be spaced
116 | void FillPatternArr64(uint64_t* pattern_arr, uint64_t list_size, uint64_t byte_increment) {
117 | uint32_t increment = byte_increment / sizeof(uint64_t);
118 | uint32_t element_count = list_size / increment;
119 | for (int i = 0; i < element_count; i++) {
120 | pattern_arr[i * increment] = i * increment;
121 | }
122 |
123 | int iter = element_count;
124 | while (iter > 1) {
125 | iter -= 1;
126 | int j = iter - 1 == 0 ? 0 : rand() % (iter - 1);
127 | uint64_t tmp = pattern_arr[iter * increment];
128 | pattern_arr[iter * increment] = pattern_arr[j * increment];
129 | pattern_arr[j * increment] = tmp;
130 | }
131 | }
132 |
133 | float RunAsmLatencyTest(uint32_t size_kb, uint64_t iterations) {
134 | struct timeb start, end;
135 | uint32_t list_size = size_kb * 1024 / sizeof(void*);
136 |
137 | uint64_t* A;
138 | if (mem == NULL) {
139 | A = (uint64_t*)malloc(size_kb * 1024);
140 | }
141 | else {
142 | A = (uint64_t*)mem;
143 | }
144 |
145 | memset(A, 0, 1024 * size_kb);
146 | FillPatternArr64(A, size_kb * 1024 / sizeof(uint64_t), 64);
147 | preplatencyarr(A, size_kb * 1024 / sizeof(uint64_t));
148 |
149 | ftime(&start);
150 | uint64_t sum = latencytest(iterations, A);
151 | ftime(&end);
152 | int64_t time_diff_ms = 1000 * (end.time - start.time) + (end.millitm - start.millitm);
153 | float latency = 1e6 * (float)time_diff_ms / (float)iterations;
154 | if (mem == NULL) free(A);
155 | return latency;
156 | }
157 |
158 | float RunLatencyTest(uint32_t size_kb, uint64_t iterations) {
159 | struct timeb start, end;
160 | uint32_t list_size = size_kb * 1024 / 4;
161 | uint32_t current;
162 |
163 | // Fill list to create random access pattern
164 | int* A;
165 | if (mem == NULL) {
166 | A = (int*)malloc(sizeof(int) * list_size);
167 | }
168 | else {
169 | A = (int*)mem;
170 | }
171 |
172 | for (int i = 0; i < list_size; i++) {
173 | A[i] = i;
174 | }
175 |
176 | FillPatternArr(A, list_size, 64);
177 |
178 | // Run test
179 | ftime(&start);
180 | current = A[0];
181 | for (int i = 0; i < iterations; i++) {
182 | current = A[current];
183 | }
184 | ftime(&end);
185 | int64_t time_diff_ms = 1000 * (end.time - start.time) + (end.millitm - start.millitm);
186 | float latency = 1e6 * (float)time_diff_ms / (float)iterations;
187 |
188 | int tmp = A[current];
189 | if (mem == NULL) free(A);
190 | if (current == tmp) return 0;
191 | return latency;
192 | }
193 |
194 | int GetPrivilege()
195 | {
196 | HANDLE hToken;
197 | TOKEN_PRIVILEGES tp;
198 | BOOL status;
199 | DWORD error;
200 |
201 | // open process token
202 | if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hToken))
203 | {
204 | return -1;
205 | }
206 |
207 | // get the luid
208 | if (!LookupPrivilegeValue(NULL, TEXT("SeLockMemoryPrivilege"), &tp.Privileges[0].Luid))
209 | {
210 | return -1;
211 | }
212 |
213 | // enable privilege
214 | tp.PrivilegeCount = 1;
215 | tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
216 | status = AdjustTokenPrivileges(hToken, FALSE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, 0);
217 |
218 | // It is possible for AdjustTokenPrivileges to return TRUE and still not succeed.
219 | // So always check for the last error value.
220 | error = GetLastError();
221 | if (!status || (error != ERROR_SUCCESS))
222 | {
223 | return -1;
224 | }
225 |
226 | // close the handle
227 | if (!CloseHandle(hToken))
228 | {
229 | return -1;
230 | }
231 |
232 | return 0;
233 | }
234 |
--------------------------------------------------------------------------------
/MemoryLatencyFunctions.asm:
--------------------------------------------------------------------------------
1 | section .text
2 | bits 64
3 |
4 | global preplatencyarr
5 | global latencytest
6 |
7 | preplatencyarr:
8 | push r15
9 | push r14
10 | xor r15, r15 ; array index
11 | preplatencyarr_loop:
12 | mov r14, [rcx + r15 * 8]
13 | lea r14, [rcx + r14 * 8]
14 | mov [rcx + r15 * 8], r14
15 | inc r15
16 | cmp rdx, r15
17 | jne preplatencyarr_loop
18 | pop r14
19 | pop r15
20 | ret
21 |
22 | latencytest:
23 | push r15
24 | mov r15, [rdx]
25 | xor rax, rax
26 | latencytest_loop:
27 | mov r15, [r15]
28 | add rax, r15
29 | dec rcx
30 | jnz latencytest_loop
31 | pop r15
32 | ret
33 |
--------------------------------------------------------------------------------
/MicrobenchmarkForm.resx:
--------------------------------------------------------------------------------
1 |
2 |
3 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 | text/microsoft-resx
110 |
111 |
112 | 2.0
113 |
114 |
115 | System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
116 |
117 |
118 | System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
119 |
120 |
--------------------------------------------------------------------------------
/MicrobenchmarkGui.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Debug
6 | AnyCPU
7 | {EA6B854D-FAD1-4212-8953-4F32286E1B57}
8 | WinExe
9 | MicrobenchmarkGui
10 | MicrobenchmarkGui
11 | v4.7.2
12 | 512
13 | true
14 | true
15 |
16 |
17 | x64
18 | true
19 | full
20 | false
21 | x64\Debug\
22 | DEBUG;TRACE
23 | prompt
24 | 4
25 |
26 |
27 | x64
28 | pdbonly
29 | true
30 | x64\Release\
31 | TRACE
32 | prompt
33 | 4
34 | false
35 |
36 |
37 |
38 | packages\Newtonsoft.Json.13.0.3\lib\net45\Newtonsoft.Json.dll
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 | Form
58 |
59 |
60 | BenchmarkSubmissionDialog.cs
61 |
62 |
63 |
64 |
65 |
66 |
67 | Form
68 |
69 |
70 | MicrobenchmarkForm.cs
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 | BenchmarkSubmissionDialog.cs
79 |
80 |
81 | MicrobenchmarkForm.cs
82 |
83 |
84 | ResXFileCodeGenerator
85 | Resources.Designer.cs
86 | Designer
87 |
88 |
89 | True
90 | Resources.resx
91 |
92 |
93 |
94 |
95 | SettingsSingleFileGenerator
96 | Settings.Designer.cs
97 |
98 |
99 | True
100 | Settings.settings
101 | True
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
--------------------------------------------------------------------------------
/MicrobenchmarkGui.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio Version 16
4 | VisualStudioVersion = 16.0.32630.194
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MicrobenchmarkGui", "MicrobenchmarkGui.csproj", "{EA6B854D-FAD1-4212-8953-4F32286E1B57}"
7 | EndProject
8 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "BenchmarkDll", "BenchmarkDll.vcxproj", "{2FC1B46D-1C99-4F82-96F4-E99320552268}"
9 | EndProject
10 | Global
11 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
12 | Debug|x64 = Debug|x64
13 | Release|x64 = Release|x64
14 | EndGlobalSection
15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
16 | {EA6B854D-FAD1-4212-8953-4F32286E1B57}.Debug|x64.ActiveCfg = Debug|Any CPU
17 | {EA6B854D-FAD1-4212-8953-4F32286E1B57}.Debug|x64.Build.0 = Debug|Any CPU
18 | {EA6B854D-FAD1-4212-8953-4F32286E1B57}.Release|x64.ActiveCfg = Release|Any CPU
19 | {EA6B854D-FAD1-4212-8953-4F32286E1B57}.Release|x64.Build.0 = Release|Any CPU
20 | {2FC1B46D-1C99-4F82-96F4-E99320552268}.Debug|x64.ActiveCfg = Debug|x64
21 | {2FC1B46D-1C99-4F82-96F4-E99320552268}.Debug|x64.Build.0 = Debug|x64
22 | {2FC1B46D-1C99-4F82-96F4-E99320552268}.Release|x64.ActiveCfg = Release|x64
23 | {2FC1B46D-1C99-4F82-96F4-E99320552268}.Release|x64.Build.0 = Release|x64
24 | EndGlobalSection
25 | GlobalSection(SolutionProperties) = preSolution
26 | HideSolutionNode = FALSE
27 | EndGlobalSection
28 | GlobalSection(ExtensibilityGlobals) = postSolution
29 | SolutionGuid = {8171071B-2507-4C70-864A-5EBA5237C090}
30 | EndGlobalSection
31 | EndGlobal
32 |
--------------------------------------------------------------------------------
/OpCode.cs:
--------------------------------------------------------------------------------
1 | // From LibreHardwareMonitor
2 | // Mozilla Public License 2.0
3 | // If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
4 | // Copyright (C) LibreHardwareMonitor and Contributors
5 | // All Rights Reserved
6 |
7 | using System;
8 | using System.Runtime.InteropServices;
9 | using static System.Windows.Forms.VisualStyles.VisualStyleElement;
10 |
11 | namespace MicrobenchmarkGui
12 | {
13 | internal static class OpCode
14 | {
15 | public static CpuidDelegate Cpuid;
16 | public static RdtscDelegate Rdtsc;
17 |
18 | private static IntPtr _codeBuffer;
19 | private static ulong _size;
20 |
21 | // void __stdcall cpuidex(unsigned int index, unsigned int ecxValue,
22 | // unsigned int* eax, unsigned int* ebx, unsigned int* ecx,
23 | // unsigned int* edx)
24 | // {
25 | // int info[4];
26 | // __cpuidex(info, index, ecxValue);
27 | // *eax = info[0];
28 | // *ebx = info[1];
29 | // *ecx = info[2];
30 | // *edx = info[3];
31 | // }
32 |
33 | private static readonly byte[] CpuId32 =
34 | {
35 | 0x55, // push ebp
36 | 0x8B,
37 | 0xEC, // mov ebp, esp
38 | 0x83,
39 | 0xEC,
40 | 0x10, // sub esp, 10h
41 | 0x8B,
42 | 0x45,
43 | 0x08, // mov eax, dword ptr [ebp+8]
44 | 0x8B,
45 | 0x4D,
46 | 0x0C, // mov ecx, dword ptr [ebp+0Ch]
47 | 0x53, // push ebx
48 | 0x0F,
49 | 0xA2, // cpuid
50 | 0x56, // push esi
51 | 0x8D,
52 | 0x75,
53 | 0xF0, // lea esi, [info]
54 | 0x89,
55 | 0x06, // mov dword ptr [esi],eax
56 | 0x8B,
57 | 0x45,
58 | 0x10, // mov eax, dword ptr [eax]
59 | 0x89,
60 | 0x5E,
61 | 0x04, // mov dword ptr [esi+4], ebx
62 | 0x89,
63 | 0x4E,
64 | 0x08, // mov dword ptr [esi+8], ecx
65 | 0x89,
66 | 0x56,
67 | 0x0C, // mov dword ptr [esi+0Ch], edx
68 | 0x8B,
69 | 0x4D,
70 | 0xF0, // mov ecx, dword ptr [info]
71 | 0x89,
72 | 0x08, // mov dword ptr [eax], ecx
73 | 0x8B,
74 | 0x45,
75 | 0x14, // mov eax, dword ptr [ebx]
76 | 0x8B,
77 | 0x4D,
78 | 0xF4, // mov ecx, dword ptr [ebp-0Ch]
79 | 0x89,
80 | 0x08, // mov dword ptr [eax], ecx
81 | 0x8B,
82 | 0x45,
83 | 0x18, // mov eax, dword ptr [ecx]
84 | 0x8B,
85 | 0x4D,
86 | 0xF8, // mov ecx, dword ptr [ebp-8]
87 | 0x89,
88 | 0x08, // mov dword ptr [eax], ecx
89 | 0x8B,
90 | 0x45,
91 | 0x1C, // mov eax, dword ptr [edx]
92 | 0x8B,
93 | 0x4D,
94 | 0xFC, // mov ecx, dword ptr [ebp-4]
95 | 0x5E, // pop esi
96 | 0x89,
97 | 0x08, // mov dword ptr [eax], ecx
98 | 0x5B, // pop ebx
99 | 0xC9, // leave
100 | 0xC2,
101 | 0x18,
102 | 0x00 // ret 18h
103 | };
104 |
105 | private static readonly byte[] CpuId64Linux =
106 | {
107 | 0x49,
108 | 0x89,
109 | 0xD2, // mov r10, rdx
110 | 0x49,
111 | 0x89,
112 | 0xCB, // mov r11, rcx
113 | 0x53, // push rbx
114 | 0x89,
115 | 0xF8, // mov eax, edi
116 | 0x89,
117 | 0xF1, // mov ecx, esi
118 | 0x0F,
119 | 0xA2, // cpuid
120 | 0x41,
121 | 0x89,
122 | 0x02, // mov dword ptr [r10], eax
123 | 0x41,
124 | 0x89,
125 | 0x1B, // mov dword ptr [r11], ebx
126 | 0x41,
127 | 0x89,
128 | 0x08, // mov dword ptr [r8], ecx
129 | 0x41,
130 | 0x89,
131 | 0x11, // mov dword ptr [r9], edx
132 | 0x5B, // pop rbx
133 | 0xC3 // ret
134 | };
135 |
136 | private static readonly byte[] CpuId64Windows =
137 | {
138 | 0x48,
139 | 0x89,
140 | 0x5C,
141 | 0x24,
142 | 0x08, // mov qword ptr [rsp+8], rbx
143 | 0x8B,
144 | 0xC1, // mov eax, ecx
145 | 0x8B,
146 | 0xCA, // mov ecx, edx
147 | 0x0F,
148 | 0xA2, // cpuid
149 | 0x41,
150 | 0x89,
151 | 0x00, // mov dword ptr [r8], eax
152 | 0x48,
153 | 0x8B,
154 | 0x44,
155 | 0x24,
156 | 0x28, // mov rax, qword ptr [rsp+28h]
157 | 0x41,
158 | 0x89,
159 | 0x19, // mov dword ptr [r9], ebx
160 | 0x48,
161 | 0x8B,
162 | 0x5C,
163 | 0x24,
164 | 0x08, // mov rbx, qword ptr [rsp+8]
165 | 0x89,
166 | 0x08, // mov dword ptr [rax], ecx
167 | 0x48,
168 | 0x8B,
169 | 0x44,
170 | 0x24,
171 | 0x30, // mov rax, qword ptr [rsp+30h]
172 | 0x89,
173 | 0x10, // mov dword ptr [rax], edx
174 | 0xC3 // ret
175 | };
176 |
177 | // unsigned __int64 __stdcall rdtsc() {
178 | // return __rdtsc();
179 | // }
180 |
181 | private static readonly byte[] Rdtsc32 =
182 | {
183 | 0x0F,
184 | 0x31, // rdtsc
185 | 0xC3 // ret
186 | };
187 |
188 | private static readonly byte[] Rdtsc64 =
189 | {
190 | 0x0F,
191 | 0x31, // rdtsc
192 | 0x48,
193 | 0xC1,
194 | 0xE2,
195 | 0x20, // shl rdx, 20h
196 | 0x48,
197 | 0x0B,
198 | 0xC2, // or rax, rdx
199 | 0xC3 // ret
200 | };
201 |
202 | [UnmanagedFunctionPointer(CallingConvention.StdCall)]
203 | public delegate bool CpuidDelegate(uint index, uint ecxValue, out uint eax, out uint ebx, out uint ecx, out uint edx);
204 |
205 | [UnmanagedFunctionPointer(CallingConvention.StdCall)]
206 | public delegate ulong RdtscDelegate();
207 |
208 | [Flags]
209 | internal enum MEM : uint
210 | {
211 | MEM_COMMIT = 0x1000,
212 | MEM_RESERVE = 0x2000,
213 | MEM_DECOMMIT = 0x4000,
214 | MEM_RELEASE = 0x8000,
215 | MEM_RESET = 0x80000,
216 | MEM_LARGE_PAGES = 0x20000000,
217 | MEM_PHYSICAL = 0x400000,
218 | MEM_TOP_DOWN = 0x100000,
219 | MEM_WRITE_WATCH = 0x200000
220 | }
221 |
222 | [Flags]
223 | internal enum PAGE : uint
224 | {
225 | PAGE_EXECUTE = 0x10,
226 | PAGE_EXECUTE_READ = 0x20,
227 | PAGE_EXECUTE_READWRITE = 0x40,
228 | PAGE_EXECUTE_WRITECOPY = 0x80,
229 | PAGE_NOACCESS = 0x01,
230 | PAGE_READONLY = 0x02,
231 | PAGE_READWRITE = 0x04,
232 | PAGE_WRITECOPY = 0x08,
233 | PAGE_GUARD = 0x100,
234 | PAGE_NOCACHE = 0x200,
235 | PAGE_WRITECOMBINE = 0x400
236 | }
237 |
238 | [DllImport("kernel32.dll", CallingConvention = CallingConvention.Winapi)]
239 | internal static extern IntPtr VirtualAlloc(IntPtr lpAddress, UIntPtr dwSize, MEM flAllocationType, PAGE flProtect);
240 |
241 | [DllImport("kernel32.dll", CallingConvention = CallingConvention.Winapi)]
242 | internal static extern bool VirtualFree(IntPtr lpAddress, UIntPtr dwSize, MEM dwFreeType);
243 |
244 | public static void Open()
245 | {
246 | byte[] rdTscCode;
247 | byte[] cpuidCode;
248 | if (IntPtr.Size == 4)
249 | {
250 | rdTscCode = Rdtsc32;
251 | cpuidCode = CpuId32;
252 | }
253 | else
254 | {
255 | rdTscCode = Rdtsc64;
256 |
257 | cpuidCode = CpuId64Windows;
258 | }
259 |
260 | _size = (ulong)(rdTscCode.Length + cpuidCode.Length);
261 |
262 | _codeBuffer = VirtualAlloc(IntPtr.Zero,
263 | (UIntPtr)_size,
264 | MEM.MEM_COMMIT | MEM.MEM_RESERVE,
265 | PAGE.PAGE_EXECUTE_READWRITE);
266 |
267 | Marshal.Copy(rdTscCode, 0, _codeBuffer, rdTscCode.Length);
268 | Rdtsc = Marshal.GetDelegateForFunctionPointer(_codeBuffer, typeof(RdtscDelegate)) as RdtscDelegate;
269 | IntPtr cpuidAddress = (IntPtr)((long)_codeBuffer + rdTscCode.Length);
270 | Marshal.Copy(cpuidCode, 0, cpuidAddress, cpuidCode.Length);
271 | Cpuid = Marshal.GetDelegateForFunctionPointer(cpuidAddress, typeof(CpuidDelegate)) as CpuidDelegate;
272 | }
273 |
274 | public static void Close()
275 | {
276 | Rdtsc = null;
277 | Cpuid = null;
278 | VirtualFree(_codeBuffer, UIntPtr.Zero, MEM.MEM_RELEASE);
279 | }
280 |
281 | ///
282 | /// Gets the CPU manufacturer ID string, from cpuid with eax = 0
283 | ///
284 | /// Manufacturer ID string
285 | public static string GetManufacturerId()
286 | {
287 | uint eax, ecx, edx, ebx;
288 | byte[] cpuManufacturerBytes = new byte[12];
289 | Cpuid(0, 0, out eax, out ebx, out ecx, out edx);
290 |
291 | // when you use a managed language and can't play with types
292 | cpuManufacturerBytes[0] = (byte)ebx;
293 | cpuManufacturerBytes[1] = (byte)(ebx >> 8);
294 | cpuManufacturerBytes[2] = (byte)(ebx >> 16);
295 | cpuManufacturerBytes[3] = (byte)(ebx >> 24);
296 | cpuManufacturerBytes[4] = (byte)edx;
297 | cpuManufacturerBytes[5] = (byte)(edx >> 8);
298 | cpuManufacturerBytes[6] = (byte)(edx >> 16);
299 | cpuManufacturerBytes[7] = (byte)(edx >> 24);
300 | cpuManufacturerBytes[8] = (byte)ecx;
301 | cpuManufacturerBytes[9] = (byte)(ecx >> 8);
302 | cpuManufacturerBytes[10] = (byte)(ecx >> 16);
303 | cpuManufacturerBytes[11] = (byte)(ecx >> 24);
304 | return System.Text.Encoding.ASCII.GetString(cpuManufacturerBytes);
305 | }
306 |
307 | public static string GetProcessorName()
308 | {
309 | uint[] buffer = new uint[12];
310 | Cpuid(0x80000002, 0, out buffer[0], out buffer[1], out buffer[2], out buffer[3]);
311 | Cpuid(0x80000003, 0, out buffer[4], out buffer[5], out buffer[6], out buffer[7]);
312 | Cpuid(0x80000004, 0, out buffer[8], out buffer[9], out buffer[10], out buffer[11]);
313 |
314 | byte[] dst = new byte[buffer.Length * 4];
315 | Buffer.BlockCopy(buffer, 0, dst, 0, buffer.Length * 4);
316 | return System.Text.Encoding.ASCII.GetString(dst);
317 | }
318 |
319 | public static void GetProcessorVersion(out byte family, out byte model, out byte stepping)
320 | {
321 | uint eax, ecx, edx, ebx;
322 | Cpuid(1, 0, out eax, out ebx, out ecx, out edx);
323 |
324 | stepping = (byte)(eax & 0xF);
325 | family = (byte)((eax >> 8) & 0xF);
326 | model = (byte)((eax >> 4) & 0xF);
327 |
328 | // wikipedia says if family id is 6 or 15, model = model + extended model id shifted left by 4 bits
329 | // extended model id starts on bit 16
330 | if (family == 6 || family == 15)
331 | {
332 | model += (byte)((eax >> 12) & 0xF0);
333 | }
334 |
335 | // if family is 15, family = family + extended family
336 | if (family == 15)
337 | {
338 | family += (byte)(eax >> 20);
339 | }
340 | }
341 | }
342 | }
343 |
--------------------------------------------------------------------------------
/OpenCL/LICENSE:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
178 |
179 | APPENDIX: How to apply the Apache License to your work.
180 |
181 | To apply the Apache License to your work, attach the following
182 | boilerplate notice, with the fields enclosed by brackets "[]"
183 | replaced with your own identifying information. (Don't include
184 | the brackets!) The text should be enclosed in the appropriate
185 | comment syntax for the file format. We also recommend that a
186 | file or class name and description of purpose be included on the
187 | same "printed page" as the copyright notice for easier
188 | identification within third-party archives.
189 |
190 | Copyright [yyyy] [name of copyright owner]
191 |
192 | Licensed under the Apache License, Version 2.0 (the "License");
193 | you may not use this file except in compliance with the License.
194 | You may obtain a copy of the License at
195 |
196 | http://www.apache.org/licenses/LICENSE-2.0
197 |
198 | Unless required by applicable law or agreed to in writing, software
199 | distributed under the License is distributed on an "AS IS" BASIS,
200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 | See the License for the specific language governing permissions and
202 | limitations under the License.
203 |
--------------------------------------------------------------------------------
/OpenCL/README.md:
--------------------------------------------------------------------------------
1 | # OpenCLTM API Headers
2 |
3 | This repository contains C language headers for the OpenCL API.
4 |
5 | The authoritative public repository for these headers is located at:
6 |
7 | https://github.com/KhronosGroup/OpenCL-Headers
8 |
9 | Issues, proposed fixes for issues, and other suggested changes should be
10 | created using Github.
11 |
12 | ## Branch Structure
13 |
14 | The OpenCL API headers in this repository are Unified headers and are designed
15 | to work with all released OpenCL versions. This differs from previous OpenCL
16 | API headers, where version-specific API headers either existed in separate
17 | branches, or in separate folders in a branch.
18 |
19 | ## Compiling for a Specific OpenCL Version
20 |
21 | By default, the OpenCL API headers in this repository are for the latest
22 | OpenCL version (currently OpenCL 2.2). To use these API headers to target
23 | a different OpenCL version, an application may `#define` the preprocessor
24 | value `CL_TARGET_OPENCL_VERSION` before including the OpenCL API headers.
25 | The `CL_TARGET_OPENCL_VERSION` is a three digit decimal value representing
26 | the OpenCL API version.
27 |
28 | For example, to enforce usage of no more than the OpenCL 1.2 APIs, you may
29 | include the OpenCL API headers as follows:
30 |
31 | ```
32 | #define CL_TARGET_OPENCL_VERSION 120
33 | #include
34 | ```
35 |
36 | ## Directory Structure
37 |
38 | ```
39 | README.md This file
40 | LICENSE Source license for the OpenCL API headers
41 | CL/ Unified OpenCL API headers tree
42 | ```
43 |
44 | ## License
45 |
46 | See [LICENSE](LICENSE).
47 |
48 | ---
49 |
50 | OpenCL and the OpenCL logo are trademarks of Apple Inc. used by permission by Khronos.
51 |
--------------------------------------------------------------------------------
/OpenCL/include/CL/cl_d3d10.h:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * Copyright (c) 2008-2020 The Khronos Group Inc.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | ******************************************************************************/
16 |
17 | #ifndef __OPENCL_CL_D3D10_H
18 | #define __OPENCL_CL_D3D10_H
19 |
20 | #include
21 | #include
22 | #include
23 |
24 | #ifdef __cplusplus
25 | extern "C" {
26 | #endif
27 |
28 | /******************************************************************************
29 | * cl_khr_d3d10_sharing */
30 | #define cl_khr_d3d10_sharing 1
31 |
32 | typedef cl_uint cl_d3d10_device_source_khr;
33 | typedef cl_uint cl_d3d10_device_set_khr;
34 |
35 | /******************************************************************************/
36 |
37 | /* Error Codes */
38 | #define CL_INVALID_D3D10_DEVICE_KHR -1002
39 | #define CL_INVALID_D3D10_RESOURCE_KHR -1003
40 | #define CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR -1004
41 | #define CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR -1005
42 |
43 | /* cl_d3d10_device_source_nv */
44 | #define CL_D3D10_DEVICE_KHR 0x4010
45 | #define CL_D3D10_DXGI_ADAPTER_KHR 0x4011
46 |
47 | /* cl_d3d10_device_set_nv */
48 | #define CL_PREFERRED_DEVICES_FOR_D3D10_KHR 0x4012
49 | #define CL_ALL_DEVICES_FOR_D3D10_KHR 0x4013
50 |
51 | /* cl_context_info */
52 | #define CL_CONTEXT_D3D10_DEVICE_KHR 0x4014
53 | #define CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR 0x402C
54 |
55 | /* cl_mem_info */
56 | #define CL_MEM_D3D10_RESOURCE_KHR 0x4015
57 |
58 | /* cl_image_info */
59 | #define CL_IMAGE_D3D10_SUBRESOURCE_KHR 0x4016
60 |
61 | /* cl_command_type */
62 | #define CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR 0x4017
63 | #define CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR 0x4018
64 |
65 | /******************************************************************************/
66 |
67 | typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D10KHR_fn)(
68 | cl_platform_id platform,
69 | cl_d3d10_device_source_khr d3d_device_source,
70 | void * d3d_object,
71 | cl_d3d10_device_set_khr d3d_device_set,
72 | cl_uint num_entries,
73 | cl_device_id * devices,
74 | cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0;
75 |
76 | typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10BufferKHR_fn)(
77 | cl_context context,
78 | cl_mem_flags flags,
79 | ID3D10Buffer * resource,
80 | cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
81 |
82 | typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture2DKHR_fn)(
83 | cl_context context,
84 | cl_mem_flags flags,
85 | ID3D10Texture2D * resource,
86 | UINT subresource,
87 | cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
88 |
89 | typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture3DKHR_fn)(
90 | cl_context context,
91 | cl_mem_flags flags,
92 | ID3D10Texture3D * resource,
93 | UINT subresource,
94 | cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
95 |
96 | typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)(
97 | cl_command_queue command_queue,
98 | cl_uint num_objects,
99 | const cl_mem * mem_objects,
100 | cl_uint num_events_in_wait_list,
101 | const cl_event * event_wait_list,
102 | cl_event * event) CL_API_SUFFIX__VERSION_1_0;
103 |
104 | typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)(
105 | cl_command_queue command_queue,
106 | cl_uint num_objects,
107 | const cl_mem * mem_objects,
108 | cl_uint num_events_in_wait_list,
109 | const cl_event * event_wait_list,
110 | cl_event * event) CL_API_SUFFIX__VERSION_1_0;
111 |
112 | #ifdef __cplusplus
113 | }
114 | #endif
115 |
116 | #endif /* __OPENCL_CL_D3D10_H */
117 |
--------------------------------------------------------------------------------
/OpenCL/include/CL/cl_d3d11.h:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * Copyright (c) 2008-2020 The Khronos Group Inc.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | ******************************************************************************/
16 |
17 | #ifndef __OPENCL_CL_D3D11_H
18 | #define __OPENCL_CL_D3D11_H
19 |
20 | #include
21 | #include
22 | #include
23 |
24 | #ifdef __cplusplus
25 | extern "C" {
26 | #endif
27 |
28 | /******************************************************************************
29 | * cl_khr_d3d11_sharing */
30 | #define cl_khr_d3d11_sharing 1
31 |
32 | typedef cl_uint cl_d3d11_device_source_khr;
33 | typedef cl_uint cl_d3d11_device_set_khr;
34 |
35 | /******************************************************************************/
36 |
37 | /* Error Codes */
38 | #define CL_INVALID_D3D11_DEVICE_KHR -1006
39 | #define CL_INVALID_D3D11_RESOURCE_KHR -1007
40 | #define CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR -1008
41 | #define CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR -1009
42 |
43 | /* cl_d3d11_device_source */
44 | #define CL_D3D11_DEVICE_KHR 0x4019
45 | #define CL_D3D11_DXGI_ADAPTER_KHR 0x401A
46 |
47 | /* cl_d3d11_device_set */
48 | #define CL_PREFERRED_DEVICES_FOR_D3D11_KHR 0x401B
49 | #define CL_ALL_DEVICES_FOR_D3D11_KHR 0x401C
50 |
51 | /* cl_context_info */
52 | #define CL_CONTEXT_D3D11_DEVICE_KHR 0x401D
53 | #define CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR 0x402D
54 |
55 | /* cl_mem_info */
56 | #define CL_MEM_D3D11_RESOURCE_KHR 0x401E
57 |
58 | /* cl_image_info */
59 | #define CL_IMAGE_D3D11_SUBRESOURCE_KHR 0x401F
60 |
61 | /* cl_command_type */
62 | #define CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR 0x4020
63 | #define CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR 0x4021
64 |
65 | /******************************************************************************/
66 |
67 | typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D11KHR_fn)(
68 | cl_platform_id platform,
69 | cl_d3d11_device_source_khr d3d_device_source,
70 | void * d3d_object,
71 | cl_d3d11_device_set_khr d3d_device_set,
72 | cl_uint num_entries,
73 | cl_device_id * devices,
74 | cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2;
75 |
76 | typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11BufferKHR_fn)(
77 | cl_context context,
78 | cl_mem_flags flags,
79 | ID3D11Buffer * resource,
80 | cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
81 |
82 | typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture2DKHR_fn)(
83 | cl_context context,
84 | cl_mem_flags flags,
85 | ID3D11Texture2D * resource,
86 | UINT subresource,
87 | cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
88 |
89 | typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture3DKHR_fn)(
90 | cl_context context,
91 | cl_mem_flags flags,
92 | ID3D11Texture3D * resource,
93 | UINT subresource,
94 | cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
95 |
96 | typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D11ObjectsKHR_fn)(
97 | cl_command_queue command_queue,
98 | cl_uint num_objects,
99 | const cl_mem * mem_objects,
100 | cl_uint num_events_in_wait_list,
101 | const cl_event * event_wait_list,
102 | cl_event * event) CL_API_SUFFIX__VERSION_1_2;
103 |
104 | typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D11ObjectsKHR_fn)(
105 | cl_command_queue command_queue,
106 | cl_uint num_objects,
107 | const cl_mem * mem_objects,
108 | cl_uint num_events_in_wait_list,
109 | const cl_event * event_wait_list,
110 | cl_event * event) CL_API_SUFFIX__VERSION_1_2;
111 |
112 | #ifdef __cplusplus
113 | }
114 | #endif
115 |
116 | #endif /* __OPENCL_CL_D3D11_H */
117 |
--------------------------------------------------------------------------------
/OpenCL/include/CL/cl_dx9_media_sharing.h:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * Copyright (c) 2008-2020 The Khronos Group Inc.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | ******************************************************************************/
16 |
17 | #ifndef __OPENCL_CL_DX9_MEDIA_SHARING_H
18 | #define __OPENCL_CL_DX9_MEDIA_SHARING_H
19 |
20 | #include
21 | #include
22 |
23 | #ifdef __cplusplus
24 | extern "C" {
25 | #endif
26 |
27 | /******************************************************************************/
28 | /* cl_khr_dx9_media_sharing */
29 | #define cl_khr_dx9_media_sharing 1
30 |
31 | typedef cl_uint cl_dx9_media_adapter_type_khr;
32 | typedef cl_uint cl_dx9_media_adapter_set_khr;
33 |
34 | #if defined(_WIN32)
35 | #include
36 | typedef struct _cl_dx9_surface_info_khr
37 | {
38 | IDirect3DSurface9 *resource;
39 | HANDLE shared_handle;
40 | } cl_dx9_surface_info_khr;
41 | #endif
42 |
43 |
44 | /******************************************************************************/
45 |
46 | /* Error Codes */
47 | #define CL_INVALID_DX9_MEDIA_ADAPTER_KHR -1010
48 | #define CL_INVALID_DX9_MEDIA_SURFACE_KHR -1011
49 | #define CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR -1012
50 | #define CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR -1013
51 |
52 | /* cl_media_adapter_type_khr */
53 | #define CL_ADAPTER_D3D9_KHR 0x2020
54 | #define CL_ADAPTER_D3D9EX_KHR 0x2021
55 | #define CL_ADAPTER_DXVA_KHR 0x2022
56 |
57 | /* cl_media_adapter_set_khr */
58 | #define CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2023
59 | #define CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2024
60 |
61 | /* cl_context_info */
62 | #define CL_CONTEXT_ADAPTER_D3D9_KHR 0x2025
63 | #define CL_CONTEXT_ADAPTER_D3D9EX_KHR 0x2026
64 | #define CL_CONTEXT_ADAPTER_DXVA_KHR 0x2027
65 |
66 | /* cl_mem_info */
67 | #define CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR 0x2028
68 | #define CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR 0x2029
69 |
70 | /* cl_image_info */
71 | #define CL_IMAGE_DX9_MEDIA_PLANE_KHR 0x202A
72 |
73 | /* cl_command_type */
74 | #define CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR 0x202B
75 | #define CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR 0x202C
76 |
77 | /******************************************************************************/
78 |
79 | typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromDX9MediaAdapterKHR_fn)(
80 | cl_platform_id platform,
81 | cl_uint num_media_adapters,
82 | cl_dx9_media_adapter_type_khr * media_adapter_type,
83 | void * media_adapters,
84 | cl_dx9_media_adapter_set_khr media_adapter_set,
85 | cl_uint num_entries,
86 | cl_device_id * devices,
87 | cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2;
88 |
89 | typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceKHR_fn)(
90 | cl_context context,
91 | cl_mem_flags flags,
92 | cl_dx9_media_adapter_type_khr adapter_type,
93 | void * surface_info,
94 | cl_uint plane,
95 | cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
96 |
97 | typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9MediaSurfacesKHR_fn)(
98 | cl_command_queue command_queue,
99 | cl_uint num_objects,
100 | const cl_mem * mem_objects,
101 | cl_uint num_events_in_wait_list,
102 | const cl_event * event_wait_list,
103 | cl_event * event) CL_API_SUFFIX__VERSION_1_2;
104 |
105 | typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9MediaSurfacesKHR_fn)(
106 | cl_command_queue command_queue,
107 | cl_uint num_objects,
108 | const cl_mem * mem_objects,
109 | cl_uint num_events_in_wait_list,
110 | const cl_event * event_wait_list,
111 | cl_event * event) CL_API_SUFFIX__VERSION_1_2;
112 |
113 | #ifdef __cplusplus
114 | }
115 | #endif
116 |
117 | #endif /* __OPENCL_CL_DX9_MEDIA_SHARING_H */
118 |
--------------------------------------------------------------------------------
/OpenCL/include/CL/cl_dx9_media_sharing_intel.h:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * Copyright (c) 2008-2020 The Khronos Group Inc.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | ******************************************************************************/
16 | /*****************************************************************************\
17 |
18 | Copyright (c) 2013-2019 Intel Corporation All Rights Reserved.
19 |
20 | THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
24 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
28 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
29 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
30 | MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 |
32 | File Name: cl_dx9_media_sharing_intel.h
33 |
34 | Abstract:
35 |
36 | Notes:
37 |
38 | \*****************************************************************************/
39 |
40 | #ifndef __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H
41 | #define __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H
42 |
43 | #include
44 | #include
45 | #include
46 | #include
47 | #include
48 | #include
49 |
50 | #ifdef __cplusplus
51 | extern "C" {
52 | #endif
53 |
54 | /***************************************
55 | * cl_intel_dx9_media_sharing extension *
56 | ****************************************/
57 |
58 | #define cl_intel_dx9_media_sharing 1
59 |
60 | typedef cl_uint cl_dx9_device_source_intel;
61 | typedef cl_uint cl_dx9_device_set_intel;
62 |
63 | /* error codes */
64 | #define CL_INVALID_DX9_DEVICE_INTEL -1010
65 | #define CL_INVALID_DX9_RESOURCE_INTEL -1011
66 | #define CL_DX9_RESOURCE_ALREADY_ACQUIRED_INTEL -1012
67 | #define CL_DX9_RESOURCE_NOT_ACQUIRED_INTEL -1013
68 |
69 | /* cl_dx9_device_source_intel */
70 | #define CL_D3D9_DEVICE_INTEL 0x4022
71 | #define CL_D3D9EX_DEVICE_INTEL 0x4070
72 | #define CL_DXVA_DEVICE_INTEL 0x4071
73 |
74 | /* cl_dx9_device_set_intel */
75 | #define CL_PREFERRED_DEVICES_FOR_DX9_INTEL 0x4024
76 | #define CL_ALL_DEVICES_FOR_DX9_INTEL 0x4025
77 |
78 | /* cl_context_info */
79 | #define CL_CONTEXT_D3D9_DEVICE_INTEL 0x4026
80 | #define CL_CONTEXT_D3D9EX_DEVICE_INTEL 0x4072
81 | #define CL_CONTEXT_DXVA_DEVICE_INTEL 0x4073
82 |
83 | /* cl_mem_info */
84 | #define CL_MEM_DX9_RESOURCE_INTEL 0x4027
85 | #define CL_MEM_DX9_SHARED_HANDLE_INTEL 0x4074
86 |
87 | /* cl_image_info */
88 | #define CL_IMAGE_DX9_PLANE_INTEL 0x4075
89 |
90 | /* cl_command_type */
91 | #define CL_COMMAND_ACQUIRE_DX9_OBJECTS_INTEL 0x402A
92 | #define CL_COMMAND_RELEASE_DX9_OBJECTS_INTEL 0x402B
93 | /******************************************************************************/
94 |
95 | extern CL_API_ENTRY cl_int CL_API_CALL
96 | clGetDeviceIDsFromDX9INTEL(
97 | cl_platform_id platform,
98 | cl_dx9_device_source_intel dx9_device_source,
99 | void* dx9_object,
100 | cl_dx9_device_set_intel dx9_device_set,
101 | cl_uint num_entries,
102 | cl_device_id* devices,
103 | cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_1;
104 |
105 | typedef CL_API_ENTRY cl_int (CL_API_CALL* clGetDeviceIDsFromDX9INTEL_fn)(
106 | cl_platform_id platform,
107 | cl_dx9_device_source_intel dx9_device_source,
108 | void* dx9_object,
109 | cl_dx9_device_set_intel dx9_device_set,
110 | cl_uint num_entries,
111 | cl_device_id* devices,
112 | cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_1;
113 |
114 | extern CL_API_ENTRY cl_mem CL_API_CALL
115 | clCreateFromDX9MediaSurfaceINTEL(
116 | cl_context context,
117 | cl_mem_flags flags,
118 | IDirect3DSurface9* resource,
119 | HANDLE sharedHandle,
120 | UINT plane,
121 | cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_1;
122 |
123 | typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceINTEL_fn)(
124 | cl_context context,
125 | cl_mem_flags flags,
126 | IDirect3DSurface9* resource,
127 | HANDLE sharedHandle,
128 | UINT plane,
129 | cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_1;
130 |
131 | extern CL_API_ENTRY cl_int CL_API_CALL
132 | clEnqueueAcquireDX9ObjectsINTEL(
133 | cl_command_queue command_queue,
134 | cl_uint num_objects,
135 | const cl_mem* mem_objects,
136 | cl_uint num_events_in_wait_list,
137 | const cl_event* event_wait_list,
138 | cl_event* event) CL_EXT_SUFFIX__VERSION_1_1;
139 |
140 | typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9ObjectsINTEL_fn)(
141 | cl_command_queue command_queue,
142 | cl_uint num_objects,
143 | const cl_mem* mem_objects,
144 | cl_uint num_events_in_wait_list,
145 | const cl_event* event_wait_list,
146 | cl_event* event) CL_EXT_SUFFIX__VERSION_1_1;
147 |
148 | extern CL_API_ENTRY cl_int CL_API_CALL
149 | clEnqueueReleaseDX9ObjectsINTEL(
150 | cl_command_queue command_queue,
151 | cl_uint num_objects,
152 | cl_mem* mem_objects,
153 | cl_uint num_events_in_wait_list,
154 | const cl_event* event_wait_list,
155 | cl_event* event) CL_EXT_SUFFIX__VERSION_1_1;
156 |
157 | typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9ObjectsINTEL_fn)(
158 | cl_command_queue command_queue,
159 | cl_uint num_objects,
160 | cl_mem* mem_objects,
161 | cl_uint num_events_in_wait_list,
162 | const cl_event* event_wait_list,
163 | cl_event* event) CL_EXT_SUFFIX__VERSION_1_1;
164 |
165 | #ifdef __cplusplus
166 | }
167 | #endif
168 |
169 | #endif /* __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H */
170 |
--------------------------------------------------------------------------------
/OpenCL/include/CL/cl_egl.h:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * Copyright (c) 2008-2020 The Khronos Group Inc.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | ******************************************************************************/
16 |
17 | #ifndef __OPENCL_CL_EGL_H
18 | #define __OPENCL_CL_EGL_H
19 |
20 | #include
21 |
22 | #ifdef __cplusplus
23 | extern "C" {
24 | #endif
25 |
26 |
27 | /* Command type for events created with clEnqueueAcquireEGLObjectsKHR */
28 | #define CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR 0x202F
29 | #define CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR 0x202D
30 | #define CL_COMMAND_RELEASE_EGL_OBJECTS_KHR 0x202E
31 |
32 | /* Error type for clCreateFromEGLImageKHR */
33 | #define CL_INVALID_EGL_OBJECT_KHR -1093
34 | #define CL_EGL_RESOURCE_NOT_ACQUIRED_KHR -1092
35 |
36 | /* CLeglImageKHR is an opaque handle to an EGLImage */
37 | typedef void* CLeglImageKHR;
38 |
39 | /* CLeglDisplayKHR is an opaque handle to an EGLDisplay */
40 | typedef void* CLeglDisplayKHR;
41 |
42 | /* CLeglSyncKHR is an opaque handle to an EGLSync object */
43 | typedef void* CLeglSyncKHR;
44 |
45 | /* properties passed to clCreateFromEGLImageKHR */
46 | typedef intptr_t cl_egl_image_properties_khr;
47 |
48 |
49 | #define cl_khr_egl_image 1
50 |
51 | extern CL_API_ENTRY cl_mem CL_API_CALL
52 | clCreateFromEGLImageKHR(cl_context context,
53 | CLeglDisplayKHR egldisplay,
54 | CLeglImageKHR eglimage,
55 | cl_mem_flags flags,
56 | const cl_egl_image_properties_khr * properties,
57 | cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
58 |
59 | typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromEGLImageKHR_fn)(
60 | cl_context context,
61 | CLeglDisplayKHR egldisplay,
62 | CLeglImageKHR eglimage,
63 | cl_mem_flags flags,
64 | const cl_egl_image_properties_khr * properties,
65 | cl_int * errcode_ret);
66 |
67 |
68 | extern CL_API_ENTRY cl_int CL_API_CALL
69 | clEnqueueAcquireEGLObjectsKHR(cl_command_queue command_queue,
70 | cl_uint num_objects,
71 | const cl_mem * mem_objects,
72 | cl_uint num_events_in_wait_list,
73 | const cl_event * event_wait_list,
74 | cl_event * event) CL_API_SUFFIX__VERSION_1_0;
75 |
76 | typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireEGLObjectsKHR_fn)(
77 | cl_command_queue command_queue,
78 | cl_uint num_objects,
79 | const cl_mem * mem_objects,
80 | cl_uint num_events_in_wait_list,
81 | const cl_event * event_wait_list,
82 | cl_event * event);
83 |
84 |
85 | extern CL_API_ENTRY cl_int CL_API_CALL
86 | clEnqueueReleaseEGLObjectsKHR(cl_command_queue command_queue,
87 | cl_uint num_objects,
88 | const cl_mem * mem_objects,
89 | cl_uint num_events_in_wait_list,
90 | const cl_event * event_wait_list,
91 | cl_event * event) CL_API_SUFFIX__VERSION_1_0;
92 |
93 | typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseEGLObjectsKHR_fn)(
94 | cl_command_queue command_queue,
95 | cl_uint num_objects,
96 | const cl_mem * mem_objects,
97 | cl_uint num_events_in_wait_list,
98 | const cl_event * event_wait_list,
99 | cl_event * event);
100 |
101 |
102 | #define cl_khr_egl_event 1
103 |
104 | extern CL_API_ENTRY cl_event CL_API_CALL
105 | clCreateEventFromEGLSyncKHR(cl_context context,
106 | CLeglSyncKHR sync,
107 | CLeglDisplayKHR display,
108 | cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
109 |
110 | typedef CL_API_ENTRY cl_event (CL_API_CALL *clCreateEventFromEGLSyncKHR_fn)(
111 | cl_context context,
112 | CLeglSyncKHR sync,
113 | CLeglDisplayKHR display,
114 | cl_int * errcode_ret);
115 |
116 | #ifdef __cplusplus
117 | }
118 | #endif
119 |
120 | #endif /* __OPENCL_CL_EGL_H */
121 |
--------------------------------------------------------------------------------
/OpenCL/include/CL/cl_gl.h:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * Copyright (c) 2008-2020 The Khronos Group Inc.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | ******************************************************************************/
16 |
17 | #ifndef __OPENCL_CL_GL_H
18 | #define __OPENCL_CL_GL_H
19 |
20 | #include
21 |
22 | #ifdef __cplusplus
23 | extern "C" {
24 | #endif
25 |
26 | typedef cl_uint cl_gl_object_type;
27 | typedef cl_uint cl_gl_texture_info;
28 | typedef cl_uint cl_gl_platform_info;
29 | typedef struct __GLsync *cl_GLsync;
30 |
31 | /* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */
32 | #define CL_GL_OBJECT_BUFFER 0x2000
33 | #define CL_GL_OBJECT_TEXTURE2D 0x2001
34 | #define CL_GL_OBJECT_TEXTURE3D 0x2002
35 | #define CL_GL_OBJECT_RENDERBUFFER 0x2003
36 | #ifdef CL_VERSION_1_2
37 | #define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E
38 | #define CL_GL_OBJECT_TEXTURE1D 0x200F
39 | #define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010
40 | #define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011
41 | #endif
42 |
43 | /* cl_gl_texture_info */
44 | #define CL_GL_TEXTURE_TARGET 0x2004
45 | #define CL_GL_MIPMAP_LEVEL 0x2005
46 | #ifdef CL_VERSION_1_2
47 | #define CL_GL_NUM_SAMPLES 0x2012
48 | #endif
49 |
50 |
51 | extern CL_API_ENTRY cl_mem CL_API_CALL
52 | clCreateFromGLBuffer(cl_context context,
53 | cl_mem_flags flags,
54 | cl_GLuint bufobj,
55 | cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
56 |
57 | #ifdef CL_VERSION_1_2
58 |
59 | extern CL_API_ENTRY cl_mem CL_API_CALL
60 | clCreateFromGLTexture(cl_context context,
61 | cl_mem_flags flags,
62 | cl_GLenum target,
63 | cl_GLint miplevel,
64 | cl_GLuint texture,
65 | cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
66 |
67 | #endif
68 |
69 | extern CL_API_ENTRY cl_mem CL_API_CALL
70 | clCreateFromGLRenderbuffer(cl_context context,
71 | cl_mem_flags flags,
72 | cl_GLuint renderbuffer,
73 | cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
74 |
75 | extern CL_API_ENTRY cl_int CL_API_CALL
76 | clGetGLObjectInfo(cl_mem memobj,
77 | cl_gl_object_type * gl_object_type,
78 | cl_GLuint * gl_object_name) CL_API_SUFFIX__VERSION_1_0;
79 |
80 | extern CL_API_ENTRY cl_int CL_API_CALL
81 | clGetGLTextureInfo(cl_mem memobj,
82 | cl_gl_texture_info param_name,
83 | size_t param_value_size,
84 | void * param_value,
85 | size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
86 |
87 | extern CL_API_ENTRY cl_int CL_API_CALL
88 | clEnqueueAcquireGLObjects(cl_command_queue command_queue,
89 | cl_uint num_objects,
90 | const cl_mem * mem_objects,
91 | cl_uint num_events_in_wait_list,
92 | const cl_event * event_wait_list,
93 | cl_event * event) CL_API_SUFFIX__VERSION_1_0;
94 |
95 | extern CL_API_ENTRY cl_int CL_API_CALL
96 | clEnqueueReleaseGLObjects(cl_command_queue command_queue,
97 | cl_uint num_objects,
98 | const cl_mem * mem_objects,
99 | cl_uint num_events_in_wait_list,
100 | const cl_event * event_wait_list,
101 | cl_event * event) CL_API_SUFFIX__VERSION_1_0;
102 |
103 |
104 | /* Deprecated OpenCL 1.1 APIs */
105 | extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
106 | clCreateFromGLTexture2D(cl_context context,
107 | cl_mem_flags flags,
108 | cl_GLenum target,
109 | cl_GLint miplevel,
110 | cl_GLuint texture,
111 | cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
112 |
113 | extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
114 | clCreateFromGLTexture3D(cl_context context,
115 | cl_mem_flags flags,
116 | cl_GLenum target,
117 | cl_GLint miplevel,
118 | cl_GLuint texture,
119 | cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
120 |
121 | /* cl_khr_gl_sharing extension */
122 |
123 | #define cl_khr_gl_sharing 1
124 |
125 | typedef cl_uint cl_gl_context_info;
126 |
127 | /* Additional Error Codes */
128 | #define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000
129 |
130 | /* cl_gl_context_info */
131 | #define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006
132 | #define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007
133 |
134 | /* Additional cl_context_properties */
135 | #define CL_GL_CONTEXT_KHR 0x2008
136 | #define CL_EGL_DISPLAY_KHR 0x2009
137 | #define CL_GLX_DISPLAY_KHR 0x200A
138 | #define CL_WGL_HDC_KHR 0x200B
139 | #define CL_CGL_SHAREGROUP_KHR 0x200C
140 |
141 | extern CL_API_ENTRY cl_int CL_API_CALL
142 | clGetGLContextInfoKHR(const cl_context_properties * properties,
143 | cl_gl_context_info param_name,
144 | size_t param_value_size,
145 | void * param_value,
146 | size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
147 |
148 | typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
149 | const cl_context_properties * properties,
150 | cl_gl_context_info param_name,
151 | size_t param_value_size,
152 | void * param_value,
153 | size_t * param_value_size_ret);
154 |
155 | #ifdef __cplusplus
156 | }
157 | #endif
158 |
159 | #endif /* __OPENCL_CL_GL_H */
160 |
--------------------------------------------------------------------------------
/OpenCL/include/CL/cl_gl_ext.h:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * Copyright (c) 2008-2020 The Khronos Group Inc.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | ******************************************************************************/
16 |
17 | #ifndef __OPENCL_CL_GL_EXT_H
18 | #define __OPENCL_CL_GL_EXT_H
19 |
20 | #ifdef __cplusplus
21 | extern "C" {
22 | #endif
23 |
24 | #include
25 |
26 | /*
27 | * cl_khr_gl_event extension
28 | */
29 | #define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D
30 |
31 | extern CL_API_ENTRY cl_event CL_API_CALL
32 | clCreateEventFromGLsyncKHR(cl_context context,
33 | cl_GLsync cl_GLsync,
34 | cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1;
35 |
36 | #ifdef __cplusplus
37 | }
38 | #endif
39 |
40 | #endif /* __OPENCL_CL_GL_EXT_H */
41 |
--------------------------------------------------------------------------------
/OpenCL/include/CL/cl_half.h:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * Copyright (c) 2019-2020 The Khronos Group Inc.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | ******************************************************************************/
16 |
17 | /**
18 | * This is a header-only utility library that provides OpenCL host code with
19 | * routines for converting to/from cl_half values.
20 | *
21 | * Example usage:
22 | *
23 | * #include
24 | * ...
25 | * cl_half h = cl_half_from_float(0.5f, CL_HALF_RTE);
26 | * cl_float f = cl_half_to_float(h);
27 | */
28 |
29 | #ifndef OPENCL_CL_HALF_H
30 | #define OPENCL_CL_HALF_H
31 |
32 | #include
33 |
34 | #include
35 |
36 | #ifdef __cplusplus
37 | extern "C" {
38 | #endif
39 |
40 |
41 | /**
42 | * Rounding mode used when converting to cl_half.
43 | */
44 | typedef enum
45 | {
46 | CL_HALF_RTE, // round to nearest even
47 | CL_HALF_RTZ, // round towards zero
48 | CL_HALF_RTP, // round towards positive infinity
49 | CL_HALF_RTN, // round towards negative infinity
50 | } cl_half_rounding_mode;
51 |
52 |
53 | /* Private utility macros. */
54 | #define CL_HALF_EXP_MASK 0x7C00
55 | #define CL_HALF_MAX_FINITE_MAG 0x7BFF
56 |
57 |
58 | /*
59 | * Utility to deal with values that overflow when converting to half precision.
60 | */
61 | static inline cl_half cl_half_handle_overflow(cl_half_rounding_mode rounding_mode,
62 | uint16_t sign)
63 | {
64 | if (rounding_mode == CL_HALF_RTZ)
65 | {
66 | // Round overflow towards zero -> largest finite number (preserving sign)
67 | return (sign << 15) | CL_HALF_MAX_FINITE_MAG;
68 | }
69 | else if (rounding_mode == CL_HALF_RTP && sign)
70 | {
71 | // Round negative overflow towards positive infinity -> most negative finite number
72 | return (1 << 15) | CL_HALF_MAX_FINITE_MAG;
73 | }
74 | else if (rounding_mode == CL_HALF_RTN && !sign)
75 | {
76 | // Round positive overflow towards negative infinity -> largest finite number
77 | return CL_HALF_MAX_FINITE_MAG;
78 | }
79 |
80 | // Overflow to infinity
81 | return (sign << 15) | CL_HALF_EXP_MASK;
82 | }
83 |
84 | /*
85 | * Utility to deal with values that underflow when converting to half precision.
86 | */
87 | static inline cl_half cl_half_handle_underflow(cl_half_rounding_mode rounding_mode,
88 | uint16_t sign)
89 | {
90 | if (rounding_mode == CL_HALF_RTP && !sign)
91 | {
92 | // Round underflow towards positive infinity -> smallest positive value
93 | return (sign << 15) | 1;
94 | }
95 | else if (rounding_mode == CL_HALF_RTN && sign)
96 | {
97 | // Round underflow towards negative infinity -> largest negative value
98 | return (sign << 15) | 1;
99 | }
100 |
101 | // Flush to zero
102 | return (sign << 15);
103 | }
104 |
105 |
106 | /**
107 | * Convert a cl_float to a cl_half.
108 | */
109 | static inline cl_half cl_half_from_float(cl_float f, cl_half_rounding_mode rounding_mode)
110 | {
111 | // Type-punning to get direct access to underlying bits
112 | union
113 | {
114 | cl_float f;
115 | uint32_t i;
116 | } f32;
117 | f32.f = f;
118 |
119 | // Extract sign bit
120 | uint16_t sign = f32.i >> 31;
121 |
122 | // Extract FP32 exponent and mantissa
123 | uint32_t f_exp = (f32.i >> (CL_FLT_MANT_DIG - 1)) & 0xFF;
124 | uint32_t f_mant = f32.i & ((1 << (CL_FLT_MANT_DIG - 1)) - 1);
125 |
126 | // Remove FP32 exponent bias
127 | int32_t exp = f_exp - CL_FLT_MAX_EXP + 1;
128 |
129 | // Add FP16 exponent bias
130 | uint16_t h_exp = exp + CL_HALF_MAX_EXP - 1;
131 |
132 | // Position of the bit that will become the FP16 mantissa LSB
133 | uint32_t lsb_pos = CL_FLT_MANT_DIG - CL_HALF_MANT_DIG;
134 |
135 | // Check for NaN / infinity
136 | if (f_exp == 0xFF)
137 | {
138 | if (f_mant)
139 | {
140 | // NaN -> propagate mantissa and silence it
141 | uint16_t h_mant = f_mant >> lsb_pos;
142 | h_mant |= 0x200;
143 | return (sign << 15) | CL_HALF_EXP_MASK | h_mant;
144 | }
145 | else
146 | {
147 | // Infinity -> zero mantissa
148 | return (sign << 15) | CL_HALF_EXP_MASK;
149 | }
150 | }
151 |
152 | // Check for zero
153 | if (!f_exp && !f_mant)
154 | {
155 | return (sign << 15);
156 | }
157 |
158 | // Check for overflow
159 | if (exp >= CL_HALF_MAX_EXP)
160 | {
161 | return cl_half_handle_overflow(rounding_mode, sign);
162 | }
163 |
164 | // Check for underflow
165 | if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1))
166 | {
167 | return cl_half_handle_underflow(rounding_mode, sign);
168 | }
169 |
170 | // Check for value that will become denormal
171 | if (exp < -14)
172 | {
173 | // Denormal -> include the implicit 1 from the FP32 mantissa
174 | h_exp = 0;
175 | f_mant |= 1 << (CL_FLT_MANT_DIG - 1);
176 |
177 | // Mantissa shift amount depends on exponent
178 | lsb_pos = -exp + (CL_FLT_MANT_DIG - 25);
179 | }
180 |
181 | // Generate FP16 mantissa by shifting FP32 mantissa
182 | uint16_t h_mant = f_mant >> lsb_pos;
183 |
184 | // Check whether we need to round
185 | uint32_t halfway = 1 << (lsb_pos - 1);
186 | uint32_t mask = (halfway << 1) - 1;
187 | switch (rounding_mode)
188 | {
189 | case CL_HALF_RTE:
190 | if ((f_mant & mask) > halfway)
191 | {
192 | // More than halfway -> round up
193 | h_mant += 1;
194 | }
195 | else if ((f_mant & mask) == halfway)
196 | {
197 | // Exactly halfway -> round to nearest even
198 | if (h_mant & 0x1)
199 | h_mant += 1;
200 | }
201 | break;
202 | case CL_HALF_RTZ:
203 | // Mantissa has already been truncated -> do nothing
204 | break;
205 | case CL_HALF_RTP:
206 | if ((f_mant & mask) && !sign)
207 | {
208 | // Round positive numbers up
209 | h_mant += 1;
210 | }
211 | break;
212 | case CL_HALF_RTN:
213 | if ((f_mant & mask) && sign)
214 | {
215 | // Round negative numbers down
216 | h_mant += 1;
217 | }
218 | break;
219 | }
220 |
221 | // Check for mantissa overflow
222 | if (h_mant & 0x400)
223 | {
224 | h_exp += 1;
225 | h_mant = 0;
226 | }
227 |
228 | return (sign << 15) | (h_exp << 10) | h_mant;
229 | }
230 |
231 |
232 | /**
233 | * Convert a cl_double to a cl_half.
234 | */
235 | static inline cl_half cl_half_from_double(cl_double d, cl_half_rounding_mode rounding_mode)
236 | {
237 | // Type-punning to get direct access to underlying bits
238 | union
239 | {
240 | cl_double d;
241 | uint64_t i;
242 | } f64;
243 | f64.d = d;
244 |
245 | // Extract sign bit
246 | uint16_t sign = f64.i >> 63;
247 |
248 | // Extract FP64 exponent and mantissa
249 | uint64_t d_exp = (f64.i >> (CL_DBL_MANT_DIG - 1)) & 0x7FF;
250 | uint64_t d_mant = f64.i & (((uint64_t)1 << (CL_DBL_MANT_DIG - 1)) - 1);
251 |
252 | // Remove FP64 exponent bias
253 | int64_t exp = d_exp - CL_DBL_MAX_EXP + 1;
254 |
255 | // Add FP16 exponent bias
256 | uint16_t h_exp = (uint16_t)(exp + CL_HALF_MAX_EXP - 1);
257 |
258 | // Position of the bit that will become the FP16 mantissa LSB
259 | uint32_t lsb_pos = CL_DBL_MANT_DIG - CL_HALF_MANT_DIG;
260 |
261 | // Check for NaN / infinity
262 | if (d_exp == 0x7FF)
263 | {
264 | if (d_mant)
265 | {
266 | // NaN -> propagate mantissa and silence it
267 | uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos);
268 | h_mant |= 0x200;
269 | return (sign << 15) | CL_HALF_EXP_MASK | h_mant;
270 | }
271 | else
272 | {
273 | // Infinity -> zero mantissa
274 | return (sign << 15) | CL_HALF_EXP_MASK;
275 | }
276 | }
277 |
278 | // Check for zero
279 | if (!d_exp && !d_mant)
280 | {
281 | return (sign << 15);
282 | }
283 |
284 | // Check for overflow
285 | if (exp >= CL_HALF_MAX_EXP)
286 | {
287 | return cl_half_handle_overflow(rounding_mode, sign);
288 | }
289 |
290 | // Check for underflow
291 | if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1))
292 | {
293 | return cl_half_handle_underflow(rounding_mode, sign);
294 | }
295 |
296 | // Check for value that will become denormal
297 | if (exp < -14)
298 | {
299 | // Include the implicit 1 from the FP64 mantissa
300 | h_exp = 0;
301 | d_mant |= (uint64_t)1 << (CL_DBL_MANT_DIG - 1);
302 |
303 | // Mantissa shift amount depends on exponent
304 | lsb_pos = (uint32_t)(-exp + (CL_DBL_MANT_DIG - 25));
305 | }
306 |
307 | // Generate FP16 mantissa by shifting FP64 mantissa
308 | uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos);
309 |
310 | // Check whether we need to round
311 | uint64_t halfway = (uint64_t)1 << (lsb_pos - 1);
312 | uint64_t mask = (halfway << 1) - 1;
313 | switch (rounding_mode)
314 | {
315 | case CL_HALF_RTE:
316 | if ((d_mant & mask) > halfway)
317 | {
318 | // More than halfway -> round up
319 | h_mant += 1;
320 | }
321 | else if ((d_mant & mask) == halfway)
322 | {
323 | // Exactly halfway -> round to nearest even
324 | if (h_mant & 0x1)
325 | h_mant += 1;
326 | }
327 | break;
328 | case CL_HALF_RTZ:
329 | // Mantissa has already been truncated -> do nothing
330 | break;
331 | case CL_HALF_RTP:
332 | if ((d_mant & mask) && !sign)
333 | {
334 | // Round positive numbers up
335 | h_mant += 1;
336 | }
337 | break;
338 | case CL_HALF_RTN:
339 | if ((d_mant & mask) && sign)
340 | {
341 | // Round negative numbers down
342 | h_mant += 1;
343 | }
344 | break;
345 | }
346 |
347 | // Check for mantissa overflow
348 | if (h_mant & 0x400)
349 | {
350 | h_exp += 1;
351 | h_mant = 0;
352 | }
353 |
354 | return (sign << 15) | (h_exp << 10) | h_mant;
355 | }
356 |
357 |
358 | /**
359 | * Convert a cl_half to a cl_float.
360 | */
361 | static inline cl_float cl_half_to_float(cl_half h)
362 | {
363 | // Type-punning to get direct access to underlying bits
364 | union
365 | {
366 | cl_float f;
367 | uint32_t i;
368 | } f32;
369 |
370 | // Extract sign bit
371 | uint16_t sign = h >> 15;
372 |
373 | // Extract FP16 exponent and mantissa
374 | uint16_t h_exp = (h >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
375 | uint16_t h_mant = h & 0x3FF;
376 |
377 | // Remove FP16 exponent bias
378 | int32_t exp = h_exp - CL_HALF_MAX_EXP + 1;
379 |
380 | // Add FP32 exponent bias
381 | uint32_t f_exp = exp + CL_FLT_MAX_EXP - 1;
382 |
383 | // Check for NaN / infinity
384 | if (h_exp == 0x1F)
385 | {
386 | if (h_mant)
387 | {
388 | // NaN -> propagate mantissa and silence it
389 | uint32_t f_mant = h_mant << (CL_FLT_MANT_DIG - CL_HALF_MANT_DIG);
390 | f_mant |= 0x400000;
391 | f32.i = (sign << 31) | 0x7F800000 | f_mant;
392 | return f32.f;
393 | }
394 | else
395 | {
396 | // Infinity -> zero mantissa
397 | f32.i = (sign << 31) | 0x7F800000;
398 | return f32.f;
399 | }
400 | }
401 |
402 | // Check for zero / denormal
403 | if (h_exp == 0)
404 | {
405 | if (h_mant == 0)
406 | {
407 | // Zero -> zero exponent
408 | f_exp = 0;
409 | }
410 | else
411 | {
412 | // Denormal -> normalize it
413 | // - Shift mantissa to make most-significant 1 implicit
414 | // - Adjust exponent accordingly
415 | uint32_t shift = 0;
416 | while ((h_mant & 0x400) == 0)
417 | {
418 | h_mant <<= 1;
419 | shift++;
420 | }
421 | h_mant &= 0x3FF;
422 | f_exp -= shift - 1;
423 | }
424 | }
425 |
426 | f32.i = (sign << 31) | (f_exp << 23) | (h_mant << 13);
427 | return f32.f;
428 | }
429 |
430 |
431 | #undef CL_HALF_EXP_MASK
432 | #undef CL_HALF_MAX_FINITE_MAG
433 |
434 |
435 | #ifdef __cplusplus
436 | }
437 | #endif
438 |
439 |
440 | #endif /* OPENCL_CL_HALF_H */
441 |
--------------------------------------------------------------------------------
/OpenCL/include/CL/cl_va_api_media_sharing_intel.h:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * Copyright (c) 2008-2020 The Khronos Group Inc.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | ******************************************************************************/
16 | /*****************************************************************************\
17 |
18 | Copyright (c) 2013-2019 Intel Corporation All Rights Reserved.
19 |
20 | THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
24 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
28 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
29 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
30 | MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 |
32 | File Name: cl_va_api_media_sharing_intel.h
33 |
34 | Abstract:
35 |
36 | Notes:
37 |
38 | \*****************************************************************************/
39 |
40 |
41 | #ifndef __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H
42 | #define __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H
43 |
44 | #include
45 | #include
46 | #include
47 |
48 | #ifdef __cplusplus
49 | extern "C" {
50 | #endif
51 |
52 | /******************************************
53 | * cl_intel_va_api_media_sharing extension *
54 | *******************************************/
55 |
56 | #define cl_intel_va_api_media_sharing 1
57 |
58 | /* error codes */
59 | #define CL_INVALID_VA_API_MEDIA_ADAPTER_INTEL -1098
60 | #define CL_INVALID_VA_API_MEDIA_SURFACE_INTEL -1099
61 | #define CL_VA_API_MEDIA_SURFACE_ALREADY_ACQUIRED_INTEL -1100
62 | #define CL_VA_API_MEDIA_SURFACE_NOT_ACQUIRED_INTEL -1101
63 |
64 | /* cl_va_api_device_source_intel */
65 | #define CL_VA_API_DISPLAY_INTEL 0x4094
66 |
67 | /* cl_va_api_device_set_intel */
68 | #define CL_PREFERRED_DEVICES_FOR_VA_API_INTEL 0x4095
69 | #define CL_ALL_DEVICES_FOR_VA_API_INTEL 0x4096
70 |
71 | /* cl_context_info */
72 | #define CL_CONTEXT_VA_API_DISPLAY_INTEL 0x4097
73 |
74 | /* cl_mem_info */
75 | #define CL_MEM_VA_API_MEDIA_SURFACE_INTEL 0x4098
76 |
77 | /* cl_image_info */
78 | #define CL_IMAGE_VA_API_PLANE_INTEL 0x4099
79 |
80 | /* cl_command_type */
81 | #define CL_COMMAND_ACQUIRE_VA_API_MEDIA_SURFACES_INTEL 0x409A
82 | #define CL_COMMAND_RELEASE_VA_API_MEDIA_SURFACES_INTEL 0x409B
83 |
84 | typedef cl_uint cl_va_api_device_source_intel;
85 | typedef cl_uint cl_va_api_device_set_intel;
86 |
87 | extern CL_API_ENTRY cl_int CL_API_CALL
88 | clGetDeviceIDsFromVA_APIMediaAdapterINTEL(
89 | cl_platform_id platform,
90 | cl_va_api_device_source_intel media_adapter_type,
91 | void* media_adapter,
92 | cl_va_api_device_set_intel media_adapter_set,
93 | cl_uint num_entries,
94 | cl_device_id* devices,
95 | cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_2;
96 |
97 | typedef CL_API_ENTRY cl_int (CL_API_CALL * clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn)(
98 | cl_platform_id platform,
99 | cl_va_api_device_source_intel media_adapter_type,
100 | void* media_adapter,
101 | cl_va_api_device_set_intel media_adapter_set,
102 | cl_uint num_entries,
103 | cl_device_id* devices,
104 | cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_2;
105 |
106 | extern CL_API_ENTRY cl_mem CL_API_CALL
107 | clCreateFromVA_APIMediaSurfaceINTEL(
108 | cl_context context,
109 | cl_mem_flags flags,
110 | VASurfaceID* surface,
111 | cl_uint plane,
112 | cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
113 |
114 | typedef CL_API_ENTRY cl_mem (CL_API_CALL * clCreateFromVA_APIMediaSurfaceINTEL_fn)(
115 | cl_context context,
116 | cl_mem_flags flags,
117 | VASurfaceID* surface,
118 | cl_uint plane,
119 | cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
120 |
121 | extern CL_API_ENTRY cl_int CL_API_CALL
122 | clEnqueueAcquireVA_APIMediaSurfacesINTEL(
123 | cl_command_queue command_queue,
124 | cl_uint num_objects,
125 | const cl_mem* mem_objects,
126 | cl_uint num_events_in_wait_list,
127 | const cl_event* event_wait_list,
128 | cl_event* event) CL_EXT_SUFFIX__VERSION_1_2;
129 |
130 | typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn)(
131 | cl_command_queue command_queue,
132 | cl_uint num_objects,
133 | const cl_mem* mem_objects,
134 | cl_uint num_events_in_wait_list,
135 | const cl_event* event_wait_list,
136 | cl_event* event) CL_EXT_SUFFIX__VERSION_1_2;
137 |
138 | extern CL_API_ENTRY cl_int CL_API_CALL
139 | clEnqueueReleaseVA_APIMediaSurfacesINTEL(
140 | cl_command_queue command_queue,
141 | cl_uint num_objects,
142 | const cl_mem* mem_objects,
143 | cl_uint num_events_in_wait_list,
144 | const cl_event* event_wait_list,
145 | cl_event* event) CL_EXT_SUFFIX__VERSION_1_2;
146 |
147 | typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn)(
148 | cl_command_queue command_queue,
149 | cl_uint num_objects,
150 | const cl_mem* mem_objects,
151 | cl_uint num_events_in_wait_list,
152 | const cl_event* event_wait_list,
153 | cl_event* event) CL_EXT_SUFFIX__VERSION_1_2;
154 |
155 | #ifdef __cplusplus
156 | }
157 | #endif
158 |
159 | #endif /* __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H */
160 |
--------------------------------------------------------------------------------
/OpenCL/include/CL/cl_version.h:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * Copyright (c) 2018-2020 The Khronos Group Inc.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | ******************************************************************************/
16 |
17 | #ifndef __CL_VERSION_H
18 | #define __CL_VERSION_H
19 |
20 | /* Detect which version to target */
21 | #if !defined(CL_TARGET_OPENCL_VERSION)
22 | #pragma message("cl_version.h: CL_TARGET_OPENCL_VERSION is not defined. Defaulting to 220 (OpenCL 2.2)")
23 | #define CL_TARGET_OPENCL_VERSION 220
24 | #endif
25 | #if CL_TARGET_OPENCL_VERSION != 100 && \
26 | CL_TARGET_OPENCL_VERSION != 110 && \
27 | CL_TARGET_OPENCL_VERSION != 120 && \
28 | CL_TARGET_OPENCL_VERSION != 200 && \
29 | CL_TARGET_OPENCL_VERSION != 210 && \
30 | CL_TARGET_OPENCL_VERSION != 220 && \
31 | CL_TARGET_OPENCL_VERSION != 300
32 | #pragma message("cl_version: CL_TARGET_OPENCL_VERSION is not a valid value (100, 110, 120, 200, 210, 220, 300). Defaulting to 220 (OpenCL 2.2)")
33 | #undef CL_TARGET_OPENCL_VERSION
34 | #define CL_TARGET_OPENCL_VERSION 220
35 | #endif
36 |
37 |
38 | /* OpenCL Version */
39 | #if CL_TARGET_OPENCL_VERSION >= 300 && !defined(CL_VERSION_3_0)
40 | #define CL_VERSION_3_0 1
41 | #endif
42 | #if CL_TARGET_OPENCL_VERSION >= 220 && !defined(CL_VERSION_2_2)
43 | #define CL_VERSION_2_2 1
44 | #endif
45 | #if CL_TARGET_OPENCL_VERSION >= 210 && !defined(CL_VERSION_2_1)
46 | #define CL_VERSION_2_1 1
47 | #endif
48 | #if CL_TARGET_OPENCL_VERSION >= 200 && !defined(CL_VERSION_2_0)
49 | #define CL_VERSION_2_0 1
50 | #endif
51 | #if CL_TARGET_OPENCL_VERSION >= 120 && !defined(CL_VERSION_1_2)
52 | #define CL_VERSION_1_2 1
53 | #endif
54 | #if CL_TARGET_OPENCL_VERSION >= 110 && !defined(CL_VERSION_1_1)
55 | #define CL_VERSION_1_1 1
56 | #endif
57 | #if CL_TARGET_OPENCL_VERSION >= 100 && !defined(CL_VERSION_1_0)
58 | #define CL_VERSION_1_0 1
59 | #endif
60 |
61 | /* Allow deprecated APIs for older OpenCL versions. */
62 | #if CL_TARGET_OPENCL_VERSION <= 220 && !defined(CL_USE_DEPRECATED_OPENCL_2_2_APIS)
63 | #define CL_USE_DEPRECATED_OPENCL_2_2_APIS
64 | #endif
65 | #if CL_TARGET_OPENCL_VERSION <= 210 && !defined(CL_USE_DEPRECATED_OPENCL_2_1_APIS)
66 | #define CL_USE_DEPRECATED_OPENCL_2_1_APIS
67 | #endif
68 | #if CL_TARGET_OPENCL_VERSION <= 200 && !defined(CL_USE_DEPRECATED_OPENCL_2_0_APIS)
69 | #define CL_USE_DEPRECATED_OPENCL_2_0_APIS
70 | #endif
71 | #if CL_TARGET_OPENCL_VERSION <= 120 && !defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS)
72 | #define CL_USE_DEPRECATED_OPENCL_1_2_APIS
73 | #endif
74 | #if CL_TARGET_OPENCL_VERSION <= 110 && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
75 | #define CL_USE_DEPRECATED_OPENCL_1_1_APIS
76 | #endif
77 | #if CL_TARGET_OPENCL_VERSION <= 100 && !defined(CL_USE_DEPRECATED_OPENCL_1_0_APIS)
78 | #define CL_USE_DEPRECATED_OPENCL_1_0_APIS
79 | #endif
80 |
81 | #endif /* __CL_VERSION_H */
82 |
--------------------------------------------------------------------------------
/OpenCL/include/CL/opencl.h:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * Copyright (c) 2008-2020 The Khronos Group Inc.
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | ******************************************************************************/
16 |
17 | #ifndef __OPENCL_H
18 | #define __OPENCL_H
19 |
20 | #ifdef __cplusplus
21 | extern "C" {
22 | #endif
23 |
24 | #include
25 | #include
26 | #include
27 | #include
28 |
29 | #ifdef __cplusplus
30 | }
31 | #endif
32 |
33 | #endif /* __OPENCL_H */
34 |
--------------------------------------------------------------------------------
/OpenCL/lib/OpenCL.lib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clamchowder/MicrobenchmarksGui/08bbe546622b843d62f548590d90b5a871d4a588/OpenCL/lib/OpenCL.lib
--------------------------------------------------------------------------------
/Program.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Threading.Tasks;
5 | using System.Windows.Forms;
6 |
7 | namespace MicrobenchmarkGui
8 | {
9 | static class Program
10 | {
11 | ///
12 | /// The main entry point for the application.
13 | ///
14 | [STAThread]
15 | static void Main()
16 | {
17 | Application.EnableVisualStyles();
18 | Application.SetCompatibleTextRenderingDefault(false);
19 | Application.Run(new MicrobenchmarkForm());
20 | }
21 | }
22 | }
--------------------------------------------------------------------------------
/Properties/AssemblyInfo.cs:
--------------------------------------------------------------------------------
1 | using System.Reflection;
2 | using System.Runtime.CompilerServices;
3 | using System.Runtime.InteropServices;
4 |
5 | // General Information about an assembly is controlled through the following
6 | // set of attributes. Change these attribute values to modify the information
7 | // associated with an assembly.
8 | [assembly: AssemblyTitle("MicrobenchmarkGui")]
9 | [assembly: AssemblyDescription("")]
10 | [assembly: AssemblyConfiguration("")]
11 | [assembly: AssemblyCompany("")]
12 | [assembly: AssemblyProduct("MicrobenchmarkGui")]
13 | [assembly: AssemblyCopyright("Copyright © 2022")]
14 | [assembly: AssemblyTrademark("")]
15 | [assembly: AssemblyCulture("")]
16 |
17 | // Setting ComVisible to false makes the types in this assembly not visible
18 | // to COM components. If you need to access a type in this assembly from
19 | // COM, set the ComVisible attribute to true on that type.
20 | [assembly: ComVisible(false)]
21 |
22 | // The following GUID is for the ID of the typelib if this project is exposed to COM
23 | [assembly: Guid("ea6b854d-fad1-4212-8953-4f32286e1b57")]
24 |
25 | // Version information for an assembly consists of the following four values:
26 | //
27 | // Major Version
28 | // Minor Version
29 | // Build Number
30 | // Revision
31 | //
32 | // You can specify all the values or you can default the Build and Revision Numbers
33 | // by using the '*' as shown below:
34 | // [assembly: AssemblyVersion("1.0.*")]
35 | [assembly: AssemblyVersion("1.0.0.0")]
36 | [assembly: AssemblyFileVersion("1.0.0.0")]
37 |
--------------------------------------------------------------------------------
/Properties/Resources.Designer.cs:
--------------------------------------------------------------------------------
1 | //------------------------------------------------------------------------------
2 | //
3 | // This code was generated by a tool.
4 | // Runtime Version:4.0.30319.42000
5 | //
6 | // Changes to this file may cause incorrect behavior and will be lost if
7 | // the code is regenerated.
8 | //
9 | //------------------------------------------------------------------------------
10 |
11 |
12 | namespace MicrobenchmarkGui.Properties
13 | {
14 | ///
15 | /// A strongly-typed resource class, for looking up localized strings, etc.
16 | ///
17 | // This class was auto-generated by the StronglyTypedResourceBuilder
18 | // class via a tool like ResGen or Visual Studio.
19 | // To add or remove a member, edit your .ResX file then rerun ResGen
20 | // with the /str option, or rebuild your VS project.
21 | [global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Resources.Tools.StronglyTypedResourceBuilder", "4.0.0.0")]
22 | [global::System.Diagnostics.DebuggerNonUserCodeAttribute()]
23 | [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
24 | internal class Resources
25 | {
26 |
27 | private static global::System.Resources.ResourceManager resourceMan;
28 |
29 | private static global::System.Globalization.CultureInfo resourceCulture;
30 |
31 | [global::System.Diagnostics.CodeAnalysis.SuppressMessageAttribute("Microsoft.Performance", "CA1811:AvoidUncalledPrivateCode")]
32 | internal Resources()
33 | {
34 | }
35 |
36 | ///
37 | /// Returns the cached ResourceManager instance used by this class.
38 | ///
39 | [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
40 | internal static global::System.Resources.ResourceManager ResourceManager
41 | {
42 | get
43 | {
44 | if ((resourceMan == null))
45 | {
46 | global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("MicrobenchmarkGui.Properties.Resources", typeof(Resources).Assembly);
47 | resourceMan = temp;
48 | }
49 | return resourceMan;
50 | }
51 | }
52 |
53 | ///
54 | /// Overrides the current thread's CurrentUICulture property for all
55 | /// resource lookups using this strongly typed resource class.
56 | ///
57 | [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
58 | internal static global::System.Globalization.CultureInfo Culture
59 | {
60 | get
61 | {
62 | return resourceCulture;
63 | }
64 | set
65 | {
66 | resourceCulture = value;
67 | }
68 | }
69 | }
70 | }
71 |
--------------------------------------------------------------------------------
/Properties/Resources.resx:
--------------------------------------------------------------------------------
1 |
2 |
3 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 | text/microsoft-resx
107 |
108 |
109 | 2.0
110 |
111 |
112 | System.Resources.ResXResourceReader, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
113 |
114 |
115 | System.Resources.ResXResourceWriter, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
116 |
117 |
--------------------------------------------------------------------------------
/Properties/Settings.Designer.cs:
--------------------------------------------------------------------------------
1 | //------------------------------------------------------------------------------
2 | //
3 | // This code was generated by a tool.
4 | // Runtime Version:4.0.30319.42000
5 | //
6 | // Changes to this file may cause incorrect behavior and will be lost if
7 | // the code is regenerated.
8 | //
9 | //------------------------------------------------------------------------------
10 |
11 |
12 | namespace MicrobenchmarkGui.Properties
13 | {
14 | [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
15 | [global::System.CodeDom.Compiler.GeneratedCodeAttribute("Microsoft.VisualStudio.Editors.SettingsDesigner.SettingsSingleFileGenerator", "11.0.0.0")]
16 | internal sealed partial class Settings : global::System.Configuration.ApplicationSettingsBase
17 | {
18 |
19 | private static Settings defaultInstance = ((Settings)(global::System.Configuration.ApplicationSettingsBase.Synchronized(new Settings())));
20 |
21 | public static Settings Default
22 | {
23 | get
24 | {
25 | return defaultInstance;
26 | }
27 | }
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/Properties/Settings.settings:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Microbenchmark GUI, currently cache and memory benchmark
2 | Started as a GUI project for https://github.com/clamchowder/Microbenchmarks, ended as a simple cache and memory benchmark because covering any more would require a hideous amount of GUI code. I'm not trying to imply the GUI code is anything but hideous, by the way :P
3 |
4 |
5 |
6 | Unlike another well known cache and memory benchmark that's spelled AIDA, this aims to be a free and more advanced tool. It runs through a lot of tests sizes designed to cover most cache capacities. Then, you can look through the results to determine bandwidth and latency for each level in the memory hierarchy.
7 |
8 | This program also provides more control over what's being tested. You can select SSE, AVX, AVX-512, or even MMX. You can test instruction fetch bandwidth, like in the screenshot above. And you can control how many threads are used.
9 |
10 | Non-goals:
11 | - Make results comparable to AIDA's. I don't know exactly what they're doing anyway. Don't try to directly compare results from this program to AIDA's.
12 | - Automatically determine bandwidth and latency at different cache levels. I think this is a shortcoming with AIDA since it seems prone to generating erroneous results with new CPU releases. Zen 3 with 96 MB of vertically stacked L3 is a good example.
13 | - Provide any kind of support. Not enough hours in a day, not enough free time :(
14 |
15 | # Building
16 | Get NASM (https://www.nasm.us/) and make sure it's in your path. Then things should build under Visual Studio 2019 or 2022.
17 |
18 | # Memory Bandwidth Options
19 | Most of these should be self explanatory. Anything that might require more explanation:
20 |
21 | ### Threading Mode
22 | - Private array per thread: Each thread gets its own array. You'll see bandwidth drop off after the sum of private cache sizes are exceeded.
23 | - One array shared by all threads: All threads read from the same array. You'll see bandwidth drop off after cache accessible to one core is exceeded, because data will be duplicated across private caches.
24 |
25 | ### Access Mode
26 | - Data Non-Temporal Read: Non-temporal means accesses don't have good temporal locality. In other words, once a location has been accessed, it's going to be a long time before it'll be accessed again, so don't bother caching it. If checked, `movntdqa` is used to load data. It doesn't seem to behave differently from regular reads, as far as this test is concerned.
27 | - Data Non-Temporal Writes: Non-temporal writes tend to behave very differently and bypass caching, using a write-combining memory model even if the region isn't marked write combining by the OS. This typically results in higher write bandwidth to memory, at the expense of not benefitting from cache.
28 | - Instruction fetch: Fills an array with the specified instructions, terminates it with a `ret`, and jumps to it. You can specify four different ways to fill the array:
29 | - 4B NOPs (0F 1F 40 00): 4 byte NOP recommended for padding in AMD optimization guides for Bulldozer and later. 4 bytes is representative of instruction length in typical integer code. Good for testing instruction fetch bandwidth in terms of IPC.
30 | - 8B NOPs (0F 1F 84 00 00 00 00 00): 8 byte NOP recommended for padding, as with above. 8 bytes is representative of very long instructions, which can come up when dealing with large immediates or AVX/AVX2/AVX-512 ISA extensions. Good for testing instruction fetch bandwidth in bytes/cycle.
31 | - Taken Branch per 16B: Each 16B block has an unconditional jump that jumps to the next 16B block. Good for testing BTB capacity.
32 | - 4B NOPs (66 66 66 90): 4 byte NOP recommended by an old AMD Athlon optimization guide. Strangely, Athlons seem to do fine with the 0F 1F 40 00 NOPs, but some sorta old Intel CPUs benefit from this.
33 |
34 | ### Test Method
35 | Specifies what instruction set extension to use. Memory accesses are aligned.
36 | - SSE: 128-bit accesses, using `movaps`
37 | - AVX: 256-bit accesses, using `vmovaps` with 256-bit YMM registers. May not be available on your CPU. Using AVX might not be beneficial even if your CPU supports it. For example, 256-bit AVX stores are microcoded on Piledriver and suffer from extremely slow throughput, so you should test with SSE in that case.
38 | - AVX-512: 512-bit accesses, using `vmovaps` with 512-bit ZMM registers. May not be avaialble on your CPU.
39 | - MMX: 64-bit accesses, using `movq`. MMX is a rather old instruction set extension introduced in later versions of the original Pentium. It still works on modern CPUs, even if it doesn't see a ton of use.
40 |
41 | # Memory Latency Options
42 |
43 | Memory latency has fewer test options, because it visits 64B cachelines in random order within a specified test sizes. But there are still a few controls:
44 |
45 | ## Access Mode
46 | - Simple addressing: instruction uses a register's value as a pointer to read from memory
47 | - Indexed addressing (C): C compilers like to compile `current = arr[current]` into an instruction that uses indexed addressing. In other words, the instruction specifies a base register and an index register. The CPU must add them together to get the final address used to access memory. On some CPUs, this indexed addressing mode creates an extra cycle of latency.
48 |
49 | ## Paging Mode
50 | Most applications have memory mapped for them in 4 KB pages, which reduces wasted memory and fragmentation. Memory can also be mapped in 2 MB pages. Windows calls this "Large Pages", while Linux calls it "Huge Pages". CPUs cache virtual to physical address translations in structures called TLBs, or translation lookaside buffers. 2 MB pages let TLB size go further, since each cached translation works with a 2 MB aligned block of memory instead of a 4 KB one.
51 |
52 | You can use 2 MB pages to minimize address translation penalties, letting you see L2 and L3 cache latency more clearly. However, this is a bit tricky to do on Windows. You need to give your account the "Lock pages in memory" privilege:
53 |
54 |
55 |
56 | If you've logged in with a non-local account, you can also give "Everyone" the privilege. You need to reboot for the change to take effect.
57 |
58 | Then if you select Large Pages under Paging Mode, the test will allocate 1 GB (the largest test size for mem latency) upfront and run all test sizes within that. That means you need to have 1 GB of contiguous memory free. If you have a system without much memory that has been running for a while, you might have to reboot before allocation will succeed.
59 |
60 | Have fun!
--------------------------------------------------------------------------------
/TestUtilities.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 | using System.Threading.Tasks;
6 |
7 | namespace MicrobenchmarkGui
8 | {
9 | public static class TestUtilities
10 | {
11 | ///
12 | /// Scale iterations to reach target time.
13 | ///
14 | /// Last iteration count
15 | /// Desired run time
16 | /// Last run time
17 | ///
18 | public static ulong ScaleIterations(ulong lastRunIterations, float targetTimeMs, float lastTimeMs)
19 | {
20 | if (lastTimeMs < 100)
21 | {
22 | return lastRunIterations * 5;
23 | }
24 |
25 | return (ulong)(lastRunIterations * (targetTimeMs / lastTimeMs));
26 | }
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/app.manifest:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
53 |
54 |
55 | true
56 |
57 |
58 |
59 |
60 |
61 |
75 |
76 |
77 |
--------------------------------------------------------------------------------
/framework.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers
4 | // Windows Header Files
5 | #include
6 |
--------------------------------------------------------------------------------
/img/guiscreenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clamchowder/MicrobenchmarksGui/08bbe546622b843d62f548590d90b5a871d4a588/img/guiscreenshot.png
--------------------------------------------------------------------------------
/img/guiscreenshot_latency.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clamchowder/MicrobenchmarksGui/08bbe546622b843d62f548590d90b5a871d4a588/img/guiscreenshot_latency.png
--------------------------------------------------------------------------------
/img/lockpages.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/clamchowder/MicrobenchmarksGui/08bbe546622b843d62f548590d90b5a871d4a588/img/lockpages.png
--------------------------------------------------------------------------------
/latencykernel.cl:
--------------------------------------------------------------------------------
1 | // unrolled until terascale no longer saw further improvement (10x unroll)
2 | // assumes count will be a multiple of 10. but it won't be too inaccurate with a big count
3 | // not divisible by 10
4 | __kernel void unrolled_latency_test(__global const int* A, int count, __global int* ret) {
5 | int current = A[0];
6 | int result;
7 | for (int i = 0; i < count; i += 10) {
8 | result += current;
9 | current = A[current];
10 | result += current;
11 | current = A[current];
12 | result += current;
13 | current = A[current];
14 | result += current;
15 | current = A[current];
16 | result += current;
17 | current = A[current];
18 | result += current;
19 | current = A[current];
20 | result += current;
21 | current = A[current];
22 | result += current;
23 | current = A[current];
24 | result += current;
25 | current = A[current];
26 | result += current;
27 | current = A[current];
28 | }
29 |
30 | ret[0] = result;
31 | }
32 |
33 | __kernel void unrolled_latency_test_amdvectorworkaround(__global const int* A, int count, __global int* ret) {
34 | int start = A[1 + get_local_id(0)];
35 | int current = A[start];
36 | int result;
37 | for (int i = 0; i < count; i += 10) {
38 | result += current;
39 | current = A[current];
40 | result += current;
41 | current = A[current];
42 | result += current;
43 | current = A[current];
44 | result += current;
45 | current = A[current];
46 | result += current;
47 | current = A[current];
48 | result += current;
49 | current = A[current];
50 | result += current;
51 | current = A[current];
52 | result += current;
53 | current = A[current];
54 | result += current;
55 | current = A[current];
56 | result += current;
57 | current = A[current];
58 | }
59 |
60 | ret[0] = result;
61 | }
62 |
63 | // latency test like the unrolled one above, but with input as constant memory
64 | __kernel void constant_unrolled_latency_test(__constant const int* A, int count, __global int* ret) {
65 | int current = A[0];
66 | int result;
67 | for (int i = 0; i < count; i += 10) {
68 | result += current;
69 | current = A[current];
70 | result += current;
71 | current = A[current];
72 | result += current;
73 | current = A[current];
74 | result += current;
75 | current = A[current];
76 | result += current;
77 | current = A[current];
78 | result += current;
79 | current = A[current];
80 | result += current;
81 | current = A[current];
82 | result += current;
83 | current = A[current];
84 | result += current;
85 | current = A[current];
86 | result += current;
87 | current = A[current];
88 | }
89 |
90 | ret[0] = result;
91 | }
92 |
93 | #define local_mem_test_size 1024
94 | // uses local memory (LDS/shmem)
95 | __kernel void local_unrolled_latency_test(__global const int* A, int count, __global int* ret) {
96 | __local int local_a[local_mem_test_size]; // 4 KB, should be present on all GPUs, amirite?
97 | // better be fast
98 | for (int i = get_local_id(0);i < local_mem_test_size; i += get_local_size(0))
99 | local_a[i] = A[i];
100 | barrier(CLK_LOCAL_MEM_FENCE);
101 |
102 | // everyone else can chill/get masked off
103 | if (get_local_id(0) == 0) {
104 | int current = local_a[0];
105 | int result;
106 | for (int i = 0; i < count; i += 10) {
107 | result += current;
108 | current = local_a[current];
109 | result += current;
110 | current = local_a[current];
111 | result += current;
112 | current = local_a[current];
113 | result += current;
114 | current = local_a[current];
115 | result += current;
116 | current = local_a[current];
117 | result += current;
118 | current = local_a[current];
119 | result += current;
120 | current = local_a[current];
121 | result += current;
122 | current = local_a[current];
123 | result += current;
124 | current = local_a[current];
125 | result += current;
126 | current = local_a[current];
127 | }
128 |
129 | ret[0] = result;
130 | }
131 | }
132 |
133 | __kernel void dummy_add(__global int* A) {
134 | A[get_global_id(0)]++;
135 | }
--------------------------------------------------------------------------------
/latencykernel_tex.cl:
--------------------------------------------------------------------------------
1 | // does not work on Fermi
2 | __kernel void tex_latency_test(__read_only image1d_buffer_t A, int count, __global int* ret) {
3 | __local uint4 local_a[128];
4 | int localId = get_local_id(0);
5 | uint4 current = read_imageui(A, 0);
6 | for (int i = 0; i < count; i += 10) {
7 | current = read_imageui(A, current.x);
8 | current = read_imageui(A, current.x);
9 | current = read_imageui(A, current.x);
10 | current = read_imageui(A, current.x);
11 | current = read_imageui(A, current.x);
12 | current = read_imageui(A, current.x);
13 | current = read_imageui(A, current.x);
14 | current = read_imageui(A, current.x);
15 | current = read_imageui(A, current.x);
16 | current = read_imageui(A, current.x);
17 | local_a[localId] = current;
18 | }
19 |
20 | ret[0] = local_a[localId].x;
21 | }
--------------------------------------------------------------------------------
/packages.config:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/pch.c:
--------------------------------------------------------------------------------
1 | // pch.cpp: source file corresponding to the pre-compiled header
2 |
3 | #include "pch.h"
4 |
5 | // When you are using pre-compiled headers, this source file is necessary for compilation to succeed.
6 |
--------------------------------------------------------------------------------
/pch.h:
--------------------------------------------------------------------------------
1 | // pch.h: This is a precompiled header file.
2 | // Files listed below are compiled only once, improving build performance for future builds.
3 | // This also affects IntelliSense performance, including code completion and many code browsing features.
4 | // However, files listed here are ALL re-compiled if any one of them is updated between builds.
5 | // Do not add files here that you will be updating frequently as this negates the performance advantage.
6 |
7 | #ifndef PCH_H
8 | #define PCH_H
9 |
10 | // add headers that you want to pre-compile here
11 | #include "framework.h"
12 |
13 | #endif //PCH_H
14 |
--------------------------------------------------------------------------------