├── 0001-migration-to-oneAPI.patch ├── 0002-modify-to-run.patch ├── 0003-Do-optimizaton.patch ├── 0004-add-SYCL.patch ├── LICENSE ├── README.md ├── SUPRA_on_oneAPI_Getting_Start_Guide_v0.8.pdf ├── SUPRA_on_oneAPI_ReleaseNotes.pdf ├── images ├── Commit info.PNG ├── PATH modify.png ├── PATH.PNG ├── guie.PNG ├── head change modify.png ├── head change.PNG ├── reset path.PNG ├── reset success modify.png └── reset success.PNG ├── supra-jupyter ├── .ipynb_checkpoints │ ├── SUPRA-jupyter-checkpoint.ipynb │ ├── download_data-checkpoint.sh │ ├── download_source-checkpoint.sh │ ├── gpu_build-checkpoint.sh │ └── gpu_run-checkpoint.sh ├── 0006-modify-for-jupyter-use-oneAPI-beta07.patch ├── SUPRA-jupyter-all-in-one-file.ipynb ├── SUPRA-jupyter.ipynb ├── download_data.sh ├── download_source.sh ├── gpu_build.sh └── gpu_run.sh └── third-party-programs.txt /0002-modify-to-run.patch: -------------------------------------------------------------------------------- 1 | From 4ca60200efcde25f49195c37d2a432b305bf0f57 Mon Sep 17 00:00:00 2001 2 | From: wangyon1 3 | Date: Thu, 4 Feb 2021 15:17:44 +0800 4 | Subject: [PATCH 2/4] modify to run 5 | 6 | --- 7 | CMakeLists.txt | 16 +- 8 | src/SupraLib/AbstractInput.h | 2 + 9 | src/SupraLib/AbstractNode.h | 3 + 10 | src/SupraLib/Beamformer/Beamformer.cpp | 9 +- 11 | .../Beamformer/HilbertEnvelopeNode.cpp | 2 +- 12 | .../Beamformer/HilbertFirEnvelope.dp.cpp | 17 +- 13 | src/SupraLib/Beamformer/LogCompressor.dp.cpp | 39 +++-- 14 | src/SupraLib/Beamformer/RxBeamformerCommon.h | 4 +- 15 | .../Beamformer/RxBeamformerCuda.dp.cpp | 66 +++---- 16 | .../RxSampleBeamformerDelayAndStdDev.h | 8 +- 17 | .../RxSampleBeamformerDelayAndSum.h | 8 +- 18 | src/SupraLib/Beamformer/ScanConverter.dp.cpp | 71 ++++---- 19 | src/SupraLib/Beamformer/WindowFunction.cpp | 3 +- 20 | src/SupraLib/Beamformer/WindowFunction.h | 6 +- 21 | src/SupraLib/CMakeLists.txt | 165 +++--------------- 22 | src/SupraLib/Container.h | 138 +++++---------- 23 | src/SupraLib/ContainerFactory.cpp.dp.cpp | 43 ++--- 24 | src/SupraLib/ContainerFactory.h | 4 +- 25 | src/SupraLib/InterfaceFactory.cpp | 34 ++-- 26 | src/SupraLib/utilities/DataType.cpp | 4 +- 27 | src/SupraLib/utilities/FirFilterFactory.h | 7 +- 28 | .../{cudaUtility.h => syclUtility.h} | 6 +- 29 | src/SupraLib/utilities/utility.h | 12 ++ 30 | src/SupraLib/vec.h | 4 +- 31 | 24 files changed, 248 insertions(+), 423 deletions(-) 32 | rename src/SupraLib/utilities/{cudaUtility.h => syclUtility.h} (95%) 33 | 34 | diff --git a/CMakeLists.txt b/CMakeLists.txt 35 | index 528fb8a..f38509f 100644 36 | --- a/CMakeLists.txt 37 | +++ b/CMakeLists.txt 38 | @@ -9,24 +9,18 @@ SET(PATCH_VERSION "0") 39 | MESSAGE( STATUS "CMAKE_GENERATOR: ${CMAKE_GENERATOR}") 40 | MESSAGE( STATUS "CMAKE_CL_64: ${CMAKE_CL_64}") 41 | 42 | -SET(CMAKE_CXX_STANDARD 11) 43 | + 44 | +SET(CMAKE_CXX_STANDARD 17) 45 | SET(CMAKE_CXX_STANDARD_REQUIRED ON) 46 | SET(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") 47 | 48 | -FIND_PACKAGE( OpenMP REQUIRED) 49 | -if(OPENMP_FOUND) 50 | - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") 51 | - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") 52 | - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") 53 | -endif() 54 | -IF(WIN32) 55 | - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP") 56 | -ENDIF(WIN32) 57 | + 58 | + 59 | 60 | # options for modules to include 61 | OPTION(SUPRA_PROFILING "Add profiling message to logfile" OFF) 62 | OPTION(SUPRA_WARNINGS_AS_ERRORS "Treat all build warnings as errors (NOT on windows)" OFF) 63 | -OPTION(SUPRA_DOWNLOAD_SAMPLEDATA "Whether sample data should be downloaded" ON) 64 | +OPTION(SUPRA_DOWNLOAD_SAMPLEDATA "Whether sample data should be downloaded" OFF) 65 | OPTION(SUPRA_CUDA "Use cuda in SUPRA" ON) 66 | OPTION(SUPRA_CUDA_PORTABLE "Build for all supported CUDA architectures" OFF) 67 | OPTION(SUPRA_TORCH "Use pytorch in SUPRA" OFF) 68 | diff --git a/src/SupraLib/AbstractInput.h b/src/SupraLib/AbstractInput.h 69 | index 66b1d5c..a3b8f8d 100644 70 | --- a/src/SupraLib/AbstractInput.h 71 | +++ b/src/SupraLib/AbstractInput.h 72 | @@ -18,7 +18,9 @@ 73 | #include 74 | #include 75 | 76 | +#undef emit 77 | #include 78 | +#define emit 79 | 80 | #include "AbstractNode.h" 81 | #include "utilities/SingleThreadTimer.h" 82 | diff --git a/src/SupraLib/AbstractNode.h b/src/SupraLib/AbstractNode.h 83 | index 33db7b0..291c9ed 100644 84 | --- a/src/SupraLib/AbstractNode.h 85 | +++ b/src/SupraLib/AbstractNode.h 86 | @@ -13,7 +13,10 @@ 87 | #define __ABSTRACTNODE_H__ 88 | 89 | #include 90 | + 91 | +#undef emit 92 | #include 93 | +#define emit 94 | 95 | #include "ConfigurationDictionary.h" 96 | #include "ValueRangeDictionary.h" 97 | diff --git a/src/SupraLib/Beamformer/Beamformer.cpp b/src/SupraLib/Beamformer/Beamformer.cpp 98 | index 799b1ef..e83a29d 100644 99 | --- a/src/SupraLib/Beamformer/Beamformer.cpp 100 | +++ b/src/SupraLib/Beamformer/Beamformer.cpp 101 | @@ -30,6 +30,7 @@ namespace supra 102 | using std::tuple; 103 | 104 | using namespace logging; 105 | + 106 | 107 | Beamformer::Beamformer() 108 | : m_pRxBeamformerParameters(nullptr) 109 | @@ -931,7 +932,7 @@ namespace supra 110 | ((relativeIndex.y - 0.5)*m_txFocusWidth)*scanlinePerpDirY; 111 | vec elementToFocus = scanlineStart3 + m_txFocusDepth*scanlineDir + focusPointFromFocusCenter - elementPos; 112 | double transitTime = m_pTransducer->computeTransitTime(elementIndex, elementToFocus, m_speedOfSoundMMperS, m_correctMatchingLayers); 113 | - maxTransitTime = max(maxTransitTime, transitTime); 114 | + maxTransitTime = std::max(maxTransitTime, transitTime); 115 | } 116 | 117 | for (size_t activeElementIdxX = txAperture.begin.x; activeElementIdxX <= txAperture.end.x; activeElementIdxX++) 118 | @@ -965,7 +966,7 @@ namespace supra 119 | double transitTime = m_pTransducer->computeTransitTime(vec2s{ activeElementIdxX, activeElementIdxY }, elementToFocus, m_speedOfSoundMMperS, m_correctMatchingLayers); 120 | double delay = maxTransitTime - transitTime; 121 | params.delays[localElementIdxX][localElementIdxY] = delay; 122 | - maxDelay = max(maxDelay, delay); 123 | + maxDelay = std::max(maxDelay, delay); 124 | } 125 | } 126 | } 127 | @@ -976,7 +977,7 @@ namespace supra 128 | { 129 | auto elementPos = std::get<2>(t); 130 | vec d = (scanlineStart3 - elementPos)*scanlineDir; 131 | - maxTransitTime = max(maxTransitTime, (d.x + d.y + d.z + m_txFocusDepth) / m_speedOfSoundMMperS); 132 | + maxTransitTime = std::max(maxTransitTime, (d.x + d.y + d.z + m_txFocusDepth) / m_speedOfSoundMMperS); 133 | } 134 | 135 | for (size_t activeElementIdxX = txAperture.begin.x; activeElementIdxX <= txAperture.end.x; activeElementIdxX++) 136 | @@ -993,7 +994,7 @@ namespace supra 137 | 138 | double delay = maxTransitTime - transitTime; 139 | params.delays[localElementIdxX][localElementIdxY] = delay; 140 | - maxDelay = max(maxDelay, delay); 141 | + maxDelay = std::max(maxDelay, delay); 142 | } 143 | } 144 | } 145 | diff --git a/src/SupraLib/Beamformer/HilbertEnvelopeNode.cpp b/src/SupraLib/Beamformer/HilbertEnvelopeNode.cpp 146 | index a9aa775..ad8ee3a 100644 147 | --- a/src/SupraLib/Beamformer/HilbertEnvelopeNode.cpp 148 | +++ b/src/SupraLib/Beamformer/HilbertEnvelopeNode.cpp 149 | @@ -16,7 +16,7 @@ 150 | #include 151 | 152 | //TODO remove this later 153 | -#include 154 | +#include 155 | 156 | using namespace std; 157 | 158 | diff --git a/src/SupraLib/Beamformer/HilbertFirEnvelope.dp.cpp b/src/SupraLib/Beamformer/HilbertFirEnvelope.dp.cpp 159 | index f2823f9..af16284 100644 160 | --- a/src/SupraLib/Beamformer/HilbertFirEnvelope.dp.cpp 161 | +++ b/src/SupraLib/Beamformer/HilbertFirEnvelope.dp.cpp 162 | @@ -92,10 +92,9 @@ namespace supra 163 | sycl::range<3> gridSizeFilter(1, static_cast((numSamples + blockSizeFilter[ 1 ] - 1) / blockSizeFilter[ 1 ]), 164 | static_cast((numScanlines + blockSizeFilter[ 2 ] - 1) / blockSizeFilter[ 2 ])); 165 | 166 | - /* 167 | - DPCT1049:27: The workgroup size passed to the SYCL kernel may exceed the limit. To get the device limit, query info::device::max_work_group_size. Adjust the workgroup size if needed. 168 | - */ 169 | - inImageData->getStream()->submit([ & ](sycl::handler& cgh) { 170 | + static long hilbert_call_count = 0; 171 | + 172 | + sycl::event hilbert_event = inImageData->getStream()->submit([ & ](sycl::handler& cgh) { 173 | auto inImageData_get_ct0 = inImageData->get(); 174 | auto m_hilbertFilter_get_ct1 = m_hilbertFilter->get(); 175 | auto pEnv_get_ct2 = pEnv->get(); 176 | @@ -105,10 +104,12 @@ namespace supra 177 | kernelFilterDemodulation(inImageData_get_ct0, m_hilbertFilter_get_ct1, pEnv_get_ct2, numSamples, numScanlines, m_filterLength_ct5, item_ct1); 178 | }); 179 | }); 180 | - /* 181 | - DPCT1010:26: SYCL uses exceptions to report errors and does not use the error codes. The call was replaced with 0. You need to rewrite this code. 182 | - */ 183 | - cudaSafeCall(0); 184 | + 185 | + hilbert_event.wait(); 186 | + hilbert_call_count++; 187 | + std::string msg = "Hilbert run " + std::to_string(hilbert_call_count) + " times: "; 188 | + Report_time(msg, hilbert_event); 189 | + 190 | 191 | return pEnv; 192 | } 193 | diff --git a/src/SupraLib/Beamformer/LogCompressor.dp.cpp b/src/SupraLib/Beamformer/LogCompressor.dp.cpp 194 | index 7389daf..70bc68b 100644 195 | --- a/src/SupraLib/Beamformer/LogCompressor.dp.cpp 196 | +++ b/src/SupraLib/Beamformer/LogCompressor.dp.cpp 197 | @@ -12,6 +12,7 @@ 198 | #include 199 | #include 200 | #include "LogCompressor.h" 201 | +#include 202 | 203 | #include 204 | #include 205 | @@ -23,11 +24,9 @@ using namespace std; 206 | namespace supra 207 | { 208 | template 209 | - /* 210 | - DPCT1044:29: thrust::unary_function was removed because std::unary_function has been deprecated in C++11. You may need to remove references to typedefs from thrust::unary_function in the class 211 | - definition. 212 | - */ 213 | - struct thrustLogcompress { 214 | + 215 | + struct thrustLogcompress 216 | + { 217 | WorkType _inScale; 218 | WorkType _scaleOverDenominator; 219 | 220 | @@ -36,12 +35,12 @@ namespace supra 221 | // of the downscaled (_inMax) input signal 222 | thrustLogcompress(double dynamicRange, In inMax, Out outMax, double scale) 223 | : _inScale(static_cast(dynamicRange / inMax)) 224 | - , _scaleOverDenominator(static_cast(scale * outMax / log10(dynamicRange + 1))) 225 | + , _scaleOverDenominator(static_cast(scale * outMax / sycl::log10(dynamicRange + 1))) 226 | {}; 227 | 228 | Out operator()(const In& a) const 229 | { 230 | - WorkType val = log10(abs(static_cast(a))*_inScale + (WorkType)1) * _scaleOverDenominator; 231 | + WorkType val = sycl::log10(std::abs(static_cast(a))*_inScale + (WorkType)1) * _scaleOverDenominator; 232 | return clampCast(val); 233 | } 234 | }; 235 | @@ -66,11 +65,27 @@ namespace supra 236 | } 237 | 238 | thrustLogcompress c(sycl::pow(10, (dynamicRange / 20)), static_cast(inMax), outMax, scale); 239 | - std::transform(thrust::cuda::par.on(inImageData->getStream()), inImageData->get(), inImageData->get() + (width * height * depth), pComprGpu->get(), c); 240 | - /* 241 | - DPCT1010:28: SYCL uses exceptions to report errors and does not use the error codes. The call was replaced with 0. You need to rewrite this code. 242 | - */ 243 | - cudaSafeCall(0); 244 | + 245 | + auto inImageData_t = inImageData->get(); 246 | + auto pComprGpu_t = pComprGpu->get(); 247 | + inImageData->getStream()->wait(); 248 | + 249 | + static long log_call_count = 0; 250 | + static std::chrono::duration log_total_duration(0); 251 | + 252 | + sycl::event log_event = inImageData->getStream()->submit([&] (sycl::handler &h) { 253 | + 254 | + h.parallel_for<>(sycl::range<1>(width * height * depth), [=](sycl::id<1> idx){ 255 | + pComprGpu_t[idx] = c(inImageData_t[idx]); 256 | + }); 257 | + 258 | + }); 259 | + 260 | + inImageData->getStream()->wait(); 261 | + log_event.wait(); 262 | + log_call_count++; 263 | + std::string Log_msg = "Log run " + std::to_string(log_call_count) + " times: "; 264 | + Report_time(Log_msg, log_event); 265 | 266 | return pComprGpu; 267 | } 268 | diff --git a/src/SupraLib/Beamformer/RxBeamformerCommon.h b/src/SupraLib/Beamformer/RxBeamformerCommon.h 269 | index 1527995..f86b5b9 100644 270 | --- a/src/SupraLib/Beamformer/RxBeamformerCommon.h 271 | +++ b/src/SupraLib/Beamformer/RxBeamformerCommon.h 272 | @@ -29,7 +29,7 @@ namespace supra 273 | template 274 | inline T computeDelayDTSPACE_D(T dirX, T dirY, T dirZ, T x_element, T x, T z) 275 | { 276 | - return sqrt((x_element - (x + dirX*z))* 277 | + return sycl::sqrt((x_element - (x + dirX*z))* 278 | (x_element - (x + dirX*z)) + 279 | (dirY*z)*(dirY*z)) + z; 280 | } 281 | @@ -37,7 +37,7 @@ namespace supra 282 | template 283 | inline T computeDelayDTSPACE3D_D(T dirX, T dirY, T dirZ, T x_element, T z_element, T x, T z, T d) 284 | { 285 | - return sqrt( 286 | + return sycl::sqrt( 287 | squ(x_element - (x + dirX*d)) + 288 | squ(z_element - (z + dirZ*d)) + 289 | squ(dirY*d)) + d; 290 | diff --git a/src/SupraLib/Beamformer/RxBeamformerCuda.dp.cpp b/src/SupraLib/Beamformer/RxBeamformerCuda.dp.cpp 291 | index 39f80ff..d97c382 100644 292 | --- a/src/SupraLib/Beamformer/RxBeamformerCuda.dp.cpp 293 | +++ b/src/SupraLib/Beamformer/RxBeamformerCuda.dp.cpp 294 | @@ -17,7 +17,8 @@ 295 | #include "RxSampleBeamformerDelayAndStdDev.h" 296 | #include "RxSampleBeamformerTestSignal.h" 297 | #include "RxBeamformerCommon.h" 298 | -#include "utilities/cudaUtility.h" 299 | +#include "utilities/syclUtility.h" 300 | +#include 301 | 302 | //TODO ALL ELEMENT/SCANLINE Y positons are actually Z! Change all variable names accordingly 303 | namespace supra 304 | @@ -25,8 +26,7 @@ namespace supra 305 | RxBeamformerCuda::RxBeamformerCuda(const RxBeamformerParameters & parameters) 306 | : m_windowFunction(nullptr) 307 | { 308 | - dpct::device_ext& dev_ct1 = dpct::get_current_device(); 309 | - sycl::queue& q_ct1 = dev_ct1.default_queue(); 310 | + 311 | m_lastSeenDt = 0; 312 | m_numRxScanlines = parameters.getNumRxScanlines(); 313 | m_rxScanlineLayout = parameters.getRxScanlineLayout(); 314 | @@ -35,6 +35,9 @@ namespace supra 315 | m_speedOfSoundMMperS = parameters.getSpeedOfSoundMMperS(); 316 | m_rxNumDepths = parameters.getRxNumDepths(); 317 | 318 | + dpct::device_ext& dev_ct1 = dpct::get_current_device(); 319 | + sycl::queue& q_ct1 = dev_ct1.default_queue(); 320 | + 321 | // create and fill new buffers 322 | m_pRxDepths = std::unique_ptr>(new Container(LocationGpu, &q_ct1, parameters.getRxDepths())); 323 | 324 | @@ -315,9 +318,7 @@ namespace supra 325 | { 326 | if (interpolateBetweenTransmits) 327 | { 328 | - /* 329 | - DPCT1049:32: The workgroup size passed to the SYCL kernel may exceed the limit. To get the device limit, query info::device::max_work_group_size. Adjust the workgroup size if needed. 330 | - */ 331 | + 332 | stream->submit([ & ](sycl::handler& cgh) { 333 | sycl::accessor x_elemsDTsh_acc_ct1(sycl::range<1>(1024), cgh); 334 | sycl::accessor z_elemsDTsh_acc_ct1(sycl::range<1>(1024), cgh); 335 | @@ -334,9 +335,7 @@ namespace supra 336 | }); 337 | } 338 | else { 339 | - /* 340 | - DPCT1049:33: The workgroup size passed to the SYCL kernel may exceed the limit. To get the device limit, query info::device::max_work_group_size. Adjust the workgroup size if needed. 341 | - */ 342 | + 343 | stream->submit([ & ](sycl::handler& cgh) { 344 | sycl::accessor x_elemsDTsh_acc_ct1(sycl::range<1>(1024), cgh); 345 | sycl::accessor z_elemsDTsh_acc_ct1(sycl::range<1>(1024), cgh); 346 | @@ -356,9 +355,7 @@ namespace supra 347 | else { 348 | if (interpolateBetweenTransmits) 349 | { 350 | - /* 351 | - DPCT1049:34: The workgroup size passed to the SYCL kernel may exceed the limit. To get the device limit, query info::device::max_work_group_size. Adjust the workgroup size if needed. 352 | - */ 353 | + 354 | stream->submit([ & ](sycl::handler& cgh) { 355 | sycl::accessor x_elemsDTsh_acc_ct1(sycl::range<1>(1024), cgh); 356 | sycl::accessor z_elemsDTsh_acc_ct1(sycl::range<1>(1024), cgh); 357 | @@ -375,9 +372,7 @@ namespace supra 358 | }); 359 | } 360 | else { 361 | - /* 362 | - DPCT1049:35: The workgroup size passed to the SYCL kernel may exceed the limit. To get the device limit, query info::device::max_work_group_size. Adjust the workgroup size if needed. 363 | - */ 364 | + 365 | stream->submit([ & ](sycl::handler& cgh) { 366 | sycl::accessor x_elemsDTsh_acc_ct1(sycl::range<1>(1024), cgh); 367 | sycl::accessor z_elemsDTsh_acc_ct1(sycl::range<1>(1024), cgh); 368 | @@ -394,10 +389,7 @@ namespace supra 369 | }); 370 | } 371 | } 372 | - /* 373 | - DPCT1010:36: SYCL uses exceptions to report errors and does not use the error codes. The call was replaced with 0. You need to rewrite this code. 374 | - */ 375 | - cudaSafeCall(0); 376 | + 377 | } 378 | 379 | template 380 | @@ -411,9 +403,7 @@ namespace supra 381 | { 382 | if (interpolateBetweenTransmits) 383 | { 384 | - /* 385 | - DPCT1049:37: The workgroup size passed to the SYCL kernel may exceed the limit. To get the device limit, query info::device::max_work_group_size. Adjust the workgroup size if needed. 386 | - */ 387 | + 388 | stream->submit([ & ](sycl::handler& cgh) { 389 | cgh.parallel_for(sycl::nd_range<3>(gridSize * blockSize, blockSize), [ = ](sycl::nd_item<3> item_ct1) { 390 | rxBeamformingDTSPACEKernel(numTransducerElements, numReceivedChannels, numTimesteps, RF, numTxScanlines, numRxScanlines, scanlines, 391 | @@ -422,23 +412,26 @@ namespace supra 392 | }); 393 | } 394 | else { 395 | - /* 396 | - DPCT1049:38: The workgroup size passed to the SYCL kernel may exceed the limit. To get the device limit, query info::device::max_work_group_size. Adjust the workgroup size if needed. 397 | - */ 398 | - stream->submit([ & ](sycl::handler& cgh) { 399 | + 400 | + static long beam_call_count = 0; 401 | + 402 | + sycl::event beam_event = stream->submit([ & ](sycl::handler& cgh) { 403 | cgh.parallel_for(sycl::nd_range<3>(gridSize * blockSize, blockSize), [ = ](sycl::nd_item<3> item_ct1) { 404 | rxBeamformingDTSPACEKernel(numTransducerElements, numReceivedChannels, numTimesteps, RF, numTxScanlines, numRxScanlines, scanlines, 405 | numZs, zs, x_elems, speedOfSound, dt, additionalOffset, F, windowFunction, s, item_ct1); 406 | }); 407 | }); 408 | + 409 | + beam_event.wait(); 410 | + beam_call_count++; 411 | + std::string msg = "Beamforming run " + std::to_string(beam_call_count) + " times: "; 412 | + Report_time(msg, beam_event); 413 | } 414 | } 415 | else { 416 | if (interpolateBetweenTransmits) 417 | { 418 | - /* 419 | - DPCT1049:39: The workgroup size passed to the SYCL kernel may exceed the limit. To get the device limit, query info::device::max_work_group_size. Adjust the workgroup size if needed. 420 | - */ 421 | + 422 | stream->submit([ & ](sycl::handler& cgh) { 423 | cgh.parallel_for(sycl::nd_range<3>(gridSize * blockSize, blockSize), [ = ](sycl::nd_item<3> item_ct1) { 424 | rxBeamformingDTSPACEKernel(numTransducerElements, numReceivedChannels, numTimesteps, RF, numTxScanlines, numRxScanlines, scanlines, 425 | @@ -447,9 +440,7 @@ namespace supra 426 | }); 427 | } 428 | else { 429 | - /* 430 | - DPCT1049:40: The workgroup size passed to the SYCL kernel may exceed the limit. To get the device limit, query info::device::max_work_group_size. Adjust the workgroup size if needed. 431 | - */ 432 | + 433 | stream->submit([ & ](sycl::handler& cgh) { 434 | cgh.parallel_for(sycl::nd_range<3>(gridSize * blockSize, blockSize), [ = ](sycl::nd_item<3> item_ct1) { 435 | rxBeamformingDTSPACEKernel(numTransducerElements, numReceivedChannels, numTimesteps, RF, numTxScanlines, numRxScanlines, scanlines, 436 | @@ -458,10 +449,7 @@ namespace supra 437 | }); 438 | } 439 | } 440 | - /* 441 | - DPCT1010:41: SYCL uses exceptions to report errors and does not use the error codes. The call was replaced with 0. You need to rewrite this code. 442 | - */ 443 | - cudaSafeCall(0); 444 | + 445 | } 446 | 447 | template 448 | @@ -494,7 +482,9 @@ namespace supra 449 | 450 | auto beamformingFunction3D = &rxBeamformingDTspaceCuda3D; 451 | auto beamformingFunction2D = &rxBeamformingDTspaceCuda; 452 | - switch (sampleBeamformer) 453 | + 454 | + // We don't use DelayAndStdDev and TestSignal algorthm, so below code are commented. 455 | + /*switch (sampleBeamformer) 456 | { 457 | case DelayAndSum: 458 | beamformingFunction3D = &rxBeamformingDTspaceCuda3D; 459 | @@ -512,7 +502,7 @@ namespace supra 460 | default: 461 | beamformingFunction3D = &rxBeamformingDTspaceCuda3D; 462 | beamformingFunction2D = &rxBeamformingDTspaceCuda; 463 | - } 464 | + }*/ 465 | 466 | 467 | convertToDtSpace(dt, speedOfSoundMMperS, rawData->getNumElements()); 468 | diff --git a/src/SupraLib/Beamformer/RxSampleBeamformerDelayAndStdDev.h b/src/SupraLib/Beamformer/RxSampleBeamformerDelayAndStdDev.h 469 | index d72079d..9e5e845 100644 470 | --- a/src/SupraLib/Beamformer/RxSampleBeamformerDelayAndStdDev.h 471 | +++ b/src/SupraLib/Beamformer/RxSampleBeamformerDelayAndStdDev.h 472 | @@ -107,7 +107,7 @@ namespace supra 473 | { 474 | LocationType delayf = initialDelay + 475 | computeDelayDTSPACE3D_D(dirX, dirY, dirZ, x_elem, z_elem, scanline_x, scanline_z, depth) + additionalOffset; 476 | - uint32_t delay = static_cast(::floor(delayf)); 477 | + uint32_t delay = static_cast(sycl::floor(delayf)); 478 | delayf -= delay; 479 | if (delay < (numTimesteps - 1)) 480 | { 481 | @@ -122,7 +122,7 @@ namespace supra 482 | } 483 | else 484 | { 485 | - uint32_t delay = static_cast(::round( 486 | + uint32_t delay = static_cast(sycl::round( 487 | initialDelay + computeDelayDTSPACE3D_D(dirX, dirY, dirZ, x_elem, z_elem, scanline_x, scanline_z, depth)) + additionalOffset); 488 | if (delay < numTimesteps) 489 | { 490 | @@ -204,7 +204,7 @@ namespace supra 491 | { 492 | LocationType delayf = initialDelay + 493 | computeDelayDTSPACE_D(dirX, dirY, dirZ, x_elem, scanline_x, depth) + additionalOffset; 494 | - int32_t delay = static_cast(floor(delayf)); 495 | + int32_t delay = static_cast(sycl::floor(delayf)); 496 | delayf -= delay; 497 | if (delay < (numTimesteps - 1)) 498 | { 499 | @@ -219,7 +219,7 @@ namespace supra 500 | } 501 | else 502 | { 503 | - int32_t delay = static_cast(round( 504 | + int32_t delay = static_cast(sycl::round( 505 | initialDelay + computeDelayDTSPACE_D(dirX, dirY, dirZ, x_elem, scanline_x, depth)) + additionalOffset); 506 | if (delay < numTimesteps) 507 | { 508 | diff --git a/src/SupraLib/Beamformer/RxSampleBeamformerDelayAndSum.h b/src/SupraLib/Beamformer/RxSampleBeamformerDelayAndSum.h 509 | index 0684605..f5330d5 100644 510 | --- a/src/SupraLib/Beamformer/RxSampleBeamformerDelayAndSum.h 511 | +++ b/src/SupraLib/Beamformer/RxSampleBeamformerDelayAndSum.h 512 | @@ -72,7 +72,7 @@ namespace supra 513 | { 514 | LocationType delayf = initialDelay + 515 | computeDelayDTSPACE3D_D(dirX, dirY, dirZ, x_elem, z_elem, scanline_x, scanline_z, depth) + additionalOffset; 516 | - uint32_t delay = static_cast(::floor(delayf)); 517 | + uint32_t delay = static_cast(sycl::floor(delayf)); 518 | delayf -= delay; 519 | if (delay < (numTimesteps - 1)) 520 | { 521 | @@ -87,7 +87,7 @@ namespace supra 522 | } 523 | else 524 | { 525 | - uint32_t delay = static_cast(::round( 526 | + uint32_t delay = static_cast(sycl::round( 527 | initialDelay + computeDelayDTSPACE3D_D(dirX, dirY, dirZ, x_elem, z_elem, scanline_x, scanline_z, depth)) + additionalOffset); 528 | if (delay < numTimesteps) 529 | { 530 | @@ -147,7 +147,7 @@ namespace supra 531 | { 532 | LocationType delayf = initialDelay + 533 | computeDelayDTSPACE_D(dirX, dirY, dirZ, x_elem, scanline_x, depth) + additionalOffset; 534 | - int32_t delay = static_cast(floor(delayf)); 535 | + int32_t delay = static_cast(sycl::floor(delayf)); 536 | delayf -= delay; 537 | if (delay < (numTimesteps - 1)) 538 | { 539 | @@ -162,7 +162,7 @@ namespace supra 540 | } 541 | else 542 | { 543 | - int32_t delay = static_cast(round( 544 | + int32_t delay = static_cast(sycl::round( 545 | initialDelay + computeDelayDTSPACE_D(dirX, dirY, dirZ, x_elem, scanline_x, depth)) + additionalOffset); 546 | if (delay < numTimesteps) 547 | { 548 | diff --git a/src/SupraLib/Beamformer/ScanConverter.dp.cpp b/src/SupraLib/Beamformer/ScanConverter.dp.cpp 549 | index fd49274..43f4504 100644 550 | --- a/src/SupraLib/Beamformer/ScanConverter.dp.cpp 551 | +++ b/src/SupraLib/Beamformer/ScanConverter.dp.cpp 552 | @@ -28,7 +28,8 @@ 553 | #include 554 | #include "ScanConverter.h" 555 | #include 556 | -#include 557 | +#include 558 | +#include 559 | #include 560 | 561 | #include 562 | @@ -90,7 +91,7 @@ namespace supra 563 | pointInsideTetrahedron(s2, s3, e1, e4, voxelPos)) 564 | { 565 | 566 | - thrust::pair, bool> params = mapToParameters3D( 567 | + std::pair, bool> params = mapToParameters3D( 568 | scanline1Pos, 569 | scanline2Pos, 570 | scanline3Pos, 571 | @@ -185,7 +186,7 @@ namespace supra 572 | } 573 | 574 | template 575 | - static thrust::pair, bool> mapToParameters3D( 576 | + static std::pair, bool> mapToParameters3D( 577 | const vec3T & a, 578 | const vec3T & ax, 579 | const vec3T & ay, 580 | @@ -215,7 +216,7 @@ namespace supra 581 | 582 | if (dot(lowConnX, highConnX) > 0 || dot(lowConnY, highConnY) > 0) 583 | { 584 | - return thrust::pair, bool>(vec3T{ 0, 0, 0 }, false); 585 | + return std::pair, bool>(vec3T{ 0, 0, 0 }, false); 586 | } 587 | 588 | vec2T dist = { 1e10, 1e10 }; 589 | @@ -273,7 +274,7 @@ namespace supra 590 | vec3T lineBase = (1 - t.y)*planeBaseX1 + t.y*planeBaseX2; 591 | Tf d = norm(x - lineBase); 592 | 593 | - return thrust::pair, bool>(vec3T{ t.x, t.y, d }, true); 594 | + return std::pair, bool>(vec3T{ t.x, t.y, d }, true); 595 | } 596 | }; 597 | 598 | @@ -455,13 +456,13 @@ namespace supra 599 | 600 | if (m_is2D) 601 | { 602 | - sycl::range<3> blockSize(1, 256, 1); 603 | + sycl::range<3> blockSize(1, 8, 16); 604 | sycl::range<3> gridSize(1, static_cast((m_imageSize.y + blockSize[ 1 ] - 1) / blockSize[ 1 ]), 605 | static_cast((m_imageSize.x + blockSize[ 2 ] - 1) / blockSize[ 2 ])); 606 | - /* 607 | - DPCT1049:22: The workgroup size passed to the SYCL kernel may exceed the limit. To get the device limit, query info::device::max_work_group_size. Adjust the workgroup size if needed. 608 | - */ 609 | - pScanlineData->getStream()->submit([ & ](sycl::handler& cgh) { 610 | + 611 | + static long scan_call_count = 0; 612 | + 613 | + sycl::event scan_event = pScanlineData->getStream()->submit([ & ](sycl::handler& cgh) { 614 | auto m_imageSize_x_ct2 = ( uint32_t )m_imageSize.x; 615 | auto m_imageSize_y_ct3 = ( uint32_t )m_imageSize.y; 616 | auto m_mask_get_ct4 = m_mask->get(); 617 | @@ -476,10 +477,13 @@ namespace supra 618 | pScanlineData_get_ct8, pConv_get_ct9, item_ct1); 619 | }); 620 | }); 621 | - /* 622 | - DPCT1010:20: SYCL uses exceptions to report errors and does not use the error codes. The call was replaced with 0. You need to rewrite this code. 623 | - */ 624 | - cudaSafeCall(0); 625 | + 626 | + scan_event.wait(); 627 | + scan_call_count++; 628 | + std::string msg = "Scan run " + std::to_string(scan_call_count) + " times: "; 629 | + Report_time(msg, scan_event); 630 | + 631 | + 632 | } 633 | else 634 | { 635 | @@ -506,10 +510,7 @@ namespace supra 636 | m_sampleIdx_get_ct7, m_weightX_get_ct8, m_weightY_get_ct9, m_weightZ_get_ct10, pScanlineData_get_ct11, pConv_get_ct12, item_ct1); 637 | }); 638 | }); 639 | - /* 640 | - DPCT1010:21: SYCL uses exceptions to report errors and does not use the error codes. The call was replaced with 0. You need to rewrite this code. 641 | - */ 642 | - cudaSafeCall(0); 643 | + 644 | } 645 | return pConv; 646 | } 647 | @@ -535,8 +536,8 @@ namespace supra 648 | 649 | void ScanConverter::updateInternals(const std::shared_ptr& inImageProps) 650 | { 651 | - dpct::device_ext& dev_ct1 = dpct::get_current_device(); 652 | - sycl::queue& q_ct1 = dev_ct1.default_queue(); 653 | + dpct::device_ext& dev_ct1 = dpct::get_current_device(); 654 | + sycl::queue& q_ct1 = dev_ct1.default_queue(); 655 | logging::log_log("Scanconverter: Updating scanconversion internals"); 656 | 657 | //Check the scanline configuration for validity 658 | @@ -675,17 +676,19 @@ namespace supra 659 | m_weightX = make_shared >(ContainerLocation::LocationHost, numelBuffers); 660 | m_weightY = make_shared >(ContainerLocation::LocationHost, numelBuffers); 661 | m_weightZ = make_shared >(ContainerLocation::LocationHost, numelBuffers);*/ 662 | - m_mask = make_shared>(ContainerLocation::LocationGpu, &q_ct1, numelBuffers); 663 | - m_sampleIdx = make_shared>(ContainerLocation::LocationGpu, &q_ct1, numelBuffers); 664 | - m_weightX = make_shared>(ContainerLocation::LocationGpu, &q_ct1, numelBuffers); 665 | - m_weightY = make_shared>(ContainerLocation::LocationGpu, &q_ct1, numelBuffers); 666 | - m_weightZ = make_shared>(ContainerLocation::LocationGpu, &q_ct1, numelBuffers); 667 | + sycl::queue &default_queue=dpct::get_default_queue(); 668 | + m_mask = make_shared>(ContainerLocation::LocationGpu, &default_queue, numelBuffers); 669 | + m_sampleIdx = make_shared>(ContainerLocation::LocationGpu, &default_queue, numelBuffers); 670 | + m_weightX = make_shared>(ContainerLocation::LocationGpu, &default_queue, numelBuffers); 671 | + m_weightY = make_shared>(ContainerLocation::LocationGpu, &default_queue, numelBuffers); 672 | + m_weightZ = make_shared>(ContainerLocation::LocationGpu, &default_queue, numelBuffers); 673 | 674 | //create image mask 675 | - /* 676 | - DPCT1003:24: Migrated API does not return error code. (*, 0) is inserted. You may need to rewrite this code. 677 | - */ 678 | - cudaSafeCall((q_ct1.memset(m_mask->get(), 0, m_mask->size() * sizeof(uint8_t)), 0)); 679 | + 680 | + //create image mask 681 | + m_mask->getStream()->submit([&](sycl::handler &h){ 682 | + h.memset(m_mask->get(), 0, m_mask->size()*sizeof(uint8_t)); 683 | + }); 684 | 685 | if (m_is2D) 686 | { 687 | @@ -839,17 +842,11 @@ namespace supra 688 | static_cast(resolution), m_mask_get_ct27, m_sampleIdx_get_ct28, m_weightX_get_ct29, m_weightY_get_ct30, m_weightZ_get_ct31, item_ct1); 689 | }); 690 | }); 691 | - /* 692 | - DPCT1010:26: SYCL uses exceptions to report errors and does not use the error codes. The call was replaced with 0. You need to rewrite this code. 693 | - */ 694 | - cudaSafeCall(0); 695 | + 696 | } 697 | } 698 | } 699 | - /* 700 | - DPCT1003:27: Migrated API does not return error code. (*, 0) is inserted. You may need to rewrite this code. 701 | - */ 702 | - cudaSafeCall((q_ct1.wait(), 0)); 703 | + 704 | } 705 | else 706 | { 707 | diff --git a/src/SupraLib/Beamformer/WindowFunction.cpp b/src/SupraLib/Beamformer/WindowFunction.cpp 708 | index 26ec55b..06bc8f5 100644 709 | --- a/src/SupraLib/Beamformer/WindowFunction.cpp 710 | +++ b/src/SupraLib/Beamformer/WindowFunction.cpp 711 | @@ -42,8 +42,9 @@ namespace supra 712 | } 713 | 714 | //Create the storage for the window functions 715 | + sycl::queue &default_queue=dpct::get_default_queue(); 716 | m_dataGpu = unique_ptr >( 717 | - new Container(LocationGpu, cudaStreamPerThread, m_data)); 718 | + new Container(LocationGpu, &default_queue, m_data)); 719 | 720 | m_gpuFunction = WindowFunctionGpu(m_numEntriesPerFunction, m_dataGpu->get()); 721 | } 722 | diff --git a/src/SupraLib/Beamformer/WindowFunction.h b/src/SupraLib/Beamformer/WindowFunction.h 723 | index 2158d3a..4f69c89 100644 724 | --- a/src/SupraLib/Beamformer/WindowFunction.h 725 | +++ b/src/SupraLib/Beamformer/WindowFunction.h 726 | @@ -21,7 +21,7 @@ 727 | #include 728 | #include 729 | #include 730 | -#include 731 | +#include 732 | #include 733 | 734 | namespace supra 735 | @@ -51,10 +51,10 @@ namespace supra 736 | public: 737 | typedef float ElementType; 738 | 739 | - WindowFunctionGpu(const WindowFunctionGpu& a) 740 | + /*WindowFunctionGpu(const WindowFunctionGpu& a) 741 | : m_numEntriesPerFunction(a.m_numEntriesPerFunction) 742 | , m_data(a.m_data) 743 | - , m_scale(a.m_scale) {}; 744 | + , m_scale(a.m_scale) {};*/ 745 | 746 | //Returns the weight of chosen window a the relative index 747 | // relativeIndex has to be normalized to [-1, 1] (inclusive) 748 | diff --git a/src/SupraLib/CMakeLists.txt b/src/SupraLib/CMakeLists.txt 749 | index 22df44e..a2472b9 100644 750 | --- a/src/SupraLib/CMakeLists.txt 751 | +++ b/src/SupraLib/CMakeLists.txt 752 | @@ -58,53 +58,11 @@ IF (SUPRA_DEVICE_CEPHASONICS) 753 | INCLUDE(supraIncludeCephasonics) 754 | ENDIF() 755 | 756 | -IF(SUPRA_CUDA) 757 | - include(supraIncludeCuda) 758 | - set(CUDA_SEPARABLE_COMPILATION ON) 759 | - MESSAGE(STATUS "Found Cuda Version " ${CUDA_VERSION_STRING}) 760 | - set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Wno-deprecated-gpu-targets;--use_fast_math;--default-stream=per-thread;-lineinfo") 761 | - IF(CUDA_VERSION_STRING VERSION_GREATER "8.0") 762 | - set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};--Wno-deprecated-declarations") 763 | - IF(BUILD_SHARED_LIBS AND NOT WIN32) 764 | - set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Xcompiler=-fPIC") 765 | - ENDIF() 766 | - ENDIF() 767 | - IF(SUPRA_CUDA_PORTABLE) 768 | - CUDA_SELECT_NVCC_ARCH_FLAGS(ARCH_FLAGS "Common" ) 769 | - ELSE() 770 | - CUDA_SELECT_NVCC_ARCH_FLAGS(ARCH_FLAGS "Auto") 771 | - ENDIF() 772 | - MESSAGE(STATUS "Using nvcc arch flags: ${ARCH_FLAGS}") 773 | - LIST(APPEND CUDA_NVCC_FLAGS ${ARCH_FLAGS}) 774 | - IF(NOT WIN32) 775 | - # nvcc cannot handle the flag beeing set twice, so only set it if not present 776 | - list(FIND CUDA_NVCC_FLAGS "-std=c++11" CUDA_NVCC_STD_FLAGa) 777 | - list(FIND CUDA_NVCC_FLAGS "--std=c++11" CUDA_NVCC_STD_FLAGb) 778 | - IF(CUDA_NVCC_STD_FLAGa EQUAL -1 AND CUDA_NVCC_STD_FLAGb EQUAL -1) 779 | - set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};--std=c++11") 780 | - ENDIF() 781 | - ENDIF() 782 | - 783 | - # on win32 cufft is not supported 784 | - IF(EXISTS ${CUDA_CUFFT_LIBRARIES}) 785 | - LIST(APPEND CUDA_LIBRARIES ${CUDA_CUFFT_LIBRARIES}) 786 | - SET(SUPRA_CUFFT TRUE) 787 | - SET(SUPRA_Lib_DEFINES ${SUPRA_Lib_DEFINES} HAVE_CUFFT) 788 | - ENDIF() 789 | - LIST(APPEND CUDA_LIBRARIES ${CUDA_curand_LIBRARY} ${CUDA_nppif_LIBRARY} ${CUDA_nppist_LIBRARY} ${CUDA_nppial_LIBRARY}) 790 | - 791 | - SET(SUPRA_Lib_DEFINES ${SUPRA_Lib_DEFINES} 792 | - HAVE_CUDA) 793 | - SET(SUPRA_Lib_INCLUDEDIRS ${SUPRA_Lib_INCLUDEDIRS} 794 | - ${CUDA_INCLUDE_DIRS}) 795 | - 796 | - IF(CUDA_cublas_LIBRARY) 797 | - SET(SUPRA_CUDA_CUBLAS ON) 798 | - SET(SUPRA_Lib_DEFINES ${SUPRA_Lib_DEFINES} 799 | - HAVE_CUDA_CUBLAS) 800 | - SET(CUDA_LIBRARIES ${CUDA_LIBRARIES} ${CUDA_cublas_LIBRARY}) 801 | - ENDIF() 802 | -ENDIF(SUPRA_CUDA) 803 | + 804 | + 805 | +SET(SUPRA_Lib_DEFINES ${SUPRA_Lib_DEFINES} HAVE_CUDA) 806 | +SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-c++11-narrowing -O2 -g") 807 | + 808 | 809 | #find IGTL Interface 810 | IF(SUPRA_DEVICE_IGTL_OUTPUT OR SUPRA_DEVICE_TRACKING_IGTL) 811 | @@ -129,7 +87,7 @@ ENDIF(SUPRA_PROFILING) 812 | ############################################ 813 | #lib base source files 814 | SET(SUPRA_Lib_SOURCE 815 | - ContainerFactory.cpp 816 | + ContainerFactory.cpp.dp.cpp 817 | SupraManager.cpp 818 | RecordObject.cpp 819 | SyncRecordObject.cpp 820 | @@ -149,7 +107,7 @@ SET(SUPRA_Lib_SOURCE 821 | utilities/tinyxml2/tinyxml2.cpp 822 | utilities/jsoncpp/jsoncpp.cpp 823 | FrequencyLimiterNode.cpp 824 | - StreamSyncNode.cpp 825 | + #StreamSyncNode.cpp 826 | AutoQuitNode.cpp 827 | ExampleNodes/ImageProcessingCpuNode.cpp) 828 | SET(SUPRA_Lib_HEADERS 829 | @@ -172,7 +130,7 @@ SET(SUPRA_Lib_HEADERS 830 | TemporalOffsetNode.h 831 | InterfaceFactory.h 832 | utilities/utility.h 833 | - utilities/cudaUtility.h 834 | + utilities/syclUtility.h 835 | utilities/Buffer.h 836 | utilities/Logging.h 837 | utilities/CallFrequency.h 838 | @@ -190,41 +148,9 @@ SET(SUPRA_Lib_HEADERS 839 | 840 | IF(SUPRA_CUDA) 841 | SET(SUPRA_Lib_SOURCE 842 | - ${SUPRA_Lib_SOURCE} 843 | - ExampleNodes/ImageProcessingCudaNode.cpp 844 | - ExampleNodes/ImageProcessingCuda.cu 845 | - ExampleNodes/ImageProcessingBufferCudaNode.cpp 846 | - ExampleNodes/ImageProcessingBufferCuda.cu 847 | - Processing/TimeGainCompensationNode.cpp 848 | - Processing/TimeGainCompensation.cu 849 | - Processing/FilterSradCuda.cu 850 | - Processing/FilterSradCudaNode.cpp 851 | - Processing/DarkFilterThresholdingCudaNode.cpp 852 | - Processing/DarkFilterThresholdingCuda.cu 853 | - Processing/BilateralFilterCudaNode.cpp 854 | - Processing/BilateralFilterCuda.cu 855 | - Processing/MedianFilterCudaNode.cpp 856 | - Processing/MedianFilterCuda.cu 857 | - NoiseNode.cpp 858 | - NoiseCuda.cu) 859 | + ${SUPRA_Lib_SOURCE} ) 860 | SET(SUPRA_Lib_HEADERS 861 | - ${SUPRA_Lib_HEADERS} 862 | - ExampleNodes/ImageProcessingCudaNode.h 863 | - ExampleNodes/ImageProcessingCuda.h 864 | - ExampleNodes/ImageProcessingBufferCudaNode.h 865 | - ExampleNodes/ImageProcessingBufferCuda.h 866 | - Processing/TimeGainCompensationNode.h 867 | - Processing/TimeGainCompensation.h 868 | - Processing/FilterSradCudaNode.h 869 | - Processing/FilterSradCuda.h 870 | - Processing/DarkFilterThresholdingCudaNode.h 871 | - Processing/DarkFilterThresholdingCuda.h 872 | - Processing/BilateralFilterCudaNode.h 873 | - Processing/BilateralFilterCuda.h 874 | - Processing/MedianFilterCudaNode.h 875 | - Processing/MedianFilterCuda.h 876 | - NoiseNode.h 877 | - NoiseCuda.h) 878 | + ${SUPRA_Lib_HEADERS}) 879 | ENDIF(SUPRA_CUDA) 880 | 881 | SET(SUPRA_Lib_INCLUDEDIRS 882 | @@ -260,23 +186,17 @@ IF(SUPRA_BEAMFORMER) 883 | Beamformer/WindowFunction.cpp 884 | Beamformer/USTransducer.cpp 885 | Beamformer/BeamformingNode.cpp 886 | - Beamformer/IQDemodulatorNode.cpp 887 | Beamformer/HilbertFirEnvelopeNode.cpp 888 | Beamformer/LogCompressorNode.cpp 889 | Beamformer/ScanConverterNode.cpp 890 | - Beamformer/TemporalFilterNode.cpp 891 | - Beamformer/RawDelayNode.cpp 892 | Beamformer/RxEventLimiterNode.cpp 893 | InputOutput/UltrasoundInterfaceRawDataMock.cpp 894 | InputOutput/UltrasoundInterfaceBeamformedMock.cpp) 895 | SET(SUPRA_Lib_CUDASOURCE ${SUPRA_Lib_CUDASOURCE} 896 | - Beamformer/RxBeamformerCuda.cu 897 | - Beamformer/IQDemodulator.cu 898 | - Beamformer/HilbertFirEnvelope.cu 899 | - Beamformer/LogCompressor.cu 900 | - Beamformer/ScanConverter.cu 901 | - Beamformer/TemporalFilter.cu 902 | - Beamformer/RawDelay.cu) 903 | + Beamformer/RxBeamformerCuda.dp.cpp 904 | + Beamformer/HilbertFirEnvelope.dp.cpp 905 | + Beamformer/LogCompressor.dp.cpp 906 | + Beamformer/ScanConverter.dp.cpp) 907 | SET(SUPRA_Lib_HEADERS ${SUPRA_Lib_HEADERS} 908 | Beamformer/Sequencer.h 909 | Beamformer/Beamformer.h 910 | @@ -305,55 +225,15 @@ IF(SUPRA_BEAMFORMER) 911 | Beamformer/RxEventLimiterNode.h 912 | InputOutput/UltrasoundInterfaceRawDataMock.h 913 | InputOutput/UltrasoundInterfaceBeamformedMock.h) 914 | - IF(SUPRA_CUFFT) 915 | - SET(SUPRA_Lib_SOURCE ${SUPRA_Lib_SOURCE} 916 | - Beamformer/HilbertEnvelopeNode.cpp) 917 | - SET(SUPRA_Lib_CUDASOURCE ${SUPRA_Lib_CUDASOURCE} 918 | - Beamformer/HilbertEnvelope.cu) 919 | - SET(SUPRA_Lib_HEADERS ${SUPRA_Lib_HEADERS} 920 | - Beamformer/HilbertEnvelope.h 921 | - Beamformer/HilbertEnvelopeNode.h) 922 | - ENDIF(SUPRA_CUFFT) 923 | + 924 | 925 | SET(SUPRA_Lib_DEFINES ${SUPRA_Lib_DEFINES} 926 | HAVE_BEAMFORMER) 927 | - SET(SUPRA_Lib_LIBRARIES ${SUPRA_Lib_LIBRARIES} 928 | - ${CUDA_LIBRARIES}) 929 | + SET(SUPRA_Lib_LIBRARIES ${SUPRA_Lib_LIBRARIES}) 930 | 931 | - IF(SUPRA_CUDA_CUBLAS) 932 | - SET(SUPRA_Lib_HEADERS ${SUPRA_Lib_HEADERS} 933 | - Beamformer/BeamformingMVNode.h 934 | - Beamformer/BeamformingMVpcgNode.h 935 | - Beamformer/RxBeamformerMV.h 936 | - Beamformer/RxBeamformerMVpcg.h) 937 | - SET(SUPRA_Lib_SOURCE ${SUPRA_Lib_SOURCE} 938 | - Beamformer/BeamformingMVNode.cpp 939 | - Beamformer/BeamformingMVpcgNode.cpp) 940 | - SET(SUPRA_Lib_CUDASOURCE ${SUPRA_Lib_CUDASOURCE} 941 | - Beamformer/RxBeamformerMV.cu 942 | - Beamformer/RxBeamformerMVpcg.cu) 943 | - SET(SUPRA_Lib_DEFINES ${SUPRA_Lib_DEFINES} 944 | - HAVE_BEAMFORMER_MINIMUM_VARIANCE) 945 | - ELSE() 946 | - MESSAGE(STATUS "CUBLAS not found, NOT adding minimum variance beamformer") 947 | - ENDIF() 948 | ENDIF(SUPRA_BEAMFORMER) 949 | 950 | -IF(SUPRA_TORCH) 951 | - SET(SUPRA_Lib_SOURCE ${SUPRA_Lib_SOURCE} 952 | - Processing/TorchInference.cpp 953 | - Processing/TorchNode.cpp) 954 | - SET(SUPRA_Lib_HEADERS ${SUPRA_Lib_HEADERS} 955 | - Processing/TorchInference.h 956 | - Processing/TorchNode.h) 957 | 958 | - SET(SUPRA_Lib_DEFINES ${SUPRA_Lib_DEFINES} 959 | - HAVE_TORCH) 960 | - SET(SUPRA_Lib_INCLUDEDIRS ${SUPRA_Lib_INCLUDEDIRS} 961 | - ${TORCH_INCLUDE_DIRS}) 962 | - SET(SUPRA_Lib_LIBRARIES ${SUPRA_Lib_LIBRARIES} 963 | - ${TORCH_LIBRARIES}) 964 | -ENDIF(SUPRA_TORCH) 965 | 966 | IF(SUPRA_DEVICE_ULTRASOUND_SIM) 967 | SET(SUPRA_Lib_SOURCE ${SUPRA_Lib_SOURCE} 968 | @@ -486,19 +366,14 @@ SOURCE_GROUP(inc FILES ${SUPRA_Lib_HEADERS}) 969 | INCLUDE_DIRECTORIES(SUPRA_Lib 970 | ${SUPRA_Lib_INCLUDEDIRS}) 971 | 972 | -IF(CUDA_FOUND) 973 | -CUDA_ADD_LIBRARY(SUPRA_Lib 974 | - ${SUPRA_Lib_SOURCE} 975 | - ${SUPRA_Lib_HEADERS} 976 | - ${SUPRA_Lib_CUDASOURCE} 977 | -) 978 | -ELSE () 979 | + 980 | ADD_LIBRARY(SUPRA_Lib 981 | ${SUPRA_Lib_SOURCE} 982 | ${SUPRA_Lib_HEADERS} 983 | ${SUPRA_Lib_CUDASOURCE} 984 | ) 985 | -ENDIF(CUDA_FOUND) 986 | + 987 | + 988 | TARGET_INCLUDE_DIRECTORIES(SUPRA_Lib 989 | PUBLIC ${SUPRA_Lib_INCLUDEDIRS} 990 | ) 991 | @@ -511,7 +386,7 @@ ENDIF() 992 | 993 | TARGET_COMPILE_DEFINITIONS(SUPRA_Lib 994 | PRIVATE ${SUPRA_Lib_DEFINES}) 995 | -set_property(TARGET SUPRA_Lib PROPERTY CXX_STANDARD 11) 996 | +set_property(TARGET SUPRA_Lib PROPERTY CXX_STANDARD 17) 997 | set_property(TARGET SUPRA_Lib PROPERTY CXX_STANDARD_REQUIRED ON) 998 | 999 | SET(SUPRA_Lib_INCLUDEDIRS ${SUPRA_Lib_INCLUDEDIRS} PARENT_SCOPE) 1000 | diff --git a/src/SupraLib/Container.h b/src/SupraLib/Container.h 1001 | index eac0fca..f154e55 100644 1002 | --- a/src/SupraLib/Container.h 1003 | +++ b/src/SupraLib/Container.h 1004 | @@ -16,7 +16,7 @@ 1005 | #include 1006 | #include "ContainerFactory.h" 1007 | #ifdef HAVE_CUDA 1008 | -#include "utilities/cudaUtility.h" 1009 | +#include "utilities/syclUtility.h" 1010 | #endif 1011 | #include "utilities/DataType.h" 1012 | 1013 | @@ -48,9 +48,7 @@ namespace supra 1014 | #ifndef HAVE_CUDA 1015 | location = LocationHost; 1016 | #endif 1017 | -#ifdef HAVE_CUDA 1018 | - m_creationEvent = nullptr; 1019 | -#endif 1020 | + 1021 | m_numel = numel; 1022 | m_location = location; 1023 | m_associatedStream = associatedStream; 1024 | @@ -64,19 +62,16 @@ namespace supra 1025 | #ifdef HAVE_CUDA 1026 | if(location == LocationGpu) 1027 | { 1028 | - /* 1029 | - DPCT1003:10: Migrated API does not return error code. (*, 0) is inserted. You may need to rewrite this code. 1030 | - */ 1031 | - cudaSafeCall((associatedStream->memcpy(this->get(), data.data(), this->size() * sizeof(T)), 0)); 1032 | - createAndRecordEvent(); 1033 | + 1034 | + associatedStream->memcpy(this->get(), data.data(), this->size() * sizeof(T)); 1035 | + associatedStream->wait(); 1036 | + 1037 | } 1038 | else if(location == LocationBoth) 1039 | { 1040 | - /* 1041 | - DPCT1003:11: Migrated API does not return error code. (*, 0) is inserted. You may need to rewrite this code. 1042 | - */ 1043 | - cudaSafeCall((associatedStream->memcpy(this->get(), data.data(), this->size() * sizeof(T)), 0)); 1044 | - createAndRecordEvent(); 1045 | + 1046 | + associatedStream->memcpy(this->get(), data.data(), this->size() * sizeof(T)); 1047 | + associatedStream->wait(); 1048 | } 1049 | else 1050 | { 1051 | @@ -94,10 +89,8 @@ namespace supra 1052 | :Container(location, associatedStream, dataEnd - dataBegin) 1053 | { 1054 | #ifdef HAVE_CUDA 1055 | - /* 1056 | - DPCT1003:12: Migrated API does not return error code. (*, 0) is inserted. You may need to rewrite this code. 1057 | - */ 1058 | - cudaSafeCall((associatedStream->memcpy(this->get(), dataBegin, this->size() * sizeof(T)), 0)); 1059 | + 1060 | + associatedStream->memcpy(this->get(), dataBegin, this->size() * sizeof(T)); 1061 | createAndRecordEvent(); 1062 | if (waitFinished) 1063 | { 1064 | @@ -117,35 +110,27 @@ namespace supra 1065 | #ifdef HAVE_CUDA 1066 | else if (source.m_location == LocationHost && location == LocationGpu) 1067 | { 1068 | - /* 1069 | - DPCT1003:13: Migrated API does not return error code. (*, 0) is inserted. You may need to rewrite this code. 1070 | - */ 1071 | - cudaSafeCall((source.getStream()->memcpy(this->get(), source.get(), source.size() * sizeof(T)), 0)); 1072 | - createAndRecordEvent(); 1073 | + 1074 | + source.getStream()->memcpy(this->get(), source.get(), source.size() * sizeof(T)); 1075 | + source.getStream()->wait(); 1076 | } 1077 | else if (source.m_location == LocationGpu && location == LocationHost) 1078 | { 1079 | - /* 1080 | - DPCT1003:14: Migrated API does not return error code. (*, 0) is inserted. You may need to rewrite this code. 1081 | - */ 1082 | - cudaSafeCall((source.getStream()->memcpy(this->get(), source.get(), source.size() * sizeof(T)), 0)); 1083 | - createAndRecordEvent(); 1084 | + 1085 | + source.getStream()->memcpy(this->get(), source.get(), source.size() * sizeof(T)); 1086 | + source.getStream()->wait(); 1087 | } 1088 | else if (source.m_location == LocationGpu && location == LocationGpu) 1089 | { 1090 | - /* 1091 | - DPCT1003:15: Migrated API does not return error code. (*, 0) is inserted. You may need to rewrite this code. 1092 | - */ 1093 | - cudaSafeCall((source.getStream()->memcpy(this->get(), source.get(), source.size() * sizeof(T)), 0)); 1094 | - createAndRecordEvent(); 1095 | + 1096 | + source.getStream()->memcpy(this->get(), source.get(), source.size() * sizeof(T)); 1097 | + source.getStream()->wait(); 1098 | } 1099 | else 1100 | { 1101 | - /* 1102 | - DPCT1003:16: Migrated API does not return error code. (*, 0) is inserted. You may need to rewrite this code. 1103 | - */ 1104 | - cudaSafeCall((source.getStream()->memcpy(this->get(), source.get(), source.size() * sizeof(T)), 0)); 1105 | - createAndRecordEvent(); 1106 | + 1107 | + source.getStream()->memcpy(this->get(), source.get(), source.size() * sizeof(T)); 1108 | + source.getStream()->wait(); 1109 | } 1110 | if (waitFinished) 1111 | { 1112 | @@ -158,13 +143,11 @@ namespace supra 1113 | ~Container() 1114 | try { 1115 | #ifdef HAVE_CUDA 1116 | - /* 1117 | - DPCT1027:3: The call to cudaStreamQuery was replaced with 0, because DPC++ currently does not support query operations on queues. 1118 | - */ 1119 | + 1120 | auto ret = 0; 1121 | if (ret != 0 && ret != 600 && ret != 4) 1122 | { 1123 | - cudaSafeCall(ret); 1124 | + syclSafeCall(ret); 1125 | } 1126 | // If the driver is currently unloading, we cannot free the memory in any way. Exit will clean up. 1127 | else if (ret != 4) 1128 | @@ -206,18 +189,14 @@ namespace supra 1129 | } 1130 | else if(m_location == LocationGpu) 1131 | { 1132 | - /* 1133 | - DPCT1003:17: Migrated API does not return error code. (*, 0) is inserted. You may need to rewrite this code. 1134 | - */ 1135 | - cudaSafeCall((getStream()->memcpy(ret, this->get(), this->size() * sizeof(T)), 0)); 1136 | - cudaSafeCall(cudaStreamSynchronize(getStream())); 1137 | + 1138 | + getStream()->memcpy(ret, this->get(), this->size() * sizeof(T)); 1139 | + getStream()->wait(); 1140 | } 1141 | else 1142 | { 1143 | - /* 1144 | - DPCT1003:18: Migrated API does not return error code. (*, 0) is inserted. You may need to rewrite this code. 1145 | - */ 1146 | - cudaSafeCall((dpct::get_default_queue().memcpy(ret, this->get(), this->size() * sizeof(T)).wait(), 0)); 1147 | + 1148 | + dpct::get_default_queue().memcpy(ret, this->get(), this->size() * sizeof(T)).wait(); 1149 | } 1150 | return ret; 1151 | #else 1152 | @@ -229,29 +208,14 @@ namespace supra 1153 | { 1154 | #ifdef HAVE_CUDA 1155 | assert(maxSize >= this->size()); 1156 | - /* 1157 | - DPCT1003:19: Migrated API does not return error code. (*, 0) is inserted. You may need to rewrite this code. 1158 | - */ 1159 | - cudaSafeCall((dpct::get_default_queue().memcpy(dst, this->get(), this->size() * sizeof(T)).wait(), 0)); 1160 | + 1161 | + dpct::get_default_queue().memcpy(dst, this->get(), this->size() * sizeof(T)).wait(); 1162 | #endif 1163 | } 1164 | 1165 | void waitCreationFinished() 1166 | { 1167 | -#ifdef HAVE_CUDA 1168 | - if (m_creationEvent) 1169 | - { 1170 | - /* 1171 | - DPCT1003:4: Migrated API does not return error code. (*, 0) is inserted. You may need to rewrite this code. 1172 | - */ 1173 | - cudaSafeCall((m_creationEvent.wait_and_throw(), 0)); 1174 | - /* 1175 | - DPCT1027:5: The call to cudaEventDestroy was replaced with 0, because this call is redundant in DPC++. 1176 | - */ 1177 | - cudaSafeCall(0); 1178 | - m_creationEvent = nullptr; 1179 | - } 1180 | -#endif 1181 | + m_associatedStream->wait(); 1182 | } 1183 | 1184 | // returns the number of elements that can be stored in this container 1185 | @@ -270,38 +234,18 @@ namespace supra 1186 | private: 1187 | void createAndRecordEvent() 1188 | { 1189 | -#ifdef HAVE_CUDA 1190 | - if (!m_creationEvent) 1191 | - { 1192 | - //cudaSafeCall(cudaEventCreateWithFlags(&m_creationEvent, cudaEventBlockingSync | cudaEventDisableTiming)); 1193 | - /* 1194 | - DPCT1027:6: The call to cudaEventCreateWithFlags was replaced with 0, because this call is redundant in DPC++. 1195 | - */ 1196 | - cudaSafeCall(0); 1197 | - } 1198 | - /* 1199 | - DPCT1012:7: Detected kernel execution time measurement pattern and generated an initial code for time measurements in SYCL. You can change the way time is measured depending on your goals. 1200 | - */ 1201 | - /* 1202 | - DPCT1024:8: The original code returned the error code that was further consumed by the program logic. This original code was replaced with 0. You may need to rewrite the program logic 1203 | - consuming the error code. 1204 | - */ 1205 | - m_creationEvent_ct1 = std::chrono::steady_clock::now(); 1206 | - cudaSafeCall(0); 1207 | -#endif 1208 | + 1209 | } 1210 | 1211 | #ifdef HAVE_CUDA 1212 | void addCallbackStream(std::function func) 1213 | { 1214 | auto funcPointer = new std::function(func); 1215 | - /* 1216 | - DPCT1003:9: Migrated API does not return error code. (*, 0) is inserted. You may need to rewrite this code. 1217 | - */ 1218 | - cudaSafeCall((std::async([ & ]() { 1219 | - m_associatedStream->wait(); &(Container::cudaDeleteCallback)(m_associatedStream, 0, funcPointer); 1220 | - }), 1221 | - 0)); 1222 | + 1223 | + std::async([ & ]() { 1224 | + m_associatedStream->wait(); 1225 | + (Container::cudaDeleteCallback)(m_associatedStream, 0, funcPointer); 1226 | + }); 1227 | } 1228 | #endif 1229 | 1230 | @@ -319,10 +263,6 @@ namespace supra 1231 | ContainerStreamType m_associatedStream; 1232 | T* m_buffer; 1233 | 1234 | -#ifdef HAVE_CUDA 1235 | - sycl::event m_creationEvent; 1236 | - std::chrono::time_point m_creationEvent_ct1; 1237 | -#endif 1238 | }; 1239 | } 1240 | 1241 | diff --git a/src/SupraLib/ContainerFactory.cpp.dp.cpp b/src/SupraLib/ContainerFactory.cpp.dp.cpp 1242 | index 464a762..ba7c0c9 100644 1243 | --- a/src/SupraLib/ContainerFactory.cpp.dp.cpp 1244 | +++ b/src/SupraLib/ContainerFactory.cpp.dp.cpp 1245 | @@ -65,8 +65,8 @@ namespace supra 1246 | size_t memoryTotal; 1247 | if (location == LocationGpu || location == LocationBoth) 1248 | { 1249 | - cudaSafeCall(cudaMemGetInfo(&memoryFree, &memoryTotal)); 1250 | - memoryFree = static_cast(std::max(static_cast(memoryFree) - (static_cast(memoryTotal) * 0.02), 0.0)); 1251 | + // SYCL doesn't provide get_free_mem_info api, so we pass it. 1252 | + memoryFree = numBytes; 1253 | } 1254 | else 1255 | #endif 1256 | @@ -114,13 +114,10 @@ namespace supra 1257 | sm_streams.resize(sm_numberStreams); 1258 | for (size_t k = 0; k < sm_numberStreams; k++) 1259 | { 1260 | - /* 1261 | - DPCT1003:32: Migrated API does not return error code. (*, 0) is inserted. You may need to rewrite this code. 1262 | - */ 1263 | - /* 1264 | - DPCT1025:33: The SYCL queue is created ignoring the flag/priority options. 1265 | - */ 1266 | - cudaSafeCall(((sm_streams[ k ]) = dpct::get_current_device().create_queue(), 0)); 1267 | + 1268 | + auto property_list = cl::sycl::property_list{cl::sycl::property::queue::enable_profiling()}; 1269 | + sm_streams[k] = new sycl::queue(dpct::get_default_queue().get_context(), dpct::get_default_queue().get_device(), property_list); 1270 | + std::cout << endl << "Selected device: " << sm_streams[k]->get_device().get_info() << endl; 1271 | } 1272 | #else 1273 | sm_streams.resize(sm_numberStreams, 0); 1274 | @@ -129,33 +126,29 @@ namespace supra 1275 | 1276 | uint8_t * ContainerFactory::allocateMemory(size_t numBytes, ContainerLocation location) 1277 | { 1278 | - dpct::device_ext& dev_ct1 = dpct::get_current_device(); 1279 | - sycl::queue& q_ct1 = dev_ct1.default_queue(); 1280 | + dpct::device_ext& dev_ct1 = dpct::get_current_device(); 1281 | + sycl::queue& q_ct1 = dev_ct1.default_queue(); 1282 | + 1283 | + 1284 | uint8_t* buffer = nullptr; 1285 | switch (location) 1286 | { 1287 | case LocationGpu: 1288 | #ifdef HAVE_CUDA 1289 | - /* 1290 | - DPCT1003:34: Migrated API does not return error code. (*, 0) is inserted. You may need to rewrite this code. 1291 | - */ 1292 | - cudaSafeCall((buffer = ( uint8_t* )sycl::malloc_device(numBytes, q_ct1), 0)); 1293 | + 1294 | + buffer = ( uint8_t* )sycl::malloc_device(numBytes, q_ct1); 1295 | #endif 1296 | break; 1297 | case LocationBoth: 1298 | #ifdef HAVE_CUDA 1299 | - /* 1300 | - DPCT1003:35: Migrated API does not return error code. (*, 0) is inserted. You may need to rewrite this code. 1301 | - */ 1302 | - cudaSafeCall((buffer = ( uint8_t* )sycl::malloc_shared(numBytes, q_ct1), 0)); 1303 | + 1304 | + buffer = ( uint8_t* )sycl::malloc_shared(numBytes, q_ct1); 1305 | #endif 1306 | break; 1307 | case LocationHost: 1308 | #ifdef HAVE_CUDA 1309 | - /* 1310 | - DPCT1003:36: Migrated API does not return error code. (*, 0) is inserted. You may need to rewrite this code. 1311 | - */ 1312 | - cudaSafeCall((buffer = ( uint8_t* )sycl::malloc_host(numBytes, q_ct1), 0)); 1313 | + 1314 | + buffer = ( uint8_t* )sycl::malloc_host(numBytes, q_ct1); 1315 | #else 1316 | buffer = new uint8_t[numBytes]; 1317 | #endif 1318 | @@ -243,8 +236,8 @@ namespace supra 1319 | 1320 | void ContainerFactory::freeMemory(uint8_t * pointer, size_t numBytes, ContainerLocation location) 1321 | { 1322 | - dpct::device_ext& dev_ct1 = dpct::get_current_device(); 1323 | - sycl::queue& q_ct1 = dev_ct1.default_queue(); 1324 | + dpct::device_ext& dev_ct1 = dpct::get_current_device(); 1325 | + sycl::queue& q_ct1 = dev_ct1.default_queue(); 1326 | switch (location) 1327 | { 1328 | case LocationGpu: 1329 | diff --git a/src/SupraLib/ContainerFactory.h b/src/SupraLib/ContainerFactory.h 1330 | index e39c914..3f1d4c8 100644 1331 | --- a/src/SupraLib/ContainerFactory.h 1332 | +++ b/src/SupraLib/ContainerFactory.h 1333 | @@ -15,7 +15,7 @@ 1334 | #ifdef HAVE_CUDA 1335 | #include 1336 | #include 1337 | -#include "utilities/cudaUtility.h" 1338 | +#include "utilities/syclUtility.h" 1339 | #endif 1340 | 1341 | #include 1342 | @@ -52,7 +52,7 @@ namespace supra 1343 | private: 1344 | static void initStreams(); 1345 | 1346 | - static constexpr size_t sm_numberStreams = 8; 1347 | + static constexpr size_t sm_numberStreams = 1; 1348 | 1349 | static std::vector sm_streams; 1350 | static size_t sm_streamIndex; 1351 | diff --git a/src/SupraLib/InterfaceFactory.cpp b/src/SupraLib/InterfaceFactory.cpp 1352 | index 7055973..02c97d1 100644 1353 | --- a/src/SupraLib/InterfaceFactory.cpp 1354 | +++ b/src/SupraLib/InterfaceFactory.cpp 1355 | @@ -209,37 +209,37 @@ namespace supra 1356 | { "TemporalOffsetNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1357 | { "FrequencyLimiterNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1358 | { "AutoQuitNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1359 | - { "StreamSyncNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1360 | + //{ "StreamSyncNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1361 | { "ImageProcessingCpuNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1362 | #ifdef HAVE_CUDA 1363 | - { "NoiseNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1364 | - { "ImageProcessingCudaNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1365 | - { "ImageProcessingBufferCudaNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1366 | - { "FilterSradCudaNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1367 | - { "TimeGainCompensationNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1368 | - { "DarkFilterThresholdingCudaNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1369 | - { "BilateralFilterCudaNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1370 | - { "MedianFilterCudaNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1371 | + //{ "NoiseNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1372 | + //{ "ImageProcessingCudaNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1373 | + //{ "ImageProcessingBufferCudaNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1374 | + //{ "FilterSradCudaNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1375 | + //{ "TimeGainCompensationNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1376 | + //{ "DarkFilterThresholdingCudaNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1377 | + //{ "BilateralFilterCudaNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1378 | + //{ "MedianFilterCudaNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1379 | #endif 1380 | #ifdef HAVE_TORCH 1381 | - { "TorchNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1382 | + //{ "TorchNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1383 | #endif 1384 | #ifdef HAVE_CUFFT 1385 | - { "HilbertEnvelopeNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1386 | + //{ "HilbertEnvelopeNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1387 | #endif 1388 | #ifdef HAVE_BEAMFORMER 1389 | { "BeamformingNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1390 | - { "IQDemodulatorNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1391 | + //{ "IQDemodulatorNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1392 | { "HilbertFirEnvelopeNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1393 | { "LogCompressorNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1394 | { "ScanConverterNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1395 | - { "TemporalFilterNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1396 | - { "RawDelayNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1397 | - { "RxEventLimiterNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1398 | + //{ "TemporalFilterNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1399 | + //{ "RawDelayNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1400 | + //{ "RxEventLimiterNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1401 | #endif 1402 | #ifdef HAVE_BEAMFORMER_MINIMUM_VARIANCE 1403 | - { "BeamformingMVNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1404 | - { "BeamformingMVpcgNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1405 | + //{ "BeamformingMVNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1406 | + //{ "BeamformingMVpcgNode", [](tbb::flow::graph& g, std::string nodeID, bool queueing) { return make_shared(g, nodeID, queueing); } }, 1407 | #endif 1408 | }; 1409 | } 1410 | diff --git a/src/SupraLib/utilities/DataType.cpp b/src/SupraLib/utilities/DataType.cpp 1411 | index 83259b8..bfc44a4 100644 1412 | --- a/src/SupraLib/utilities/DataType.cpp 1413 | +++ b/src/SupraLib/utilities/DataType.cpp 1414 | @@ -50,10 +50,10 @@ namespace supra 1415 | return TypeUint64; 1416 | } 1417 | #ifdef HAVE_CUDA 1418 | - template <> 1419 | + /*template <> 1420 | DataType DataTypeGet<__half>() { 1421 | return TypeHalf; 1422 | - } 1423 | + }*/ 1424 | #endif 1425 | template <> 1426 | DataType DataTypeGet() { 1427 | diff --git a/src/SupraLib/utilities/FirFilterFactory.h b/src/SupraLib/utilities/FirFilterFactory.h 1428 | index 09283ba..d0aaf44 100644 1429 | --- a/src/SupraLib/utilities/FirFilterFactory.h 1430 | +++ b/src/SupraLib/utilities/FirFilterFactory.h 1431 | @@ -89,7 +89,8 @@ namespace supra 1432 | ElementType omegaBandwidth = static_cast(2 * M_PI* bandwidth / samplingFrequency); 1433 | int halfWidth = ((int)length - 1) / 2; 1434 | 1435 | - auto filter = std::make_shared>(LocationHost, &dpct::get_default_queue(), length); 1436 | + sycl::queue &default_queue=dpct::get_default_queue(); 1437 | + auto filter = std::make_shared>(LocationHost, &default_queue, length); 1438 | 1439 | //determine the filter function 1440 | std::function filterFunction = [&halfWidth](int n) -> ElementType { 1441 | @@ -184,7 +185,7 @@ namespace supra 1442 | break; 1443 | case FilterWindowKaiser: 1444 | windowFunction = [maxN, beta](int n) -> ElementType { 1445 | - double argument = beta * sqrt(1.0 - (2 * (( ElementType )n - maxN / 2) / maxN) * (2 * (( ElementType )n - maxN / 2) / maxN)); 1446 | + double argument = beta * sycl::sqrt(1.0 - (2 * (( ElementType )n - maxN / 2) / maxN) * (2 * (( ElementType )n - maxN / 2) / maxN)); 1447 | return static_cast(bessel0_1stKind(argument) / bessel0_1stKind(beta)); }; 1448 | break; 1449 | case FilterWindowRectangular: 1450 | @@ -211,7 +212,7 @@ namespace supra 1451 | gainR += filter->get()[k] * cos(omega * (ElementType)k); 1452 | gainI += filter->get()[k] * sin(omega * (ElementType)k); 1453 | } 1454 | - ElementType gain = sqrt(gainR*gainR + gainI*gainI); 1455 | + ElementType gain = sycl::sqrt(gainR*gainR + gainI*gainI); 1456 | for (int k = 0; k < filter->size(); k++) 1457 | { 1458 | filter->get()[k] /= gain; 1459 | diff --git a/src/SupraLib/utilities/cudaUtility.h b/src/SupraLib/utilities/syclUtility.h 1460 | similarity index 95% 1461 | rename from src/SupraLib/utilities/cudaUtility.h 1462 | rename to src/SupraLib/utilities/syclUtility.h 1463 | index 8d5356c..8167b17 100644 1464 | --- a/src/SupraLib/utilities/cudaUtility.h 1465 | +++ b/src/SupraLib/utilities/syclUtility.h 1466 | @@ -56,11 +56,11 @@ namespace supra 1467 | 1468 | /// Verifies a cuda call returned "cudaSuccess". Prints error message otherwise. 1469 | /// returns true if no error occured, false otherwise. 1470 | - #define cudaSafeCall(_err_) cudaSafeCall2(_err_, __FILE__, __LINE__, FUNCNAME_PORTABLE) 1471 | + #define syclSafeCall(_err_) syclSafeCall2(_err_, __FILE__, __LINE__, FUNCNAME_PORTABLE) 1472 | 1473 | /// Verifies a cuda call returned "cudaSuccess". Prints error message otherwise. 1474 | /// returns true if no error occured, false otherwise. Calles by cudaSafeCall 1475 | - inline bool cudaSafeCall2(int err, const char* file, int line, const char* func) { 1476 | + inline bool syclSafeCall2(int err, const char* file, int line, const char* func) { 1477 | 1478 | //#ifdef CUDA_ERROR_CHECK 1479 | /* 1480 | @@ -74,7 +74,7 @@ namespace supra 1481 | /* 1482 | DPCT1009:2: SYCL uses exceptions to report errors and does not use the error codes. The original code was commented out and a warning string was inserted. You need to rewrite this code. 1483 | */ 1484 | - sprintf(buf, "CUDA Error (in \"%s\", Line: %d, %s): %d - %s\n", file, line, func, err, "cudaGetErrorString not supported" /*cudaGetErrorString(err)*/); 1485 | + sprintf(buf, "SYCL Error (in \"%s\", Line: %d, %s): %d - %s\n", file, line, func, err, "cudaGetErrorString not supported" /*cudaGetErrorString(err)*/); 1486 | printf("%s", buf); 1487 | logging::log_error(buf); 1488 | return false; 1489 | diff --git a/src/SupraLib/utilities/utility.h b/src/SupraLib/utilities/utility.h 1490 | index 729de80..49ec2b7 100644 1491 | --- a/src/SupraLib/utilities/utility.h 1492 | +++ b/src/SupraLib/utilities/utility.h 1493 | @@ -33,6 +33,18 @@ namespace std 1494 | } 1495 | } 1496 | 1497 | +static void Report_time(const std::string &msg, sycl::event e) 1498 | +{ 1499 | + cl::sycl::cl_ulong time_start = 1500 | + e.get_profiling_info(); 1501 | + 1502 | + cl::sycl::cl_ulong time_end = 1503 | + e.get_profiling_info(); 1504 | + 1505 | + double elapsed = (time_end - time_start) / 1e6; 1506 | + std::cout << msg << elapsed << " milliseconds\n"; 1507 | +} 1508 | + 1509 | namespace supra 1510 | { 1511 | using std::to_string; 1512 | diff --git a/src/SupraLib/vec.h b/src/SupraLib/vec.h 1513 | index 8b10d3e..b0f08de 100644 1514 | --- a/src/SupraLib/vec.h 1515 | +++ b/src/SupraLib/vec.h 1516 | @@ -15,7 +15,7 @@ 1517 | #include 1518 | #include 1519 | #include "utilities/utility.h" 1520 | -#include "utilities/cudaUtility.h" 1521 | +#include "utilities/syclUtility.h" 1522 | #include 1523 | 1524 | namespace supra 1525 | @@ -228,7 +228,7 @@ namespace supra 1526 | template 1527 | inline Ta norm(const vec2T& a) 1528 | { 1529 | - return sqrt(a.x*a.x + a.y*a.y); 1530 | + return sycl::sqrt(a.x*a.x + a.y*a.y); 1531 | } 1532 | /// Normalization of a two-vector 1533 | template 1534 | -- 1535 | 2.17.1 1536 | 1537 | -------------------------------------------------------------------------------- /0003-Do-optimizaton.patch: -------------------------------------------------------------------------------- 1 | From e6932c3be3dbe1bb791fab4aa143019a23e73c30 Mon Sep 17 00:00:00 2001 2 | From: wangyon1 3 | Date: Thu, 4 Feb 2021 17:00:16 +0800 4 | Subject: [PATCH 3/4] Do optimizaton 5 | 6 | --- 7 | .../Beamformer/HilbertFirEnvelope.dp.cpp | 53 +++++- 8 | src/SupraLib/Beamformer/RxBeamformerCommon.h | 6 + 9 | .../Beamformer/RxBeamformerCuda.dp.cpp | 104 +++++++++++- 10 | .../RxSampleBeamformerDelayAndSum.h | 105 ++++++++++++ 11 | src/SupraLib/Beamformer/WindowFunction.h | 8 +- 12 | src/SupraLib/Beamformer/helper.h | 159 ++++++++++++++++++ 13 | 6 files changed, 425 insertions(+), 10 deletions(-) 14 | create mode 100644 src/SupraLib/Beamformer/helper.h 15 | 16 | diff --git a/src/SupraLib/Beamformer/HilbertFirEnvelope.dp.cpp b/src/SupraLib/Beamformer/HilbertFirEnvelope.dp.cpp 17 | index af16284..99cf7be 100644 18 | --- a/src/SupraLib/Beamformer/HilbertFirEnvelope.dp.cpp 19 | +++ b/src/SupraLib/Beamformer/HilbertFirEnvelope.dp.cpp 20 | @@ -14,6 +14,7 @@ 21 | #include "HilbertFirEnvelope.h" 22 | #include 23 | #include 24 | +#include "helper.h" 25 | 26 | #include 27 | #include 28 | @@ -62,6 +63,54 @@ namespace supra 29 | 30 | } 31 | 32 | + const int H_VEC_SIZE = 4; 33 | + template 34 | + void vec_kernelFilterDemodulation( 35 | + const InputType* __restrict__ signal, 36 | + const HilbertFirEnvelope::WorkType * __restrict__ filter, 37 | + OutputType * __restrict__ out, 38 | + const int numSamples, 39 | + const int numScanlines, 40 | + const int filterLength, 41 | + sycl::nd_item<3> item_ct1) { 42 | + 43 | + int scanlineIdx = item_ct1.get_local_range().get(2) * item_ct1.get_group(2) + item_ct1.get_local_id(2); 44 | + int sampleIdx = item_ct1.get_local_range().get(1) * item_ct1.get_group(1) + item_ct1.get_local_id(1); 45 | + 46 | + scanlineIdx *= H_VEC_SIZE; 47 | + if (scanlineIdx < numScanlines && sampleIdx < numSamples) 48 | + { 49 | + sycl::vec accumulator(0.0); 50 | + 51 | + int startPoint = sampleIdx - filterLength / 2; 52 | + int endPoint = sampleIdx + filterLength / 2; 53 | + int currentFilterElement = 0; 54 | + 55 | + for (int currentSample = startPoint; 56 | + currentSample <= endPoint; 57 | + currentSample ++, currentFilterElement++) 58 | + { 59 | + if (currentSample >= 0 && currentSample < numSamples) 60 | + { 61 | + sycl::vec vec_sample(0.0); 62 | + #pragma unroll 63 | + for (int c = 0; c < H_VEC_SIZE; c++) { 64 | + vec_sample[c] = static_cast(signal[scanlineIdx + c + currentSample * numScanlines]) 65 | + * filter[currentFilterElement]; 66 | + } 67 | + accumulator += vec_sample; 68 | + } 69 | + } 70 | + #pragma unroll 71 | + for (int c = 0; c < H_VEC_SIZE; c++) { 72 | + HilbertFirEnvelope::WorkType signalValue = static_cast(signal[scanlineIdx + c + sampleIdx*numScanlines]); 73 | + out[ scanlineIdx + c + sampleIdx * numScanlines ] = sycl::sqrt(squ(signalValue) + squ(accumulator[c])); 74 | + } 75 | + } 76 | + 77 | + } 78 | + 79 | + 80 | HilbertFirEnvelope::HilbertFirEnvelope(size_t filterLength) 81 | : m_filterLength(filterLength) 82 | , m_hilbertFilter(nullptr) 83 | @@ -90,7 +139,7 @@ namespace supra 84 | auto pEnv = make_shared >(LocationGpu, inImageData->getStream(), numScanlines*numSamples); 85 | sycl::range<3> blockSizeFilter(1, 8, 16); 86 | sycl::range<3> gridSizeFilter(1, static_cast((numSamples + blockSizeFilter[ 1 ] - 1) / blockSizeFilter[ 1 ]), 87 | - static_cast((numScanlines + blockSizeFilter[ 2 ] - 1) / blockSizeFilter[ 2 ])); 88 | + static_cast((numScanlines + blockSizeFilter[ 2 ] - 1) / blockSizeFilter[ 2 ] / H_VEC_SIZE)); 89 | 90 | static long hilbert_call_count = 0; 91 | 92 | @@ -101,7 +150,7 @@ namespace supra 93 | auto m_filterLength_ct5 = ( int )m_filterLength; 94 | 95 | cgh.parallel_for(sycl::nd_range<3>(gridSizeFilter * blockSizeFilter, blockSizeFilter), [ = ](sycl::nd_item<3> item_ct1) { 96 | - kernelFilterDemodulation(inImageData_get_ct0, m_hilbertFilter_get_ct1, pEnv_get_ct2, numSamples, numScanlines, m_filterLength_ct5, item_ct1); 97 | + vec_kernelFilterDemodulation(inImageData_get_ct0, m_hilbertFilter_get_ct1, pEnv_get_ct2, numSamples, numScanlines, m_filterLength_ct5, item_ct1); 98 | }); 99 | }); 100 | 101 | diff --git a/src/SupraLib/Beamformer/RxBeamformerCommon.h b/src/SupraLib/Beamformer/RxBeamformerCommon.h 102 | index f86b5b9..35b05e0 100644 103 | --- a/src/SupraLib/Beamformer/RxBeamformerCommon.h 104 | +++ b/src/SupraLib/Beamformer/RxBeamformerCommon.h 105 | @@ -34,6 +34,12 @@ namespace supra 106 | (dirY*z)*(dirY*z)) + z; 107 | } 108 | 109 | + const int Vec_SIZE = 8; 110 | + sycl::vec vec_computeDelayDTSPACE_D(float dirX, float dirY, float dirZ, sycl::vec x_element, float x, float z) 111 | + { 112 | + return sycl::sqrt(((x_element - (x + dirX * z)) * (x_element - (x + dirX * z)) + (dirY * z) * (dirY * z))) + z; 113 | + } 114 | + 115 | template 116 | inline T computeDelayDTSPACE3D_D(T dirX, T dirY, T dirZ, T x_element, T z_element, T x, T z, T d) 117 | { 118 | diff --git a/src/SupraLib/Beamformer/RxBeamformerCuda.dp.cpp b/src/SupraLib/Beamformer/RxBeamformerCuda.dp.cpp 119 | index d97c382..7c7a4b7 100644 120 | --- a/src/SupraLib/Beamformer/RxBeamformerCuda.dp.cpp 121 | +++ b/src/SupraLib/Beamformer/RxBeamformerCuda.dp.cpp 122 | @@ -305,6 +305,87 @@ namespace supra 123 | } 124 | } 125 | 126 | + 127 | + const int ROW_SIZE = 2; 128 | + template 129 | + void mixed_rxBeamformingDTSPACEKernel(size_t numTransducerElements, size_t numReceivedChannels, size_t numTimesteps, const RFType* __restrict__ RF, size_t numTxScanlines, size_t numRxScanlines, 130 | + const ScanlineRxParameters3D* __restrict__ scanlinesDT, size_t numDs, const LocationType* __restrict__ dsDT, const LocationType* __restrict__ x_elemsDT, LocationType speedOfSound, 131 | + LocationType dt, uint32_t additionalOffset, LocationType F, const WindowFunctionGpu* __restrict__ windowFunction, ResultType* __restrict__ s, sycl::nd_item<3> item_ct1, float* __restrict__ mdataGpu) 132 | + { 133 | + int r = item_ct1.get_local_range().get(1) * item_ct1.get_group(1) + item_ct1.get_local_id(1); //@suppress("Symbol is not resolved") 134 | + //@suppress("Field cannot be resolved") 135 | + int scanlineIdx = item_ct1.get_local_range().get(2) * item_ct1.get_group(2) + item_ct1.get_local_id(2); //@suppress("Symbol is not resolved") 136 | + 137 | + //@suppress("Field cannot be resolved") 138 | + const int row_size = ROW_SIZE; 139 | + r = r * row_size; 140 | + if (r < numDs && scanlineIdx < numRxScanlines) 141 | + { 142 | + sycl::vec d(0.0); 143 | + #pragma unroll 144 | + for (int i = 0; i < row_size; i++) { 145 | + d[i] = dsDT[r + i]; 146 | + } 147 | + // TODO should this also depend on the angle? 148 | + sycl::vec aDT = d / (2*F); 149 | + ScanlineRxParameters3D scanline = scanlinesDT[ scanlineIdx ]; 150 | + LocationType scanline_x = scanline.position.x; 151 | + LocationType dirX = scanline.direction.x; 152 | + LocationType dirY = scanline.direction.y; 153 | + LocationType dirZ = scanline.direction.z; 154 | + LocationType maxElementDistance = static_cast(scanline.maxElementDistance.x); 155 | + 156 | + sycl::vec sInterp(0.0); 157 | + 158 | + int highestWeightIndex; 159 | + if (!interpolateBetweenTransmits) 160 | + { 161 | + highestWeightIndex = 0; 162 | + float highestWeight = scanline.txWeights[ 0 ]; 163 | + for (int k = 1; k < std::extent::value; k++) 164 | + { 165 | + if (scanline.txWeights[ k ] > highestWeight) 166 | + { 167 | + highestWeight = scanline.txWeights[ k ]; 168 | + highestWeightIndex = k; 169 | + } 170 | + } 171 | + } 172 | + 173 | + // now iterate over all four txScanlines to interpolate beamformed scanlines from those transmits 174 | + for (int k = (interpolateBetweenTransmits ? 0 : highestWeightIndex); 175 | + (interpolateBetweenTransmits && k < std::extent::value) || (!interpolateBetweenTransmits && k == highestWeightIndex); k++) 176 | + { 177 | + if (scanline.txWeights[ k ] > 0.0) 178 | + { 179 | + ScanlineRxParameters3D::TransmitParameters txParams = scanline.txParameters[ k ]; 180 | + uint32_t txScanlineIdx = txParams.txScanlineIdx; 181 | + if (txScanlineIdx >= numTxScanlines) 182 | + { 183 | + // ERROR! 184 | + return; 185 | + } 186 | + 187 | + #pragma unroll 188 | + for (int i = 0; i < row_size; i++) { 189 | + LocationType invMaxElementDistance = 1.f / sycl::min(aDT[i], maxElementDistance); 190 | + sInterp[i] = SampleBeamformer::template vec_sampleBeamform2D(txParams, RF, numTransducerElements, 191 | + numReceivedChannels, numTimesteps, x_elemsDT, scanline_x, dirX, dirY, dirZ, aDT[i], d[i], invMaxElementDistance , speedOfSound, dt, additionalOffset, 192 | + windowFunction, mdataGpu); 193 | + } 194 | + 195 | + } 196 | + } 197 | + 198 | + #pragma unroll 199 | + for (int i = 0; i < row_size; i++) { 200 | + s[scanlineIdx + (r + i) * numRxScanlines] = clampCast(sInterp[i]); 201 | + } 202 | + } 203 | + } 204 | + 205 | + 206 | + 207 | template 208 | void rxBeamformingDTspaceCuda3D(bool interpolateRFlines, bool interpolateBetweenTransmits, size_t numTransducerElements, vec2s elementLayout, size_t numReceivedChannels, size_t numTimesteps, 209 | const RFType* RF, size_t numTxScanlines, size_t numRxScanlines, const ScanlineRxParameters3D* scanlines, size_t numZs, const LocationType* zs, 210 | @@ -395,10 +476,11 @@ namespace supra 211 | template 212 | void rxBeamformingDTspaceCuda(bool interpolateRFlines, bool interpolateBetweenTransmits, size_t numTransducerElements, size_t numReceivedChannels, size_t numTimesteps, const RFType* RF, 213 | size_t numTxScanlines, size_t numRxScanlines, const ScanlineRxParameters3D* scanlines, size_t numZs, const LocationType* zs, const LocationType* x_elems, 214 | - LocationType speedOfSound, LocationType dt, uint32_t additionalOffset, LocationType F, const WindowFunctionGpu windowFunction, sycl::queue* stream, ResultType* s) 215 | + LocationType speedOfSound, LocationType dt, uint32_t additionalOffset, LocationType F, const WindowFunctionGpu windowFunction, sycl::queue* stream, ResultType* s, LocationType *mdataGpu) 216 | { 217 | sycl::range<3> blockSize(1, 256, 1); 218 | - sycl::range<3> gridSize(1, static_cast((numZs + blockSize[ 1 ] - 1) / blockSize[ 1 ]), static_cast((numRxScanlines + blockSize[ 2 ] - 1) / blockSize[ 2 ])); 219 | + sycl::range<3> gridSize(1, static_cast((numZs + blockSize[ 1 ] - 1) / blockSize[ 1 ] / ROW_SIZE), 220 | + static_cast((numRxScanlines + blockSize[ 2 ] - 1) / blockSize[ 2 ])); 221 | if (interpolateRFlines) 222 | { 223 | if (interpolateBetweenTransmits) 224 | @@ -417,8 +499,13 @@ namespace supra 225 | 226 | sycl::event beam_event = stream->submit([ & ](sycl::handler& cgh) { 227 | cgh.parallel_for(sycl::nd_range<3>(gridSize * blockSize, blockSize), [ = ](sycl::nd_item<3> item_ct1) { 228 | - rxBeamformingDTSPACEKernel(numTransducerElements, numReceivedChannels, numTimesteps, RF, numTxScanlines, numRxScanlines, scanlines, 229 | - numZs, zs, x_elems, speedOfSound, dt, additionalOffset, F, windowFunction, s, item_ct1); 230 | + /*rxBeamformingDTSPACEKernel(numTransducerElements, numReceivedChannels, numTimesteps, RF, numTxScanlines, numRxScanlines, scanlines, 231 | + numZs, zs, x_elems, speedOfSound, dt, additionalOffset, F, windowFunction, s, item_ct1);*/ 232 | + 233 | + mixed_rxBeamformingDTSPACEKernel(numTransducerElements, numReceivedChannels, numTimesteps, RF, numTxScanlines, numRxScanlines, 234 | + scanlines, numZs, zs, x_elems, speedOfSound, dt, additionalOffset, F, &windowFunction, s, item_ct1, mdataGpu); 235 | + 236 | + 237 | }); 238 | }); 239 | 240 | @@ -480,6 +567,12 @@ namespace supra 241 | m_windowFunction = std::unique_ptr(new WindowFunction(windowType, windowParameter, m_windowFunctionNumEntries)); 242 | } 243 | 244 | + auto mdataGpu = (float*) sycl::malloc_device(m_windowFunctionNumEntries * sizeof(windowType), gRawData->getStream()->get_device(), gRawData->getStream()->get_context()); 245 | + gRawData->getStream()->submit([&] (sycl::handler &h) { 246 | + h.memcpy(mdataGpu, m_windowFunction->m_data.data(), m_windowFunctionNumEntries * sizeof(WindowType)); 247 | + }); 248 | + gRawData->getStream()->wait(); 249 | + 250 | auto beamformingFunction3D = &rxBeamformingDTspaceCuda3D; 251 | auto beamformingFunction2D = &rxBeamformingDTspaceCuda; 252 | 253 | @@ -550,7 +643,8 @@ namespace supra 254 | static_cast(fNumber), 255 | *(m_windowFunction->getGpu()), 256 | gRawData->getStream(), 257 | - pData->get() 258 | + pData->get(), 259 | + mdataGpu 260 | ); 261 | } 262 | 263 | diff --git a/src/SupraLib/Beamformer/RxSampleBeamformerDelayAndSum.h b/src/SupraLib/Beamformer/RxSampleBeamformerDelayAndSum.h 264 | index f5330d5..b9c574d 100644 265 | --- a/src/SupraLib/Beamformer/RxSampleBeamformerDelayAndSum.h 266 | +++ b/src/SupraLib/Beamformer/RxSampleBeamformerDelayAndSum.h 267 | @@ -17,6 +17,7 @@ 268 | #include "USImageProperties.h" 269 | #include "WindowFunction.h" 270 | #include "RxBeamformerCommon.h" 271 | +#include "helper.h" 272 | 273 | //TODO ALL ELEMENT/SCANLINE Y positons are actually Z! Change all variable names accordingly 274 | namespace supra 275 | @@ -180,6 +181,110 @@ namespace supra 276 | return 0; 277 | } 278 | } 279 | + 280 | + template 281 | + static ResultType vec_sampleBeamform2D( 282 | + ScanlineRxParameters3D::TransmitParameters txParams, 283 | + const RFType* RF, 284 | + uint32_t numTransducerElements, 285 | + uint32_t numReceivedChannels, 286 | + uint32_t numTimesteps, 287 | + const LocationType* x_elemsDT, 288 | + LocationType scanline_x, 289 | + LocationType dirX, 290 | + LocationType dirY, 291 | + LocationType dirZ, 292 | + LocationType aDT, 293 | + LocationType depth, 294 | + LocationType invMaxElementDistance, 295 | + LocationType speedOfSound, 296 | + LocationType dt, 297 | + int32_t additionalOffset, 298 | + const WindowFunctionGpu* __restrict__ windowFunction, 299 | + const float* mdataGpu) 300 | + { 301 | + const int VEC_SIZE = Vec_SIZE; 302 | + float sampleAcum = 0.0f; 303 | + float weightAcum = 0.0f; 304 | + int numAdds = 0; 305 | + LocationType initialDelay = txParams.initialDelay; 306 | + uint32_t txScanlineIdx = txParams.txScanlineIdx; 307 | + 308 | + for (int32_t elemIdxX = txParams.firstActiveElementIndex.x; elemIdxX < txParams.lastActiveElementIndex.x; elemIdxX += VEC_SIZE) 309 | + { 310 | + sycl::vec channelIdx; 311 | + sycl::vec x_elem; 312 | + 313 | + #pragma unroll 314 | + for (int i = 0; i < VEC_SIZE; i +=2) { 315 | + channelIdx[i] = (elemIdxX + i) % numReceivedChannels; 316 | + channelIdx[i+1] = (elemIdxX + i + 1) % numReceivedChannels; 317 | + x_elem[i] = x_elemsDT[elemIdxX + i]; 318 | + x_elem[i + 1] = x_elemsDT[elemIdxX + i + 1]; 319 | + } 320 | + sycl::vec sample; 321 | + sycl::vec mask = (sycl::fabs(x_elem - scanline_x) <= aDT); 322 | + /*sycl spec1.2.1 mentioned: true return -1, false return 0*/ 323 | + mask *= -1; 324 | + numAdds += utils::add_vec(mask); 325 | + 326 | + sycl::vec relativeIndex = (x_elem - scanline_x) * invMaxElementDistance; 327 | + sycl::vec relativeIndexClamped = sycl::min(sycl::max(relativeIndex, -1.0f), 1.0f); 328 | + sycl::vec absoluteIndex = windowFunction->m_scale * (relativeIndexClamped + 1.0f); 329 | + sycl::vec absoluteIndex_int = absoluteIndex.convert(); 330 | + sycl::vec weight; 331 | + 332 | + #pragma unroll 333 | + for (int i = 0; i < VEC_SIZE; i += 2 ) { 334 | + weight[i] = mdataGpu[absoluteIndex_int[i]]; 335 | + weight[i + 1] = mdataGpu[absoluteIndex_int[i + 1]]; 336 | + } 337 | + 338 | + weight *= mask.convert(); 339 | + weightAcum += utils::add_vec(weight); 340 | + 341 | + sycl::vec delayf = initialDelay + 342 | + vec_computeDelayDTSPACE_D(dirX, dirY, dirZ, x_elem, scanline_x, depth) + additionalOffset; 343 | + sycl::vec delay = sycl::floor(delayf); 344 | + sycl::vec delay_index = delay.convert() + channelIdx*numTimesteps + 345 | + txScanlineIdx*numReceivedChannels*numTimesteps; 346 | + 347 | + delayf -= delay; 348 | + 349 | + sycl::vec RF_data; 350 | + sycl::vec RF_data_one; 351 | + 352 | + #pragma unroll 353 | + for (int j = 0; j < VEC_SIZE; j += 2) { 354 | + RF_data[j] = (float)RF[delay_index[j]]; 355 | + RF_data[j + 1] = (float)RF[delay_index[j + 1]]; 356 | + RF_data_one[j] = (float)RF[delay_index[j]+1]; 357 | + RF_data_one[j + 1] = (float)RF[delay_index[j + 1]+1]; 358 | + } 359 | + 360 | + sycl::vec mask1 = (delay < (numTimesteps - 1)); 361 | + mask1 *= -1; 362 | + sample = weight * ((1.0f - delayf) * RF_data + 363 | + delayf * RF_data_one) * mask1.convert(); 364 | + sampleAcum += utils::add_vec(sample); 365 | + 366 | + sycl::vec mask2 = (delay < numTimesteps && delayf == 0.0); 367 | + mask2 *= -1; 368 | + sample = weight * RF_data * mask2.convert(); 369 | + sampleAcum += utils::add_vec(sample); 370 | + 371 | + } 372 | + 373 | + if (numAdds > 0) 374 | + { 375 | + return sampleAcum / weightAcum * numAdds; 376 | + } 377 | + else 378 | + { 379 | + return 0; 380 | + } 381 | + } 382 | + 383 | }; 384 | } 385 | 386 | diff --git a/src/SupraLib/Beamformer/WindowFunction.h b/src/SupraLib/Beamformer/WindowFunction.h 387 | index 4f69c89..71c2267 100644 388 | --- a/src/SupraLib/Beamformer/WindowFunction.h 389 | +++ b/src/SupraLib/Beamformer/WindowFunction.h 390 | @@ -88,7 +88,7 @@ namespace supra 391 | { 392 | return m_numEntriesPerFunction; 393 | } 394 | - 395 | + float m_scale; 396 | private: 397 | friend WindowFunction; 398 | WindowFunctionGpu(size_t numEntriesPerFunction, const ElementType* data) 399 | @@ -96,7 +96,7 @@ namespace supra 400 | , m_data(data) 401 | , m_scale(static_cast(numEntriesPerFunction - 1)*0.5f) {}; 402 | 403 | - float m_scale; 404 | + 405 | uint32_t m_numEntriesPerFunction; 406 | const ElementType* m_data; 407 | }; 408 | @@ -136,9 +136,11 @@ namespace supra 409 | return 0; 410 | } 411 | } 412 | + 413 | + std::vector m_data; 414 | private: 415 | size_t m_numEntriesPerFunction; 416 | - std::vector m_data; 417 | + 418 | std::unique_ptr > m_dataGpu; 419 | ElementType m_scale; 420 | WindowType m_type; 421 | diff --git a/src/SupraLib/Beamformer/helper.h b/src/SupraLib/Beamformer/helper.h 422 | new file mode 100644 423 | index 0000000..15cfe21 424 | --- /dev/null 425 | +++ b/src/SupraLib/Beamformer/helper.h 426 | @@ -0,0 +1,159 @@ 427 | +//==---------- helper.hpp - SYCL sub_group helper functions ----------------==// 428 | +// 429 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 430 | +// See https://llvm.org/LICENSE.txt for license information. 431 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 432 | +// 433 | +//===----------------------------------------------------------------------===// 434 | +#include 435 | +#include 436 | +#include 437 | + 438 | +using namespace cl::sycl; 439 | + 440 | +template struct utils { 441 | + static T1 add_vec(const vec &v); 442 | + static bool cmp_vec(const vec &v, const vec &r); 443 | + static std::string stringify_vec(const vec &v); 444 | +}; 445 | +template struct utils { 446 | + static T2 add_vec(const vec &v) { return v.s0(); } 447 | + static bool cmp_vec(const vec &v, const vec &r) { 448 | + return v.s0() == r.s0(); 449 | + } 450 | + static std::string stringify_vec(const vec &v) { 451 | + return std::to_string((T2)v.s0()); 452 | + } 453 | +}; 454 | +template struct utils { 455 | + static T2 add_vec(const vec &v) { return v.s0() + v.s1(); } 456 | + static bool cmp_vec(const vec &v, const vec &r) { 457 | + return v.s0() == r.s0() && v.s1() == r.s1(); 458 | + } 459 | + static std::string stringify_vec(const vec &v) { 460 | + return std::string("(") + std::to_string((T2)v.s0()) + ", " + 461 | + std::to_string((T2)v.s1()) + " )"; 462 | + } 463 | +}; 464 | +template struct utils { 465 | + static T2 add_vec(const vec &v) { 466 | + return v.s0() + v.s1() + v.s2() + v.s3(); 467 | + } 468 | + static bool cmp_vec(const vec &v, const vec &r) { 469 | + return v.s0() == r.s0() && v.s1() == r.s1() && v.s2() == r.s2() && 470 | + v.s3() == r.s3(); 471 | + } 472 | + static std::string stringify_vec(const vec &v) { 473 | + return std::string("(") + std::to_string((T2)v.s0()) + ", " + 474 | + std::to_string((T2)v.s1()) + std::to_string((T2)v.s2()) + ", " + 475 | + std::to_string((T2)v.s3()) + " )"; 476 | + } 477 | +}; 478 | +template struct utils { 479 | + static T2 add_vec(const vec &v) { 480 | + return v.s0() + v.s1() + v.s2() + v.s3() + v.s4() + v.s5() + v.s6() + 481 | + v.s7(); 482 | + } 483 | + static bool cmp_vec(const vec &v, const vec &r) { 484 | + return v.s0() == r.s0() && v.s1() == r.s1() && v.s2() == r.s2() && 485 | + v.s3() == r.s3() && v.s4() == r.s4() && v.s5() == r.s5() && 486 | + v.s6() == r.s6() && v.s7() == r.s7(); 487 | + } 488 | + static std::string stringify_vec(const vec &v) { 489 | + return std::string("(") + std::to_string((T2)v.s0()) + ", " + 490 | + std::to_string((T2)v.s1()) + std::to_string((T2)v.s2()) + ", " + 491 | + std::to_string((T2)v.s3()) + std::to_string((T2)v.s4()) + ", " + 492 | + std::to_string((T2)v.s5()) + std::to_string((T2)v.s6()) + ", " + 493 | + std::to_string((T2)v.s7()) + " )"; 494 | + } 495 | +}; 496 | + 497 | +template struct utils { 498 | + static T2 add_vec(const vec &v) { 499 | + return v.s0() + v.s1() + v.s2() + v.s3() + v.s4() + v.s5() + v.s6() + 500 | + v.s7() + v.s8() + v.s9() + v.sA() + v.sB() + v.sC() + v.sD() + 501 | + v.sE() + v.sF(); 502 | + } 503 | + static bool cmp_vec(const vec &v, const vec &r) { 504 | + return v.s0() == r.s0() && v.s1() == r.s1() && v.s2() == r.s2() && 505 | + v.s3() == r.s3() && v.s4() == r.s4() && v.s5() == r.s5() && 506 | + v.s6() == r.s6() && v.s7() == r.s7() && v.s8() == r.s8() && 507 | + v.s9() == r.s9() && v.sA() == r.sA() && v.sB() == r.sB() && 508 | + v.sC() == r.sC() && v.sD() == r.sD() && v.sE() == r.sE() && 509 | + v.sF() == r.sF(); 510 | + } 511 | + static std::string stringify_vec(const vec &v) { 512 | + return std::string("(") + std::to_string((T2)v.s0()) + ", " + 513 | + std::to_string((T2)v.s1()) + std::to_string((T2)v.s2()) + ", " + 514 | + std::to_string((T2)v.s3()) + std::to_string((T2)v.s4()) + ", " + 515 | + std::to_string((T2)v.s5()) + std::to_string((T2)v.s6()) + ", " + 516 | + std::to_string((T2)v.s7()) + std::to_string((T2)v.s8()) + ", " + 517 | + std::to_string((T2)v.s9()) + std::to_string((T2)v.sA()) + ", " + 518 | + std::to_string((T2)v.sB()) + std::to_string((T2)v.sC()) + ", " + 519 | + std::to_string((T2)v.sE()) + std::to_string((T2)v.sD()) + ", " + 520 | + std::to_string((T2)v.sF()) + " )"; 521 | + } 522 | +}; 523 | + 524 | +template 525 | +inline void exit_if_not_equal(T val, T ref, const char *name) { 526 | + if (std::is_floating_point::value) { 527 | + if (std::fabs(val - ref) > 0.01) { 528 | + std::cout << "Unexpected result for " << name << ": " << (double)val 529 | + << " expected value: " << (double)ref << std::endl; 530 | + exit(1); 531 | + } 532 | + } else { 533 | + if ((val - ref) != 0) { 534 | + std::cout << "Unexpected result for " << name << ": " << (long)val 535 | + << " expected value: " << (long)ref << std::endl; 536 | + exit(1); 537 | + } 538 | + } 539 | +} 540 | + 541 | +template <> 542 | +inline void exit_if_not_equal(half val, half ref, const char *name) { 543 | + int16_t cmp_val = reinterpret_cast(val); 544 | + int16_t cmp_ref = reinterpret_cast(ref); 545 | + if (std::abs(cmp_val - cmp_ref) > 1) { 546 | + std::cout << "Unexpected result for " << name << ": " << (float)val 547 | + << " expected value: " << (float)ref << std::endl; 548 | + exit(1); 549 | + } 550 | +} 551 | + 552 | +template 553 | +inline void exit_if_not_equal_vec(vec val, vec ref, const char *name) { 554 | + if (!utils::cmp_vec(ref, val)) { 555 | + std::cout << "Unexpected result for " << name << ": " 556 | + << utils::stringify_vec(val) 557 | + << " expected value: " << utils::stringify_vec(ref) 558 | + << std::endl; 559 | + 560 | + exit(1); 561 | + } 562 | +} 563 | + 564 | +/* CPU returns max number of SG, GPU returns max SG size for 565 | + * CL_DEVICE_MAX_NUM_SUB_GROUPS device parameter. This function aligns the 566 | + * value. 567 | + * */ 568 | +inline size_t get_sg_size(const device &Device) { 569 | + size_t max_num_sg = Device.get_info(); 570 | + if (Device.get_info() == info::device_type::cpu) { 571 | + size_t max_wg_size = Device.get_info(); 572 | + return max_wg_size / max_num_sg; 573 | + } 574 | + if (Device.get_info() == info::device_type::gpu) { 575 | + return max_num_sg; 576 | + } 577 | + std::cout << "Unexpected deive type" << std::endl; 578 | + exit(1); 579 | +} 580 | + 581 | +inline bool core_sg_supported(const device &Device) { 582 | + return (Device.has_extension("cl_khr_subgroups") || 583 | + Device.get_info().find(" 2.1") != 584 | + string_class::npos); 585 | +} 586 | -- 587 | 2.17.1 588 | 589 | -------------------------------------------------------------------------------- /0004-add-SYCL.patch: -------------------------------------------------------------------------------- 1 | From 0cd01e7a8a1a4052059f7dee668289261c595eaa Mon Sep 17 00:00:00 2001 2 | From: wangyon1 3 | Date: Fri, 5 Feb 2021 09:56:45 +0800 4 | Subject: [PATCH 4/4] add SYCL 5 | 6 | --- 7 | src/CommandlineInterface/CMakeLists.txt | 2 +- 8 | src/GraphicInterface/CMakeLists.txt | 2 +- 9 | src/RestInterface/CMakeLists.txt | 6 +- 10 | src/RosInterface/CMakeLists.txt | 2 +- 11 | src/SupraExecutor/CMakeLists.txt | 2 +- 12 | src/SupraLib/Beamformer/Beamformer.cpp | 2 +- 13 | src/SupraLib/Beamformer/BeamformingNode.cpp | 12 ++-- 14 | src/SupraLib/Beamformer/BeamformingNode.h | 6 +- 15 | .../Beamformer/HilbertFirEnvelope.dp.cpp | 8 +-- 16 | src/SupraLib/Beamformer/LogCompressor.dp.cpp | 7 +-- 17 | ...merCuda.dp.cpp => RxBeamformerSYCL.dp.cpp} | 56 ++++++------------- 18 | ...{RxBeamformerCuda.h => RxBeamformerSYCL.h} | 12 ++-- 19 | src/SupraLib/Beamformer/ScanConverter.dp.cpp | 7 +-- 20 | src/SupraLib/CMakeLists.txt | 4 +- 21 | src/Wrapper/CMakeLists.txt | 2 +- 22 | 15 files changed, 53 insertions(+), 77 deletions(-) 23 | rename src/SupraLib/Beamformer/{RxBeamformerCuda.dp.cpp => RxBeamformerSYCL.dp.cpp} (92%) 24 | rename src/SupraLib/Beamformer/{RxBeamformerCuda.h => RxBeamformerSYCL.h} (91%) 25 | 26 | diff --git a/src/CommandlineInterface/CMakeLists.txt b/src/CommandlineInterface/CMakeLists.txt 27 | index a15ba72..bc8b110 100644 28 | --- a/src/CommandlineInterface/CMakeLists.txt 29 | +++ b/src/CommandlineInterface/CMakeLists.txt 30 | @@ -33,7 +33,7 @@ TARGET_COMPILE_DEFINITIONS(SUPRA_CMD 31 | TARGET_LINK_LIBRARIES(SUPRA_CMD 32 | ${SUPRA_Lib_LIBRARIES} 33 | ) 34 | -set_property(TARGET SUPRA_CMD PROPERTY CXX_STANDARD 11) 35 | +set_property(TARGET SUPRA_CMD PROPERTY CXX_STANDARD 17) 36 | set_property(TARGET SUPRA_CMD PROPERTY CXX_STANDARD_REQUIRED ON) 37 | 38 | add_dependencies(SUPRA_CMD SUPRA_Lib) 39 | diff --git a/src/GraphicInterface/CMakeLists.txt b/src/GraphicInterface/CMakeLists.txt 40 | index 29a59f5..fa149d9 100644 41 | --- a/src/GraphicInterface/CMakeLists.txt 42 | +++ b/src/GraphicInterface/CMakeLists.txt 43 | @@ -156,7 +156,7 @@ TARGET_LINK_LIBRARIES(SUPRA_GUI 44 | ${CAMPVIS_LIB} 45 | ${NodeEditor_LIBRARIES} 46 | ) 47 | -set_property(TARGET SUPRA_GUI PROPERTY CXX_STANDARD 11) 48 | +set_property(TARGET SUPRA_GUI PROPERTY CXX_STANDARD 17) 49 | set_property(TARGET SUPRA_GUI PROPERTY CXX_STANDARD_REQUIRED ON) 50 | 51 | add_dependencies(SUPRA_GUI SUPRA_Lib NodeEditor) 52 | diff --git a/src/RestInterface/CMakeLists.txt b/src/RestInterface/CMakeLists.txt 53 | index fb8fa95..94a417e 100644 54 | --- a/src/RestInterface/CMakeLists.txt 55 | +++ b/src/RestInterface/CMakeLists.txt 56 | @@ -45,13 +45,13 @@ endif() 57 | if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") 58 | 59 | message("-- configuring clang options") 60 | - set(CMAKE_CXX_FLAGS "-arch x86_64 -std=c++11 -stdlib=libc++ -DBOOST_LOG_DYN_LINK -Wno-deprecated-declarations") 61 | + set(CMAKE_CXX_FLAGS "-arch x86_64 -std=c++17 -stdlib=libc++ -DBOOST_LOG_DYN_LINK -Wno-deprecated-declarations") 62 | 63 | elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU") 64 | 65 | message("-- configuring gcc options") 66 | 67 | - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++11 -DBOOST_LOG_DYN_LINK") 68 | + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++17 -DBOOST_LOG_DYN_LINK") 69 | 70 | endif() 71 | 72 | @@ -129,7 +129,7 @@ TARGET_LINK_LIBRARIES(SUPRA_REST 73 | ) 74 | TARGET_COMPILE_DEFINITIONS(SUPRA_REST 75 | PRIVATE ${SUPRA_Lib_DEFINES}) 76 | -set_property(TARGET SUPRA_REST PROPERTY CXX_STANDARD 11) 77 | +set_property(TARGET SUPRA_REST PROPERTY CXX_STANDARD 17) 78 | set_property(TARGET SUPRA_REST PROPERTY CXX_STANDARD_REQUIRED ON) 79 | 80 | add_dependencies(SUPRA_REST SUPRA_Lib) 81 | diff --git a/src/RosInterface/CMakeLists.txt b/src/RosInterface/CMakeLists.txt 82 | index 37eab21..dd769c6 100644 83 | --- a/src/RosInterface/CMakeLists.txt 84 | +++ b/src/RosInterface/CMakeLists.txt 85 | @@ -39,7 +39,7 @@ TARGET_LINK_LIBRARIES(SUPRA_ROS 86 | ) 87 | TARGET_COMPILE_DEFINITIONS(SUPRA_ROS 88 | PRIVATE ${SUPRA_Lib_DEFINES}) 89 | -set_property(TARGET SUPRA_ROS PROPERTY CXX_STANDARD 11) 90 | +set_property(TARGET SUPRA_ROS PROPERTY CXX_STANDARD 17) 91 | set_property(TARGET SUPRA_ROS PROPERTY CXX_STANDARD_REQUIRED ON) 92 | 93 | add_dependencies(SUPRA_ROS SUPRA_Lib) 94 | diff --git a/src/SupraExecutor/CMakeLists.txt b/src/SupraExecutor/CMakeLists.txt 95 | index 978cb3c..049bea6 100644 96 | --- a/src/SupraExecutor/CMakeLists.txt 97 | +++ b/src/SupraExecutor/CMakeLists.txt 98 | @@ -31,7 +31,7 @@ TARGET_COMPILE_DEFINITIONS(SUPRA_EXECUTOR 99 | TARGET_LINK_LIBRARIES(SUPRA_EXECUTOR 100 | ${SUPRA_Lib_LIBRARIES} 101 | ) 102 | -set_property(TARGET SUPRA_EXECUTOR PROPERTY CXX_STANDARD 11) 103 | +set_property(TARGET SUPRA_EXECUTOR PROPERTY CXX_STANDARD 17) 104 | set_property(TARGET SUPRA_EXECUTOR PROPERTY CXX_STANDARD_REQUIRED ON) 105 | 106 | add_dependencies(SUPRA_EXECUTOR SUPRA_Lib) 107 | diff --git a/src/SupraLib/Beamformer/Beamformer.cpp b/src/SupraLib/Beamformer/Beamformer.cpp 108 | index e83a29d..5489c7f 100644 109 | --- a/src/SupraLib/Beamformer/Beamformer.cpp 110 | +++ b/src/SupraLib/Beamformer/Beamformer.cpp 111 | @@ -13,7 +13,7 @@ 112 | // ================================================================================================ 113 | 114 | #include "Beamformer.h" 115 | -#include "RxBeamformerCuda.h" 116 | +#include "RxBeamformerSYCL.h" 117 | 118 | #include 119 | #include 120 | diff --git a/src/SupraLib/Beamformer/BeamformingNode.cpp b/src/SupraLib/Beamformer/BeamformingNode.cpp 121 | index 5a49f7a..6b79102 100644 122 | --- a/src/SupraLib/Beamformer/BeamformingNode.cpp 123 | +++ b/src/SupraLib/Beamformer/BeamformingNode.cpp 124 | @@ -13,7 +13,7 @@ 125 | 126 | #include "USImage.h" 127 | #include "USRawData.h" 128 | -#include "RxBeamformerCuda.h" 129 | +#include "RxBeamformerSYCL.h" 130 | 131 | #include 132 | #include 133 | @@ -146,7 +146,7 @@ namespace supra 134 | } 135 | if (needNewBeamformer) 136 | { 137 | - m_beamformer = std::make_shared(*m_lastSeenBeamformerParameters); 138 | + m_beamformer = std::make_shared(*m_lastSeenBeamformerParameters); 139 | } 140 | 141 | switch (pRawData->getDataType()) 142 | @@ -200,18 +200,18 @@ namespace supra 143 | void BeamformingNode::readBeamformerType() 144 | { 145 | string beamformer = m_configurationDictionary.get("beamformerType"); 146 | - m_beamformerType = RxBeamformerCuda::DelayAndSum; 147 | + m_beamformerType = RxBeamformerSYCL::DelayAndSum; 148 | if (beamformer == "DelayAndSum") 149 | { 150 | - m_beamformerType = RxBeamformerCuda::DelayAndSum; 151 | + m_beamformerType = RxBeamformerSYCL::DelayAndSum; 152 | } 153 | else if (beamformer == "DelayAndStdDev") 154 | { 155 | - m_beamformerType = RxBeamformerCuda::DelayAndStdDev; 156 | + m_beamformerType = RxBeamformerSYCL::DelayAndStdDev; 157 | } 158 | else if (beamformer == "TestSignal") 159 | { 160 | - m_beamformerType = RxBeamformerCuda::TestSignal; 161 | + m_beamformerType = RxBeamformerSYCL::TestSignal; 162 | } 163 | } 164 | 165 | diff --git a/src/SupraLib/Beamformer/BeamformingNode.h b/src/SupraLib/Beamformer/BeamformingNode.h 166 | index 314eb24..72b0109 100644 167 | --- a/src/SupraLib/Beamformer/BeamformingNode.h 168 | +++ b/src/SupraLib/Beamformer/BeamformingNode.h 169 | @@ -24,7 +24,7 @@ 170 | #include "RecordObject.h" 171 | #include "SyncRecordObject.h" 172 | #include "RxBeamformerParameters.h" 173 | -#include "RxBeamformerCuda.h" 174 | +#include "RxBeamformerSYCL.h" 175 | 176 | namespace supra 177 | { 178 | @@ -67,7 +67,7 @@ namespace supra 179 | void readBeamformerType(); 180 | void updateImageProperties(std::shared_ptr imageProperties); 181 | 182 | - std::shared_ptr m_beamformer; 183 | + std::shared_ptr m_beamformer; 184 | std::shared_ptr m_lastSeenBeamformerParameters; 185 | 186 | std::shared_ptr m_lastSeenImageProperties; 187 | @@ -81,7 +81,7 @@ namespace supra 188 | double m_windowParameter; 189 | double m_speedOfSoundMMperS; 190 | DataType m_outputType; 191 | - RxBeamformerCuda::RxSampleBeamformer m_beamformerType; 192 | + RxBeamformerSYCL::RxSampleBeamformer m_beamformerType; 193 | bool m_interpolateTransmits; 194 | int32_t m_additionalOffset; 195 | }; 196 | diff --git a/src/SupraLib/Beamformer/HilbertFirEnvelope.dp.cpp b/src/SupraLib/Beamformer/HilbertFirEnvelope.dp.cpp 197 | index 99cf7be..da32ae6 100644 198 | --- a/src/SupraLib/Beamformer/HilbertFirEnvelope.dp.cpp 199 | +++ b/src/SupraLib/Beamformer/HilbertFirEnvelope.dp.cpp 200 | @@ -9,17 +9,15 @@ 201 | // 202 | // ================================================================================================ 203 | 204 | +#include 205 | +#include 206 | + 207 | #include 208 | -#include 209 | #include "HilbertFirEnvelope.h" 210 | #include 211 | #include 212 | #include "helper.h" 213 | 214 | -#include 215 | -#include 216 | -#include 217 | - 218 | #include 219 | 220 | using namespace std; 221 | diff --git a/src/SupraLib/Beamformer/LogCompressor.dp.cpp b/src/SupraLib/Beamformer/LogCompressor.dp.cpp 222 | index 70bc68b..eb33e66 100644 223 | --- a/src/SupraLib/Beamformer/LogCompressor.dp.cpp 224 | +++ b/src/SupraLib/Beamformer/LogCompressor.dp.cpp 225 | @@ -9,14 +9,13 @@ 226 | // 227 | // ================================================================================================ 228 | 229 | +#include 230 | +#include 231 | + 232 | #include 233 | -#include 234 | #include "LogCompressor.h" 235 | #include 236 | 237 | -#include 238 | -#include 239 | -#include 240 | #include 241 | 242 | using namespace std; 243 | diff --git a/src/SupraLib/Beamformer/RxBeamformerCuda.dp.cpp b/src/SupraLib/Beamformer/RxBeamformerSYCL.dp.cpp 244 | similarity index 92% 245 | rename from src/SupraLib/Beamformer/RxBeamformerCuda.dp.cpp 246 | rename to src/SupraLib/Beamformer/RxBeamformerSYCL.dp.cpp 247 | index 7c7a4b7..f9ac5f7 100644 248 | --- a/src/SupraLib/Beamformer/RxBeamformerCuda.dp.cpp 249 | +++ b/src/SupraLib/Beamformer/RxBeamformerSYCL.dp.cpp 250 | @@ -10,7 +10,7 @@ 251 | // ================================================================================================ 252 | #include 253 | #include 254 | -#include "RxBeamformerCuda.h" 255 | +#include "RxBeamformerSYCL.h" 256 | #include "USImage.h" 257 | #include "USRawData.h" 258 | #include "RxSampleBeamformerDelayAndSum.h" 259 | @@ -23,7 +23,7 @@ 260 | //TODO ALL ELEMENT/SCANLINE Y positons are actually Z! Change all variable names accordingly 261 | namespace supra 262 | { 263 | - RxBeamformerCuda::RxBeamformerCuda(const RxBeamformerParameters & parameters) 264 | + RxBeamformerSYCL::RxBeamformerSYCL(const RxBeamformerParameters & parameters) 265 | : m_windowFunction(nullptr) 266 | { 267 | 268 | @@ -47,11 +47,11 @@ namespace supra 269 | m_pRxElementYs = std::unique_ptr>(new Container(LocationGpu, &q_ct1, parameters.getRxElementYs())); 270 | } 271 | 272 | - RxBeamformerCuda::~RxBeamformerCuda() 273 | + RxBeamformerSYCL::~RxBeamformerSYCL() 274 | { 275 | } 276 | 277 | - void RxBeamformerCuda::convertToDtSpace(double dt, double speedOfSoundMMperS, size_t numTransducerElements) const 278 | + void RxBeamformerSYCL::convertToDtSpace(double dt, double speedOfSoundMMperS, size_t numTransducerElements) const 279 | { 280 | if (m_lastSeenDt != dt || m_speedOfSoundMMperS != speedOfSoundMMperS) 281 | { 282 | @@ -387,7 +387,7 @@ namespace supra 283 | 284 | 285 | template 286 | - void rxBeamformingDTspaceCuda3D(bool interpolateRFlines, bool interpolateBetweenTransmits, size_t numTransducerElements, vec2s elementLayout, size_t numReceivedChannels, size_t numTimesteps, 287 | + void rxBeamformingDTspaceSYCL3D(bool interpolateRFlines, bool interpolateBetweenTransmits, size_t numTransducerElements, vec2s elementLayout, size_t numReceivedChannels, size_t numTimesteps, 288 | const RFType* RF, size_t numTxScanlines, size_t numRxScanlines, const ScanlineRxParameters3D* scanlines, size_t numZs, const LocationType* zs, 289 | const LocationType* x_elems, const LocationType* y_elems, LocationType speedOfSound, LocationType dt, uint32_t additionalOffset, LocationType F, 290 | const WindowFunctionGpu windowFunction, sycl::queue* stream, ResultType* s) 291 | @@ -474,7 +474,7 @@ namespace supra 292 | } 293 | 294 | template 295 | - void rxBeamformingDTspaceCuda(bool interpolateRFlines, bool interpolateBetweenTransmits, size_t numTransducerElements, size_t numReceivedChannels, size_t numTimesteps, const RFType* RF, 296 | + void rxBeamformingDTspaceSYCL(bool interpolateRFlines, bool interpolateBetweenTransmits, size_t numTransducerElements, size_t numReceivedChannels, size_t numTimesteps, const RFType* RF, 297 | size_t numTxScanlines, size_t numRxScanlines, const ScanlineRxParameters3D* scanlines, size_t numZs, const LocationType* zs, const LocationType* x_elems, 298 | LocationType speedOfSound, LocationType dt, uint32_t additionalOffset, LocationType F, const WindowFunctionGpu windowFunction, sycl::queue* stream, ResultType* s, LocationType *mdataGpu) 299 | { 300 | @@ -540,8 +540,8 @@ namespace supra 301 | } 302 | 303 | template 304 | - shared_ptr RxBeamformerCuda::performRxBeamforming( 305 | - RxBeamformerCuda::RxSampleBeamformer sampleBeamformer, 306 | + shared_ptr RxBeamformerSYCL::performRxBeamforming( 307 | + RxBeamformerSYCL::RxSampleBeamformer sampleBeamformer, 308 | shared_ptr rawData, 309 | double fNumber, 310 | double speedOfSoundMMperS, 311 | @@ -573,29 +573,9 @@ namespace supra 312 | }); 313 | gRawData->getStream()->wait(); 314 | 315 | - auto beamformingFunction3D = &rxBeamformingDTspaceCuda3D; 316 | - auto beamformingFunction2D = &rxBeamformingDTspaceCuda; 317 | + auto beamformingFunction3D = &rxBeamformingDTspaceSYCL3D; 318 | + auto beamformingFunction2D = &rxBeamformingDTspaceSYCL; 319 | 320 | - // We don't use DelayAndStdDev and TestSignal algorthm, so below code are commented. 321 | - /*switch (sampleBeamformer) 322 | - { 323 | - case DelayAndSum: 324 | - beamformingFunction3D = &rxBeamformingDTspaceCuda3D; 325 | - beamformingFunction2D = &rxBeamformingDTspaceCuda; 326 | - break; 327 | - case DelayAndStdDev: 328 | - beamformingFunction3D = &rxBeamformingDTspaceCuda3D; 329 | - beamformingFunction2D = &rxBeamformingDTspaceCuda; 330 | - break; 331 | - case TestSignal: 332 | - beamformingFunction3D = &rxBeamformingDTspaceCuda3D; 333 | - beamformingFunction2D = &rxBeamformingDTspaceCuda; 334 | - break; 335 | - case INVALID: 336 | - default: 337 | - beamformingFunction3D = &rxBeamformingDTspaceCuda3D; 338 | - beamformingFunction2D = &rxBeamformingDTspaceCuda; 339 | - }*/ 340 | 341 | 342 | convertToDtSpace(dt, speedOfSoundMMperS, rawData->getNumElements()); 343 | @@ -669,8 +649,8 @@ namespace supra 344 | } 345 | 346 | template 347 | - shared_ptr RxBeamformerCuda::performRxBeamforming( 348 | - RxBeamformerCuda::RxSampleBeamformer sampleBeamformer, 349 | + shared_ptr RxBeamformerSYCL::performRxBeamforming( 350 | + RxBeamformerSYCL::RxSampleBeamformer sampleBeamformer, 351 | shared_ptr rawData, 352 | double fNumber, 353 | double speedOfSoundMMperS, 354 | @@ -679,8 +659,8 @@ namespace supra 355 | bool interpolateBetweenTransmits, 356 | int32_t additionalOffset) const; 357 | template 358 | - shared_ptr RxBeamformerCuda::performRxBeamforming( 359 | - RxBeamformerCuda::RxSampleBeamformer sampleBeamformer, 360 | + shared_ptr RxBeamformerSYCL::performRxBeamforming( 361 | + RxBeamformerSYCL::RxSampleBeamformer sampleBeamformer, 362 | shared_ptr rawData, 363 | double fNumber, 364 | double speedOfSoundMMperS, 365 | @@ -689,8 +669,8 @@ namespace supra 366 | bool interpolateBetweenTransmits, 367 | int32_t additionalOffset) const; 368 | template 369 | - shared_ptr RxBeamformerCuda::performRxBeamforming( 370 | - RxBeamformerCuda::RxSampleBeamformer sampleBeamformer, 371 | + shared_ptr RxBeamformerSYCL::performRxBeamforming( 372 | + RxBeamformerSYCL::RxSampleBeamformer sampleBeamformer, 373 | shared_ptr rawData, 374 | double fNumber, 375 | double speedOfSoundMMperS, 376 | @@ -699,8 +679,8 @@ namespace supra 377 | bool interpolateBetweenTransmits, 378 | int32_t additionalOffset) const; 379 | template 380 | - shared_ptr RxBeamformerCuda::performRxBeamforming( 381 | - RxBeamformerCuda::RxSampleBeamformer sampleBeamformer, 382 | + shared_ptr RxBeamformerSYCL::performRxBeamforming( 383 | + RxBeamformerSYCL::RxSampleBeamformer sampleBeamformer, 384 | shared_ptr rawData, 385 | double fNumber, 386 | double speedOfSoundMMperS, 387 | diff --git a/src/SupraLib/Beamformer/RxBeamformerCuda.h b/src/SupraLib/Beamformer/RxBeamformerSYCL.h 388 | similarity index 91% 389 | rename from src/SupraLib/Beamformer/RxBeamformerCuda.h 390 | rename to src/SupraLib/Beamformer/RxBeamformerSYCL.h 391 | index 5d9f3cf..495f030 100644 392 | --- a/src/SupraLib/Beamformer/RxBeamformerCuda.h 393 | +++ b/src/SupraLib/Beamformer/RxBeamformerSYCL.h 394 | @@ -9,8 +9,8 @@ 395 | // 396 | // ================================================================================================ 397 | 398 | -#ifndef __RXBEAMFORMERCUDA_H__ 399 | -#define __RXBEAMFORMERCUDA_H__ 400 | +#ifndef __RXBEAMFORMERSYCL_H__ 401 | +#define __RXBEAMFORMERSYCL_H__ 402 | 403 | #include "USImage.h" 404 | #include "WindowFunction.h" 405 | @@ -25,7 +25,7 @@ namespace supra 406 | 407 | using std::shared_ptr; 408 | 409 | - class RxBeamformerCuda 410 | + class RxBeamformerSYCL 411 | { 412 | public: 413 | enum RxSampleBeamformer { 414 | @@ -35,8 +35,8 @@ namespace supra 415 | INVALID 416 | }; 417 | 418 | - RxBeamformerCuda(const RxBeamformerParameters& parameters); 419 | - ~RxBeamformerCuda(); 420 | + RxBeamformerSYCL(const RxBeamformerParameters& parameters); 421 | + ~RxBeamformerSYCL(); 422 | 423 | // perform the receive beamforming 424 | template 425 | @@ -78,4 +78,4 @@ namespace supra 426 | }; 427 | } 428 | 429 | -#endif //!__RXBEAMFORMERCUDA_H__ 430 | +#endif //!__RXBEAMFORMERSYCL_H__ 431 | diff --git a/src/SupraLib/Beamformer/ScanConverter.dp.cpp b/src/SupraLib/Beamformer/ScanConverter.dp.cpp 432 | index 43f4504..a16e2d8 100644 433 | --- a/src/SupraLib/Beamformer/ScanConverter.dp.cpp 434 | +++ b/src/SupraLib/Beamformer/ScanConverter.dp.cpp 435 | @@ -24,17 +24,16 @@ 436 | // 437 | // ================================================================================================ 438 | 439 | +#include 440 | +#include 441 | + 442 | #include 443 | -#include 444 | #include "ScanConverter.h" 445 | #include 446 | #include 447 | #include 448 | #include 449 | 450 | -#include 451 | -#include 452 | -#include 453 | #include 454 | 455 | using namespace std; 456 | diff --git a/src/SupraLib/CMakeLists.txt b/src/SupraLib/CMakeLists.txt 457 | index a2472b9..56a7f6a 100644 458 | --- a/src/SupraLib/CMakeLists.txt 459 | +++ b/src/SupraLib/CMakeLists.txt 460 | @@ -193,14 +193,14 @@ IF(SUPRA_BEAMFORMER) 461 | InputOutput/UltrasoundInterfaceRawDataMock.cpp 462 | InputOutput/UltrasoundInterfaceBeamformedMock.cpp) 463 | SET(SUPRA_Lib_CUDASOURCE ${SUPRA_Lib_CUDASOURCE} 464 | - Beamformer/RxBeamformerCuda.dp.cpp 465 | + Beamformer/RxBeamformerSYCL.dp.cpp 466 | Beamformer/HilbertFirEnvelope.dp.cpp 467 | Beamformer/LogCompressor.dp.cpp 468 | Beamformer/ScanConverter.dp.cpp) 469 | SET(SUPRA_Lib_HEADERS ${SUPRA_Lib_HEADERS} 470 | Beamformer/Sequencer.h 471 | Beamformer/Beamformer.h 472 | - Beamformer/RxBeamformerCuda.h 473 | + Beamformer/RxBeamformerSYCL.h 474 | Beamformer/RxBeamformerCommon.h 475 | Beamformer/RxSampleBeamformerDelayAndSum.h 476 | Beamformer/RxSampleBeamformerDelayAndStdDev.h 477 | diff --git a/src/Wrapper/CMakeLists.txt b/src/Wrapper/CMakeLists.txt 478 | index 2562392..749ba26 100644 479 | --- a/src/Wrapper/CMakeLists.txt 480 | +++ b/src/Wrapper/CMakeLists.txt 481 | @@ -28,7 +28,7 @@ TARGET_COMPILE_DEFINITIONS(SUPRA_Wrapper 482 | TARGET_LINK_LIBRARIES(SUPRA_Wrapper 483 | ${SUPRA_Lib_LIBRARIES} 484 | ) 485 | -set_property(TARGET SUPRA_Wrapper PROPERTY CXX_STANDARD 11) 486 | +set_property(TARGET SUPRA_Wrapper PROPERTY CXX_STANDARD 17) 487 | set_property(TARGET SUPRA_Wrapper PROPERTY CXX_STANDARD_REQUIRED ON) 488 | 489 | add_dependencies(SUPRA_Wrapper SUPRA_Lib) 490 | -- 491 | 2.17.1 492 | 493 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (C) 2020, Intel Corporation 2 | 3 | SPDX-License-Identifier: GPL-2.0-or-later 4 | 5 | 6 | ### GNU LESSER GENERAL PUBLIC LICENSE 7 | 8 | Version 2.1, February 1999 9 | 10 | Copyright (C) 1991, 1999 Free Software Foundation, Inc. 11 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 12 | 13 | Everyone is permitted to copy and distribute verbatim copies 14 | of this license document, but changing it is not allowed. 15 | 16 | [This is the first released version of the Lesser GPL. It also counts 17 | as the successor of the GNU Library Public License, version 2, hence 18 | the version number 2.1.] 19 | 20 | ### Preamble 21 | 22 | The licenses for most software are designed to take away your freedom 23 | to share and change it. By contrast, the GNU General Public Licenses 24 | are intended to guarantee your freedom to share and change free 25 | software--to make sure the software is free for all its users. 26 | 27 | This license, the Lesser General Public License, applies to some 28 | specially designated software packages--typically libraries--of the 29 | Free Software Foundation and other authors who decide to use it. You 30 | can use it too, but we suggest you first think carefully about whether 31 | this license or the ordinary General Public License is the better 32 | strategy to use in any particular case, based on the explanations 33 | below. 34 | 35 | When we speak of free software, we are referring to freedom of use, 36 | not price. Our General Public Licenses are designed to make sure that 37 | you have the freedom to distribute copies of free software (and charge 38 | for this service if you wish); that you receive source code or can get 39 | it if you want it; that you can change the software and use pieces of 40 | it in new free programs; and that you are informed that you can do 41 | these things. 42 | 43 | To protect your rights, we need to make restrictions that forbid 44 | distributors to deny you these rights or to ask you to surrender these 45 | rights. These restrictions translate to certain responsibilities for 46 | you if you distribute copies of the library or if you modify it. 47 | 48 | For example, if you distribute copies of the library, whether gratis 49 | or for a fee, you must give the recipients all the rights that we gave 50 | you. You must make sure that they, too, receive or can get the source 51 | code. If you link other code with the library, you must provide 52 | complete object files to the recipients, so that they can relink them 53 | with the library after making changes to the library and recompiling 54 | it. And you must show them these terms so they know their rights. 55 | 56 | We protect your rights with a two-step method: (1) we copyright the 57 | library, and (2) we offer you this license, which gives you legal 58 | permission to copy, distribute and/or modify the library. 59 | 60 | To protect each distributor, we want to make it very clear that there 61 | is no warranty for the free library. Also, if the library is modified 62 | by someone else and passed on, the recipients should know that what 63 | they have is not the original version, so that the original author's 64 | reputation will not be affected by problems that might be introduced 65 | by others. 66 | 67 | Finally, software patents pose a constant threat to the existence of 68 | any free program. We wish to make sure that a company cannot 69 | effectively restrict the users of a free program by obtaining a 70 | restrictive license from a patent holder. Therefore, we insist that 71 | any patent license obtained for a version of the library must be 72 | consistent with the full freedom of use specified in this license. 73 | 74 | Most GNU software, including some libraries, is covered by the 75 | ordinary GNU General Public License. This license, the GNU Lesser 76 | General Public License, applies to certain designated libraries, and 77 | is quite different from the ordinary General Public License. We use 78 | this license for certain libraries in order to permit linking those 79 | libraries into non-free programs. 80 | 81 | When a program is linked with a library, whether statically or using a 82 | shared library, the combination of the two is legally speaking a 83 | combined work, a derivative of the original library. The ordinary 84 | General Public License therefore permits such linking only if the 85 | entire combination fits its criteria of freedom. The Lesser General 86 | Public License permits more lax criteria for linking other code with 87 | the library. 88 | 89 | We call this license the "Lesser" General Public License because it 90 | does Less to protect the user's freedom than the ordinary General 91 | Public License. It also provides other free software developers Less 92 | of an advantage over competing non-free programs. These disadvantages 93 | are the reason we use the ordinary General Public License for many 94 | libraries. However, the Lesser license provides advantages in certain 95 | special circumstances. 96 | 97 | For example, on rare occasions, there may be a special need to 98 | encourage the widest possible use of a certain library, so that it 99 | becomes a de-facto standard. To achieve this, non-free programs must 100 | be allowed to use the library. A more frequent case is that a free 101 | library does the same job as widely used non-free libraries. In this 102 | case, there is little to gain by limiting the free library to free 103 | software only, so we use the Lesser General Public License. 104 | 105 | In other cases, permission to use a particular library in non-free 106 | programs enables a greater number of people to use a large body of 107 | free software. For example, permission to use the GNU C Library in 108 | non-free programs enables many more people to use the whole GNU 109 | operating system, as well as its variant, the GNU/Linux operating 110 | system. 111 | 112 | Although the Lesser General Public License is Less protective of the 113 | users' freedom, it does ensure that the user of a program that is 114 | linked with the Library has the freedom and the wherewithal to run 115 | that program using a modified version of the Library. 116 | 117 | The precise terms and conditions for copying, distribution and 118 | modification follow. Pay close attention to the difference between a 119 | "work based on the library" and a "work that uses the library". The 120 | former contains code derived from the library, whereas the latter must 121 | be combined with the library in order to run. 122 | 123 | ### TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 124 | 125 | **0.** This License Agreement applies to any software library or other 126 | program which contains a notice placed by the copyright holder or 127 | other authorized party saying it may be distributed under the terms of 128 | this Lesser General Public License (also called "this License"). Each 129 | licensee is addressed as "you". 130 | 131 | A "library" means a collection of software functions and/or data 132 | prepared so as to be conveniently linked with application programs 133 | (which use some of those functions and data) to form executables. 134 | 135 | The "Library", below, refers to any such software library or work 136 | which has been distributed under these terms. A "work based on the 137 | Library" means either the Library or any derivative work under 138 | copyright law: that is to say, a work containing the Library or a 139 | portion of it, either verbatim or with modifications and/or translated 140 | straightforwardly into another language. (Hereinafter, translation is 141 | included without limitation in the term "modification".) 142 | 143 | "Source code" for a work means the preferred form of the work for 144 | making modifications to it. For a library, complete source code means 145 | all the source code for all modules it contains, plus any associated 146 | interface definition files, plus the scripts used to control 147 | compilation and installation of the library. 148 | 149 | Activities other than copying, distribution and modification are not 150 | covered by this License; they are outside its scope. The act of 151 | running a program using the Library is not restricted, and output from 152 | such a program is covered only if its contents constitute a work based 153 | on the Library (independent of the use of the Library in a tool for 154 | writing it). Whether that is true depends on what the Library does and 155 | what the program that uses the Library does. 156 | 157 | **1.** You may copy and distribute verbatim copies of the Library's 158 | complete source code as you receive it, in any medium, provided that 159 | you conspicuously and appropriately publish on each copy an 160 | appropriate copyright notice and disclaimer of warranty; keep intact 161 | all the notices that refer to this License and to the absence of any 162 | warranty; and distribute a copy of this License along with the 163 | Library. 164 | 165 | You may charge a fee for the physical act of transferring a copy, and 166 | you may at your option offer warranty protection in exchange for a 167 | fee. 168 | 169 | **2.** You may modify your copy or copies of the Library or any 170 | portion of it, thus forming a work based on the Library, and copy and 171 | distribute such modifications or work under the terms of Section 1 172 | above, provided that you also meet all of these conditions: 173 | 174 | - **a)** The modified work must itself be a software library. 175 | - **b)** You must cause the files modified to carry prominent 176 | notices stating that you changed the files and the date of 177 | any change. 178 | - **c)** You must cause the whole of the work to be licensed at no 179 | charge to all third parties under the terms of this License. 180 | - **d)** If a facility in the modified Library refers to a function 181 | or a table of data to be supplied by an application program that 182 | uses the facility, other than as an argument passed when the 183 | facility is invoked, then you must make a good faith effort to 184 | ensure that, in the event an application does not supply such 185 | function or table, the facility still operates, and performs 186 | whatever part of its purpose remains meaningful. 187 | 188 | (For example, a function in a library to compute square roots has 189 | a purpose that is entirely well-defined independent of 190 | the application. Therefore, Subsection 2d requires that any 191 | application-supplied function or table used by this function must 192 | be optional: if the application does not supply it, the square 193 | root function must still compute square roots.) 194 | 195 | These requirements apply to the modified work as a whole. If 196 | identifiable sections of that work are not derived from the Library, 197 | and can be reasonably considered independent and separate works in 198 | themselves, then this License, and its terms, do not apply to those 199 | sections when you distribute them as separate works. But when you 200 | distribute the same sections as part of a whole which is a work based 201 | on the Library, the distribution of the whole must be on the terms of 202 | this License, whose permissions for other licensees extend to the 203 | entire whole, and thus to each and every part regardless of who wrote 204 | it. 205 | 206 | Thus, it is not the intent of this section to claim rights or contest 207 | your rights to work written entirely by you; rather, the intent is to 208 | exercise the right to control the distribution of derivative or 209 | collective works based on the Library. 210 | 211 | In addition, mere aggregation of another work not based on the Library 212 | with the Library (or with a work based on the Library) on a volume of 213 | a storage or distribution medium does not bring the other work under 214 | the scope of this License. 215 | 216 | **3.** You may opt to apply the terms of the ordinary GNU General 217 | Public License instead of this License to a given copy of the Library. 218 | To do this, you must alter all the notices that refer to this License, 219 | so that they refer to the ordinary GNU General Public License, version 220 | 2, instead of to this License. (If a newer version than version 2 of 221 | the ordinary GNU General Public License has appeared, then you can 222 | specify that version instead if you wish.) Do not make any other 223 | change in these notices. 224 | 225 | Once this change is made in a given copy, it is irreversible for that 226 | copy, so the ordinary GNU General Public License applies to all 227 | subsequent copies and derivative works made from that copy. 228 | 229 | This option is useful when you wish to copy part of the code of the 230 | Library into a program that is not a library. 231 | 232 | **4.** You may copy and distribute the Library (or a portion or 233 | derivative of it, under Section 2) in object code or executable form 234 | under the terms of Sections 1 and 2 above provided that you accompany 235 | it with the complete corresponding machine-readable source code, which 236 | must be distributed under the terms of Sections 1 and 2 above on a 237 | medium customarily used for software interchange. 238 | 239 | If distribution of object code is made by offering access to copy from 240 | a designated place, then offering equivalent access to copy the source 241 | code from the same place satisfies the requirement to distribute the 242 | source code, even though third parties are not compelled to copy the 243 | source along with the object code. 244 | 245 | **5.** A program that contains no derivative of any portion of the 246 | Library, but is designed to work with the Library by being compiled or 247 | linked with it, is called a "work that uses the Library". Such a work, 248 | in isolation, is not a derivative work of the Library, and therefore 249 | falls outside the scope of this License. 250 | 251 | However, linking a "work that uses the Library" with the Library 252 | creates an executable that is a derivative of the Library (because it 253 | contains portions of the Library), rather than a "work that uses the 254 | library". The executable is therefore covered by this License. Section 255 | 6 states terms for distribution of such executables. 256 | 257 | When a "work that uses the Library" uses material from a header file 258 | that is part of the Library, the object code for the work may be a 259 | derivative work of the Library even though the source code is not. 260 | Whether this is true is especially significant if the work can be 261 | linked without the Library, or if the work is itself a library. The 262 | threshold for this to be true is not precisely defined by law. 263 | 264 | If such an object file uses only numerical parameters, data structure 265 | layouts and accessors, and small macros and small inline functions 266 | (ten lines or less in length), then the use of the object file is 267 | unrestricted, regardless of whether it is legally a derivative work. 268 | (Executables containing this object code plus portions of the Library 269 | will still fall under Section 6.) 270 | 271 | Otherwise, if the work is a derivative of the Library, you may 272 | distribute the object code for the work under the terms of Section 6. 273 | Any executables containing that work also fall under Section 6, 274 | whether or not they are linked directly with the Library itself. 275 | 276 | **6.** As an exception to the Sections above, you may also combine or 277 | link a "work that uses the Library" with the Library to produce a work 278 | containing portions of the Library, and distribute that work under 279 | terms of your choice, provided that the terms permit modification of 280 | the work for the customer's own use and reverse engineering for 281 | debugging such modifications. 282 | 283 | You must give prominent notice with each copy of the work that the 284 | Library is used in it and that the Library and its use are covered by 285 | this License. You must supply a copy of this License. If the work 286 | during execution displays copyright notices, you must include the 287 | copyright notice for the Library among them, as well as a reference 288 | directing the user to the copy of this License. Also, you must do one 289 | of these things: 290 | 291 | - **a)** Accompany the work with the complete corresponding 292 | machine-readable source code for the Library including whatever 293 | changes were used in the work (which must be distributed under 294 | Sections 1 and 2 above); and, if the work is an executable linked 295 | with the Library, with the complete machine-readable "work that 296 | uses the Library", as object code and/or source code, so that the 297 | user can modify the Library and then relink to produce a modified 298 | executable containing the modified Library. (It is understood that 299 | the user who changes the contents of definitions files in the 300 | Library will not necessarily be able to recompile the application 301 | to use the modified definitions.) 302 | - **b)** Use a suitable shared library mechanism for linking with 303 | the Library. A suitable mechanism is one that (1) uses at run time 304 | a copy of the library already present on the user's computer 305 | system, rather than copying library functions into the executable, 306 | and (2) will operate properly with a modified version of the 307 | library, if the user installs one, as long as the modified version 308 | is interface-compatible with the version that the work was 309 | made with. 310 | - **c)** Accompany the work with a written offer, valid for at least 311 | three years, to give the same user the materials specified in 312 | Subsection 6a, above, for a charge no more than the cost of 313 | performing this distribution. 314 | - **d)** If distribution of the work is made by offering access to 315 | copy from a designated place, offer equivalent access to copy the 316 | above specified materials from the same place. 317 | - **e)** Verify that the user has already received a copy of these 318 | materials or that you have already sent this user a copy. 319 | 320 | For an executable, the required form of the "work that uses the 321 | Library" must include any data and utility programs needed for 322 | reproducing the executable from it. However, as a special exception, 323 | the materials to be distributed need not include anything that is 324 | normally distributed (in either source or binary form) with the major 325 | components (compiler, kernel, and so on) of the operating system on 326 | which the executable runs, unless that component itself accompanies 327 | the executable. 328 | 329 | It may happen that this requirement contradicts the license 330 | restrictions of other proprietary libraries that do not normally 331 | accompany the operating system. Such a contradiction means you cannot 332 | use both them and the Library together in an executable that you 333 | distribute. 334 | 335 | **7.** You may place library facilities that are a work based on the 336 | Library side-by-side in a single library together with other library 337 | facilities not covered by this License, and distribute such a combined 338 | library, provided that the separate distribution of the work based on 339 | the Library and of the other library facilities is otherwise 340 | permitted, and provided that you do these two things: 341 | 342 | - **a)** Accompany the combined library with a copy of the same work 343 | based on the Library, uncombined with any other 344 | library facilities. This must be distributed under the terms of 345 | the Sections above. 346 | - **b)** Give prominent notice with the combined library of the fact 347 | that part of it is a work based on the Library, and explaining 348 | where to find the accompanying uncombined form of the same work. 349 | 350 | **8.** You may not copy, modify, sublicense, link with, or distribute 351 | the Library except as expressly provided under this License. Any 352 | attempt otherwise to copy, modify, sublicense, link with, or 353 | distribute the Library is void, and will automatically terminate your 354 | rights under this License. However, parties who have received copies, 355 | or rights, from you under this License will not have their licenses 356 | terminated so long as such parties remain in full compliance. 357 | 358 | **9.** You are not required to accept this License, since you have not 359 | signed it. However, nothing else grants you permission to modify or 360 | distribute the Library or its derivative works. These actions are 361 | prohibited by law if you do not accept this License. Therefore, by 362 | modifying or distributing the Library (or any work based on the 363 | Library), you indicate your acceptance of this License to do so, and 364 | all its terms and conditions for copying, distributing or modifying 365 | the Library or works based on it. 366 | 367 | **10.** Each time you redistribute the Library (or any work based on 368 | the Library), the recipient automatically receives a license from the 369 | original licensor to copy, distribute, link with or modify the Library 370 | subject to these terms and conditions. You may not impose any further 371 | restrictions on the recipients' exercise of the rights granted herein. 372 | You are not responsible for enforcing compliance by third parties with 373 | this License. 374 | 375 | **11.** If, as a consequence of a court judgment or allegation of 376 | patent infringement or for any other reason (not limited to patent 377 | issues), conditions are imposed on you (whether by court order, 378 | agreement or otherwise) that contradict the conditions of this 379 | License, they do not excuse you from the conditions of this License. 380 | If you cannot distribute so as to satisfy simultaneously your 381 | obligations under this License and any other pertinent obligations, 382 | then as a consequence you may not distribute the Library at all. For 383 | example, if a patent license would not permit royalty-free 384 | redistribution of the Library by all those who receive copies directly 385 | or indirectly through you, then the only way you could satisfy both it 386 | and this License would be to refrain entirely from distribution of the 387 | Library. 388 | 389 | If any portion of this section is held invalid or unenforceable under 390 | any particular circumstance, the balance of the section is intended to 391 | apply, and the section as a whole is intended to apply in other 392 | circumstances. 393 | 394 | It is not the purpose of this section to induce you to infringe any 395 | patents or other property right claims or to contest validity of any 396 | such claims; this section has the sole purpose of protecting the 397 | integrity of the free software distribution system which is 398 | implemented by public license practices. Many people have made 399 | generous contributions to the wide range of software distributed 400 | through that system in reliance on consistent application of that 401 | system; it is up to the author/donor to decide if he or she is willing 402 | to distribute software through any other system and a licensee cannot 403 | impose that choice. 404 | 405 | This section is intended to make thoroughly clear what is believed to 406 | be a consequence of the rest of this License. 407 | 408 | **12.** If the distribution and/or use of the Library is restricted in 409 | certain countries either by patents or by copyrighted interfaces, the 410 | original copyright holder who places the Library under this License 411 | may add an explicit geographical distribution limitation excluding 412 | those countries, so that distribution is permitted only in or among 413 | countries not thus excluded. In such case, this License incorporates 414 | the limitation as if written in the body of this License. 415 | 416 | **13.** The Free Software Foundation may publish revised and/or new 417 | versions of the Lesser General Public License from time to time. Such 418 | new versions will be similar in spirit to the present version, but may 419 | differ in detail to address new problems or concerns. 420 | 421 | Each version is given a distinguishing version number. If the Library 422 | specifies a version number of this License which applies to it and 423 | "any later version", you have the option of following the terms and 424 | conditions either of that version or of any later version published by 425 | the Free Software Foundation. If the Library does not specify a 426 | license version number, you may choose any version ever published by 427 | the Free Software Foundation. 428 | 429 | **14.** If you wish to incorporate parts of the Library into other 430 | free programs whose distribution conditions are incompatible with 431 | these, write to the author to ask for permission. For software which 432 | is copyrighted by the Free Software Foundation, write to the Free 433 | Software Foundation; we sometimes make exceptions for this. Our 434 | decision will be guided by the two goals of preserving the free status 435 | of all derivatives of our free software and of promoting the sharing 436 | and reuse of software generally. 437 | 438 | **NO WARRANTY** 439 | 440 | **15.** BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO 441 | WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. 442 | EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR 443 | OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY 444 | KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE 445 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 446 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE 447 | LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME 448 | THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 449 | 450 | **16.** IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN 451 | WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY 452 | AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU 453 | FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR 454 | CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE 455 | LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING 456 | RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A 457 | FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF 458 | SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 459 | DAMAGES. 460 | 461 | ### END OF TERMS AND CONDITIONS 462 | ------------------------------------------------------------- 463 | 464 | Other names and brands may be claimed as the property of others. 465 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DISCONTINUATION OF PROJECT # 2 | This project will no longer be maintained by Intel. 3 | Intel has ceased development and contributions including, but not limited to, maintenance, bug fixes, new releases, or updates, to this project. 4 | Intel no longer accepts patches to this project. 5 | If you have an ongoing need to use this project, are interested in independently developing it, or would like to maintain patches for the open source software community, please create your own fork of this project. 6 | 7 | # This project provides you the patches to enable SUPRA on Intel GPU using Intel(R) oneAPI Base Toolkit Golden 2021.3.0.(Intel(R) oneAPI DPC++/C++ Compiler 2021.3.0) 8 | ### Note: This project based on Intel(R) oneAPI Base Toolkit Golden 2021.3.0. It should not be used for production. 9 | 10 | ### 2021.12.9, tested on Intel(R) oneAPI Base Toolkit Golden 2021.4.0, works well. (Download oneaAPI 2021.4.0: wget https://registrationcenter-download.intel.com/akdlm/irc_nas/18236/l_BaseKit_p_2021.4.0.3422_offline.sh) 11 | 12 | ## 1. Project Introduction 13 | ### (1) Envrionment setup 14 | 15 | Please make sure you have installed Intel GPU driver (very important), you can follow below links: 16 | 17 | https://www.intel.com/content/www/us/en/develop/documentation/installation-guide-for-intel-oneapi-toolkits-linux/top/prerequisites/install-intel-gpu-drivers.html 18 | 19 | https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-bionic.html 20 | 21 | You need install Intel oneAPI, Please refer to Intel(R) oneAPI installation guide: https://software.intel.com/content/www/us/en/develop/articles/installation-guide-for-intel-oneapi-toolkits.html 22 | 23 | OS: Ubuntu 18.04 or Ubuntu 20.04 24 | 25 | Hardware: Intel CPU with Gen9 or Intel Tigerlake Iris Xe GPU. 26 | 27 | This project was tested on Intel(R) i7-8700K CPU with Intel(R) UHD Graphics 630 , please refer https://ark.intel.com/content/www/us/en/ark/products/126684/intel-core-i7-8700k-processor-12m-cache-up-to-4-70-ghz.html 28 | 29 | This project was tested on Intel(R) i7-1185G7E CPU with Intel(R) Iris Xe GPU, please refer https://ark.intel.com/content/www/us/en/ark/products/208076/intel-core-i7-1185g7e-processor-12m-cache-up-to-4-40-ghz.html 30 | 31 | 32 | 33 | ### (2) Patch information 34 | The patch 0001-* describes the Intel(R) DPC++ Compatibility Tool migrates CUDA file to DPC++ file. Apply patch 0001-*, you will see a oneapi/ folder which contains migrated DPC++ files and related header files. 35 | The patch 0002-* describes modification to the DPC++ files. Apply patch 0001-* and 0002-*, you can build and run SUPRA successfully. 36 | The patch 0003-* describes optimization to BeamformingNode and HilbertEnvelopeNode. Apply patch 0003-*, the BeamformingNode and HilbertEnvelopeNode performance will improve. 37 | The patch 0004-* do code clean, using sycl in file name and variable name. 38 | 39 | ### 40 | 41 | ### Publications 42 | We have a published paper on IEEE Xplore, You can get more detail about the code from it: https://ieeexplore.ieee.org/document/9460657 43 | 44 | ## 2 Project build steps 45 | There are 2 ways you can build and run our project. the jupyter notebook provides simple and quick method to enable SUPRA. if you want to know complete build process, you can follow the Linux terminal method to compile and run, which also won't take too long. 46 | 47 | ### (1) Jupyter notebook 48 | 49 | Clone this repo to your local machine, go to supra-jupyter/ folder, launch your jupyter notebook and open supra-jupyter.ipynb in your jupyter notebook. Follow the 50 | hints, you will enable SUPRA with just few clicks. 51 | 52 | 53 | ### (2) Linux terminal 54 | 55 | Install 3rd libraries in your machine: 56 | 57 | `apt-get install cmake cmake-gui qt5-default libtbb-dev libopenigtlink-dev git` 58 | 59 | Download this repo to your machine: 60 | 61 | `git clone https://github.com/intel/supra-on-oneapi.git` 62 | 63 | Download SUPRA form github: 64 | 65 | `git clone https://github.com/IFL-CAMP/supra.git` 66 | 67 | Enter supra folder: 68 | 69 | `cd supra` 70 | 71 | Check supra commit log: 72 | 73 | `git log` 74 | 75 | 76 | 77 | Find this commit message: 78 | 79 | ![avatar](https://github.com/intel/supra-on-oneapi/raw/master/images/Commit%20info.PNG) 80 | 81 | Reset the supra commit HEAD, use 82 | 83 | `git reset --hard 73c930a08a7b1087f5be588863876a648a1add99` 84 | ![avatar](https://github.com/intel/supra-on-oneapi/raw/master/images/reset%20success%20modify.png) 85 | 86 | Apply patches to SUPRA: 87 | 88 | `git am ../supra-on-oneapi/*.patch` 89 | 90 | 91 | Build and Run supra demo, in supra directory: 92 | 93 | `mkdir build` 94 | 95 | 96 | `cd build` 97 | 98 | 99 | Setup eviroment: 100 | 101 | `source /opt/intel/oneapi/setvars.sh` 102 | 103 | For Intel(R) oneAPI Base Toolkit beta07 version, you need change the $PATH, follow this two steps: 104 | 105 | `echo $PATH` 106 | 107 | it will print: 108 | 109 | ![avatar](https://github.com/intel/supra-on-oneapi/raw/master/images/PATH%20modify.png) 110 | 111 | copy the whole PATH value except the contents in Red Rectangle.
112 | 113 | Reset PATH variable value with before copied content use this command: 114 | 115 | `export PATH=` 116 | 117 | it should like this: 118 | 119 | ![avatar](https://github.com/intel/supra-on-oneapi/raw/master/images/reset%20path.PNG) 120 | 121 | (use your machine to print PATH content, don't copy from here) 122 | 123 | 124 | Configure project: 125 | 126 | `CC=clang CXX=dpcpp CXXFLAGS="-L${TBBROOT}/lib/intel64/gcc4.8" cmake ..` 127 | 128 | Build: 129 | 130 | `make -j4` 131 | 132 | Download sample data, in build directory create data folder: 133 | 134 | `mkdir data` 135 | 136 | `cd data` 137 | 138 | `wget http://campar.in.tum.de/files/goeblr/mockData_linearProbe.zip` 139 | 140 | unzip the sample data: 141 | 142 | `unzip mockData_linearProbe.zip` 143 | 144 | Copy config file to data folder: 145 | 146 | `cp ../../config/configDemo.xml .` 147 | 148 | Run the SUPRA GUI, in build folder: 149 | 150 | `src/GraphicInterface/SUPRA_GUI -c data/configDemo.xml -a` 151 | 152 | the SUPRA GUI show like this: 153 | 154 | ![avatar](https://github.com/intel/supra-on-oneapi/raw/master/images/guie.PNG) 155 | 156 | Check the performance, open supra.log in the build directory: 157 | 158 | `cat supra.log` 159 | 160 | it will show every node performance performance in Millisecond. 161 | 162 | ## 3. Additional Note 163 | -------------------------------------------------------------------------------- /SUPRA_on_oneAPI_Getting_Start_Guide_v0.8.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/supra-on-oneapi/c823cb52433e9211114ad570fc196aaa3aff9b16/SUPRA_on_oneAPI_Getting_Start_Guide_v0.8.pdf -------------------------------------------------------------------------------- /SUPRA_on_oneAPI_ReleaseNotes.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/supra-on-oneapi/c823cb52433e9211114ad570fc196aaa3aff9b16/SUPRA_on_oneAPI_ReleaseNotes.pdf -------------------------------------------------------------------------------- /images/Commit info.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/supra-on-oneapi/c823cb52433e9211114ad570fc196aaa3aff9b16/images/Commit info.PNG -------------------------------------------------------------------------------- /images/PATH modify.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/supra-on-oneapi/c823cb52433e9211114ad570fc196aaa3aff9b16/images/PATH modify.png -------------------------------------------------------------------------------- /images/PATH.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/supra-on-oneapi/c823cb52433e9211114ad570fc196aaa3aff9b16/images/PATH.PNG -------------------------------------------------------------------------------- /images/guie.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/supra-on-oneapi/c823cb52433e9211114ad570fc196aaa3aff9b16/images/guie.PNG -------------------------------------------------------------------------------- /images/head change modify.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/supra-on-oneapi/c823cb52433e9211114ad570fc196aaa3aff9b16/images/head change modify.png -------------------------------------------------------------------------------- /images/head change.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/supra-on-oneapi/c823cb52433e9211114ad570fc196aaa3aff9b16/images/head change.PNG -------------------------------------------------------------------------------- /images/reset path.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/supra-on-oneapi/c823cb52433e9211114ad570fc196aaa3aff9b16/images/reset path.PNG -------------------------------------------------------------------------------- /images/reset success modify.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/supra-on-oneapi/c823cb52433e9211114ad570fc196aaa3aff9b16/images/reset success modify.png -------------------------------------------------------------------------------- /images/reset success.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intel/supra-on-oneapi/c823cb52433e9211114ad570fc196aaa3aff9b16/images/reset success.PNG -------------------------------------------------------------------------------- /supra-jupyter/.ipynb_checkpoints/download_data-checkpoint.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | echo "Downloading raw ultrasound data,you may need wait minutes..." 4 | wget http://campar.in.tum.de/files/goeblr/mockData_linearProbe.zip -o /dev/null 5 | wget https://github.com/IFL-CAMP/supra/raw/master/config/configDemo.xml -o /dev/null 6 | 7 | mkdir data 8 | 9 | mv configDemo.xml data 10 | mv mockData_linearProbe.zip data 11 | cd data 12 | unzip mockData_linearProbe.zip -d . 13 | rm mockData_linearProbe.zip 14 | 15 | echo "data ok!" -------------------------------------------------------------------------------- /supra-jupyter/.ipynb_checkpoints/download_source-checkpoint.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | git clone https://github.com/IFL-CAMP/supra.git 4 | 5 | 6 | cd supra 7 | git reset --hard 73c930a08a7b1087f5be588863876a648a1add99 8 | git am --whitespace=nowarn ../../*.patch 9 | git am --whitespace=nowarn ../../supra-jupyter/*.patch -------------------------------------------------------------------------------- /supra-jupyter/.ipynb_checkpoints/gpu_build-checkpoint.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | source /opt/intel/inteloneapi/setvars.sh --force 4 | set -x 5 | dpcpp --version 6 | 7 | cd supra 8 | 9 | echo "Remove old build folder..." 10 | rm -rf build 11 | echo "Remove done." 12 | 13 | echo "Create image output folder..." 14 | mkdir -p build 15 | echo "Create folder done." 16 | cd build 17 | 18 | export PATH=`echo ${PATH} | awk -v RS=: -v ORS=: '/vtune/ {next} {print}'` 19 | 20 | CC=clang CXX=dpcpp cmake .. 21 | make -j4 &> /dev/null 22 | 23 | echo "Build done!" -------------------------------------------------------------------------------- /supra-jupyter/.ipynb_checkpoints/gpu_run-checkpoint.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | source /opt/intel/inteloneapi/setvars.sh --force 3 | 4 | cd supra/build 5 | cp -r ../../data . 6 | rm -rf RxBeamformer HilbertFirEnvelope LogCompressor ScanConverter 7 | rm supra.log 8 | mkdir RxBeamformer HilbertFirEnvelope LogCompressor ScanConverter 9 | 10 | ./src/CommandlineInterface/SUPRA_CMD data/configDemo.xml -------------------------------------------------------------------------------- /supra-jupyter/SUPRA-jupyter-all-in-one-file.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Welcome to SUPRA on Jupyter\n", 8 | "This document contains the process of using Intel(R) oneAPI Base Toolkit build and run SUPRA on Intel GPU.\n", 9 | "(This file contains all commands you need to run SUPRA on GPU)" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "**Intel(R) oneAPI is still in beta phase, this project only for test and should not be used in products.** " 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "## Table of Contents\n", 24 | "1. [SUPRA introduction](#sec-supra)\n", 25 | "2. [Build steps](#sec-steps)\n", 26 | "3. [Performance](#sec-perf)" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "\n", 34 | "## 1. SUPRA introduction\n", 35 | "\n", 36 | "The SUPRA is an open source project, your can find detailed information at SUPRA." 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "\n", 44 | "## 2. Build steps" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "#### **Usage Tip:**\n", 52 | "**Create an empty folder in your linux machine(ubuntu 18.04).Copy this file to the folder. launch jupyter lab or jupyter notebook and open this file, follow the below command.**" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "### 2.1 Download source code from github and apply patch" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "%%bash\n", 69 | "# Download source code and patches\n", 70 | "git clone https://github.com/IFL-CAMP/supra.git\n", 71 | "git clone https://github.com/intel/supra-on-oneapi.git\n", 72 | " \n", 73 | "cd supra \n", 74 | "git reset --hard 73c930a08a7b1087f5be588863876a648a1add99 \n", 75 | "git am --whitespace=nowarn ../supra-on-oneapi/*.patch \n", 76 | "git am --whitespace=nowarn ../supra-on-oneapi/supra-jupyter/*.patch" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "### 2.1 Download Ultrasound data and unzip files" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "%%bash\n", 93 | "echo \"Downloading raw ultrasound data,you may need wait minutes...\"\n", 94 | "wget http://campar.in.tum.de/files/goeblr/mockData_linearProbe.zip -o /dev/null\n", 95 | "wget https://github.com/IFL-CAMP/supra/raw/master/config/configDemo.xml -o /dev/null\n", 96 | "\n", 97 | "mkdir data\n", 98 | "\n", 99 | "mv configDemo.xml data\n", 100 | "mv mockData_linearProbe.zip data\n", 101 | "\n", 102 | "cd data && unzip mockData_linearProbe.zip -d . && rm mockData_linearProbe.zip\n", 103 | "\n", 104 | "echo \"data ok!\"" 105 | ] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "metadata": {}, 110 | "source": [ 111 | "### Note: If import wget fails, run below command to install wget and re-run above cell" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "!pip install wget" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "### Use below command in your terminal to install Below 3rd libraries if you first run SUPRA on your machine." 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "sudo apt-get install cmake cmake-gui qt5-default libtbb-dev libopenigtlink-dev git" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "#### Build project for GPU" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "metadata": {}, 148 | "outputs": [], 149 | "source": [ 150 | "%%bash\n", 151 | "# Setup oneAPI environment\n", 152 | "echo \"Build process stat, this may take minutes\"\n", 153 | "source /opt/intel/inteloneapi/setvars.sh --force\n", 154 | "set -x\n", 155 | "dpcpp --version\n", 156 | "\n", 157 | "echo \"Remove old build folder...\"\n", 158 | "cd supra && rm -rf build && mkdir -p build && cd build\n", 159 | "echo \"Remove done.\"\n", 160 | "\n", 161 | "\n", 162 | "export PATH=`echo ${PATH} | awk -v RS=: -v ORS=: '/vtune/ {next} {print}'`\n", 163 | "\n", 164 | "CC=clang CXX=dpcpp cmake .. \n", 165 | "make -j4 &> /dev/null\n", 166 | "echo \"Build done!\"" 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": {}, 172 | "source": [ 173 | "#### Run on GPU in Commandline version\n", 174 | "After run a while, you can interrupt below cell to stop supra running." 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [ 183 | "%%bash\n", 184 | "# Run SUPRA on GPU\n", 185 | "source /opt/intel/inteloneapi/setvars.sh --force\n", 186 | "\n", 187 | "cd supra/build\n", 188 | "cp -r ../../data .\n", 189 | "rm -rf RxBeamformer HilbertFirEnvelope LogCompressor ScanConverter\n", 190 | "rm supra.log\n", 191 | "mkdir RxBeamformer HilbertFirEnvelope LogCompressor ScanConverter\n", 192 | "\n", 193 | "./src/CommandlineInterface/SUPRA_CMD data/configDemo.xml" 194 | ] 195 | }, 196 | { 197 | "cell_type": "markdown", 198 | "metadata": {}, 199 | "source": [ 200 | "#### ====== Note: Run on GPU in Graphic version =======" 201 | ] 202 | }, 203 | { 204 | "cell_type": "markdown", 205 | "metadata": {}, 206 | "source": [ 207 | "#### Open a terminal, In supra/build foler, run below command to see output.\n", 208 | "\n", 209 | "source /opt/intel/inteloneapi/setvars.sh\n", 210 | "\n", 211 | "src/GraphicInterface/SUPRA_GUI -c data/configDemo.xml -a\n" 212 | ] 213 | }, 214 | { 215 | "cell_type": "markdown", 216 | "metadata": {}, 217 | "source": [ 218 | "#### ====== End of Note =======" 219 | ] 220 | }, 221 | { 222 | "cell_type": "markdown", 223 | "metadata": {}, 224 | "source": [ 225 | "#### Display GPU output" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": null, 231 | "metadata": {}, 232 | "outputs": [], 233 | "source": [ 234 | "import matplotlib.pyplot as plt\n", 235 | "%matplotlib inline\n", 236 | "# image path\n", 237 | "fpga_img_path = [\"./supra/build/RxBeamformer/\", \"./supra/build/HilbertFirEnvelope/\", \n", 238 | " \"./supra/build/LogCompressor/\", \"./supra/build/ScanConverter/\"]\n", 239 | "col_labels = [\"RxBeamformer\", \"HilbertFirEnvelope\", \"LogCompressor\", \"ScanConverter\"]\n", 240 | "row_labels = [\"Frame #\" + str(i+1) for i in range(8)]\n", 241 | "\n", 242 | "for row_img_index in range(8):\n", 243 | " \n", 244 | " fig, axs = plt.subplots(1, 4, figsize=(10,5))\n", 245 | " \n", 246 | " if row_img_index == 0:\n", 247 | " for i, col in zip(range(4), col_labels):\n", 248 | " axs[i].set_title(col)\n", 249 | " \n", 250 | " for col_img_index in range(4):\n", 251 | " current_img_path = fpga_img_path[col_img_index] + str(row_img_index + 1) + '.png'\n", 252 | " current_img = plt.imread(current_img_path)\n", 253 | " axs[col_img_index].imshow(current_img, \"gray\")\n", 254 | " axs[col_img_index].axis('off')\n", 255 | " fig.suptitle(row_labels[row_img_index], x= -0.05, y=0.5,fontsize = 9)\n", 256 | " plt.tight_layout()\n", 257 | " plt.show()\n", 258 | "plt.show()" 259 | ] 260 | }, 261 | { 262 | "cell_type": "markdown", 263 | "metadata": {}, 264 | "source": [ 265 | "\n", 266 | "## 3. Performance" 267 | ] 268 | }, 269 | { 270 | "cell_type": "markdown", 271 | "metadata": {}, 272 | "source": [ 273 | "The performance data were written to ./supra/build/supra.log file. run below cell to check." 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": null, 279 | "metadata": {}, 280 | "outputs": [], 281 | "source": [ 282 | "!cat ./supra/build/supra.log" 283 | ] 284 | } 285 | ], 286 | "metadata": { 287 | "kernelspec": { 288 | "display_name": "Python 3", 289 | "language": "python", 290 | "name": "python3" 291 | }, 292 | "language_info": { 293 | "codemirror_mode": { 294 | "name": "ipython", 295 | "version": 3 296 | }, 297 | "file_extension": ".py", 298 | "mimetype": "text/x-python", 299 | "name": "python", 300 | "nbconvert_exporter": "python", 301 | "pygments_lexer": "ipython3", 302 | "version": "3.6.9" 303 | } 304 | }, 305 | "nbformat": 4, 306 | "nbformat_minor": 4 307 | } 308 | -------------------------------------------------------------------------------- /supra-jupyter/download_data.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | echo "Downloading raw ultrasound data,you may need wait minutes..." 4 | wget http://campar.in.tum.de/files/goeblr/mockData_linearProbe.zip -o /dev/null 5 | wget https://github.com/IFL-CAMP/supra/raw/master/config/configDemo.xml -o /dev/null 6 | 7 | mkdir data 8 | 9 | mv configDemo.xml data 10 | mv mockData_linearProbe.zip data 11 | cd data 12 | unzip mockData_linearProbe.zip -d . 13 | rm mockData_linearProbe.zip 14 | 15 | echo "data ok!" -------------------------------------------------------------------------------- /supra-jupyter/download_source.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | git clone https://github.com/IFL-CAMP/supra.git 4 | 5 | 6 | cd supra 7 | git reset --hard 73c930a08a7b1087f5be588863876a648a1add99 8 | git am --whitespace=nowarn ../../*.patch 9 | git am --whitespace=nowarn ../../supra-jupyter/*.patch -------------------------------------------------------------------------------- /supra-jupyter/gpu_build.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | source /opt/intel/inteloneapi/setvars.sh --force 4 | set -x 5 | dpcpp --version 6 | 7 | cd supra 8 | 9 | echo "Remove old build folder..." 10 | rm -rf build 11 | echo "Remove done." 12 | 13 | echo "Create image output folder..." 14 | mkdir -p build 15 | echo "Create folder done." 16 | cd build 17 | 18 | export PATH=`echo ${PATH} | awk -v RS=: -v ORS=: '/vtune/ {next} {print}'` 19 | 20 | CC=clang CXX=dpcpp cmake .. 21 | make -j4 &> /dev/null 22 | 23 | echo "Build done!" -------------------------------------------------------------------------------- /supra-jupyter/gpu_run.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | source /opt/intel/inteloneapi/setvars.sh --force 3 | 4 | cd supra/build 5 | cp -r ../../data . 6 | rm -rf RxBeamformer HilbertFirEnvelope LogCompressor ScanConverter 7 | rm supra.log 8 | mkdir RxBeamformer HilbertFirEnvelope LogCompressor ScanConverter 9 | 10 | ./src/CommandlineInterface/SUPRA_CMD data/configDemo.xml -------------------------------------------------------------------------------- /third-party-programs.txt: -------------------------------------------------------------------------------- 1 | Intel® patches for SUPRA Third Party Programs File 2 | 3 | This file contains the list of third party software (“third party programs”) 4 | contained in the Intel software and their required notices and/or license terms. 5 | This third party software, even if included with the distribution of the Intel 6 | software, may be governed by separate license terms, including without limitation, 7 | third party license terms, other Intel software license terms, and open source 8 | software license terms. These separate license terms govern your use of the third 9 | party programs as set forth in the “third-party-programs.txt” file. 10 | 11 | The third party programs and their corresponding required notices and/or license terms are listed below. 12 | 13 | ------------------------------------------------------------- 14 | 15 | 1. SUPRA 16 | 17 | 18 | ### GNU LESSER GENERAL PUBLIC LICENSE 19 | 20 | Version 2.1, February 1999 21 | 22 | Copyright (C) 1991, 1999 Free Software Foundation, Inc. 23 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 24 | 25 | Everyone is permitted to copy and distribute verbatim copies 26 | of this license document, but changing it is not allowed. 27 | 28 | [This is the first released version of the Lesser GPL. It also counts 29 | as the successor of the GNU Library Public License, version 2, hence 30 | the version number 2.1.] 31 | 32 | ### Preamble 33 | 34 | The licenses for most software are designed to take away your freedom 35 | to share and change it. By contrast, the GNU General Public Licenses 36 | are intended to guarantee your freedom to share and change free 37 | software--to make sure the software is free for all its users. 38 | 39 | This license, the Lesser General Public License, applies to some 40 | specially designated software packages--typically libraries--of the 41 | Free Software Foundation and other authors who decide to use it. You 42 | can use it too, but we suggest you first think carefully about whether 43 | this license or the ordinary General Public License is the better 44 | strategy to use in any particular case, based on the explanations 45 | below. 46 | 47 | When we speak of free software, we are referring to freedom of use, 48 | not price. Our General Public Licenses are designed to make sure that 49 | you have the freedom to distribute copies of free software (and charge 50 | for this service if you wish); that you receive source code or can get 51 | it if you want it; that you can change the software and use pieces of 52 | it in new free programs; and that you are informed that you can do 53 | these things. 54 | 55 | To protect your rights, we need to make restrictions that forbid 56 | distributors to deny you these rights or to ask you to surrender these 57 | rights. These restrictions translate to certain responsibilities for 58 | you if you distribute copies of the library or if you modify it. 59 | 60 | For example, if you distribute copies of the library, whether gratis 61 | or for a fee, you must give the recipients all the rights that we gave 62 | you. You must make sure that they, too, receive or can get the source 63 | code. If you link other code with the library, you must provide 64 | complete object files to the recipients, so that they can relink them 65 | with the library after making changes to the library and recompiling 66 | it. And you must show them these terms so they know their rights. 67 | 68 | We protect your rights with a two-step method: (1) we copyright the 69 | library, and (2) we offer you this license, which gives you legal 70 | permission to copy, distribute and/or modify the library. 71 | 72 | To protect each distributor, we want to make it very clear that there 73 | is no warranty for the free library. Also, if the library is modified 74 | by someone else and passed on, the recipients should know that what 75 | they have is not the original version, so that the original author's 76 | reputation will not be affected by problems that might be introduced 77 | by others. 78 | 79 | Finally, software patents pose a constant threat to the existence of 80 | any free program. We wish to make sure that a company cannot 81 | effectively restrict the users of a free program by obtaining a 82 | restrictive license from a patent holder. Therefore, we insist that 83 | any patent license obtained for a version of the library must be 84 | consistent with the full freedom of use specified in this license. 85 | 86 | Most GNU software, including some libraries, is covered by the 87 | ordinary GNU General Public License. This license, the GNU Lesser 88 | General Public License, applies to certain designated libraries, and 89 | is quite different from the ordinary General Public License. We use 90 | this license for certain libraries in order to permit linking those 91 | libraries into non-free programs. 92 | 93 | When a program is linked with a library, whether statically or using a 94 | shared library, the combination of the two is legally speaking a 95 | combined work, a derivative of the original library. The ordinary 96 | General Public License therefore permits such linking only if the 97 | entire combination fits its criteria of freedom. The Lesser General 98 | Public License permits more lax criteria for linking other code with 99 | the library. 100 | 101 | We call this license the "Lesser" General Public License because it 102 | does Less to protect the user's freedom than the ordinary General 103 | Public License. It also provides other free software developers Less 104 | of an advantage over competing non-free programs. These disadvantages 105 | are the reason we use the ordinary General Public License for many 106 | libraries. However, the Lesser license provides advantages in certain 107 | special circumstances. 108 | 109 | For example, on rare occasions, there may be a special need to 110 | encourage the widest possible use of a certain library, so that it 111 | becomes a de-facto standard. To achieve this, non-free programs must 112 | be allowed to use the library. A more frequent case is that a free 113 | library does the same job as widely used non-free libraries. In this 114 | case, there is little to gain by limiting the free library to free 115 | software only, so we use the Lesser General Public License. 116 | 117 | In other cases, permission to use a particular library in non-free 118 | programs enables a greater number of people to use a large body of 119 | free software. For example, permission to use the GNU C Library in 120 | non-free programs enables many more people to use the whole GNU 121 | operating system, as well as its variant, the GNU/Linux operating 122 | system. 123 | 124 | Although the Lesser General Public License is Less protective of the 125 | users' freedom, it does ensure that the user of a program that is 126 | linked with the Library has the freedom and the wherewithal to run 127 | that program using a modified version of the Library. 128 | 129 | The precise terms and conditions for copying, distribution and 130 | modification follow. Pay close attention to the difference between a 131 | "work based on the library" and a "work that uses the library". The 132 | former contains code derived from the library, whereas the latter must 133 | be combined with the library in order to run. 134 | 135 | ### TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 136 | 137 | **0.** This License Agreement applies to any software library or other 138 | program which contains a notice placed by the copyright holder or 139 | other authorized party saying it may be distributed under the terms of 140 | this Lesser General Public License (also called "this License"). Each 141 | licensee is addressed as "you". 142 | 143 | A "library" means a collection of software functions and/or data 144 | prepared so as to be conveniently linked with application programs 145 | (which use some of those functions and data) to form executables. 146 | 147 | The "Library", below, refers to any such software library or work 148 | which has been distributed under these terms. A "work based on the 149 | Library" means either the Library or any derivative work under 150 | copyright law: that is to say, a work containing the Library or a 151 | portion of it, either verbatim or with modifications and/or translated 152 | straightforwardly into another language. (Hereinafter, translation is 153 | included without limitation in the term "modification".) 154 | 155 | "Source code" for a work means the preferred form of the work for 156 | making modifications to it. For a library, complete source code means 157 | all the source code for all modules it contains, plus any associated 158 | interface definition files, plus the scripts used to control 159 | compilation and installation of the library. 160 | 161 | Activities other than copying, distribution and modification are not 162 | covered by this License; they are outside its scope. The act of 163 | running a program using the Library is not restricted, and output from 164 | such a program is covered only if its contents constitute a work based 165 | on the Library (independent of the use of the Library in a tool for 166 | writing it). Whether that is true depends on what the Library does and 167 | what the program that uses the Library does. 168 | 169 | **1.** You may copy and distribute verbatim copies of the Library's 170 | complete source code as you receive it, in any medium, provided that 171 | you conspicuously and appropriately publish on each copy an 172 | appropriate copyright notice and disclaimer of warranty; keep intact 173 | all the notices that refer to this License and to the absence of any 174 | warranty; and distribute a copy of this License along with the 175 | Library. 176 | 177 | You may charge a fee for the physical act of transferring a copy, and 178 | you may at your option offer warranty protection in exchange for a 179 | fee. 180 | 181 | **2.** You may modify your copy or copies of the Library or any 182 | portion of it, thus forming a work based on the Library, and copy and 183 | distribute such modifications or work under the terms of Section 1 184 | above, provided that you also meet all of these conditions: 185 | 186 | - **a)** The modified work must itself be a software library. 187 | - **b)** You must cause the files modified to carry prominent 188 | notices stating that you changed the files and the date of 189 | any change. 190 | - **c)** You must cause the whole of the work to be licensed at no 191 | charge to all third parties under the terms of this License. 192 | - **d)** If a facility in the modified Library refers to a function 193 | or a table of data to be supplied by an application program that 194 | uses the facility, other than as an argument passed when the 195 | facility is invoked, then you must make a good faith effort to 196 | ensure that, in the event an application does not supply such 197 | function or table, the facility still operates, and performs 198 | whatever part of its purpose remains meaningful. 199 | 200 | (For example, a function in a library to compute square roots has 201 | a purpose that is entirely well-defined independent of 202 | the application. Therefore, Subsection 2d requires that any 203 | application-supplied function or table used by this function must 204 | be optional: if the application does not supply it, the square 205 | root function must still compute square roots.) 206 | 207 | These requirements apply to the modified work as a whole. If 208 | identifiable sections of that work are not derived from the Library, 209 | and can be reasonably considered independent and separate works in 210 | themselves, then this License, and its terms, do not apply to those 211 | sections when you distribute them as separate works. But when you 212 | distribute the same sections as part of a whole which is a work based 213 | on the Library, the distribution of the whole must be on the terms of 214 | this License, whose permissions for other licensees extend to the 215 | entire whole, and thus to each and every part regardless of who wrote 216 | it. 217 | 218 | Thus, it is not the intent of this section to claim rights or contest 219 | your rights to work written entirely by you; rather, the intent is to 220 | exercise the right to control the distribution of derivative or 221 | collective works based on the Library. 222 | 223 | In addition, mere aggregation of another work not based on the Library 224 | with the Library (or with a work based on the Library) on a volume of 225 | a storage or distribution medium does not bring the other work under 226 | the scope of this License. 227 | 228 | **3.** You may opt to apply the terms of the ordinary GNU General 229 | Public License instead of this License to a given copy of the Library. 230 | To do this, you must alter all the notices that refer to this License, 231 | so that they refer to the ordinary GNU General Public License, version 232 | 2, instead of to this License. (If a newer version than version 2 of 233 | the ordinary GNU General Public License has appeared, then you can 234 | specify that version instead if you wish.) Do not make any other 235 | change in these notices. 236 | 237 | Once this change is made in a given copy, it is irreversible for that 238 | copy, so the ordinary GNU General Public License applies to all 239 | subsequent copies and derivative works made from that copy. 240 | 241 | This option is useful when you wish to copy part of the code of the 242 | Library into a program that is not a library. 243 | 244 | **4.** You may copy and distribute the Library (or a portion or 245 | derivative of it, under Section 2) in object code or executable form 246 | under the terms of Sections 1 and 2 above provided that you accompany 247 | it with the complete corresponding machine-readable source code, which 248 | must be distributed under the terms of Sections 1 and 2 above on a 249 | medium customarily used for software interchange. 250 | 251 | If distribution of object code is made by offering access to copy from 252 | a designated place, then offering equivalent access to copy the source 253 | code from the same place satisfies the requirement to distribute the 254 | source code, even though third parties are not compelled to copy the 255 | source along with the object code. 256 | 257 | **5.** A program that contains no derivative of any portion of the 258 | Library, but is designed to work with the Library by being compiled or 259 | linked with it, is called a "work that uses the Library". Such a work, 260 | in isolation, is not a derivative work of the Library, and therefore 261 | falls outside the scope of this License. 262 | 263 | However, linking a "work that uses the Library" with the Library 264 | creates an executable that is a derivative of the Library (because it 265 | contains portions of the Library), rather than a "work that uses the 266 | library". The executable is therefore covered by this License. Section 267 | 6 states terms for distribution of such executables. 268 | 269 | When a "work that uses the Library" uses material from a header file 270 | that is part of the Library, the object code for the work may be a 271 | derivative work of the Library even though the source code is not. 272 | Whether this is true is especially significant if the work can be 273 | linked without the Library, or if the work is itself a library. The 274 | threshold for this to be true is not precisely defined by law. 275 | 276 | If such an object file uses only numerical parameters, data structure 277 | layouts and accessors, and small macros and small inline functions 278 | (ten lines or less in length), then the use of the object file is 279 | unrestricted, regardless of whether it is legally a derivative work. 280 | (Executables containing this object code plus portions of the Library 281 | will still fall under Section 6.) 282 | 283 | Otherwise, if the work is a derivative of the Library, you may 284 | distribute the object code for the work under the terms of Section 6. 285 | Any executables containing that work also fall under Section 6, 286 | whether or not they are linked directly with the Library itself. 287 | 288 | **6.** As an exception to the Sections above, you may also combine or 289 | link a "work that uses the Library" with the Library to produce a work 290 | containing portions of the Library, and distribute that work under 291 | terms of your choice, provided that the terms permit modification of 292 | the work for the customer's own use and reverse engineering for 293 | debugging such modifications. 294 | 295 | You must give prominent notice with each copy of the work that the 296 | Library is used in it and that the Library and its use are covered by 297 | this License. You must supply a copy of this License. If the work 298 | during execution displays copyright notices, you must include the 299 | copyright notice for the Library among them, as well as a reference 300 | directing the user to the copy of this License. Also, you must do one 301 | of these things: 302 | 303 | - **a)** Accompany the work with the complete corresponding 304 | machine-readable source code for the Library including whatever 305 | changes were used in the work (which must be distributed under 306 | Sections 1 and 2 above); and, if the work is an executable linked 307 | with the Library, with the complete machine-readable "work that 308 | uses the Library", as object code and/or source code, so that the 309 | user can modify the Library and then relink to produce a modified 310 | executable containing the modified Library. (It is understood that 311 | the user who changes the contents of definitions files in the 312 | Library will not necessarily be able to recompile the application 313 | to use the modified definitions.) 314 | - **b)** Use a suitable shared library mechanism for linking with 315 | the Library. A suitable mechanism is one that (1) uses at run time 316 | a copy of the library already present on the user's computer 317 | system, rather than copying library functions into the executable, 318 | and (2) will operate properly with a modified version of the 319 | library, if the user installs one, as long as the modified version 320 | is interface-compatible with the version that the work was 321 | made with. 322 | - **c)** Accompany the work with a written offer, valid for at least 323 | three years, to give the same user the materials specified in 324 | Subsection 6a, above, for a charge no more than the cost of 325 | performing this distribution. 326 | - **d)** If distribution of the work is made by offering access to 327 | copy from a designated place, offer equivalent access to copy the 328 | above specified materials from the same place. 329 | - **e)** Verify that the user has already received a copy of these 330 | materials or that you have already sent this user a copy. 331 | 332 | For an executable, the required form of the "work that uses the 333 | Library" must include any data and utility programs needed for 334 | reproducing the executable from it. However, as a special exception, 335 | the materials to be distributed need not include anything that is 336 | normally distributed (in either source or binary form) with the major 337 | components (compiler, kernel, and so on) of the operating system on 338 | which the executable runs, unless that component itself accompanies 339 | the executable. 340 | 341 | It may happen that this requirement contradicts the license 342 | restrictions of other proprietary libraries that do not normally 343 | accompany the operating system. Such a contradiction means you cannot 344 | use both them and the Library together in an executable that you 345 | distribute. 346 | 347 | **7.** You may place library facilities that are a work based on the 348 | Library side-by-side in a single library together with other library 349 | facilities not covered by this License, and distribute such a combined 350 | library, provided that the separate distribution of the work based on 351 | the Library and of the other library facilities is otherwise 352 | permitted, and provided that you do these two things: 353 | 354 | - **a)** Accompany the combined library with a copy of the same work 355 | based on the Library, uncombined with any other 356 | library facilities. This must be distributed under the terms of 357 | the Sections above. 358 | - **b)** Give prominent notice with the combined library of the fact 359 | that part of it is a work based on the Library, and explaining 360 | where to find the accompanying uncombined form of the same work. 361 | 362 | **8.** You may not copy, modify, sublicense, link with, or distribute 363 | the Library except as expressly provided under this License. Any 364 | attempt otherwise to copy, modify, sublicense, link with, or 365 | distribute the Library is void, and will automatically terminate your 366 | rights under this License. However, parties who have received copies, 367 | or rights, from you under this License will not have their licenses 368 | terminated so long as such parties remain in full compliance. 369 | 370 | **9.** You are not required to accept this License, since you have not 371 | signed it. However, nothing else grants you permission to modify or 372 | distribute the Library or its derivative works. These actions are 373 | prohibited by law if you do not accept this License. Therefore, by 374 | modifying or distributing the Library (or any work based on the 375 | Library), you indicate your acceptance of this License to do so, and 376 | all its terms and conditions for copying, distributing or modifying 377 | the Library or works based on it. 378 | 379 | **10.** Each time you redistribute the Library (or any work based on 380 | the Library), the recipient automatically receives a license from the 381 | original licensor to copy, distribute, link with or modify the Library 382 | subject to these terms and conditions. You may not impose any further 383 | restrictions on the recipients' exercise of the rights granted herein. 384 | You are not responsible for enforcing compliance by third parties with 385 | this License. 386 | 387 | **11.** If, as a consequence of a court judgment or allegation of 388 | patent infringement or for any other reason (not limited to patent 389 | issues), conditions are imposed on you (whether by court order, 390 | agreement or otherwise) that contradict the conditions of this 391 | License, they do not excuse you from the conditions of this License. 392 | If you cannot distribute so as to satisfy simultaneously your 393 | obligations under this License and any other pertinent obligations, 394 | then as a consequence you may not distribute the Library at all. For 395 | example, if a patent license would not permit royalty-free 396 | redistribution of the Library by all those who receive copies directly 397 | or indirectly through you, then the only way you could satisfy both it 398 | and this License would be to refrain entirely from distribution of the 399 | Library. 400 | 401 | If any portion of this section is held invalid or unenforceable under 402 | any particular circumstance, the balance of the section is intended to 403 | apply, and the section as a whole is intended to apply in other 404 | circumstances. 405 | 406 | It is not the purpose of this section to induce you to infringe any 407 | patents or other property right claims or to contest validity of any 408 | such claims; this section has the sole purpose of protecting the 409 | integrity of the free software distribution system which is 410 | implemented by public license practices. Many people have made 411 | generous contributions to the wide range of software distributed 412 | through that system in reliance on consistent application of that 413 | system; it is up to the author/donor to decide if he or she is willing 414 | to distribute software through any other system and a licensee cannot 415 | impose that choice. 416 | 417 | This section is intended to make thoroughly clear what is believed to 418 | be a consequence of the rest of this License. 419 | 420 | **12.** If the distribution and/or use of the Library is restricted in 421 | certain countries either by patents or by copyrighted interfaces, the 422 | original copyright holder who places the Library under this License 423 | may add an explicit geographical distribution limitation excluding 424 | those countries, so that distribution is permitted only in or among 425 | countries not thus excluded. In such case, this License incorporates 426 | the limitation as if written in the body of this License. 427 | 428 | **13.** The Free Software Foundation may publish revised and/or new 429 | versions of the Lesser General Public License from time to time. Such 430 | new versions will be similar in spirit to the present version, but may 431 | differ in detail to address new problems or concerns. 432 | 433 | Each version is given a distinguishing version number. If the Library 434 | specifies a version number of this License which applies to it and 435 | "any later version", you have the option of following the terms and 436 | conditions either of that version or of any later version published by 437 | the Free Software Foundation. If the Library does not specify a 438 | license version number, you may choose any version ever published by 439 | the Free Software Foundation. 440 | 441 | **14.** If you wish to incorporate parts of the Library into other 442 | free programs whose distribution conditions are incompatible with 443 | these, write to the author to ask for permission. For software which 444 | is copyrighted by the Free Software Foundation, write to the Free 445 | Software Foundation; we sometimes make exceptions for this. Our 446 | decision will be guided by the two goals of preserving the free status 447 | of all derivatives of our free software and of promoting the sharing 448 | and reuse of software generally. 449 | 450 | **NO WARRANTY** 451 | 452 | **15.** BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO 453 | WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. 454 | EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR 455 | OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY 456 | KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE 457 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 458 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE 459 | LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME 460 | THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 461 | 462 | **16.** IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN 463 | WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY 464 | AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU 465 | FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR 466 | CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE 467 | LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING 468 | RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A 469 | FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF 470 | SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 471 | DAMAGES. 472 | 473 | ### END OF TERMS AND CONDITIONS 474 | ------------------------------------------------------------- 475 | 476 | Other names and brands may be claimed as the property of others. 477 | --------------------------------------------------------------------------------