├── .gitignore ├── runbuild.sh ├── samples ├── lena.png ├── inverse.png ├── phantom.png └── phantom_sinogram.png ├── app ├── CMakeLists.txt └── radon-tf.cpp ├── .clang-format ├── README.md ├── benchmarks ├── CMakeLists.txt └── cpu.cpp ├── CMakeLists.txt ├── radon ├── radon.hpp ├── tensor.hpp └── radon.inl.hpp └── LICENSE /.gitignore: -------------------------------------------------------------------------------- 1 | bin 2 | build 3 | .*sw* 4 | .vscode 5 | perf.* -------------------------------------------------------------------------------- /runbuild.sh: -------------------------------------------------------------------------------- 1 | cmake -B build 2 | cmake --build build -j$(nproc) -------------------------------------------------------------------------------- /samples/lena.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ferasboulala/radon-tf/HEAD/samples/lena.png -------------------------------------------------------------------------------- /samples/inverse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ferasboulala/radon-tf/HEAD/samples/inverse.png -------------------------------------------------------------------------------- /samples/phantom.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ferasboulala/radon-tf/HEAD/samples/phantom.png -------------------------------------------------------------------------------- /samples/phantom_sinogram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ferasboulala/radon-tf/HEAD/samples/phantom_sinogram.png -------------------------------------------------------------------------------- /app/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(radon-tf 2 | radon-tf.cpp 3 | ) 4 | 5 | 6 | target_link_libraries(radon-tf -pthread ${OpenCV_LIBS}) -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: Google 2 | IndentWidth: 4 3 | ColumnLimit: 100 4 | AccessModifierOffset: -4 5 | 6 | BinPackArguments: false 7 | BinPackParameters: false 8 | AlignConsecutiveDeclarations: true -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # radon-tf 2 | Simple implementation of the radon transform. Faster when using more than one thread to execute it. No inverse function is provided. CPU implementation only. 3 | 4 | ![example](samples/phantom_sinogram.png) 5 | 6 | ## Setup 7 | This is a header only library. Copy the contents of `radon/` or add it in the include search path into your project. Then simply `#include "radon.hpp"`. 8 | 9 | ## How to Use 10 | Take a look at `app/radon-tf.cpp` or `benchmarks/cpu.cpp`. -------------------------------------------------------------------------------- /benchmarks/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | find_package(benchmark) 2 | 3 | if (benchmark_FOUND) 4 | function(add_benchmark target_name entry_point) 5 | add_executable(${target_name} ${entry_point}) 6 | target_compile_options(${target_name} PRIVATE "-Wl,--no-as-needed,-lprofiler,--as-needed") 7 | target_link_libraries(${target_name} -pthread ${OpenCV_LIBS} benchmark::benchmark) 8 | endfunction(add_benchmark) 9 | 10 | add_benchmark(radon-cpu-bench cpu.cpp) 11 | endif() -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.0) 2 | project(radon-tf LANGUAGES CXX) 3 | 4 | set(CMAKE_C_COMPILER "gcc-11") 5 | set(CMAKE_CXX_COMPILER "g++-11") 6 | set(CMAKE_BUILD_TYPE Release) 7 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 8 | set(ALL_LIBRARIES) 9 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${PROJECT_SOURCE_DIR}/bin") 10 | 11 | add_compile_options( 12 | -Ofast 13 | -march=native 14 | -Wall 15 | -Wpedantic 16 | -Werror 17 | -std=c++2a 18 | -std=gnu++2a 19 | -fverbose-asm 20 | -save-temps 21 | ) 22 | 23 | find_package(Threads REQUIRED) 24 | find_package(OpenCV REQUIRED) 25 | 26 | include_directories(${CMAKE_SOURCE_DIR}) 27 | 28 | add_subdirectory(app) 29 | add_subdirectory(benchmarks) -------------------------------------------------------------------------------- /radon/radon.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace radon { 4 | 5 | // Given an input image size and a theta resolution, returns in 6 | // the expected sinogram. 7 | inline std::tuple sinogram_size(unsigned rows, 8 | unsigned cols, 9 | unsigned resolution); 10 | 11 | // Returns a sinogram. Use appropriate data types to avoid overflow. 12 | template 13 | std::unique_ptr transform(const InputType* src, 14 | unsigned rows, 15 | unsigned cols, 16 | unsigned resolution, 17 | unsigned n_threads = 1); 18 | 19 | } // namespace radon 20 | 21 | #include "radon.inl.hpp" 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Feras Boulala 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /radon/tensor.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #define ALWAYS_INLINE __attribute__((always_inline)) 9 | 10 | // This function is always inlined because we want to avoid having to recompute the accumulator 11 | // values. Also, if the indices are the same, this whole thing will get optimized out from CSE. 12 | template 13 | inline unsigned ravel(const std::array& indices, 14 | const std::array& sizes) { 15 | static_assert(RANK); 16 | assert(indices.back() < sizes.back()); 17 | unsigned index = indices.back(); 18 | unsigned accumulator = sizes.back(); 19 | for (int i = RANK - 2; i >= 0; --i) { 20 | assert(indices[i] < sizes[i]); 21 | index += accumulator * indices[i]; 22 | accumulator *= sizes[i]; 23 | } 24 | 25 | return index; 26 | } 27 | 28 | template 29 | class Tensor { 30 | public: 31 | Tensor() = default; 32 | Tensor(const std::array& size, const T& filler = T()) { resize(size, filler); } 33 | ~Tensor() = default; 34 | 35 | ALWAYS_INLINE void resize(const std::array& size, const T& filler = T()) { 36 | size_ = size; 37 | const unsigned new_volume = 38 | std::accumulate(size.begin(), size.end(), 1, std::multiplies()); 39 | data_.resize(new_volume, filler); 40 | } 41 | ALWAYS_INLINE const T& operator[](const std::array& indices) const { 42 | return data_[ravel(indices, size_)]; 43 | } 44 | ALWAYS_INLINE T& operator[](const std::array& indices) { 45 | return data_[ravel(indices, size_)]; 46 | } 47 | ALWAYS_INLINE unsigned volume() const { return data_.size(); } 48 | ALWAYS_INLINE const std::array size() const { return size_; } 49 | ALWAYS_INLINE const T* data() const { return data_.data(); } 50 | ALWAYS_INLINE T* data() { return data_.data(); } 51 | 52 | private: 53 | std::array size_; 54 | std::vector data_; 55 | }; -------------------------------------------------------------------------------- /benchmarks/cpu.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "radon/radon.hpp" 8 | 9 | static cv::Mat phantom_img; 10 | 11 | template 12 | void benchmark_phantom(benchmark::State& state) { 13 | std::unique_ptr src(new InputType[phantom_img.total()]); 14 | for (uint64_t i = 0; i < phantom_img.total(); ++i) { 15 | src.get()[i] = phantom_img.at(i); 16 | } 17 | 18 | static constexpr unsigned N_THETA_BINS = 720; 19 | for (auto _ : state) { 20 | auto sinogram = radon::transform( 21 | src.get(), phantom_img.rows, phantom_img.cols, N_THETA_BINS, 1); 22 | benchmark::DoNotOptimize(sinogram); 23 | benchmark::ClobberMemory(); 24 | } 25 | } 26 | 27 | int main(int argc, char** argv) { 28 | assert(argc == 2); 29 | const std::string phantom_filename = argv[1]; 30 | 31 | phantom_img = cv::imread(phantom_filename, cv::IMREAD_GRAYSCALE); 32 | // Some of these tests cause overflows but that is fine. We want to benchmark the execution, not 33 | // have valid data in the sinogram. 34 | std::vector benchmarks = { 35 | benchmark::RegisterBenchmark("u8", benchmark_phantom), 36 | benchmark::RegisterBenchmark("u16", benchmark_phantom), 37 | benchmark::RegisterBenchmark("u32", benchmark_phantom), 38 | benchmark::RegisterBenchmark("u64", benchmark_phantom), 39 | benchmark::RegisterBenchmark("f32", benchmark_phantom), 40 | benchmark::RegisterBenchmark("f64", benchmark_phantom), 41 | benchmark::RegisterBenchmark("u8u64", benchmark_phantom), 42 | benchmark::RegisterBenchmark("u64f64", benchmark_phantom), 43 | benchmark::RegisterBenchmark("f64u64", benchmark_phantom), 44 | }; 45 | 46 | static constexpr uint64_t N_ITERATIONS = 4; 47 | for (auto bench : benchmarks) { 48 | bench->Iterations(N_ITERATIONS); 49 | } 50 | 51 | int new_argc = 0; 52 | benchmark::Initialize(&new_argc, &argv[2]); 53 | benchmark::RunSpecifiedBenchmarks(); 54 | } -------------------------------------------------------------------------------- /app/radon-tf.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "radon/radon.hpp" 10 | 11 | static constexpr unsigned RESOLUTION = 720; 12 | using RadonType = double; 13 | 14 | template 15 | static T cv_max(const cv::Mat img) { 16 | T val = std::numeric_limits::min(); 17 | const T *data = (T *)img.data; 18 | for (size_t i = 0; i < img.total(); ++i) { 19 | if (data[i] > val) val = data[i]; 20 | } 21 | 22 | return val; 23 | } 24 | 25 | void print_help(char *progname) { 26 | printf("Usage: %s [-t num_threads] input_file output_file\n", progname); 27 | printf( 28 | "This program reads an input image in grey scale and evaluates the radon transform. The " 29 | "output is stored to disk.\n"); 30 | printf("Options:\n"); 31 | printf(" -t num_threads Number of threads to use (default: 1)\n"); 32 | printf(" -h, --help Display this help message\n"); 33 | } 34 | 35 | int main(int argc, char **argv) { 36 | static constexpr uint64_t DEFAULT_N_THREADS = 1; 37 | 38 | int opt; 39 | int num_threads = DEFAULT_N_THREADS; 40 | 41 | while ((opt = getopt(argc, argv, "ht:")) != -1) { 42 | switch (opt) { 43 | case 't': 44 | num_threads = atoi(optarg); 45 | break; 46 | case 'h': 47 | case '?': 48 | print_help(argv[0]); 49 | return 0; 50 | } 51 | } 52 | 53 | if (optind + 2 > argc) { 54 | fprintf(stderr, "Usage: %s [-t num_threads] input_file output_file\n", argv[0]); 55 | return 1; 56 | } 57 | 58 | const char *input_file = argv[optind]; 59 | const char *output_file = argv[optind + 1]; 60 | 61 | printf("Input file: %s\n", input_file); 62 | printf("Output file: %s\n", output_file); 63 | printf("Number of threads: %d\n", num_threads); 64 | 65 | cv::Mat img = cv::imread(input_file, cv::IMREAD_GRAYSCALE); 66 | 67 | std::chrono::high_resolution_clock::time_point start, end; 68 | std::chrono::duration fp_ms; 69 | start = std::chrono::high_resolution_clock::now(); 70 | auto sinogram = 71 | radon::transform(img.data, img.rows, img.cols, RESOLUTION, num_threads); 72 | end = std::chrono::high_resolution_clock::now(); 73 | fp_ms = end - start; 74 | printf("Took %f ms\n", fp_ms.count()); 75 | 76 | const auto [n_rows, n_cols] = radon::sinogram_size(img.rows, img.cols, RESOLUTION); 77 | const auto data = sinogram.get(); 78 | sinogram.release(); 79 | cv::Mat cv_sinogram(n_rows, n_cols, CV_64FC1, data); 80 | 81 | const RadonType largest_intensity = cv_max(cv_sinogram); 82 | cv_sinogram *= 255 / largest_intensity; 83 | cv_sinogram.convertTo(cv_sinogram, CV_8UC1); 84 | 85 | cv::imwrite(output_file, cv_sinogram); 86 | 87 | return 0; 88 | } 89 | -------------------------------------------------------------------------------- /radon/radon.inl.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "tensor.hpp" 7 | 8 | namespace { 9 | 10 | struct PolarCoordinate { 11 | double radius; 12 | double theta; 13 | }; 14 | 15 | using AngleCache = Tensor; 16 | 17 | template 18 | struct ThreadData { 19 | const T* src; 20 | T* dst; 21 | unsigned rows, cols; 22 | unsigned start_index, end_index, resolution, diagonal_length; 23 | double dtheta; 24 | const AngleCache* angle_cache; 25 | const std::vector* cosine_table; 26 | }; 27 | 28 | std::vector precompute_cosines(unsigned resolution) { 29 | assert(resolution); 30 | const double dtheta = 2 * M_PI / resolution; 31 | std::vector cosine_table(resolution); 32 | for (unsigned i = 0; i < cosine_table.size(); ++i) { 33 | cosine_table[i] = std::cos(i * dtheta); 34 | } 35 | 36 | return cosine_table; 37 | } 38 | 39 | template 40 | inline T l2_norm(const std::array& size) { 41 | T accumulator = T(0); 42 | for (unsigned i = 0; i < N; ++i) { 43 | accumulator += size[i] * size[i]; 44 | } 45 | 46 | return std::pow(accumulator, double(1) / N); 47 | } 48 | 49 | AngleCache precompute_angle_cache(unsigned cols, unsigned rows) { 50 | const double origin_x = std::round(cols / 2.0); 51 | const double origin_y = std::round(rows / 2.0); 52 | AngleCache angle_cache({cols, rows}); 53 | for (unsigned x = 0; x < angle_cache.size()[0]; ++x) { 54 | for (unsigned y = 0; y < angle_cache.size()[1]; ++y) { 55 | const double delta_x = x - origin_x; 56 | const double delta_y = origin_y - y; 57 | const double radius = l2_norm({delta_x, delta_y}); 58 | const double theta = atan2(delta_y, delta_x); 59 | angle_cache[{x, y}] = {radius, theta}; 60 | } 61 | } 62 | 63 | return angle_cache; 64 | } 65 | 66 | inline double normalize_angle(double angle) { 67 | angle = std::fmod(angle, 2 * M_PI); 68 | if (angle < M_PI) { 69 | angle += 2 * M_PI; 70 | } 71 | 72 | return angle; 73 | } 74 | 75 | #pragma GCC diagnostic push 76 | #pragma GCC diagnostic ignored "-Wnarrowing" 77 | #pragma GCC diagnostic ignored "-Wsign-compare" 78 | 79 | template 80 | void transform_threaded(ThreadData& thread_data) { 81 | const double diagonal_middle = thread_data.diagonal_length / 2; 82 | for (int y = 0; y < thread_data.rows; ++y) { 83 | for (int x = 0; x < thread_data.cols; ++x) { 84 | const auto polar_coordinate = thread_data.angle_cache->operator[]({x, y}); 85 | const double theta = normalize_angle(polar_coordinate.theta - 86 | thread_data.start_index * thread_data.dtheta); 87 | int theta_index = (theta / (2 * M_PI)) * thread_data.cosine_table->size(); 88 | const T intensity = 89 | thread_data.src[ravel<2>({y, x}, {thread_data.rows, thread_data.cols})]; 90 | for (int i = thread_data.start_index; i < thread_data.end_index; ++i, --theta_index) { 91 | const double x_cart = 92 | polar_coordinate.radius * thread_data.cosine_table->operator[]( 93 | (theta_index + thread_data.cosine_table->size()) % 94 | thread_data.cosine_table->size()); 95 | const int bin_idx = x_cart + diagonal_middle; 96 | thread_data.dst[ravel<2>({i, bin_idx}, 97 | {thread_data.resolution, thread_data.diagonal_length})] += 98 | intensity; 99 | } 100 | } 101 | } 102 | } 103 | 104 | #pragma GCC diagnostic pop 105 | } // namespace 106 | 107 | inline std::tuple radon::sinogram_size(unsigned rows, 108 | unsigned cols, 109 | unsigned resolution) { 110 | const unsigned diagonal_length = l2_norm({cols, rows}); 111 | 112 | return {resolution, diagonal_length}; 113 | } 114 | 115 | template 116 | std::unique_ptr radon::transform( 117 | const InputType* src, unsigned rows, unsigned cols, unsigned resolution, unsigned n_threads) { 118 | const unsigned diagonal_length = l2_norm({cols, rows}); 119 | const uint64_t n_pixels_in_output = diagonal_length * resolution; 120 | const uint64_t n_bytes_in_output = n_pixels_in_output * sizeof(OutputType); 121 | std::unique_ptr output(new OutputType[n_pixels_in_output]); 122 | std::memset(output.get(), 0, n_bytes_in_output); 123 | 124 | constexpr bool output_same_as_input_type = std::is_same::value; 125 | const OutputType* copy; 126 | if constexpr (!output_same_as_input_type) { 127 | const uint64_t n_pixels_in_input = rows * cols; 128 | auto new_src = new OutputType[n_pixels_in_input]; 129 | for (uint64_t i = 0; i < n_pixels_in_input; ++i) { 130 | new_src[i] = src[i]; 131 | } 132 | copy = new_src; 133 | } else { 134 | copy = src; 135 | } 136 | 137 | const std::vector cosine_table = precompute_cosines(2 * resolution); 138 | const AngleCache angle_cache = precompute_angle_cache(cols, rows); 139 | 140 | const double dtheta = M_PI / resolution; 141 | const int bin_per_thread = resolution / n_threads; 142 | std::vector threads(n_threads); 143 | std::vector > data; 144 | data.reserve(n_threads); 145 | for (unsigned i = 0; i < n_threads; i++) { 146 | data.push_back(ThreadData{ 147 | .src = copy, 148 | .dst = output.get(), 149 | .rows = rows, 150 | .cols = cols, 151 | .start_index = i * bin_per_thread, 152 | .end_index = (i == (n_threads - 1)) ? resolution : ((i + 1) * bin_per_thread), 153 | .resolution = resolution, 154 | .diagonal_length = diagonal_length, 155 | .dtheta = dtheta, 156 | .angle_cache = &angle_cache, 157 | .cosine_table = &cosine_table}); 158 | threads[i] = std::thread(transform_threaded, std::ref(data[i])); 159 | } 160 | 161 | for (auto& t : threads) { 162 | t.join(); 163 | } 164 | 165 | if constexpr (!output_same_as_input_type) { 166 | delete[] copy; 167 | } 168 | 169 | return output; 170 | } --------------------------------------------------------------------------------