├── .gitignore
├── runbuild.sh
├── samples
    ├── lena.png
    ├── inverse.png
    ├── phantom.png
    └── phantom_sinogram.png
├── app
    ├── CMakeLists.txt
    └── radon-tf.cpp
├── .clang-format
├── README.md
├── benchmarks
    ├── CMakeLists.txt
    └── cpu.cpp
├── CMakeLists.txt
├── radon
    ├── radon.hpp
    ├── tensor.hpp
    └── radon.inl.hpp
└── LICENSE


/.gitignore:
--------------------------------------------------------------------------------
1 | bin
2 | build
3 | .*sw*
4 | .vscode
5 | perf.*


--------------------------------------------------------------------------------
/runbuild.sh:
--------------------------------------------------------------------------------
1 | cmake -B build
2 | cmake --build build -j$(nproc)


--------------------------------------------------------------------------------
/samples/lena.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ferasboulala/radon-tf/HEAD/samples/lena.png


--------------------------------------------------------------------------------
/samples/inverse.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ferasboulala/radon-tf/HEAD/samples/inverse.png


--------------------------------------------------------------------------------
/samples/phantom.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ferasboulala/radon-tf/HEAD/samples/phantom.png


--------------------------------------------------------------------------------
/samples/phantom_sinogram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ferasboulala/radon-tf/HEAD/samples/phantom_sinogram.png


--------------------------------------------------------------------------------
/app/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_executable(radon-tf
2 |   radon-tf.cpp
3 | )
4 | 
5 | 
6 | target_link_libraries(radon-tf -pthread ${OpenCV_LIBS})


--------------------------------------------------------------------------------
/.clang-format:
--------------------------------------------------------------------------------
1 | BasedOnStyle: Google
2 | IndentWidth: 4
3 | ColumnLimit: 100
4 | AccessModifierOffset: -4
5 | 
6 | BinPackArguments: false
7 | BinPackParameters: false
8 | AlignConsecutiveDeclarations: true


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # radon-tf
 2 | Simple implementation of the radon transform. Faster when using more than one thread to execute it. No inverse function is provided. CPU implementation only.
 3 | 
 4 | ![example](samples/phantom_sinogram.png)
 5 | 
 6 | ## Setup
 7 | This is a header only library. Copy the contents of `radon/` or add it in the include search path into your project. Then simply `#include "radon.hpp"`.
 8 | 
 9 | ## How to Use
10 | Take a look at `app/radon-tf.cpp` or `benchmarks/cpu.cpp`.


--------------------------------------------------------------------------------
/benchmarks/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | find_package(benchmark)
 2 | 
 3 | if (benchmark_FOUND)
 4 |     function(add_benchmark target_name entry_point)
 5 |         add_executable(${target_name} ${entry_point})
 6 |         target_compile_options(${target_name} PRIVATE "-Wl,--no-as-needed,-lprofiler,--as-needed")
 7 |         target_link_libraries(${target_name} -pthread ${OpenCV_LIBS} benchmark::benchmark)
 8 |     endfunction(add_benchmark)
 9 | 
10 |     add_benchmark(radon-cpu-bench cpu.cpp)
11 | endif()


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.0)
 2 | project(radon-tf LANGUAGES CXX)
 3 | 
 4 | set(CMAKE_C_COMPILER "gcc-11")
 5 | set(CMAKE_CXX_COMPILER "g++-11")
 6 | set(CMAKE_BUILD_TYPE Release)
 7 | set(CMAKE_CXX_STANDARD_REQUIRED ON)
 8 | set(ALL_LIBRARIES)
 9 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${PROJECT_SOURCE_DIR}/bin")
10 | 
11 | add_compile_options(
12 |   -Ofast
13 |   -march=native
14 |   -Wall
15 |   -Wpedantic
16 |   -Werror
17 |   -std=c++2a
18 |   -std=gnu++2a
19 |   -fverbose-asm
20 |   -save-temps
21 | )
22 | 
23 | find_package(Threads REQUIRED)
24 | find_package(OpenCV REQUIRED)
25 | 
26 | include_directories(${CMAKE_SOURCE_DIR})
27 | 
28 | add_subdirectory(app)
29 | add_subdirectory(benchmarks)


--------------------------------------------------------------------------------
/radon/radon.hpp:
--------------------------------------------------------------------------------
 1 | #include <memory>
 2 | 
 3 | namespace radon {
 4 | 
 5 | // Given an input image size and a theta resolution, returns <number of rows, number of columns> in
 6 | // the expected sinogram.
 7 | inline std::tuple<unsigned, unsigned> sinogram_size(unsigned rows,
 8 |                                                     unsigned cols,
 9 |                                                     unsigned resolution);
10 | 
11 | // Returns a sinogram. Use appropriate data types to avoid overflow.
12 | template <typename InputType, typename OutputType = float>
13 | std::unique_ptr<OutputType[]> transform(const InputType* src,
14 |                                         unsigned         rows,
15 |                                         unsigned         cols,
16 |                                         unsigned         resolution,
17 |                                         unsigned         n_threads = 1);
18 | 
19 | }  // namespace radon
20 | 
21 | #include "radon.inl.hpp"
22 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Feras Boulala
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/radon/tensor.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <array>
 4 | #include <cassert>
 5 | #include <numeric>
 6 | #include <vector>
 7 | 
 8 | #define ALWAYS_INLINE __attribute__((always_inline))
 9 | 
10 | // This function is always inlined because we want to avoid having to recompute the accumulator
11 | // values. Also, if the indices are the same, this whole thing will get optimized out from CSE.
12 | template <unsigned RANK>
13 | inline unsigned ravel(const std::array<unsigned, RANK>& indices,
14 |                              const std::array<unsigned, RANK>& sizes) {
15 |     static_assert(RANK);
16 |     assert(indices.back() < sizes.back());
17 |     unsigned index = indices.back();
18 |     unsigned accumulator = sizes.back();
19 |     for (int i = RANK - 2; i >= 0; --i) {
20 |         assert(indices[i] < sizes[i]);
21 |         index += accumulator * indices[i];
22 |         accumulator *= sizes[i];
23 |     }
24 | 
25 |     return index;
26 | }
27 | 
28 | template <typename T, unsigned RANK>
29 | class Tensor {
30 | public:
31 |     Tensor() = default;
32 |     Tensor(const std::array<unsigned, RANK>& size, const T& filler = T()) { resize(size, filler); }
33 |     ~Tensor() = default;
34 | 
35 |     ALWAYS_INLINE void resize(const std::array<unsigned, RANK>& size, const T& filler = T()) {
36 |         size_ = size;
37 |         const unsigned new_volume =
38 |             std::accumulate(size.begin(), size.end(), 1, std::multiplies<unsigned>());
39 |         data_.resize(new_volume, filler);
40 |     }
41 |     ALWAYS_INLINE const T& operator[](const std::array<unsigned, RANK>& indices) const {
42 |         return data_[ravel<RANK>(indices, size_)];
43 |     }
44 |     ALWAYS_INLINE T& operator[](const std::array<unsigned, RANK>& indices) {
45 |         return data_[ravel<RANK>(indices, size_)];
46 |     }
47 |     ALWAYS_INLINE unsigned volume() const { return data_.size(); }
48 |     ALWAYS_INLINE const std::array<unsigned, RANK> size() const { return size_; }
49 |     ALWAYS_INLINE const T*                         data() const { return data_.data(); }
50 |     ALWAYS_INLINE T*                               data() { return data_.data(); }
51 | 
52 | private:
53 |     std::array<unsigned, RANK> size_;
54 |     std::vector<T>             data_;
55 | };


--------------------------------------------------------------------------------
/benchmarks/cpu.cpp:
--------------------------------------------------------------------------------
 1 | #include <benchmark/benchmark.h>
 2 | 
 3 | #include <cassert>
 4 | #include <memory>
 5 | #include <opencv2/opencv.hpp>
 6 | 
 7 | #include "radon/radon.hpp"
 8 | 
 9 | static cv::Mat phantom_img;
10 | 
11 | template <typename InputType, typename OutputType>
12 | void benchmark_phantom(benchmark::State& state) {
13 |     std::unique_ptr<InputType[]> src(new InputType[phantom_img.total()]);
14 |     for (uint64_t i = 0; i < phantom_img.total(); ++i) {
15 |         src.get()[i] = phantom_img.at<uint8_t>(i);
16 |     }
17 | 
18 |     static constexpr unsigned N_THETA_BINS = 720;
19 |     for (auto _ : state) {
20 |         auto sinogram = radon::transform<InputType, OutputType>(
21 |             src.get(), phantom_img.rows, phantom_img.cols, N_THETA_BINS, 1);
22 |         benchmark::DoNotOptimize(sinogram);
23 |         benchmark::ClobberMemory();
24 |     }
25 | }
26 | 
27 | int main(int argc, char** argv) {
28 |     assert(argc == 2);
29 |     const std::string phantom_filename = argv[1];
30 | 
31 |     phantom_img = cv::imread(phantom_filename, cv::IMREAD_GRAYSCALE);
32 |     // Some of these tests cause overflows but that is fine. We want to benchmark the execution, not
33 |     // have valid data in the sinogram.
34 |     std::vector<benchmark::internal::Benchmark*> benchmarks = {
35 |         benchmark::RegisterBenchmark("u8", benchmark_phantom<uint8_t, uint8_t>),
36 |         benchmark::RegisterBenchmark("u16", benchmark_phantom<uint16_t, uint16_t>),
37 |         benchmark::RegisterBenchmark("u32", benchmark_phantom<uint32_t, uint32_t>),
38 |         benchmark::RegisterBenchmark("u64", benchmark_phantom<uint64_t, uint64_t>),
39 |         benchmark::RegisterBenchmark("f32", benchmark_phantom<float, float>),
40 |         benchmark::RegisterBenchmark("f64", benchmark_phantom<double, double>),
41 |         benchmark::RegisterBenchmark("u8u64", benchmark_phantom<uint8_t, uint64_t>),
42 |         benchmark::RegisterBenchmark("u64f64", benchmark_phantom<uint64_t, double>),
43 |         benchmark::RegisterBenchmark("f64u64", benchmark_phantom<double, uint64_t>),
44 |     };
45 | 
46 |     static constexpr uint64_t N_ITERATIONS = 4;
47 |     for (auto bench : benchmarks) {
48 |         bench->Iterations(N_ITERATIONS);
49 |     }
50 | 
51 |     int new_argc = 0;
52 |     benchmark::Initialize(&new_argc, &argv[2]);
53 |     benchmark::RunSpecifiedBenchmarks();
54 | }


--------------------------------------------------------------------------------
/app/radon-tf.cpp:
--------------------------------------------------------------------------------
 1 | #include <chrono>
 2 | #include <cstdio>
 3 | #include <cstdlib>
 4 | #include <iostream>
 5 | #include <limits>
 6 | #include <opencv2/opencv.hpp>
 7 | #include <string>
 8 | 
 9 | #include "radon/radon.hpp"
10 | 
11 | static constexpr unsigned RESOLUTION = 720;
12 | using RadonType = double;
13 | 
14 | template <typename T>
15 | static T cv_max(const cv::Mat img) {
16 |     T        val = std::numeric_limits<T>::min();
17 |     const T *data = (T *)img.data;
18 |     for (size_t i = 0; i < img.total(); ++i) {
19 |         if (data[i] > val) val = data[i];
20 |     }
21 | 
22 |     return val;
23 | }
24 | 
25 | void print_help(char *progname) {
26 |     printf("Usage: %s [-t num_threads] input_file output_file\n", progname);
27 |     printf(
28 |         "This program reads an input image in grey scale and evaluates the radon transform. The "
29 |         "output is stored to disk.\n");
30 |     printf("Options:\n");
31 |     printf("  -t num_threads  Number of threads to use (default: 1)\n");
32 |     printf("  -h, --help      Display this help message\n");
33 | }
34 | 
35 | int main(int argc, char **argv) {
36 |     static constexpr uint64_t DEFAULT_N_THREADS = 1;
37 | 
38 |     int opt;
39 |     int num_threads = DEFAULT_N_THREADS;
40 | 
41 |     while ((opt = getopt(argc, argv, "ht:")) != -1) {
42 |         switch (opt) {
43 |             case 't':
44 |                 num_threads = atoi(optarg);
45 |                 break;
46 |             case 'h':
47 |             case '?':
48 |                 print_help(argv[0]);
49 |                 return 0;
50 |         }
51 |     }
52 | 
53 |     if (optind + 2 > argc) {
54 |         fprintf(stderr, "Usage: %s [-t num_threads] input_file output_file\n", argv[0]);
55 |         return 1;
56 |     }
57 | 
58 |     const char *input_file = argv[optind];
59 |     const char *output_file = argv[optind + 1];
60 | 
61 |     printf("Input file: %s\n", input_file);
62 |     printf("Output file: %s\n", output_file);
63 |     printf("Number of threads: %d\n", num_threads);
64 | 
65 |     cv::Mat img = cv::imread(input_file, cv::IMREAD_GRAYSCALE);
66 | 
67 |     std::chrono::high_resolution_clock::time_point start, end;
68 |     std::chrono::duration<double, std::milli>      fp_ms;
69 |     start = std::chrono::high_resolution_clock::now();
70 |     auto sinogram =
71 |         radon::transform<uint8_t, RadonType>(img.data, img.rows, img.cols, RESOLUTION, num_threads);
72 |     end = std::chrono::high_resolution_clock::now();
73 |     fp_ms = end - start;
74 |     printf("Took %f ms\n", fp_ms.count());
75 | 
76 |     const auto [n_rows, n_cols] = radon::sinogram_size(img.rows, img.cols, RESOLUTION);
77 |     const auto data = sinogram.get();
78 |     sinogram.release();
79 |     cv::Mat cv_sinogram(n_rows, n_cols, CV_64FC1, data);
80 | 
81 |     const RadonType largest_intensity = cv_max<RadonType>(cv_sinogram);
82 |     cv_sinogram *= 255 / largest_intensity;
83 |     cv_sinogram.convertTo(cv_sinogram, CV_8UC1);
84 | 
85 |     cv::imwrite(output_file, cv_sinogram);
86 | 
87 |     return 0;
88 | }
89 | 


--------------------------------------------------------------------------------
/radon/radon.inl.hpp:
--------------------------------------------------------------------------------
  1 | #include <cmath>
  2 | #include <cstdint>
  3 | #include <thread>
  4 | #include <vector>
  5 | 
  6 | #include "tensor.hpp"
  7 | 
  8 | namespace {
  9 | 
 10 | struct PolarCoordinate {
 11 |     double radius;
 12 |     double theta;
 13 | };
 14 | 
 15 | using AngleCache = Tensor<PolarCoordinate, 2>;
 16 | 
 17 | template <typename T>
 18 | struct ThreadData {
 19 |     const T*                   src;
 20 |     T*                         dst;
 21 |     unsigned                   rows, cols;
 22 |     unsigned                   start_index, end_index, resolution, diagonal_length;
 23 |     double                     dtheta;
 24 |     const AngleCache*          angle_cache;
 25 |     const std::vector<double>* cosine_table;
 26 | };
 27 | 
 28 | std::vector<double> precompute_cosines(unsigned resolution) {
 29 |     assert(resolution);
 30 |     const double        dtheta = 2 * M_PI / resolution;
 31 |     std::vector<double> cosine_table(resolution);
 32 |     for (unsigned i = 0; i < cosine_table.size(); ++i) {
 33 |         cosine_table[i] = std::cos(i * dtheta);
 34 |     }
 35 | 
 36 |     return cosine_table;
 37 | }
 38 | 
 39 | template <typename T, unsigned N>
 40 | inline T l2_norm(const std::array<T, N>& size) {
 41 |     T accumulator = T(0);
 42 |     for (unsigned i = 0; i < N; ++i) {
 43 |         accumulator += size[i] * size[i];
 44 |     }
 45 | 
 46 |     return std::pow(accumulator, double(1) / N);
 47 | }
 48 | 
 49 | AngleCache precompute_angle_cache(unsigned cols, unsigned rows) {
 50 |     const double origin_x = std::round(cols / 2.0);
 51 |     const double origin_y = std::round(rows / 2.0);
 52 |     AngleCache   angle_cache({cols, rows});
 53 |     for (unsigned x = 0; x < angle_cache.size()[0]; ++x) {
 54 |         for (unsigned y = 0; y < angle_cache.size()[1]; ++y) {
 55 |             const double delta_x = x - origin_x;
 56 |             const double delta_y = origin_y - y;
 57 |             const double radius = l2_norm<double, 2>({delta_x, delta_y});
 58 |             const double theta = atan2(delta_y, delta_x);
 59 |             angle_cache[{x, y}] = {radius, theta};
 60 |         }
 61 |     }
 62 | 
 63 |     return angle_cache;
 64 | }
 65 | 
 66 | inline double normalize_angle(double angle) {
 67 |     angle = std::fmod(angle, 2 * M_PI);
 68 |     if (angle < M_PI) {
 69 |         angle += 2 * M_PI;
 70 |     }
 71 | 
 72 |     return angle;
 73 | }
 74 | 
 75 | #pragma GCC diagnostic push
 76 | #pragma GCC diagnostic ignored "-Wnarrowing"
 77 | #pragma GCC diagnostic ignored "-Wsign-compare"
 78 | 
 79 | template <typename T>
 80 | void transform_threaded(ThreadData<T>& thread_data) {
 81 |     const double diagonal_middle = thread_data.diagonal_length / 2;
 82 |     for (int y = 0; y < thread_data.rows; ++y) {
 83 |         for (int x = 0; x < thread_data.cols; ++x) {
 84 |             const auto polar_coordinate = thread_data.angle_cache->operator[]({x, y});
 85 |             const double theta = normalize_angle(polar_coordinate.theta -
 86 |                                                  thread_data.start_index * thread_data.dtheta);
 87 |             int          theta_index = (theta / (2 * M_PI)) * thread_data.cosine_table->size();
 88 |             const T      intensity =
 89 |                 thread_data.src[ravel<2>({y, x}, {thread_data.rows, thread_data.cols})];
 90 |             for (int i = thread_data.start_index; i < thread_data.end_index; ++i, --theta_index) {
 91 |                 const double                                            x_cart =
 92 |                     polar_coordinate.radius * thread_data.cosine_table->operator[](
 93 |                                                   (theta_index + thread_data.cosine_table->size()) %
 94 |                                                   thread_data.cosine_table->size());
 95 |                 const int bin_idx = x_cart + diagonal_middle;
 96 |                 thread_data.dst[ravel<2>({i, bin_idx},
 97 |                                          {thread_data.resolution, thread_data.diagonal_length})] +=
 98 |                     intensity;
 99 |             }
100 |         }
101 |     }
102 | }
103 | 
104 | #pragma GCC diagnostic pop
105 | }  // namespace
106 | 
107 | inline std::tuple<unsigned, unsigned> radon::sinogram_size(unsigned rows,
108 |                                                            unsigned cols,
109 |                                                            unsigned resolution) {
110 |     const unsigned diagonal_length = l2_norm<unsigned, 2>({cols, rows});
111 | 
112 |     return {resolution, diagonal_length};
113 | }
114 | 
115 | template <typename InputType, typename OutputType = float>
116 | std::unique_ptr<OutputType[]> radon::transform(
117 |     const InputType* src, unsigned rows, unsigned cols, unsigned resolution, unsigned n_threads) {
118 |     const unsigned                diagonal_length = l2_norm<unsigned, 2>({cols, rows});
119 |     const uint64_t                n_pixels_in_output = diagonal_length * resolution;
120 |     const uint64_t                n_bytes_in_output = n_pixels_in_output * sizeof(OutputType);
121 |     std::unique_ptr<OutputType[]> output(new OutputType[n_pixels_in_output]);
122 |     std::memset(output.get(), 0, n_bytes_in_output);
123 | 
124 |     constexpr bool    output_same_as_input_type = std::is_same<InputType, OutputType>::value;
125 |     const OutputType* copy;
126 |     if constexpr (!output_same_as_input_type) {
127 |         const uint64_t n_pixels_in_input = rows * cols;
128 |         auto           new_src = new OutputType[n_pixels_in_input];
129 |         for (uint64_t i = 0; i < n_pixels_in_input; ++i) {
130 |             new_src[i] = src[i];
131 |         }
132 |         copy = new_src;
133 |     } else {
134 |         copy = src;
135 |     }
136 | 
137 |     const std::vector<double> cosine_table = precompute_cosines(2 * resolution);
138 |     const AngleCache          angle_cache = precompute_angle_cache(cols, rows);
139 | 
140 |     const double                         dtheta = M_PI / resolution;
141 |     const int                            bin_per_thread = resolution / n_threads;
142 |     std::vector<std::thread>             threads(n_threads);
143 |     std::vector<ThreadData<OutputType> > data;
144 |     data.reserve(n_threads);
145 |     for (unsigned i = 0; i < n_threads; i++) {
146 |         data.push_back(ThreadData<OutputType>{
147 |             .src = copy,
148 |             .dst = output.get(),
149 |             .rows = rows,
150 |             .cols = cols,
151 |             .start_index = i * bin_per_thread,
152 |             .end_index = (i == (n_threads - 1)) ? resolution : ((i + 1) * bin_per_thread),
153 |             .resolution = resolution,
154 |             .diagonal_length = diagonal_length,
155 |             .dtheta = dtheta,
156 |             .angle_cache = &angle_cache,
157 |             .cosine_table = &cosine_table});
158 |         threads[i] = std::thread(transform_threaded<OutputType>, std::ref(data[i]));
159 |     }
160 | 
161 |     for (auto& t : threads) {
162 |         t.join();
163 |     }
164 | 
165 |     if constexpr (!output_same_as_input_type) {
166 |         delete[] copy;
167 |     }
168 | 
169 |     return output;
170 | }


--------------------------------------------------------------------------------