├── README.md
├── cpp
    ├── CMakeLists.txt
    ├── binary_predictor.cpp
    ├── binary_predictor.h
    ├── main.cpp
    ├── unblur_predictor.cpp
    ├── unblur_predictor.h
    ├── unshadow_predictor.cpp
    ├── unshadow_predictor.h
    ├── unwrap_predictor.cpp
    └── unwrap_predictor.h
├── images
    ├── demo.jpg
    ├── demo1.jpg
    ├── demo1.png
    ├── demo2.png
    └── demo3.jpg
└── python
    ├── binary_predictor.py
    ├── main.py
    ├── unblur_predictor.py
    ├── unshadow_predictor.py
    └── unwrap_predictor.py


/README.md:
--------------------------------------------------------------------------------
1 | 平常在做文档OCR的项目时，经常会遇到纸质文档有扭曲或者有阴影的情况，本套程序是对文档做矫正的，从而有利于下游的文字检测和识别任务
2 | 
3 | 
4 | onnx文件 链接: https://pan.baidu.com/s/1Bza0jK_MlUSXoLzrpWPGQg 提取码: fipm
5 | 
6 | 参考自https://www.modelscope.cn/studios/jockerK/DocUnwrap/files
7 | 


--------------------------------------------------------------------------------
/cpp/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | project(test)
 2 | set(CMAKE_CXX_STANDARD 17)
 3 | 
 4 | add_executable(${PROJECT_NAME}  ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp
 5 |                                 ${CMAKE_CURRENT_SOURCE_DIR}/binary_predictor.cpp
 6 |                                 ${CMAKE_CURRENT_SOURCE_DIR}/unblur_predictor.cpp
 7 |                                 ${CMAKE_CURRENT_SOURCE_DIR}/unshadow_predictor.cpp
 8 |                                 ${CMAKE_CURRENT_SOURCE_DIR}/unwrap_predictor.cpp)
 9 | 
10 | target_include_directories(${PROJECT_NAME}
11 |         PUBLIC "/home/wangbo/libs/mylibs/opencv-4.8.0/temp_install_dir/include/opencv4"
12 |         PUBLIC "/home/wangbo/libs/mylibs/onnxruntime-linux-x64-1.18.1/include"
13 |         PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include
14 |         )
15 | target_link_libraries(${PROJECT_NAME}
16 |         "/home/wangbo/libs/mylibs/opencv-4.8.0/temp_install_dir/lib/libopencv_imgcodecs.so.4.8.0"
17 |         "/home/wangbo/libs/mylibs/opencv-4.8.0/temp_install_dir/lib/libopencv_highgui.so.4.8.0"
18 |         "/home/wangbo/libs/mylibs/opencv-4.8.0/temp_install_dir/lib/libopencv_imgproc.so.4.8.0"
19 |         "/home/wangbo/libs/mylibs/opencv-4.8.0/temp_install_dir/lib/libopencv_core.so.4.8.0"
20 |         "/home/wangbo/libs/mylibs/opencv-4.8.0/temp_install_dir/lib/libopencv_videoio.so.4.8.0"
21 |         "/home/wangbo/libs/mylibs/opencv-4.8.0/temp_install_dir/lib/libopencv_video.so.4.8.0"
22 |         "/home/wangbo/libs/mylibs/onnxruntime-linux-x64-1.18.1/lib/libonnxruntime.so.1.18.1"
23 |         )
24 | 


--------------------------------------------------------------------------------
/cpp/binary_predictor.cpp:
--------------------------------------------------------------------------------
  1 | #include "binary_predictor.h"
  2 | 
  3 | 
  4 | using namespace cv;
  5 | using namespace std;
  6 | using namespace Ort;
  7 | 
  8 | 
  9 | static Mat pad_to_multiple_of_n(const Mat& image, const int n, int* pad_info)
 10 | {
 11 |     int original_height = image.rows;
 12 |     int original_width = image.cols;
 13 |     
 14 |     int target_width = ((original_width + n - 1) / n) * n;
 15 |     int target_height = ((original_height + n - 1) / n) * n;
 16 | 
 17 |     Mat padded_image(target_height, target_width, CV_8UC3, Scalar(255,255,255));
 18 | 
 19 |     int start_x = (target_width - original_width) / 2;
 20 |     int start_y = (target_height - original_height) / 2;
 21 | 
 22 |     image.copyTo(padded_image(Rect(start_x, start_y, original_width, original_height)));
 23 |     pad_info[0] = start_x;
 24 |     pad_info[1] = start_y;
 25 |     pad_info[2] = original_height;
 26 |     pad_info[3] = original_width;
 27 |     return padded_image;
 28 | }
 29 | 
 30 | UnetCNN::UnetCNN(const string& model_path)
 31 | {
 32 |     if (model_path.empty()) 
 33 |     {
 34 |         std::cout << "onnx path error" << std::endl;
 35 |     }
 36 | 
 37 |     sessionOptions.SetGraphOptimizationLevel(ORT_ENABLE_BASIC);
 38 |     // 加载模型
 39 |     std::wstring_convert <std::codecvt_utf8<wchar_t>> converter;
 40 | #ifdef _WIN32
 41 |     std::wstring w_model_path = converter.from_bytes(model_path);
 42 |     ort_session = new Ort::Session(env, w_model_path.c_str(), sessionOptions);
 43 | #else
 44 |     ort_session = new Ort::Session(env, model_path.c_str(), sessionOptions);
 45 | #endif
 46 | }
 47 | 
 48 | void UnetCNN::preprocess(const Mat& srcimg)
 49 | {
 50 |     Mat img = pad_to_multiple_of_n(srcimg, 32, this->pad_info);
 51 |     this->input_h = img.rows;
 52 |     this->input_w = img.cols;
 53 |     vector<cv::Mat> bgrChannels(3);
 54 |     split(img, bgrChannels);
 55 |     for (int c = 0; c < 3; c++)
 56 |     {
 57 |         bgrChannels[c].convertTo(bgrChannels[c], CV_32FC1, 1 / 255.0);
 58 |     }
 59 | 
 60 |     const int image_area = this->input_h * this->input_w;
 61 |     this->input_image.clear();
 62 |     this->input_image.resize(3 * image_area);
 63 |     size_t single_chn_size = image_area * sizeof(float);
 64 |     memcpy(this->input_image.data(), (float *)bgrChannels[2].data, single_chn_size);
 65 |     memcpy(this->input_image.data() + image_area, (float *)bgrChannels[1].data, single_chn_size);
 66 |     memcpy(this->input_image.data() + image_area * 2, (float *)bgrChannels[0].data, single_chn_size);
 67 | }
 68 | 
 69 | Mat UnetCNN::predict(const Mat& srcimg)
 70 | {
 71 |     this->preprocess(srcimg);
 72 |     std::vector<int64_t> input_img_shape = {1, 3, this->input_h, this->input_w};
 73 |     Value input_tensor_ = Value::CreateTensor<float>(memory_info_handler, this->input_image.data(), this->input_image.size(), input_img_shape.data(), input_img_shape.size());
 74 | 
 75 |     vector<Value> ort_outputs = this->ort_session->Run(runOptions, this->input_names.data(), &input_tensor_, this->input_names.size(), this->output_names.data(), this->output_names.size());
 76 | 
 77 |     std::vector<int64_t> out_shape = ort_outputs[0].GetTensorTypeAndShapeInfo().GetShape();
 78 |     const int out_h = out_shape[2];
 79 |     const int out_w = out_shape[3];
 80 |     float* pred = ort_outputs[0].GetTensorMutableData<float>();   
 81 |     Mat out = Mat(out_h, out_w, CV_32FC1, pred);
 82 |     Mat cropped_image = this->postprocess(out);
 83 |     out.release();
 84 |     vector<Mat> channel_mats = {cropped_image, cropped_image, cropped_image};
 85 |     Mat out_img;
 86 |     merge(channel_mats, out_img);
 87 |     out_img.convertTo(out_img, CV_8UC3);
 88 |     return out_img;
 89 | }
 90 | 
 91 | Mat UnetCNN::postprocess(Mat& img)
 92 | {
 93 |     double min_value, max_value;
 94 | 	minMaxLoc(img, &min_value, &max_value, 0, 0);
 95 |     img = 1 - (img - min_value) / (max_value - min_value);
 96 |     img = img * 255 + 0.5;
 97 |     img.setTo(0, img<0);
 98 |     img.setTo(255, img>255);
 99 |     Mat cropped_image;
100 |     img(Rect(this->pad_info[0], this->pad_info[1], this->pad_info[3], this->pad_info[2])).copyTo(cropped_image);
101 |     return cropped_image;
102 | }


--------------------------------------------------------------------------------
/cpp/binary_predictor.h:
--------------------------------------------------------------------------------
 1 | #ifndef BINARY_PREDICTOR_H
 2 | #define BINARY_PREDICTOR_H
 3 | #include <iostream>
 4 | #include <vector>
 5 | #include <locale>
 6 | #include <codecvt>
 7 | #include <numeric>
 8 | #include <opencv2/core.hpp>
 9 | #include <opencv2/imgproc.hpp>
10 | //#include <cuda_provider_factory.h>  ///如果使用cuda加速，需要取消注释
11 | #include <onnxruntime_cxx_api.h>
12 | 
13 | 
14 | class UnetCNN
15 | {
16 | public:
17 | 	UnetCNN(const std::string& model_path);
18 | 	cv::Mat predict(const cv::Mat& srcimg);   
19 | private:
20 | 	void preprocess(const cv::Mat& img);
21 | 	std::vector<float> input_image;
22 |     int pad_info[4];
23 |     int input_h;
24 |     int input_w;
25 |     cv::Mat postprocess(cv::Mat& img);
26 | 
27 | 	Ort::Env env = Ort::Env(ORT_LOGGING_LEVEL_ERROR, "BINARY PREDICTOR");
28 | 	Ort::SessionOptions sessionOptions = Ort::SessionOptions();
29 | 
30 | 	Ort::Session *ort_session = nullptr;
31 | 	const std::vector<const char*> input_names = {"input"};
32 | 	const std::vector<const char*> output_names = {"output"};
33 | 
34 | 	Ort::MemoryInfo memory_info_handler = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
35 | 	Ort::RunOptions runOptions;
36 | };
37 | 
38 | 
39 | #endif


--------------------------------------------------------------------------------
/cpp/main.cpp:
--------------------------------------------------------------------------------
 1 | #include "binary_predictor.h"
 2 | #include "unblur_predictor.h"
 3 | #include "unshadow_predictor.h"
 4 | #include "unwrap_predictor.h"
 5 | #include<opencv2/highgui.hpp>
 6 | 
 7 | 
 8 | using namespace cv;
 9 | using namespace std;
10 | 
11 | 
12 | int main()
13 | {
14 |     UnetCNN binary_model("/home/wangbo/doc-undistort/weights/unetcnn.onnx");   /////注意文件路径要写对
15 |     NAF_DPM unblur_model("/home/wangbo/doc-undistort/weights/nafdpm.onnx");
16 |     GCDRNET unshadow_model("/home/wangbo/doc-undistort/weights/gcnet.onnx", "/home/wangbo/doc-undistort/weights/drnet.onnx");
17 |     UVDocPredictor unwrap_model("/home/wangbo/doc-undistort/weights/uvdoc.onnx");
18 | 
19 |     vector<string> task_list = {"unwrap", "unshadow", "unblur", "OpenCvBilateral"};
20 | 
21 |     string imgpath = "/home/wangbo/doc-undistort/images/demo3.jpg";
22 | 
23 |     Mat srcimg = imread(imgpath);
24 |     // Mat out_img = unwrap_model.predict(srcimg);
25 | 
26 |     Mat out_img = srcimg.clone();
27 |     for(string task : task_list)
28 |     {
29 |         /////switch不支持字符串表达式,所以用if else
30 |         if(task == "unwrap")
31 |         {
32 |             out_img = unwrap_model.predict(out_img);
33 |         }
34 |         else if(task == "unshadow")
35 |         {
36 |             out_img = unshadow_model.predict(out_img);
37 |         }
38 |         else if(task == "unblur")
39 |         {
40 |             out_img = unblur_model.predict(out_img);
41 |         }
42 |         else if(task == "OpenCvBilateral")
43 |         {
44 |             out_img = OpenCvBilateral(out_img);
45 |         }
46 |         else if(task == "binary")
47 |         {
48 |             out_img = binary_model.predict(out_img);
49 |         }
50 |         else
51 |         {
52 |             cout << "task not found" << endl;
53 |         }
54 |         
55 |     }
56 | 
57 |     imwrite("out.jpg", out_img);
58 |     return 0;
59 | }
60 | 


--------------------------------------------------------------------------------
/cpp/unblur_predictor.cpp:
--------------------------------------------------------------------------------
  1 | #include "unblur_predictor.h"
  2 | 
  3 | 
  4 | using namespace cv;
  5 | using namespace std;
  6 | using namespace Ort;
  7 | 
  8 | 
  9 | NAF_DPM::NAF_DPM(const string& model_path)
 10 | {
 11 |     if (model_path.empty()) 
 12 |     {
 13 |         std::cout << "onnx path error" << std::endl;
 14 |     }
 15 | 
 16 |     sessionOptions.SetGraphOptimizationLevel(ORT_ENABLE_BASIC);
 17 |     // 加载模型
 18 |     std::wstring_convert <std::codecvt_utf8<wchar_t>> converter;
 19 | #ifdef _WIN32
 20 |     std::wstring w_model_path = converter.from_bytes(model_path);
 21 |     ort_session = new Ort::Session(env, w_model_path.c_str(), sessionOptions);
 22 | #else
 23 |     ort_session = new Ort::Session(env, model_path.c_str(), sessionOptions);
 24 | #endif
 25 | }
 26 | 
 27 | void NAF_DPM::preprocess(const Mat& srcimg)
 28 | {
 29 |     this->input_h = srcimg.rows;
 30 |     this->input_w = srcimg.cols;
 31 |     vector<cv::Mat> bgrChannels(3);
 32 |     split(srcimg, bgrChannels);
 33 |     for (int c = 0; c < 3; c++)
 34 |     {
 35 |         bgrChannels[c].convertTo(bgrChannels[c], CV_32FC1, 1 / 255.0);
 36 |     }
 37 | 
 38 |     const int image_area = this->input_h * this->input_w;
 39 |     this->input_image.clear();
 40 |     this->input_image.resize(3 * image_area);
 41 |     size_t single_chn_size = image_area * sizeof(float);
 42 |     memcpy(this->input_image.data(), (float *)bgrChannels[0].data, single_chn_size);
 43 |     memcpy(this->input_image.data() + image_area, (float *)bgrChannels[1].data, single_chn_size);
 44 |     memcpy(this->input_image.data() + image_area * 2, (float *)bgrChannels[2].data, single_chn_size);
 45 | }
 46 | 
 47 | Mat NAF_DPM::predict(const Mat& srcimg)
 48 | {
 49 |     this->preprocess(srcimg);
 50 |     std::vector<int64_t> input_img_shape = {1, 3, this->input_h, this->input_w};
 51 |     Value input_tensor_ = Value::CreateTensor<float>(memory_info_handler, this->input_image.data(), this->input_image.size(), input_img_shape.data(), input_img_shape.size());
 52 | 
 53 |     vector<Value> ort_outputs = this->ort_session->Run(runOptions, this->input_names.data(), &input_tensor_, this->input_names.size(), this->output_names.data(), this->output_names.size());
 54 | 
 55 |     std::vector<int64_t> out_shape = ort_outputs[0].GetTensorTypeAndShapeInfo().GetShape();
 56 |     const int out_h = out_shape[2];
 57 |     const int out_w = out_shape[3];
 58 |     const int area = out_h * out_w;
 59 |     float* pred = ort_outputs[0].GetTensorMutableData<float>();   
 60 |     Mat bmat = Mat(out_h, out_w, CV_32FC1, pred);
 61 |     Mat gmat = Mat(out_h, out_w, CV_32FC1, pred + area);
 62 |     Mat rmat = Mat(out_h, out_w, CV_32FC1, pred + area * 2);
 63 | 
 64 |     this->postprocess(bmat);
 65 |     this->postprocess(gmat);
 66 |     this->postprocess(rmat);
 67 | 
 68 |     vector<Mat> channel_mats = {bmat, gmat, rmat};
 69 |     Mat out_img;
 70 |     merge(channel_mats, out_img);
 71 |     out_img.convertTo(out_img, CV_8UC3);
 72 |     return out_img;
 73 | }
 74 | 
 75 | void NAF_DPM::postprocess(Mat& img)
 76 | {
 77 |     img = img * 255 + 0.5;
 78 |     img.setTo(0, img<0);
 79 |     img.setTo(255, img>255);
 80 | }
 81 | 
 82 | 
 83 | cv::Mat OpenCvBilateral(const cv::Mat& img) {
 84 |     cv::Mat img_uint8;
 85 |     img.convertTo(img_uint8, CV_8U);
 86 | 
 87 |     // 双边滤波
 88 |     cv::Mat bilateral;
 89 |     cv::bilateralFilter(img_uint8, bilateral, 9, 75, 75);
 90 | 
 91 |     // 自适应直方图均衡化
 92 |     cv::Mat lab;
 93 |     cv::cvtColor(bilateral, lab, cv::COLOR_BGR2Lab);
 94 |     std::vector<cv::Mat> lab_planes(3);
 95 |     cv::split(lab, lab_planes);
 96 |     cv::Ptr<cv::CLAHE> clahe = cv::createCLAHE(3.0, cv::Size(8, 8));
 97 |     clahe->apply(lab_planes[0], lab_planes[0]);
 98 |     cv::merge(lab_planes, lab);
 99 |     cv::Mat enhanced;
100 |     cv::cvtColor(lab, enhanced, cv::COLOR_Lab2BGR);
101 | 
102 |     // 应用锐化滤波器
103 |     cv::Mat kernel = (cv::Mat_<float>(3, 3) << 0, -1, 0, -1, 5, -1, 0, -1, 0);
104 |     cv::Mat sharpened;
105 |     cv::filter2D(enhanced, sharpened, -1, kernel);
106 | 
107 |     return sharpened;
108 | }


--------------------------------------------------------------------------------
/cpp/unblur_predictor.h:
--------------------------------------------------------------------------------
 1 | #ifndef UNBLUR_PREDICTOR_H
 2 | #define UNBLUR_PREDICTOR_H
 3 | #include <iostream>
 4 | #include <vector>
 5 | #include <locale>
 6 | #include <codecvt>
 7 | #include <numeric>
 8 | #include <opencv2/core.hpp>
 9 | #include <opencv2/imgproc.hpp>
10 | //#include <cuda_provider_factory.h>  ///如果使用cuda加速，需要取消注释
11 | #include <onnxruntime_cxx_api.h>
12 | 
13 | 
14 | class NAF_DPM
15 | {
16 | public:
17 | 	NAF_DPM(const std::string& model_path);
18 | 	cv::Mat predict(const cv::Mat& srcimg);   
19 | private:
20 | 	void preprocess(const cv::Mat& img);
21 | 	std::vector<float> input_image;
22 |     int input_h;
23 |     int input_w;
24 |     void postprocess(cv::Mat& img);
25 | 
26 | 	Ort::Env env = Ort::Env(ORT_LOGGING_LEVEL_ERROR, "UNBLUR PREDICTOR");
27 | 	Ort::SessionOptions sessionOptions = Ort::SessionOptions();
28 | 
29 | 	Ort::Session *ort_session = nullptr;
30 | 	const std::vector<const char*> input_names = {"input"};
31 | 	const std::vector<const char*> output_names = {"output"};
32 | 
33 | 	Ort::MemoryInfo memory_info_handler = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
34 | 	Ort::RunOptions runOptions;
35 | };
36 | 
37 | cv::Mat OpenCvBilateral(const cv::Mat& img);
38 | 
39 | #endif


--------------------------------------------------------------------------------
/cpp/unshadow_predictor.cpp:
--------------------------------------------------------------------------------
  1 | #include "unshadow_predictor.h"
  2 | 
  3 | 
  4 | using namespace cv;
  5 | using namespace std;
  6 | using namespace Ort;
  7 | 
  8 | 
  9 | GCDRNET::GCDRNET(const string& gcnet_modelpath, const string& drnet_modelpath)
 10 | {
 11 |     if (gcnet_modelpath.empty() || drnet_modelpath.empty()) 
 12 |     {
 13 |         std::cout << "onnx path error" << std::endl;
 14 |     }
 15 | 
 16 |     sessionOptions.SetGraphOptimizationLevel(ORT_ENABLE_BASIC);
 17 |     // 加载模型
 18 |     std::wstring_convert <std::codecvt_utf8<wchar_t>> converter;
 19 | #ifdef _WIN32
 20 |     std::wstring w_model_path = converter.from_bytes(gcnet_modelpath);
 21 |     gcnet_session = new Ort::Session(env, w_model_path.c_str(), sessionOptions);
 22 |     std::wstring w_model2_path = converter.from_bytes(drnet_modelpath);
 23 |     drnet_session = new Ort::Session(env, w_model2_path.c_str(), sessionOptions);
 24 | #else
 25 |     gcnet_session = new Ort::Session(env, gcnet_modelpath.c_str(), sessionOptions);
 26 |     drnet_session = new Ort::Session(env, drnet_modelpath.c_str(), sessionOptions);
 27 | #endif
 28 | }
 29 | 
 30 | Mat GCDRNET::stride_integral(const Mat& srcimg, const int stride)
 31 | {
 32 |     int h = srcimg.rows;
 33 |     int w = srcimg.cols;
 34 | 
 35 |     Mat img = srcimg.clone();
 36 |     if((h % stride) != 0)
 37 |     {
 38 |         this->padding_h = stride - (h % stride);
 39 |         cv::copyMakeBorder(img, img, padding_h, 0, 0, 0, cv::BORDER_REPLICATE);
 40 |     }
 41 |     else
 42 |     {
 43 |         this->padding_h = 0;
 44 |     }
 45 | 
 46 |     if((w % stride) != 0)
 47 |     {
 48 |         this->padding_w = stride - (w % stride);
 49 |         cv::copyMakeBorder(img, img, 0, 0, padding_w, 0, cv::BORDER_REPLICATE);
 50 |     }
 51 |     else
 52 |     {
 53 |         this->padding_w = 0;
 54 |     }
 55 |     return img;
 56 | }
 57 | 
 58 | void GCDRNET::preprocess(const Mat& srcimg)
 59 | {
 60 |     Mat img = this->stride_integral(srcimg, 32);
 61 |     this->input_h = img.rows;
 62 |     this->input_w = img.cols;
 63 |     vector<cv::Mat> bgrChannels(3);
 64 |     split(img, bgrChannels);
 65 |     for (int c = 0; c < 3; c++)
 66 |     {
 67 |         bgrChannels[c].convertTo(bgrChannels[c], CV_32FC1, 1 / 255.0);
 68 |     }
 69 | 
 70 |     const int image_area = this->input_h * this->input_w;
 71 |     this->input_image.clear();
 72 |     this->input_image.resize(3 * image_area);
 73 |     size_t single_chn_size = image_area * sizeof(float);
 74 |     memcpy(this->input_image.data(), (float *)bgrChannels[0].data, single_chn_size);
 75 |     memcpy(this->input_image.data() + image_area, (float *)bgrChannels[1].data, single_chn_size);
 76 |     memcpy(this->input_image.data() + image_area * 2, (float *)bgrChannels[2].data, single_chn_size);
 77 | }
 78 | 
 79 | Mat GCDRNET::predict(const Mat& srcimg)
 80 | {
 81 |     this->preprocess(srcimg);
 82 |     std::vector<int64_t> input_img_shape = {1, 3, this->input_h, this->input_w};
 83 |     Value input_tensor_ = Value::CreateTensor<float>(memory_info_handler, this->input_image.data(), this->input_image.size(), input_img_shape.data(), input_img_shape.size());
 84 | 
 85 |     vector<Value> gcnet_outputs = this->gcnet_session->Run(runOptions, this->input_names.data(), &input_tensor_, this->input_names.size(), this->output_names.data(), this->output_names.size());
 86 |     float* img_shadow = gcnet_outputs[0].GetTensorMutableData<float>();
 87 |     const int len = 3 * this->input_h * this->input_w;
 88 |     this->concatenated_input.clear();
 89 |     this->concatenated_input.resize(2 * len);
 90 |     for(int i=0;i<len;i++)
 91 |     {
 92 |         this->concatenated_input[i] = this->input_image[i];
 93 |         float x = this->input_image[i] / img_shadow[i];
 94 |         x = std::min(std::max(x, 0.0f), 1.0f);
 95 |         this->concatenated_input[i + len] = x;
 96 |     }
 97 | 
 98 |     std::vector<int64_t> input2_shape = {1, 2*3, this->input_h, this->input_w};
 99 |     Value input2_tensor_ = Value::CreateTensor<float>(memory_info_handler, this->concatenated_input.data(), this->concatenated_input.size(), input2_shape.data(), input2_shape.size());
100 | 
101 |     vector<Value> drnet_outputs = this->drnet_session->Run(runOptions, this->input_names.data(), &input2_tensor_, this->input_names.size(), this->output_names.data(), this->output_names.size());
102 | 
103 |     std::vector<int64_t> out_shape = drnet_outputs[0].GetTensorTypeAndShapeInfo().GetShape();
104 |     const int out_h = out_shape[2];
105 |     const int out_w = out_shape[3];
106 |     const int area = out_h * out_w;
107 |     float* pred = drnet_outputs[0].GetTensorMutableData<float>();   
108 |     Mat bmat = Mat(out_h, out_w, CV_32FC1, pred);
109 |     Mat gmat = Mat(out_h, out_w, CV_32FC1, pred + area);
110 |     Mat rmat = Mat(out_h, out_w, CV_32FC1, pred + area * 2);
111 |     bmat *= 255;
112 |     gmat *= 255;
113 |     rmat *= 255;
114 | 
115 |     vector<Mat> channel_mats = {bmat, gmat, rmat};
116 |     Mat out_img;
117 |     merge(channel_mats, out_img);
118 |     Mat enhance_img;
119 |     out_img(Rect(this->padding_w, this->padding_h, out_w-this->padding_w, out_h-this->padding_h)).copyTo(enhance_img);
120 |     enhance_img.convertTo(enhance_img, CV_8UC3);
121 |     return enhance_img;
122 | }


--------------------------------------------------------------------------------
/cpp/unshadow_predictor.h:
--------------------------------------------------------------------------------
 1 | #ifndef UNSHADOW_PREDICTOR_H
 2 | #define UNSHADOW_PREDICTOR_H
 3 | #include <iostream>
 4 | #include <vector>
 5 | #include <locale>
 6 | #include <codecvt>
 7 | #include <numeric>
 8 | #include <opencv2/core.hpp>
 9 | #include <opencv2/imgproc.hpp>
10 | //#include <cuda_provider_factory.h>  ///如果使用cuda加速，需要取消注释
11 | #include <onnxruntime_cxx_api.h>
12 | 
13 | 
14 | class GCDRNET
15 | {
16 | public:
17 | 	GCDRNET(const std::string& gcnet_modelpath, const std::string& drnet_modelpath);
18 | 	cv::Mat predict(const cv::Mat& srcimg);   
19 | private:
20 | 	void preprocess(const cv::Mat& img);
21 | 	std::vector<float> input_image;
22 |     std::vector<float> concatenated_input;
23 |     int input_h;
24 |     int input_w;
25 |     int padding_h;
26 |     int padding_w;
27 |     cv::Mat stride_integral(const cv::Mat& img, const int stride);
28 | 
29 | 	Ort::Env env = Ort::Env(ORT_LOGGING_LEVEL_ERROR, "UNSHADOW PREDICTOR");
30 | 	Ort::SessionOptions sessionOptions = Ort::SessionOptions();
31 | 
32 | 	Ort::Session *gcnet_session = nullptr;
33 |     Ort::Session *drnet_session = nullptr;
34 | 	const std::vector<const char*> input_names = {"input"};
35 | 	const std::vector<const char*> output_names = {"output"};
36 | 
37 | 	Ort::MemoryInfo memory_info_handler = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
38 | 	Ort::RunOptions runOptions;
39 | };
40 | 
41 | 
42 | #endif


--------------------------------------------------------------------------------
/cpp/unwrap_predictor.cpp:
--------------------------------------------------------------------------------
  1 | #include "unwrap_predictor.h"
  2 | 
  3 | 
  4 | using namespace cv;
  5 | using namespace std;
  6 | using namespace Ort;
  7 | 
  8 | 
  9 | UVDocPredictor::UVDocPredictor(const string& model_path)
 10 | {
 11 |     if (model_path.empty()) 
 12 |     {
 13 |         std::cout << "onnx path error" << std::endl;
 14 |     }
 15 | 
 16 |     sessionOptions.SetGraphOptimizationLevel(ORT_ENABLE_BASIC);
 17 |     // 加载模型
 18 |     std::wstring_convert <std::codecvt_utf8<wchar_t>> converter;
 19 | #ifdef _WIN32
 20 |     std::wstring w_model_path = converter.from_bytes(model_path);
 21 |     ort_session = new Ort::Session(env, w_model_path.c_str(), sessionOptions);
 22 | #else
 23 |     ort_session = new Ort::Session(env, model_path.c_str(), sessionOptions);
 24 | #endif
 25 | }
 26 | 
 27 | void UVDocPredictor::preprocess(Mat& img)
 28 | {
 29 |     img.convertTo(img, CV_32FC3, 1 / 255.0);
 30 | 
 31 |     Mat temp;
 32 |     cv::resize(img, temp, cv::Size(this->input_w, this->input_h));
 33 |     vector<cv::Mat> bgrChannels(3);
 34 |     split(temp, bgrChannels);
 35 |     const int image_area = this->input_h * this->input_w;
 36 |     this->input_image.clear();
 37 |     this->input_image.resize(3 * image_area);
 38 |     size_t single_chn_size = image_area * sizeof(float);
 39 |     memcpy(this->input_image.data(), (float *)bgrChannels[0].data, single_chn_size);
 40 |     memcpy(this->input_image.data() + image_area, (float *)bgrChannels[1].data, single_chn_size);
 41 |     memcpy(this->input_image.data() + image_area * 2, (float *)bgrChannels[2].data, single_chn_size);
 42 | }
 43 | 
 44 | Mat UVDocPredictor::predict(const Mat& srcimg)
 45 | {
 46 |     const int size[2] = {srcimg.cols, srcimg.rows};
 47 |     Mat img = srcimg.clone();
 48 |     this->preprocess(img);
 49 |     std::vector<int64_t> input_img_shape = {1, 3, this->input_h, this->input_w};
 50 |     Value input_tensor_ = Value::CreateTensor<float>(memory_info_handler, this->input_image.data(), this->input_image.size(), input_img_shape.data(), input_img_shape.size());
 51 | 
 52 |     vector<Value> ort_outputs = this->ort_session->Run(runOptions, this->input_names.data(), &input_tensor_, this->input_names.size(), this->output_names.data(), this->output_names.size());
 53 | 
 54 |     std::vector<int64_t> out_shape = ort_outputs[0].GetTensorTypeAndShapeInfo().GetShape();
 55 |     float* output = ort_outputs[0].GetTensorMutableData<float>();
 56 |     Mat out_img = this->postprocess(img, size, output, out_shape);
 57 |     out_img.convertTo(out_img, CV_8UC3);
 58 |     return out_img;
 59 | }
 60 | 
 61 | static Mat convert3channeltonchw(const Mat& img)
 62 | {
 63 |     vector<cv::Mat> bgrChannels(3);
 64 |     split(img, bgrChannels);
 65 |     const int image_area = img.rows * img.cols;
 66 |     const vector<int> newsz = {1, 3, img.rows, img.cols};
 67 |     Mat dstimg = cv::Mat(newsz, CV_32FC1);
 68 |     size_t single_chn_size = image_area * sizeof(float);
 69 |     memcpy((float *)dstimg.data, (float *)bgrChannels[0].data, single_chn_size);
 70 |     memcpy((float *)dstimg.data + image_area, (float *)bgrChannels[1].data, single_chn_size);
 71 |     memcpy((float *)dstimg.data + image_area * 2, (float *)bgrChannels[2].data, single_chn_size);
 72 |     return dstimg;
 73 | }
 74 | 
 75 | Mat UVDocPredictor::postprocess(const Mat& img, const int* size, const float* output, vector<int64_t> out_shape)
 76 | {
 77 |     Mat warped_img = convert3channeltonchw(img);
 78 | 
 79 |     Mat upsampled_grid = this->interpolate(output, out_shape, size, true);
 80 |     cv::transposeND(upsampled_grid, {0, 2, 3, 1}, upsampled_grid);
 81 |     
 82 |     Mat unwarped_img = this->grid_sample(warped_img, upsampled_grid, true);
 83 | 
 84 |     float* pdata = (float*)unwarped_img.data;
 85 |     const int out_h = unwarped_img.size[2];
 86 |     const int out_w = unwarped_img.size[3];
 87 |     const int area = out_h * out_w;
 88 |     Mat bmat = Mat(out_h, out_w, CV_32FC1, pdata);
 89 |     Mat gmat = Mat(out_h, out_w, CV_32FC1, pdata + area);
 90 |     Mat rmat = Mat(out_h, out_w, CV_32FC1, pdata + area * 2);
 91 |     bmat *= 255;
 92 |     gmat *= 255;
 93 |     rmat *= 255;
 94 |     vector<Mat> channel_mats = {bmat, gmat, rmat};
 95 |     Mat out_img;
 96 |     merge(channel_mats, out_img);
 97 |     return out_img;
 98 | }
 99 | 
100 | static float get_pixel_value(const float* pinput, const int H, const int W, const int y, const int x)
101 | {
102 |     if(y < 0 || y >= H || x < 0 || x >= W)
103 |     {
104 |         return 0.f;
105 |     }
106 |     return pinput[y*W+x];
107 | }
108 | 
109 | Mat UVDocPredictor::interpolate(const float* input_tensor, vector<int64_t> shape, const int* size, const bool align_corners)
110 | {
111 |     const int B = shape[0];
112 |     const int C = shape[1];
113 |     const int H = shape[2];
114 |     const int W = shape[3];
115 |     const int new_H = size[1];
116 |     const int new_W = size[0];
117 |     const vector<int> newsz = {B, C, new_H, new_W};
118 |     Mat dstimg = cv::Mat(newsz, CV_32FC1);
119 |     for(int n=0;n<B;n++)
120 |     {
121 |         for(int cid=0;cid<C;cid++)
122 |         {
123 |             float scale_h = (new_H > 1) ? (float(H - 1) / float(new_H - 1)):0.f;
124 |             float scale_w = (new_W > 1) ? (float(W - 1) / float(new_W - 1)):0.f;
125 |             if(!align_corners)
126 |             {
127 |                 scale_h = (float)H / new_H;
128 |                 scale_w = (float)W / new_W;
129 |             }
130 |             const float* pinput = input_tensor + n*C*H*W + cid*H*W;
131 |             for(int h=0;h<new_H;h++)
132 |             {
133 |                 for(int w=0;w<new_W;w++)
134 |                 {
135 |                     const float y = (float)h * scale_h;
136 |                     const float x = (float)w * scale_w;
137 |                     const int y0 = floor(y);
138 |                     const int x0 = floor(x);
139 |                     const int y1 = y0 + 1;
140 |                     const int x1 = x0 + 1;
141 |                     const float denom = (y1-y0)*(x1-x0);
142 | 
143 |                     const float f_x0_y0 = get_pixel_value(pinput, H, W, y0, x0);
144 |                     const float f_x1_y0 = get_pixel_value(pinput, H, W, y0, x1);
145 |                     const float f_x0_y1 = get_pixel_value(pinput, H, W, y1, x0);
146 |                     const float f_x1_y1 = get_pixel_value(pinput, H, W, y1, x1);
147 | 
148 |                     const float f = ((y1-y)*(x1-x) / denom) * f_x0_y0 + ((y1-y)*(x-x0) / denom) * f_x1_y0 + ((y-y0)*(x1-x) / denom) * f_x0_y1 + ((y-y0)*(x-x0) / denom) * f_x1_y1;
149 |                     dstimg.ptr<float>(n, cid, h)[w] = f;
150 |                 }
151 |             }
152 |         }
153 |     }
154 |     return dstimg;
155 | }
156 | 
157 | Mat UVDocPredictor::grid_sample(const Mat& input_tensor, const Mat& grid, const bool align_corners)
158 | {
159 |     const int B = input_tensor.size[0];
160 |     const int C = input_tensor.size[1];
161 |     const int H = input_tensor.size[2];
162 |     const int W = input_tensor.size[3];
163 |     const int B_grid = grid.size[0];
164 |     const int H_grid = grid.size[1];
165 |     const int W_grid = grid.size[2];
166 | 
167 |     if(B != B_grid || H != H_grid || W != W_grid)
168 |     {
169 |         cout<<"Error, Input tensor and grid must have the same spatial dimensions."<<endl;
170 |         exit(-1);
171 |     }
172 | 
173 |     const vector<int> newsz = {B, C, H, W};
174 |     Mat dstimg = cv::Mat(newsz, CV_32FC1);
175 |     for(int n=0;n<B;n++)
176 |     {
177 |         for(int cid=0;cid<C;cid++)
178 |         {
179 |             const float* pinput = (float*)input_tensor.data + n*C*H*W + cid*H*W;
180 |             for(int h=0;h<H_grid;h++)
181 |             {
182 |                 for(int w=0;w<W_grid;w++)
183 |                 {
184 |                     float x = (grid.ptr<float>(n, h, w)[0] + 1) * (W - 1) / 2;
185 |                     float y = (grid.ptr<float>(n, h, w)[1] + 1) * (H - 1) / 2;
186 |                     if(!align_corners)
187 |                     {
188 |                         x = ((grid.ptr<float>(n, h, w)[0] + 1) * W - 1) / 2;
189 |                         y = ((grid.ptr<float>(n, h, w)[1] + 1) * H - 1) / 2;
190 |                     }
191 |                     
192 |                     const int y0 = floor(y);
193 |                     const int x0 = floor(x);
194 |                     const int y1 = y0 + 1;
195 |                     const int x1 = x0 + 1;
196 |                     const float denom = (y1-y0)*(x1-x0);
197 | 
198 |                     const float f_x0_y0 = get_pixel_value(pinput, H, W, y0, x0);
199 |                     const float f_x1_y0 = get_pixel_value(pinput, H, W, y0, x1);
200 |                     const float f_x0_y1 = get_pixel_value(pinput, H, W, y1, x0);
201 |                     const float f_x1_y1 = get_pixel_value(pinput, H, W, y1, x1);
202 | 
203 |                     const float f = ((y1-y)*(x1-x) / denom) * f_x0_y0 + ((y1-y)*(x-x0) / denom) * f_x1_y0 + ((y-y0)*(x1-x) / denom) * f_x0_y1 + ((y-y0)*(x-x0) / denom) * f_x1_y1;
204 |                     dstimg.ptr<float>(n, cid, h)[w] = f;
205 |                 }
206 |             }
207 |         }
208 |     }
209 |     return dstimg;
210 | }


--------------------------------------------------------------------------------
/cpp/unwrap_predictor.h:
--------------------------------------------------------------------------------
 1 | #ifndef UNWRAP_PREDICTOR_H
 2 | #define UNWRAP_PREDICTOR_H
 3 | #include <iostream>
 4 | #include <vector>
 5 | #include <locale>
 6 | #include <codecvt>
 7 | #include <numeric>
 8 | #include <opencv2/core.hpp>
 9 | #include <opencv2/imgproc.hpp>
10 | //#include <cuda_provider_factory.h>  ///如果使用cuda加速，需要取消注释
11 | #include <onnxruntime_cxx_api.h>
12 | 
13 | 
14 | class UVDocPredictor
15 | {
16 | public:
17 | 	UVDocPredictor(const std::string& model_path);
18 | 	cv::Mat predict(const cv::Mat& srcimg);   
19 | private:
20 | 	void preprocess(cv::Mat& img);
21 | 	std::vector<float> input_image;
22 |     const int input_h = 712;
23 |     const int input_w = 488;
24 |     const int grid_size[2] = {45, 31};
25 |     cv::Mat postprocess(const cv::Mat& img, const int* size, const float* output, std::vector<int64_t> out_shape);
26 |     cv::Mat interpolate(const float* input_tensor, std::vector<int64_t> shape, const int* size, const bool align_corners);
27 |     cv::Mat grid_sample(const cv::Mat& input_tensor, const cv::Mat& grid, const bool align_corners);
28 | 
29 | 	Ort::Env env = Ort::Env(ORT_LOGGING_LEVEL_ERROR, "UNWRAP PREDICTOR");
30 | 	Ort::SessionOptions sessionOptions = Ort::SessionOptions();
31 | 
32 | 	Ort::Session *ort_session = nullptr;
33 | 	const std::vector<const char*> input_names = {"input"};
34 | 	const std::vector<const char*> output_names = {"output", "546"};
35 | 
36 | 	Ort::MemoryInfo memory_info_handler = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
37 | 	Ort::RunOptions runOptions;
38 | };
39 | 
40 | 
41 | #endif


--------------------------------------------------------------------------------
/images/demo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/document-undistort-onnxrun/85554ebdf4223bf70e1a6c8c05b48f9e2250b34e/images/demo.jpg


--------------------------------------------------------------------------------
/images/demo1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/document-undistort-onnxrun/85554ebdf4223bf70e1a6c8c05b48f9e2250b34e/images/demo1.jpg


--------------------------------------------------------------------------------
/images/demo1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/document-undistort-onnxrun/85554ebdf4223bf70e1a6c8c05b48f9e2250b34e/images/demo1.png


--------------------------------------------------------------------------------
/images/demo2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/document-undistort-onnxrun/85554ebdf4223bf70e1a6c8c05b48f9e2250b34e/images/demo2.png


--------------------------------------------------------------------------------
/images/demo3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpc203/document-undistort-onnxrun/85554ebdf4223bf70e1a6c8c05b48f9e2250b34e/images/demo3.jpg


--------------------------------------------------------------------------------
/python/binary_predictor.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | import onnxruntime
 4 | 
 5 | 
 6 | def restore_original_size(image, pad_info):
 7 |     start_x, start_y, original_height, original_width = pad_info
 8 |     cropped_image = image[start_y:start_y + original_height, start_x:start_x + original_width]
 9 |     return cropped_image
10 | 
11 | def pad_to_multiple_of_n(image, n=32):
12 |     original_height, original_width = image.shape[:2]
13 | 
14 |     # 计算目标形状
15 |     target_width = ((original_width + n - 1) // n) * n
16 |     target_height = ((original_height + n - 1) // n) * n
17 | 
18 |     # 创建一个纯白背景的图像
19 |     padded_image = np.ones((target_height, target_width, 3), dtype=np.uint8) * 255
20 | 
21 |     # 计算填充的位置
22 |     start_x = (target_width - original_width) // 2
23 |     start_y = (target_height - original_height) // 2
24 | 
25 |     # 将原始图像放置在纯白背景上
26 |     padded_image[start_y:start_y + original_height, start_x:start_x + original_width] = image
27 | 
28 |     # 返回填充后的图像和填充位置
29 |     return padded_image, (start_x, start_y, original_height, original_width)
30 | 
31 | 
32 | class UnetCNN():
33 |     def __init__(self, modelpath):
34 |         so = onnxruntime.SessionOptions()
35 |         so.log_severity_level = 3
36 |         self.session = onnxruntime.InferenceSession(modelpath, so)
37 |         self.input_name = self.session.get_inputs()[0].name
38 | 
39 |     def preprocess(self, img):
40 |         img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
41 |         img, pad_info = pad_to_multiple_of_n(img)
42 |         # 归一化
43 |         img = img.transpose(2, 0, 1) / 255.0
44 |         # 将图像数据扩展为一个批次的形式
45 |         img = np.expand_dims(img, axis=0).astype(np.float32)
46 |         # 转换为模型输入格式
47 |         return img, pad_info
48 |     
49 |     def predict(self, img):
50 |         img, pad_info = self.preprocess(img)
51 |         pred = self.session.run(None, {self.input_name: img})[0]
52 |         out_img = self.postprocess(pred, pad_info)
53 |         return out_img.astype(np.uint8)
54 | 
55 |     def postprocess(self, img, pad_info):
56 |         img = 1 - (img - img.min()) / (img.max() - img.min())
57 |         img = img[0].transpose(1, 2, 0)
58 |         # 重复最后一个通道维度三次
59 |         img = np.repeat(img, 3, axis=2)
60 |         img = (img * 255 + 0.5).clip(0, 255)
61 |         img = restore_original_size(img, pad_info)
62 |         return img
63 | 
64 | 
65 | if __name__=='__main__':
66 |     model = UnetCNN('weights/unetcnn.onnx')
67 |     img = cv2.imread('images/demo3.jpg')
68 |     out_img = model.predict(img)
69 |     cv2.imwrite('binary_predictor_out.jpg', out_img)


--------------------------------------------------------------------------------
/python/main.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | from binary_predictor import UnetCNN
 3 | from unblur_predictor import NAF_DPM, OpenCvBilateral
 4 | from unshadow_predictor import GCDRNET
 5 | from unwrap_predictor import UVDocPredictor
 6 | 
 7 | 
 8 | if __name__=='__main__':
 9 |     binary_model = UnetCNN('weights/unetcnn.onnx')
10 |     unblur_model = NAF_DPM('weights/nafdpm.onnx')
11 |     unblur_model2 = OpenCvBilateral()
12 |     unshadow_model = GCDRNET('weights/gcnet.onnx', 'weights/drnet.onnx')
13 |     unwrap_model = UVDocPredictor('weights/uvdoc.onnx')
14 |     model_dict = {"binary": binary_model, "unblur": unblur_model, "unshadow": unshadow_model, "unwrap": unwrap_model, "OpenCvBilateral": unblur_model2}
15 | 
16 |     task_list = ["unwrap", "unshadow", "unblur", "OpenCvBilateral"]
17 |     srcimg = cv2.imread('images/demo3.jpg')
18 |     out_img = srcimg.copy()
19 |     for task in task_list:
20 |         out_img = model_dict[task].predict(out_img)
21 |         
22 |     cv2.imwrite('out.jpg', out_img)
23 |     


--------------------------------------------------------------------------------
/python/unblur_predictor.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | import onnxruntime
 4 | 
 5 | 
 6 | class NAF_DPM():
 7 |     def __init__(self, modelpath):
 8 |         so = onnxruntime.SessionOptions()
 9 |         so.log_severity_level = 3
10 |         self.session = onnxruntime.InferenceSession(modelpath, so)
11 |         self.input_name = self.session.get_inputs()[0].name
12 |     
13 |     def preprocess(self, img):
14 |         # 归一化
15 |         img = img.transpose(2, 0, 1) / 255.0
16 |         # 将图像数据扩展为一个批次的形式
17 |         img = np.expand_dims(img, axis=0).astype(np.float32)
18 |         # 转换为模型输入格式
19 |         return img
20 |     
21 |     def predict(self, img):
22 |         img = self.preprocess(img)
23 |         pred = self.session.run(None, {self.input_name: img})[0]
24 |         out_img = self.postprocess(pred)
25 |         return out_img.astype(np.uint8)
26 |     
27 |     def postprocess(self, img):
28 |         img = img[0]
29 |         img = (img * 255 + 0.5).clip(0, 255).transpose(1, 2, 0)
30 |         return img
31 | 
32 | 
33 | class OpenCvBilateral:
34 |     def __init__(self,):
35 |         pass
36 |     def predict(self, img):
37 |         img = img.astype(np.uint8)
38 |         # 双边滤波
39 |         bilateral = cv2.bilateralFilter(img, d=9, sigmaColor=75, sigmaSpace=75)
40 |         # 自适应直方图均衡化
41 |         lab = cv2.cvtColor(bilateral, cv2.COLOR_BGR2LAB)
42 |         l, a, b = cv2.split(lab)
43 |         clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
44 |         cl = clahe.apply(l)
45 |         limg = cv2.merge((cl, a, b))
46 |         enhanced = cv2.cvtColor(limg, cv2.COLOR_LAB2BGR)
47 | 
48 |         # 应用锐化滤波器
49 |         kernel = np.array([[0, -1, 0],
50 |                            [-1, 5, -1],
51 |                            [0, -1, 0]])
52 |         sharpened = cv2.filter2D(enhanced, -1, kernel)
53 |         return sharpened
54 | 
55 | 
56 | if __name__=='__main__':
57 |     model = NAF_DPM('weights/nafdpm.onnx')
58 |     model2 = OpenCvBilateral()
59 |     img = cv2.imread('images/demo3.jpg')
60 |     out_img = model.predict(img)
61 |     out_img = model2.predict(out_img)
62 |     cv2.imwrite('unblur_predictor_out.jpg', out_img)


--------------------------------------------------------------------------------
/python/unshadow_predictor.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | import onnxruntime
 4 | 
 5 | 
 6 | class GCDRNET():
 7 |     def __init__(self, gcnet_modelpath, drnet_modelpath):
 8 |         so = onnxruntime.SessionOptions()
 9 |         so.log_severity_level = 3
10 |         self.gcnet_session = onnxruntime.InferenceSession(gcnet_modelpath, so)
11 |         self.drnet_session = onnxruntime.InferenceSession(drnet_modelpath, so)
12 |         self.gcnet_input_name = self.gcnet_session.get_inputs()[0].name
13 |         self.drnet_input_name = self.drnet_session.get_inputs()[0].name
14 | 
15 |     def stride_integral(self, img, stride=32):
16 |         h, w = img.shape[:2]
17 | 
18 |         if (h % stride) != 0:
19 |             padding_h = stride - (h % stride)
20 |             img = cv2.copyMakeBorder(img, padding_h, 0, 0, 0, borderType=cv2.BORDER_REPLICATE)
21 |         else:
22 |             padding_h = 0
23 | 
24 |         if (w % stride) != 0:
25 |             padding_w = stride - (w % stride)
26 |             img = cv2.copyMakeBorder(img, 0, 0, padding_w, 0, borderType=cv2.BORDER_REPLICATE)
27 |         else:
28 |             padding_w = 0
29 | 
30 |         return img, padding_h, padding_w
31 | 
32 |     def preprocess(self, img):
33 |         img, padding_h, padding_w = self.stride_integral(img)
34 |         # 归一化
35 |         img = img.transpose(2, 0, 1) / 255.0
36 |         img = np.expand_dims(img, axis=0).astype(np.float32)
37 |         # 转换为模型输入格式
38 |         return img, padding_h, padding_w
39 | 
40 |     def predict(self, img):
41 |         im_padding, padding_h, padding_w = self.preprocess(img.copy())
42 |         img_shadow = im_padding.copy()
43 |         img_shadow = self.gcnet_session.run(None, {self.gcnet_input_name: img_shadow})[0]
44 |         model1_im = np.clip(im_padding / img_shadow, 0, 1)
45 |         # 拼接 im_org 和 model1_im
46 |         concatenated_input = np.concatenate((im_padding, model1_im), axis=1)
47 |         pred = self.drnet_session.run(None, {self.drnet_input_name: concatenated_input})[0]
48 |         out_img = self.postprocess(pred, padding_h, padding_w)
49 |         return out_img.astype(np.uint8)
50 |     
51 |     def postprocess(self, pred, padding_h, padding_w):
52 |         pred = np.transpose(pred[0], (1, 2, 0))
53 |         pred = pred * 255
54 |         enhance_img = pred[padding_h:, padding_w:]
55 |         return enhance_img
56 | 
57 | 
58 | if __name__=='__main__':
59 |     model = GCDRNET('weights/gcnet.onnx', 'weights/drnet.onnx')
60 |     img = cv2.imread('images/demo3.jpg')
61 |     out_img = model.predict(img)
62 |     cv2.imwrite('unshadow_predictor_out.jpg', out_img)


--------------------------------------------------------------------------------
/python/unwrap_predictor.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | import onnxruntime
  4 | from scipy.ndimage import map_coordinates
  5 | 
  6 | 
  7 | class UVDocPredictor:
  8 |     def __init__(self, modelpath):
  9 |         so = onnxruntime.SessionOptions()
 10 |         so.log_severity_level = 3
 11 |         self.session = onnxruntime.InferenceSession(modelpath, so)
 12 |         self.input_name = self.session.get_inputs()[0].name
 13 |         self.img_size = [488, 712]
 14 |         self.grid_size = [45, 31]
 15 | 
 16 |     def preprocess(self, img):
 17 |         img = cv2.resize(img, self.img_size).transpose(2, 0, 1)
 18 |         img = np.expand_dims(img, axis=0)
 19 |         return img
 20 | 
 21 |     def predict(self, img):
 22 |         size = img.shape[:2][::-1]
 23 |         img = img.astype(np.float32) / 255
 24 |         inp = self.preprocess(img.copy())
 25 |         outputs = self.session.run(None, {self.input_name: inp})[0]
 26 |         out_img = self.postprocess(img, size, outputs)
 27 |         return out_img.astype(np.uint8)
 28 |     
 29 |     def postprocess(self, img, size, output):
 30 |         # 将图像转换为NumPy数组
 31 |         warped_img = np.expand_dims(img.transpose(2, 0, 1), axis=0).astype(np.float32)
 32 | 
 33 |         # 上采样网格
 34 |         upsampled_grid = self.interpolate(output, size=(size[1], size[0]), align_corners=True)
 35 |         # 调整网格的形状
 36 |         upsampled_grid = upsampled_grid.transpose(0, 2, 3, 1)
 37 | 
 38 |         # 重映射图像
 39 |         unwarped_img = self.grid_sample(warped_img, upsampled_grid)
 40 | 
 41 |         # 将结果转换回原始格式
 42 |         return unwarped_img[0].transpose(1, 2, 0) * 255
 43 | 
 44 |     def interpolate(self, input_tensor, size, align_corners=True):
 45 |         """
 46 |         Interpolate function to resize the input tensor.
 47 | 
 48 |         Args:
 49 |             input_tensor: numpy.ndarray of shape (B, C, H, W)
 50 |             size: tuple of int (new_height, new_width)
 51 |             mode: str, interpolation mode ('bilinear' or 'nearest')
 52 |             align_corners: bool, whether to align corners in bilinear interpolation
 53 | 
 54 |         Returns:
 55 |             numpy.ndarray of shape (B, C, new_height, new_width)
 56 |         """
 57 |         B, C, H, W = input_tensor.shape
 58 |         new_H, new_W = size
 59 |         resized_tensors = []
 60 |         for b in range(B):
 61 |             resized_channels = []
 62 |             for c in range(C):
 63 |                 # 计算新的坐标
 64 |                 if align_corners:
 65 |                     scale_h = (H - 1) / (new_H - 1) if new_H > 1 else 0
 66 |                     scale_w = (W - 1) / (new_W - 1) if new_W > 1 else 0
 67 |                 else:
 68 |                     scale_h = H / new_H
 69 |                     scale_w = W / new_W
 70 | 
 71 |                 # 创建新的坐标网格
 72 |                 y, x = np.indices((new_H, new_W), dtype=np.float32)
 73 |                 y = y * scale_h
 74 |                 x = x * scale_w
 75 | 
 76 |                 # 双线性插值
 77 |                 coords = np.stack([y.flatten(), x.flatten()], axis=0)
 78 |                 # tmp = my_map_coordinates(input_tensor[b, c], coords)
 79 |                 resized_channel = map_coordinates(input_tensor[b, c], coords, order=1, mode='constant', cval=0.0)
 80 |                 resized_channel = resized_channel.reshape(new_H, new_W)
 81 |                 resized_channels.append(resized_channel)
 82 | 
 83 |             resized_tensors.append(np.stack(resized_channels, axis=0))
 84 | 
 85 |         return np.stack(resized_tensors, axis=0)
 86 | 
 87 |     def grid_sample(self, input_tensor, grid, align_corners=True):
 88 |         """
 89 |         Grid sample function to sample the input tensor using the given grid.
 90 | 
 91 |         Args:
 92 |             input_tensor: numpy.ndarray of shape (B, C, H, W)
 93 |             grid: numpy.ndarray of shape (B, H, W, 2) with values in [-1, 1]
 94 |             align_corners: bool, whether to align corners in bilinear interpolation
 95 | 
 96 |         Returns:
 97 |             numpy.ndarray of shape (B, C, H, W)
 98 |         """
 99 |         B, C, H, W = input_tensor.shape
100 |         B_grid, H_grid, W_grid, _ = grid.shape
101 | 
102 |         if B != B_grid or H != H_grid or W != W_grid:
103 |             raise ValueError("Input tensor and grid must have the same spatial dimensions.")
104 | 
105 |         # Convert grid coordinates from [-1, 1] to [0, W-1] and [0, H-1]
106 |         if align_corners:
107 |             grid[:, :, :, 0] = (grid[:, :, :, 0] + 1) * (W - 1) / 2
108 |             grid[:, :, :, 1] = (grid[:, :, :, 1] + 1) * (H - 1) / 2
109 |         else:
110 |             grid[:, :, :, 0] = ((grid[:, :, :, 0] + 1) * W - 1) / 2
111 |             grid[:, :, :, 1] = ((grid[:, :, :, 1] + 1) * H - 1) / 2
112 | 
113 |         sampled_tensors = []
114 |         for b in range(B):
115 |             sampled_channels = []
116 |             for c in range(C):
117 |                 channel = input_tensor[b, c]
118 |                 x_coords = grid[b, :, :, 0].flatten()
119 |                 y_coords = grid[b, :, :, 1].flatten()
120 |                 coords = np.stack([y_coords, x_coords], axis=-1)
121 |                 sampled_channel = map_coordinates(channel, coords.T, order=1, mode='constant', cval=0.0).reshape(H, W)
122 |                 sampled_channels.append(sampled_channel)
123 |             sampled_tensors.append(np.stack(sampled_channels, axis=0))
124 | 
125 |         return np.stack(sampled_tensors, axis=0)
126 | 
127 | 
128 | def my_map_coordinates(input, coordinates):
129 |     def set_value(input, x, y):
130 |         mask = (x >= 0) & (x < input.shape[1]) & (y >= 0) & (y < input.shape[0])
131 |         out = np.zeros(y.shape)
132 |         out[mask] = input[y[mask], x[mask]]
133 |         return out
134 | 
135 |     y = coordinates[0,:]
136 |     x = coordinates[1,:]
137 |     x0 = np.floor(x).astype(np.int32)
138 |     x1 = x0 + 1
139 |     y0 = np.floor(y).astype(np.int32)
140 |     y1 = y0 + 1
141 |     
142 |     f_x0_y0 = set_value(input, x0, y0)
143 |     f_x1_y0 = set_value(input, x1, y0)
144 |     f_x0_y1 = set_value(input, x0, y1)
145 |     f_x1_y1 = set_value(input, x1, y1)
146 | 
147 |     denom = (y1-y0)*(x1-x0)
148 |     f = ((y1-y)*(x1-x) / denom) * f_x0_y0 + ((y1-y)*(x-x0) / denom) * f_x1_y0 + ((y-y0)*(x1-x) / denom) * f_x0_y1 + ((y-y0)*(x-x0) / denom) * f_x1_y1
149 |     return f.astype(np.float32)
150 | 
151 | if __name__=='__main__':
152 |     model = UVDocPredictor('weights/uvdoc.onnx')
153 |     img = cv2.imread('images/demo3.jpg')
154 |     out_img = model.predict(img)
155 |     cv2.imwrite('unwrap_predictor_out.jpg', out_img)


--------------------------------------------------------------------------------