├── README.md ├── cpp ├── CMakeLists.txt ├── binary_predictor.cpp ├── binary_predictor.h ├── main.cpp ├── unblur_predictor.cpp ├── unblur_predictor.h ├── unshadow_predictor.cpp ├── unshadow_predictor.h ├── unwrap_predictor.cpp └── unwrap_predictor.h ├── images ├── demo.jpg ├── demo1.jpg ├── demo1.png ├── demo2.png └── demo3.jpg └── python ├── binary_predictor.py ├── main.py ├── unblur_predictor.py ├── unshadow_predictor.py └── unwrap_predictor.py /README.md: -------------------------------------------------------------------------------- 1 | 平常在做文档OCR的项目时,经常会遇到纸质文档有扭曲或者有阴影的情况,本套程序是对文档做矫正的,从而有利于下游的文字检测和识别任务 2 | 3 | 4 | onnx文件 链接: https://pan.baidu.com/s/1Bza0jK_MlUSXoLzrpWPGQg 提取码: fipm 5 | 6 | 参考自https://www.modelscope.cn/studios/jockerK/DocUnwrap/files 7 | -------------------------------------------------------------------------------- /cpp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(test) 2 | set(CMAKE_CXX_STANDARD 17) 3 | 4 | add_executable(${PROJECT_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp 5 | ${CMAKE_CURRENT_SOURCE_DIR}/binary_predictor.cpp 6 | ${CMAKE_CURRENT_SOURCE_DIR}/unblur_predictor.cpp 7 | ${CMAKE_CURRENT_SOURCE_DIR}/unshadow_predictor.cpp 8 | ${CMAKE_CURRENT_SOURCE_DIR}/unwrap_predictor.cpp) 9 | 10 | target_include_directories(${PROJECT_NAME} 11 | PUBLIC "/home/wangbo/libs/mylibs/opencv-4.8.0/temp_install_dir/include/opencv4" 12 | PUBLIC "/home/wangbo/libs/mylibs/onnxruntime-linux-x64-1.18.1/include" 13 | PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include 14 | ) 15 | target_link_libraries(${PROJECT_NAME} 16 | "/home/wangbo/libs/mylibs/opencv-4.8.0/temp_install_dir/lib/libopencv_imgcodecs.so.4.8.0" 17 | "/home/wangbo/libs/mylibs/opencv-4.8.0/temp_install_dir/lib/libopencv_highgui.so.4.8.0" 18 | "/home/wangbo/libs/mylibs/opencv-4.8.0/temp_install_dir/lib/libopencv_imgproc.so.4.8.0" 19 | "/home/wangbo/libs/mylibs/opencv-4.8.0/temp_install_dir/lib/libopencv_core.so.4.8.0" 20 | "/home/wangbo/libs/mylibs/opencv-4.8.0/temp_install_dir/lib/libopencv_videoio.so.4.8.0" 21 | "/home/wangbo/libs/mylibs/opencv-4.8.0/temp_install_dir/lib/libopencv_video.so.4.8.0" 22 | "/home/wangbo/libs/mylibs/onnxruntime-linux-x64-1.18.1/lib/libonnxruntime.so.1.18.1" 23 | ) 24 | -------------------------------------------------------------------------------- /cpp/binary_predictor.cpp: -------------------------------------------------------------------------------- 1 | #include "binary_predictor.h" 2 | 3 | 4 | using namespace cv; 5 | using namespace std; 6 | using namespace Ort; 7 | 8 | 9 | static Mat pad_to_multiple_of_n(const Mat& image, const int n, int* pad_info) 10 | { 11 | int original_height = image.rows; 12 | int original_width = image.cols; 13 | 14 | int target_width = ((original_width + n - 1) / n) * n; 15 | int target_height = ((original_height + n - 1) / n) * n; 16 | 17 | Mat padded_image(target_height, target_width, CV_8UC3, Scalar(255,255,255)); 18 | 19 | int start_x = (target_width - original_width) / 2; 20 | int start_y = (target_height - original_height) / 2; 21 | 22 | image.copyTo(padded_image(Rect(start_x, start_y, original_width, original_height))); 23 | pad_info[0] = start_x; 24 | pad_info[1] = start_y; 25 | pad_info[2] = original_height; 26 | pad_info[3] = original_width; 27 | return padded_image; 28 | } 29 | 30 | UnetCNN::UnetCNN(const string& model_path) 31 | { 32 | if (model_path.empty()) 33 | { 34 | std::cout << "onnx path error" << std::endl; 35 | } 36 | 37 | sessionOptions.SetGraphOptimizationLevel(ORT_ENABLE_BASIC); 38 | // 加载模型 39 | std::wstring_convert > converter; 40 | #ifdef _WIN32 41 | std::wstring w_model_path = converter.from_bytes(model_path); 42 | ort_session = new Ort::Session(env, w_model_path.c_str(), sessionOptions); 43 | #else 44 | ort_session = new Ort::Session(env, model_path.c_str(), sessionOptions); 45 | #endif 46 | } 47 | 48 | void UnetCNN::preprocess(const Mat& srcimg) 49 | { 50 | Mat img = pad_to_multiple_of_n(srcimg, 32, this->pad_info); 51 | this->input_h = img.rows; 52 | this->input_w = img.cols; 53 | vector bgrChannels(3); 54 | split(img, bgrChannels); 55 | for (int c = 0; c < 3; c++) 56 | { 57 | bgrChannels[c].convertTo(bgrChannels[c], CV_32FC1, 1 / 255.0); 58 | } 59 | 60 | const int image_area = this->input_h * this->input_w; 61 | this->input_image.clear(); 62 | this->input_image.resize(3 * image_area); 63 | size_t single_chn_size = image_area * sizeof(float); 64 | memcpy(this->input_image.data(), (float *)bgrChannels[2].data, single_chn_size); 65 | memcpy(this->input_image.data() + image_area, (float *)bgrChannels[1].data, single_chn_size); 66 | memcpy(this->input_image.data() + image_area * 2, (float *)bgrChannels[0].data, single_chn_size); 67 | } 68 | 69 | Mat UnetCNN::predict(const Mat& srcimg) 70 | { 71 | this->preprocess(srcimg); 72 | std::vector input_img_shape = {1, 3, this->input_h, this->input_w}; 73 | Value input_tensor_ = Value::CreateTensor(memory_info_handler, this->input_image.data(), this->input_image.size(), input_img_shape.data(), input_img_shape.size()); 74 | 75 | vector ort_outputs = this->ort_session->Run(runOptions, this->input_names.data(), &input_tensor_, this->input_names.size(), this->output_names.data(), this->output_names.size()); 76 | 77 | std::vector out_shape = ort_outputs[0].GetTensorTypeAndShapeInfo().GetShape(); 78 | const int out_h = out_shape[2]; 79 | const int out_w = out_shape[3]; 80 | float* pred = ort_outputs[0].GetTensorMutableData(); 81 | Mat out = Mat(out_h, out_w, CV_32FC1, pred); 82 | Mat cropped_image = this->postprocess(out); 83 | out.release(); 84 | vector channel_mats = {cropped_image, cropped_image, cropped_image}; 85 | Mat out_img; 86 | merge(channel_mats, out_img); 87 | out_img.convertTo(out_img, CV_8UC3); 88 | return out_img; 89 | } 90 | 91 | Mat UnetCNN::postprocess(Mat& img) 92 | { 93 | double min_value, max_value; 94 | minMaxLoc(img, &min_value, &max_value, 0, 0); 95 | img = 1 - (img - min_value) / (max_value - min_value); 96 | img = img * 255 + 0.5; 97 | img.setTo(0, img<0); 98 | img.setTo(255, img>255); 99 | Mat cropped_image; 100 | img(Rect(this->pad_info[0], this->pad_info[1], this->pad_info[3], this->pad_info[2])).copyTo(cropped_image); 101 | return cropped_image; 102 | } -------------------------------------------------------------------------------- /cpp/binary_predictor.h: -------------------------------------------------------------------------------- 1 | #ifndef BINARY_PREDICTOR_H 2 | #define BINARY_PREDICTOR_H 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | //#include ///如果使用cuda加速,需要取消注释 11 | #include 12 | 13 | 14 | class UnetCNN 15 | { 16 | public: 17 | UnetCNN(const std::string& model_path); 18 | cv::Mat predict(const cv::Mat& srcimg); 19 | private: 20 | void preprocess(const cv::Mat& img); 21 | std::vector input_image; 22 | int pad_info[4]; 23 | int input_h; 24 | int input_w; 25 | cv::Mat postprocess(cv::Mat& img); 26 | 27 | Ort::Env env = Ort::Env(ORT_LOGGING_LEVEL_ERROR, "BINARY PREDICTOR"); 28 | Ort::SessionOptions sessionOptions = Ort::SessionOptions(); 29 | 30 | Ort::Session *ort_session = nullptr; 31 | const std::vector input_names = {"input"}; 32 | const std::vector output_names = {"output"}; 33 | 34 | Ort::MemoryInfo memory_info_handler = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU); 35 | Ort::RunOptions runOptions; 36 | }; 37 | 38 | 39 | #endif -------------------------------------------------------------------------------- /cpp/main.cpp: -------------------------------------------------------------------------------- 1 | #include "binary_predictor.h" 2 | #include "unblur_predictor.h" 3 | #include "unshadow_predictor.h" 4 | #include "unwrap_predictor.h" 5 | #include 6 | 7 | 8 | using namespace cv; 9 | using namespace std; 10 | 11 | 12 | int main() 13 | { 14 | UnetCNN binary_model("/home/wangbo/doc-undistort/weights/unetcnn.onnx"); /////注意文件路径要写对 15 | NAF_DPM unblur_model("/home/wangbo/doc-undistort/weights/nafdpm.onnx"); 16 | GCDRNET unshadow_model("/home/wangbo/doc-undistort/weights/gcnet.onnx", "/home/wangbo/doc-undistort/weights/drnet.onnx"); 17 | UVDocPredictor unwrap_model("/home/wangbo/doc-undistort/weights/uvdoc.onnx"); 18 | 19 | vector task_list = {"unwrap", "unshadow", "unblur", "OpenCvBilateral"}; 20 | 21 | string imgpath = "/home/wangbo/doc-undistort/images/demo3.jpg"; 22 | 23 | Mat srcimg = imread(imgpath); 24 | // Mat out_img = unwrap_model.predict(srcimg); 25 | 26 | Mat out_img = srcimg.clone(); 27 | for(string task : task_list) 28 | { 29 | /////switch不支持字符串表达式,所以用if else 30 | if(task == "unwrap") 31 | { 32 | out_img = unwrap_model.predict(out_img); 33 | } 34 | else if(task == "unshadow") 35 | { 36 | out_img = unshadow_model.predict(out_img); 37 | } 38 | else if(task == "unblur") 39 | { 40 | out_img = unblur_model.predict(out_img); 41 | } 42 | else if(task == "OpenCvBilateral") 43 | { 44 | out_img = OpenCvBilateral(out_img); 45 | } 46 | else if(task == "binary") 47 | { 48 | out_img = binary_model.predict(out_img); 49 | } 50 | else 51 | { 52 | cout << "task not found" << endl; 53 | } 54 | 55 | } 56 | 57 | imwrite("out.jpg", out_img); 58 | return 0; 59 | } 60 | -------------------------------------------------------------------------------- /cpp/unblur_predictor.cpp: -------------------------------------------------------------------------------- 1 | #include "unblur_predictor.h" 2 | 3 | 4 | using namespace cv; 5 | using namespace std; 6 | using namespace Ort; 7 | 8 | 9 | NAF_DPM::NAF_DPM(const string& model_path) 10 | { 11 | if (model_path.empty()) 12 | { 13 | std::cout << "onnx path error" << std::endl; 14 | } 15 | 16 | sessionOptions.SetGraphOptimizationLevel(ORT_ENABLE_BASIC); 17 | // 加载模型 18 | std::wstring_convert > converter; 19 | #ifdef _WIN32 20 | std::wstring w_model_path = converter.from_bytes(model_path); 21 | ort_session = new Ort::Session(env, w_model_path.c_str(), sessionOptions); 22 | #else 23 | ort_session = new Ort::Session(env, model_path.c_str(), sessionOptions); 24 | #endif 25 | } 26 | 27 | void NAF_DPM::preprocess(const Mat& srcimg) 28 | { 29 | this->input_h = srcimg.rows; 30 | this->input_w = srcimg.cols; 31 | vector bgrChannels(3); 32 | split(srcimg, bgrChannels); 33 | for (int c = 0; c < 3; c++) 34 | { 35 | bgrChannels[c].convertTo(bgrChannels[c], CV_32FC1, 1 / 255.0); 36 | } 37 | 38 | const int image_area = this->input_h * this->input_w; 39 | this->input_image.clear(); 40 | this->input_image.resize(3 * image_area); 41 | size_t single_chn_size = image_area * sizeof(float); 42 | memcpy(this->input_image.data(), (float *)bgrChannels[0].data, single_chn_size); 43 | memcpy(this->input_image.data() + image_area, (float *)bgrChannels[1].data, single_chn_size); 44 | memcpy(this->input_image.data() + image_area * 2, (float *)bgrChannels[2].data, single_chn_size); 45 | } 46 | 47 | Mat NAF_DPM::predict(const Mat& srcimg) 48 | { 49 | this->preprocess(srcimg); 50 | std::vector input_img_shape = {1, 3, this->input_h, this->input_w}; 51 | Value input_tensor_ = Value::CreateTensor(memory_info_handler, this->input_image.data(), this->input_image.size(), input_img_shape.data(), input_img_shape.size()); 52 | 53 | vector ort_outputs = this->ort_session->Run(runOptions, this->input_names.data(), &input_tensor_, this->input_names.size(), this->output_names.data(), this->output_names.size()); 54 | 55 | std::vector out_shape = ort_outputs[0].GetTensorTypeAndShapeInfo().GetShape(); 56 | const int out_h = out_shape[2]; 57 | const int out_w = out_shape[3]; 58 | const int area = out_h * out_w; 59 | float* pred = ort_outputs[0].GetTensorMutableData(); 60 | Mat bmat = Mat(out_h, out_w, CV_32FC1, pred); 61 | Mat gmat = Mat(out_h, out_w, CV_32FC1, pred + area); 62 | Mat rmat = Mat(out_h, out_w, CV_32FC1, pred + area * 2); 63 | 64 | this->postprocess(bmat); 65 | this->postprocess(gmat); 66 | this->postprocess(rmat); 67 | 68 | vector channel_mats = {bmat, gmat, rmat}; 69 | Mat out_img; 70 | merge(channel_mats, out_img); 71 | out_img.convertTo(out_img, CV_8UC3); 72 | return out_img; 73 | } 74 | 75 | void NAF_DPM::postprocess(Mat& img) 76 | { 77 | img = img * 255 + 0.5; 78 | img.setTo(0, img<0); 79 | img.setTo(255, img>255); 80 | } 81 | 82 | 83 | cv::Mat OpenCvBilateral(const cv::Mat& img) { 84 | cv::Mat img_uint8; 85 | img.convertTo(img_uint8, CV_8U); 86 | 87 | // 双边滤波 88 | cv::Mat bilateral; 89 | cv::bilateralFilter(img_uint8, bilateral, 9, 75, 75); 90 | 91 | // 自适应直方图均衡化 92 | cv::Mat lab; 93 | cv::cvtColor(bilateral, lab, cv::COLOR_BGR2Lab); 94 | std::vector lab_planes(3); 95 | cv::split(lab, lab_planes); 96 | cv::Ptr clahe = cv::createCLAHE(3.0, cv::Size(8, 8)); 97 | clahe->apply(lab_planes[0], lab_planes[0]); 98 | cv::merge(lab_planes, lab); 99 | cv::Mat enhanced; 100 | cv::cvtColor(lab, enhanced, cv::COLOR_Lab2BGR); 101 | 102 | // 应用锐化滤波器 103 | cv::Mat kernel = (cv::Mat_(3, 3) << 0, -1, 0, -1, 5, -1, 0, -1, 0); 104 | cv::Mat sharpened; 105 | cv::filter2D(enhanced, sharpened, -1, kernel); 106 | 107 | return sharpened; 108 | } -------------------------------------------------------------------------------- /cpp/unblur_predictor.h: -------------------------------------------------------------------------------- 1 | #ifndef UNBLUR_PREDICTOR_H 2 | #define UNBLUR_PREDICTOR_H 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | //#include ///如果使用cuda加速,需要取消注释 11 | #include 12 | 13 | 14 | class NAF_DPM 15 | { 16 | public: 17 | NAF_DPM(const std::string& model_path); 18 | cv::Mat predict(const cv::Mat& srcimg); 19 | private: 20 | void preprocess(const cv::Mat& img); 21 | std::vector input_image; 22 | int input_h; 23 | int input_w; 24 | void postprocess(cv::Mat& img); 25 | 26 | Ort::Env env = Ort::Env(ORT_LOGGING_LEVEL_ERROR, "UNBLUR PREDICTOR"); 27 | Ort::SessionOptions sessionOptions = Ort::SessionOptions(); 28 | 29 | Ort::Session *ort_session = nullptr; 30 | const std::vector input_names = {"input"}; 31 | const std::vector output_names = {"output"}; 32 | 33 | Ort::MemoryInfo memory_info_handler = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU); 34 | Ort::RunOptions runOptions; 35 | }; 36 | 37 | cv::Mat OpenCvBilateral(const cv::Mat& img); 38 | 39 | #endif -------------------------------------------------------------------------------- /cpp/unshadow_predictor.cpp: -------------------------------------------------------------------------------- 1 | #include "unshadow_predictor.h" 2 | 3 | 4 | using namespace cv; 5 | using namespace std; 6 | using namespace Ort; 7 | 8 | 9 | GCDRNET::GCDRNET(const string& gcnet_modelpath, const string& drnet_modelpath) 10 | { 11 | if (gcnet_modelpath.empty() || drnet_modelpath.empty()) 12 | { 13 | std::cout << "onnx path error" << std::endl; 14 | } 15 | 16 | sessionOptions.SetGraphOptimizationLevel(ORT_ENABLE_BASIC); 17 | // 加载模型 18 | std::wstring_convert > converter; 19 | #ifdef _WIN32 20 | std::wstring w_model_path = converter.from_bytes(gcnet_modelpath); 21 | gcnet_session = new Ort::Session(env, w_model_path.c_str(), sessionOptions); 22 | std::wstring w_model2_path = converter.from_bytes(drnet_modelpath); 23 | drnet_session = new Ort::Session(env, w_model2_path.c_str(), sessionOptions); 24 | #else 25 | gcnet_session = new Ort::Session(env, gcnet_modelpath.c_str(), sessionOptions); 26 | drnet_session = new Ort::Session(env, drnet_modelpath.c_str(), sessionOptions); 27 | #endif 28 | } 29 | 30 | Mat GCDRNET::stride_integral(const Mat& srcimg, const int stride) 31 | { 32 | int h = srcimg.rows; 33 | int w = srcimg.cols; 34 | 35 | Mat img = srcimg.clone(); 36 | if((h % stride) != 0) 37 | { 38 | this->padding_h = stride - (h % stride); 39 | cv::copyMakeBorder(img, img, padding_h, 0, 0, 0, cv::BORDER_REPLICATE); 40 | } 41 | else 42 | { 43 | this->padding_h = 0; 44 | } 45 | 46 | if((w % stride) != 0) 47 | { 48 | this->padding_w = stride - (w % stride); 49 | cv::copyMakeBorder(img, img, 0, 0, padding_w, 0, cv::BORDER_REPLICATE); 50 | } 51 | else 52 | { 53 | this->padding_w = 0; 54 | } 55 | return img; 56 | } 57 | 58 | void GCDRNET::preprocess(const Mat& srcimg) 59 | { 60 | Mat img = this->stride_integral(srcimg, 32); 61 | this->input_h = img.rows; 62 | this->input_w = img.cols; 63 | vector bgrChannels(3); 64 | split(img, bgrChannels); 65 | for (int c = 0; c < 3; c++) 66 | { 67 | bgrChannels[c].convertTo(bgrChannels[c], CV_32FC1, 1 / 255.0); 68 | } 69 | 70 | const int image_area = this->input_h * this->input_w; 71 | this->input_image.clear(); 72 | this->input_image.resize(3 * image_area); 73 | size_t single_chn_size = image_area * sizeof(float); 74 | memcpy(this->input_image.data(), (float *)bgrChannels[0].data, single_chn_size); 75 | memcpy(this->input_image.data() + image_area, (float *)bgrChannels[1].data, single_chn_size); 76 | memcpy(this->input_image.data() + image_area * 2, (float *)bgrChannels[2].data, single_chn_size); 77 | } 78 | 79 | Mat GCDRNET::predict(const Mat& srcimg) 80 | { 81 | this->preprocess(srcimg); 82 | std::vector input_img_shape = {1, 3, this->input_h, this->input_w}; 83 | Value input_tensor_ = Value::CreateTensor(memory_info_handler, this->input_image.data(), this->input_image.size(), input_img_shape.data(), input_img_shape.size()); 84 | 85 | vector gcnet_outputs = this->gcnet_session->Run(runOptions, this->input_names.data(), &input_tensor_, this->input_names.size(), this->output_names.data(), this->output_names.size()); 86 | float* img_shadow = gcnet_outputs[0].GetTensorMutableData(); 87 | const int len = 3 * this->input_h * this->input_w; 88 | this->concatenated_input.clear(); 89 | this->concatenated_input.resize(2 * len); 90 | for(int i=0;iconcatenated_input[i] = this->input_image[i]; 93 | float x = this->input_image[i] / img_shadow[i]; 94 | x = std::min(std::max(x, 0.0f), 1.0f); 95 | this->concatenated_input[i + len] = x; 96 | } 97 | 98 | std::vector input2_shape = {1, 2*3, this->input_h, this->input_w}; 99 | Value input2_tensor_ = Value::CreateTensor(memory_info_handler, this->concatenated_input.data(), this->concatenated_input.size(), input2_shape.data(), input2_shape.size()); 100 | 101 | vector drnet_outputs = this->drnet_session->Run(runOptions, this->input_names.data(), &input2_tensor_, this->input_names.size(), this->output_names.data(), this->output_names.size()); 102 | 103 | std::vector out_shape = drnet_outputs[0].GetTensorTypeAndShapeInfo().GetShape(); 104 | const int out_h = out_shape[2]; 105 | const int out_w = out_shape[3]; 106 | const int area = out_h * out_w; 107 | float* pred = drnet_outputs[0].GetTensorMutableData(); 108 | Mat bmat = Mat(out_h, out_w, CV_32FC1, pred); 109 | Mat gmat = Mat(out_h, out_w, CV_32FC1, pred + area); 110 | Mat rmat = Mat(out_h, out_w, CV_32FC1, pred + area * 2); 111 | bmat *= 255; 112 | gmat *= 255; 113 | rmat *= 255; 114 | 115 | vector channel_mats = {bmat, gmat, rmat}; 116 | Mat out_img; 117 | merge(channel_mats, out_img); 118 | Mat enhance_img; 119 | out_img(Rect(this->padding_w, this->padding_h, out_w-this->padding_w, out_h-this->padding_h)).copyTo(enhance_img); 120 | enhance_img.convertTo(enhance_img, CV_8UC3); 121 | return enhance_img; 122 | } -------------------------------------------------------------------------------- /cpp/unshadow_predictor.h: -------------------------------------------------------------------------------- 1 | #ifndef UNSHADOW_PREDICTOR_H 2 | #define UNSHADOW_PREDICTOR_H 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | //#include ///如果使用cuda加速,需要取消注释 11 | #include 12 | 13 | 14 | class GCDRNET 15 | { 16 | public: 17 | GCDRNET(const std::string& gcnet_modelpath, const std::string& drnet_modelpath); 18 | cv::Mat predict(const cv::Mat& srcimg); 19 | private: 20 | void preprocess(const cv::Mat& img); 21 | std::vector input_image; 22 | std::vector concatenated_input; 23 | int input_h; 24 | int input_w; 25 | int padding_h; 26 | int padding_w; 27 | cv::Mat stride_integral(const cv::Mat& img, const int stride); 28 | 29 | Ort::Env env = Ort::Env(ORT_LOGGING_LEVEL_ERROR, "UNSHADOW PREDICTOR"); 30 | Ort::SessionOptions sessionOptions = Ort::SessionOptions(); 31 | 32 | Ort::Session *gcnet_session = nullptr; 33 | Ort::Session *drnet_session = nullptr; 34 | const std::vector input_names = {"input"}; 35 | const std::vector output_names = {"output"}; 36 | 37 | Ort::MemoryInfo memory_info_handler = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU); 38 | Ort::RunOptions runOptions; 39 | }; 40 | 41 | 42 | #endif -------------------------------------------------------------------------------- /cpp/unwrap_predictor.cpp: -------------------------------------------------------------------------------- 1 | #include "unwrap_predictor.h" 2 | 3 | 4 | using namespace cv; 5 | using namespace std; 6 | using namespace Ort; 7 | 8 | 9 | UVDocPredictor::UVDocPredictor(const string& model_path) 10 | { 11 | if (model_path.empty()) 12 | { 13 | std::cout << "onnx path error" << std::endl; 14 | } 15 | 16 | sessionOptions.SetGraphOptimizationLevel(ORT_ENABLE_BASIC); 17 | // 加载模型 18 | std::wstring_convert > converter; 19 | #ifdef _WIN32 20 | std::wstring w_model_path = converter.from_bytes(model_path); 21 | ort_session = new Ort::Session(env, w_model_path.c_str(), sessionOptions); 22 | #else 23 | ort_session = new Ort::Session(env, model_path.c_str(), sessionOptions); 24 | #endif 25 | } 26 | 27 | void UVDocPredictor::preprocess(Mat& img) 28 | { 29 | img.convertTo(img, CV_32FC3, 1 / 255.0); 30 | 31 | Mat temp; 32 | cv::resize(img, temp, cv::Size(this->input_w, this->input_h)); 33 | vector bgrChannels(3); 34 | split(temp, bgrChannels); 35 | const int image_area = this->input_h * this->input_w; 36 | this->input_image.clear(); 37 | this->input_image.resize(3 * image_area); 38 | size_t single_chn_size = image_area * sizeof(float); 39 | memcpy(this->input_image.data(), (float *)bgrChannels[0].data, single_chn_size); 40 | memcpy(this->input_image.data() + image_area, (float *)bgrChannels[1].data, single_chn_size); 41 | memcpy(this->input_image.data() + image_area * 2, (float *)bgrChannels[2].data, single_chn_size); 42 | } 43 | 44 | Mat UVDocPredictor::predict(const Mat& srcimg) 45 | { 46 | const int size[2] = {srcimg.cols, srcimg.rows}; 47 | Mat img = srcimg.clone(); 48 | this->preprocess(img); 49 | std::vector input_img_shape = {1, 3, this->input_h, this->input_w}; 50 | Value input_tensor_ = Value::CreateTensor(memory_info_handler, this->input_image.data(), this->input_image.size(), input_img_shape.data(), input_img_shape.size()); 51 | 52 | vector ort_outputs = this->ort_session->Run(runOptions, this->input_names.data(), &input_tensor_, this->input_names.size(), this->output_names.data(), this->output_names.size()); 53 | 54 | std::vector out_shape = ort_outputs[0].GetTensorTypeAndShapeInfo().GetShape(); 55 | float* output = ort_outputs[0].GetTensorMutableData(); 56 | Mat out_img = this->postprocess(img, size, output, out_shape); 57 | out_img.convertTo(out_img, CV_8UC3); 58 | return out_img; 59 | } 60 | 61 | static Mat convert3channeltonchw(const Mat& img) 62 | { 63 | vector bgrChannels(3); 64 | split(img, bgrChannels); 65 | const int image_area = img.rows * img.cols; 66 | const vector newsz = {1, 3, img.rows, img.cols}; 67 | Mat dstimg = cv::Mat(newsz, CV_32FC1); 68 | size_t single_chn_size = image_area * sizeof(float); 69 | memcpy((float *)dstimg.data, (float *)bgrChannels[0].data, single_chn_size); 70 | memcpy((float *)dstimg.data + image_area, (float *)bgrChannels[1].data, single_chn_size); 71 | memcpy((float *)dstimg.data + image_area * 2, (float *)bgrChannels[2].data, single_chn_size); 72 | return dstimg; 73 | } 74 | 75 | Mat UVDocPredictor::postprocess(const Mat& img, const int* size, const float* output, vector out_shape) 76 | { 77 | Mat warped_img = convert3channeltonchw(img); 78 | 79 | Mat upsampled_grid = this->interpolate(output, out_shape, size, true); 80 | cv::transposeND(upsampled_grid, {0, 2, 3, 1}, upsampled_grid); 81 | 82 | Mat unwarped_img = this->grid_sample(warped_img, upsampled_grid, true); 83 | 84 | float* pdata = (float*)unwarped_img.data; 85 | const int out_h = unwarped_img.size[2]; 86 | const int out_w = unwarped_img.size[3]; 87 | const int area = out_h * out_w; 88 | Mat bmat = Mat(out_h, out_w, CV_32FC1, pdata); 89 | Mat gmat = Mat(out_h, out_w, CV_32FC1, pdata + area); 90 | Mat rmat = Mat(out_h, out_w, CV_32FC1, pdata + area * 2); 91 | bmat *= 255; 92 | gmat *= 255; 93 | rmat *= 255; 94 | vector channel_mats = {bmat, gmat, rmat}; 95 | Mat out_img; 96 | merge(channel_mats, out_img); 97 | return out_img; 98 | } 99 | 100 | static float get_pixel_value(const float* pinput, const int H, const int W, const int y, const int x) 101 | { 102 | if(y < 0 || y >= H || x < 0 || x >= W) 103 | { 104 | return 0.f; 105 | } 106 | return pinput[y*W+x]; 107 | } 108 | 109 | Mat UVDocPredictor::interpolate(const float* input_tensor, vector shape, const int* size, const bool align_corners) 110 | { 111 | const int B = shape[0]; 112 | const int C = shape[1]; 113 | const int H = shape[2]; 114 | const int W = shape[3]; 115 | const int new_H = size[1]; 116 | const int new_W = size[0]; 117 | const vector newsz = {B, C, new_H, new_W}; 118 | Mat dstimg = cv::Mat(newsz, CV_32FC1); 119 | for(int n=0;n 1) ? (float(H - 1) / float(new_H - 1)):0.f; 124 | float scale_w = (new_W > 1) ? (float(W - 1) / float(new_W - 1)):0.f; 125 | if(!align_corners) 126 | { 127 | scale_h = (float)H / new_H; 128 | scale_w = (float)W / new_W; 129 | } 130 | const float* pinput = input_tensor + n*C*H*W + cid*H*W; 131 | for(int h=0;h(n, cid, h)[w] = f; 150 | } 151 | } 152 | } 153 | } 154 | return dstimg; 155 | } 156 | 157 | Mat UVDocPredictor::grid_sample(const Mat& input_tensor, const Mat& grid, const bool align_corners) 158 | { 159 | const int B = input_tensor.size[0]; 160 | const int C = input_tensor.size[1]; 161 | const int H = input_tensor.size[2]; 162 | const int W = input_tensor.size[3]; 163 | const int B_grid = grid.size[0]; 164 | const int H_grid = grid.size[1]; 165 | const int W_grid = grid.size[2]; 166 | 167 | if(B != B_grid || H != H_grid || W != W_grid) 168 | { 169 | cout<<"Error, Input tensor and grid must have the same spatial dimensions."< newsz = {B, C, H, W}; 174 | Mat dstimg = cv::Mat(newsz, CV_32FC1); 175 | for(int n=0;n(n, h, w)[0] + 1) * (W - 1) / 2; 185 | float y = (grid.ptr(n, h, w)[1] + 1) * (H - 1) / 2; 186 | if(!align_corners) 187 | { 188 | x = ((grid.ptr(n, h, w)[0] + 1) * W - 1) / 2; 189 | y = ((grid.ptr(n, h, w)[1] + 1) * H - 1) / 2; 190 | } 191 | 192 | const int y0 = floor(y); 193 | const int x0 = floor(x); 194 | const int y1 = y0 + 1; 195 | const int x1 = x0 + 1; 196 | const float denom = (y1-y0)*(x1-x0); 197 | 198 | const float f_x0_y0 = get_pixel_value(pinput, H, W, y0, x0); 199 | const float f_x1_y0 = get_pixel_value(pinput, H, W, y0, x1); 200 | const float f_x0_y1 = get_pixel_value(pinput, H, W, y1, x0); 201 | const float f_x1_y1 = get_pixel_value(pinput, H, W, y1, x1); 202 | 203 | const float f = ((y1-y)*(x1-x) / denom) * f_x0_y0 + ((y1-y)*(x-x0) / denom) * f_x1_y0 + ((y-y0)*(x1-x) / denom) * f_x0_y1 + ((y-y0)*(x-x0) / denom) * f_x1_y1; 204 | dstimg.ptr(n, cid, h)[w] = f; 205 | } 206 | } 207 | } 208 | } 209 | return dstimg; 210 | } -------------------------------------------------------------------------------- /cpp/unwrap_predictor.h: -------------------------------------------------------------------------------- 1 | #ifndef UNWRAP_PREDICTOR_H 2 | #define UNWRAP_PREDICTOR_H 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | //#include ///如果使用cuda加速,需要取消注释 11 | #include 12 | 13 | 14 | class UVDocPredictor 15 | { 16 | public: 17 | UVDocPredictor(const std::string& model_path); 18 | cv::Mat predict(const cv::Mat& srcimg); 19 | private: 20 | void preprocess(cv::Mat& img); 21 | std::vector input_image; 22 | const int input_h = 712; 23 | const int input_w = 488; 24 | const int grid_size[2] = {45, 31}; 25 | cv::Mat postprocess(const cv::Mat& img, const int* size, const float* output, std::vector out_shape); 26 | cv::Mat interpolate(const float* input_tensor, std::vector shape, const int* size, const bool align_corners); 27 | cv::Mat grid_sample(const cv::Mat& input_tensor, const cv::Mat& grid, const bool align_corners); 28 | 29 | Ort::Env env = Ort::Env(ORT_LOGGING_LEVEL_ERROR, "UNWRAP PREDICTOR"); 30 | Ort::SessionOptions sessionOptions = Ort::SessionOptions(); 31 | 32 | Ort::Session *ort_session = nullptr; 33 | const std::vector input_names = {"input"}; 34 | const std::vector output_names = {"output", "546"}; 35 | 36 | Ort::MemoryInfo memory_info_handler = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU); 37 | Ort::RunOptions runOptions; 38 | }; 39 | 40 | 41 | #endif -------------------------------------------------------------------------------- /images/demo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/document-undistort-onnxrun/85554ebdf4223bf70e1a6c8c05b48f9e2250b34e/images/demo.jpg -------------------------------------------------------------------------------- /images/demo1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/document-undistort-onnxrun/85554ebdf4223bf70e1a6c8c05b48f9e2250b34e/images/demo1.jpg -------------------------------------------------------------------------------- /images/demo1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/document-undistort-onnxrun/85554ebdf4223bf70e1a6c8c05b48f9e2250b34e/images/demo1.png -------------------------------------------------------------------------------- /images/demo2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/document-undistort-onnxrun/85554ebdf4223bf70e1a6c8c05b48f9e2250b34e/images/demo2.png -------------------------------------------------------------------------------- /images/demo3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpc203/document-undistort-onnxrun/85554ebdf4223bf70e1a6c8c05b48f9e2250b34e/images/demo3.jpg -------------------------------------------------------------------------------- /python/binary_predictor.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import onnxruntime 4 | 5 | 6 | def restore_original_size(image, pad_info): 7 | start_x, start_y, original_height, original_width = pad_info 8 | cropped_image = image[start_y:start_y + original_height, start_x:start_x + original_width] 9 | return cropped_image 10 | 11 | def pad_to_multiple_of_n(image, n=32): 12 | original_height, original_width = image.shape[:2] 13 | 14 | # 计算目标形状 15 | target_width = ((original_width + n - 1) // n) * n 16 | target_height = ((original_height + n - 1) // n) * n 17 | 18 | # 创建一个纯白背景的图像 19 | padded_image = np.ones((target_height, target_width, 3), dtype=np.uint8) * 255 20 | 21 | # 计算填充的位置 22 | start_x = (target_width - original_width) // 2 23 | start_y = (target_height - original_height) // 2 24 | 25 | # 将原始图像放置在纯白背景上 26 | padded_image[start_y:start_y + original_height, start_x:start_x + original_width] = image 27 | 28 | # 返回填充后的图像和填充位置 29 | return padded_image, (start_x, start_y, original_height, original_width) 30 | 31 | 32 | class UnetCNN(): 33 | def __init__(self, modelpath): 34 | so = onnxruntime.SessionOptions() 35 | so.log_severity_level = 3 36 | self.session = onnxruntime.InferenceSession(modelpath, so) 37 | self.input_name = self.session.get_inputs()[0].name 38 | 39 | def preprocess(self, img): 40 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 41 | img, pad_info = pad_to_multiple_of_n(img) 42 | # 归一化 43 | img = img.transpose(2, 0, 1) / 255.0 44 | # 将图像数据扩展为一个批次的形式 45 | img = np.expand_dims(img, axis=0).astype(np.float32) 46 | # 转换为模型输入格式 47 | return img, pad_info 48 | 49 | def predict(self, img): 50 | img, pad_info = self.preprocess(img) 51 | pred = self.session.run(None, {self.input_name: img})[0] 52 | out_img = self.postprocess(pred, pad_info) 53 | return out_img.astype(np.uint8) 54 | 55 | def postprocess(self, img, pad_info): 56 | img = 1 - (img - img.min()) / (img.max() - img.min()) 57 | img = img[0].transpose(1, 2, 0) 58 | # 重复最后一个通道维度三次 59 | img = np.repeat(img, 3, axis=2) 60 | img = (img * 255 + 0.5).clip(0, 255) 61 | img = restore_original_size(img, pad_info) 62 | return img 63 | 64 | 65 | if __name__=='__main__': 66 | model = UnetCNN('weights/unetcnn.onnx') 67 | img = cv2.imread('images/demo3.jpg') 68 | out_img = model.predict(img) 69 | cv2.imwrite('binary_predictor_out.jpg', out_img) -------------------------------------------------------------------------------- /python/main.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | from binary_predictor import UnetCNN 3 | from unblur_predictor import NAF_DPM, OpenCvBilateral 4 | from unshadow_predictor import GCDRNET 5 | from unwrap_predictor import UVDocPredictor 6 | 7 | 8 | if __name__=='__main__': 9 | binary_model = UnetCNN('weights/unetcnn.onnx') 10 | unblur_model = NAF_DPM('weights/nafdpm.onnx') 11 | unblur_model2 = OpenCvBilateral() 12 | unshadow_model = GCDRNET('weights/gcnet.onnx', 'weights/drnet.onnx') 13 | unwrap_model = UVDocPredictor('weights/uvdoc.onnx') 14 | model_dict = {"binary": binary_model, "unblur": unblur_model, "unshadow": unshadow_model, "unwrap": unwrap_model, "OpenCvBilateral": unblur_model2} 15 | 16 | task_list = ["unwrap", "unshadow", "unblur", "OpenCvBilateral"] 17 | srcimg = cv2.imread('images/demo3.jpg') 18 | out_img = srcimg.copy() 19 | for task in task_list: 20 | out_img = model_dict[task].predict(out_img) 21 | 22 | cv2.imwrite('out.jpg', out_img) 23 | -------------------------------------------------------------------------------- /python/unblur_predictor.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import onnxruntime 4 | 5 | 6 | class NAF_DPM(): 7 | def __init__(self, modelpath): 8 | so = onnxruntime.SessionOptions() 9 | so.log_severity_level = 3 10 | self.session = onnxruntime.InferenceSession(modelpath, so) 11 | self.input_name = self.session.get_inputs()[0].name 12 | 13 | def preprocess(self, img): 14 | # 归一化 15 | img = img.transpose(2, 0, 1) / 255.0 16 | # 将图像数据扩展为一个批次的形式 17 | img = np.expand_dims(img, axis=0).astype(np.float32) 18 | # 转换为模型输入格式 19 | return img 20 | 21 | def predict(self, img): 22 | img = self.preprocess(img) 23 | pred = self.session.run(None, {self.input_name: img})[0] 24 | out_img = self.postprocess(pred) 25 | return out_img.astype(np.uint8) 26 | 27 | def postprocess(self, img): 28 | img = img[0] 29 | img = (img * 255 + 0.5).clip(0, 255).transpose(1, 2, 0) 30 | return img 31 | 32 | 33 | class OpenCvBilateral: 34 | def __init__(self,): 35 | pass 36 | def predict(self, img): 37 | img = img.astype(np.uint8) 38 | # 双边滤波 39 | bilateral = cv2.bilateralFilter(img, d=9, sigmaColor=75, sigmaSpace=75) 40 | # 自适应直方图均衡化 41 | lab = cv2.cvtColor(bilateral, cv2.COLOR_BGR2LAB) 42 | l, a, b = cv2.split(lab) 43 | clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8)) 44 | cl = clahe.apply(l) 45 | limg = cv2.merge((cl, a, b)) 46 | enhanced = cv2.cvtColor(limg, cv2.COLOR_LAB2BGR) 47 | 48 | # 应用锐化滤波器 49 | kernel = np.array([[0, -1, 0], 50 | [-1, 5, -1], 51 | [0, -1, 0]]) 52 | sharpened = cv2.filter2D(enhanced, -1, kernel) 53 | return sharpened 54 | 55 | 56 | if __name__=='__main__': 57 | model = NAF_DPM('weights/nafdpm.onnx') 58 | model2 = OpenCvBilateral() 59 | img = cv2.imread('images/demo3.jpg') 60 | out_img = model.predict(img) 61 | out_img = model2.predict(out_img) 62 | cv2.imwrite('unblur_predictor_out.jpg', out_img) -------------------------------------------------------------------------------- /python/unshadow_predictor.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import onnxruntime 4 | 5 | 6 | class GCDRNET(): 7 | def __init__(self, gcnet_modelpath, drnet_modelpath): 8 | so = onnxruntime.SessionOptions() 9 | so.log_severity_level = 3 10 | self.gcnet_session = onnxruntime.InferenceSession(gcnet_modelpath, so) 11 | self.drnet_session = onnxruntime.InferenceSession(drnet_modelpath, so) 12 | self.gcnet_input_name = self.gcnet_session.get_inputs()[0].name 13 | self.drnet_input_name = self.drnet_session.get_inputs()[0].name 14 | 15 | def stride_integral(self, img, stride=32): 16 | h, w = img.shape[:2] 17 | 18 | if (h % stride) != 0: 19 | padding_h = stride - (h % stride) 20 | img = cv2.copyMakeBorder(img, padding_h, 0, 0, 0, borderType=cv2.BORDER_REPLICATE) 21 | else: 22 | padding_h = 0 23 | 24 | if (w % stride) != 0: 25 | padding_w = stride - (w % stride) 26 | img = cv2.copyMakeBorder(img, 0, 0, padding_w, 0, borderType=cv2.BORDER_REPLICATE) 27 | else: 28 | padding_w = 0 29 | 30 | return img, padding_h, padding_w 31 | 32 | def preprocess(self, img): 33 | img, padding_h, padding_w = self.stride_integral(img) 34 | # 归一化 35 | img = img.transpose(2, 0, 1) / 255.0 36 | img = np.expand_dims(img, axis=0).astype(np.float32) 37 | # 转换为模型输入格式 38 | return img, padding_h, padding_w 39 | 40 | def predict(self, img): 41 | im_padding, padding_h, padding_w = self.preprocess(img.copy()) 42 | img_shadow = im_padding.copy() 43 | img_shadow = self.gcnet_session.run(None, {self.gcnet_input_name: img_shadow})[0] 44 | model1_im = np.clip(im_padding / img_shadow, 0, 1) 45 | # 拼接 im_org 和 model1_im 46 | concatenated_input = np.concatenate((im_padding, model1_im), axis=1) 47 | pred = self.drnet_session.run(None, {self.drnet_input_name: concatenated_input})[0] 48 | out_img = self.postprocess(pred, padding_h, padding_w) 49 | return out_img.astype(np.uint8) 50 | 51 | def postprocess(self, pred, padding_h, padding_w): 52 | pred = np.transpose(pred[0], (1, 2, 0)) 53 | pred = pred * 255 54 | enhance_img = pred[padding_h:, padding_w:] 55 | return enhance_img 56 | 57 | 58 | if __name__=='__main__': 59 | model = GCDRNET('weights/gcnet.onnx', 'weights/drnet.onnx') 60 | img = cv2.imread('images/demo3.jpg') 61 | out_img = model.predict(img) 62 | cv2.imwrite('unshadow_predictor_out.jpg', out_img) -------------------------------------------------------------------------------- /python/unwrap_predictor.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import onnxruntime 4 | from scipy.ndimage import map_coordinates 5 | 6 | 7 | class UVDocPredictor: 8 | def __init__(self, modelpath): 9 | so = onnxruntime.SessionOptions() 10 | so.log_severity_level = 3 11 | self.session = onnxruntime.InferenceSession(modelpath, so) 12 | self.input_name = self.session.get_inputs()[0].name 13 | self.img_size = [488, 712] 14 | self.grid_size = [45, 31] 15 | 16 | def preprocess(self, img): 17 | img = cv2.resize(img, self.img_size).transpose(2, 0, 1) 18 | img = np.expand_dims(img, axis=0) 19 | return img 20 | 21 | def predict(self, img): 22 | size = img.shape[:2][::-1] 23 | img = img.astype(np.float32) / 255 24 | inp = self.preprocess(img.copy()) 25 | outputs = self.session.run(None, {self.input_name: inp})[0] 26 | out_img = self.postprocess(img, size, outputs) 27 | return out_img.astype(np.uint8) 28 | 29 | def postprocess(self, img, size, output): 30 | # 将图像转换为NumPy数组 31 | warped_img = np.expand_dims(img.transpose(2, 0, 1), axis=0).astype(np.float32) 32 | 33 | # 上采样网格 34 | upsampled_grid = self.interpolate(output, size=(size[1], size[0]), align_corners=True) 35 | # 调整网格的形状 36 | upsampled_grid = upsampled_grid.transpose(0, 2, 3, 1) 37 | 38 | # 重映射图像 39 | unwarped_img = self.grid_sample(warped_img, upsampled_grid) 40 | 41 | # 将结果转换回原始格式 42 | return unwarped_img[0].transpose(1, 2, 0) * 255 43 | 44 | def interpolate(self, input_tensor, size, align_corners=True): 45 | """ 46 | Interpolate function to resize the input tensor. 47 | 48 | Args: 49 | input_tensor: numpy.ndarray of shape (B, C, H, W) 50 | size: tuple of int (new_height, new_width) 51 | mode: str, interpolation mode ('bilinear' or 'nearest') 52 | align_corners: bool, whether to align corners in bilinear interpolation 53 | 54 | Returns: 55 | numpy.ndarray of shape (B, C, new_height, new_width) 56 | """ 57 | B, C, H, W = input_tensor.shape 58 | new_H, new_W = size 59 | resized_tensors = [] 60 | for b in range(B): 61 | resized_channels = [] 62 | for c in range(C): 63 | # 计算新的坐标 64 | if align_corners: 65 | scale_h = (H - 1) / (new_H - 1) if new_H > 1 else 0 66 | scale_w = (W - 1) / (new_W - 1) if new_W > 1 else 0 67 | else: 68 | scale_h = H / new_H 69 | scale_w = W / new_W 70 | 71 | # 创建新的坐标网格 72 | y, x = np.indices((new_H, new_W), dtype=np.float32) 73 | y = y * scale_h 74 | x = x * scale_w 75 | 76 | # 双线性插值 77 | coords = np.stack([y.flatten(), x.flatten()], axis=0) 78 | # tmp = my_map_coordinates(input_tensor[b, c], coords) 79 | resized_channel = map_coordinates(input_tensor[b, c], coords, order=1, mode='constant', cval=0.0) 80 | resized_channel = resized_channel.reshape(new_H, new_W) 81 | resized_channels.append(resized_channel) 82 | 83 | resized_tensors.append(np.stack(resized_channels, axis=0)) 84 | 85 | return np.stack(resized_tensors, axis=0) 86 | 87 | def grid_sample(self, input_tensor, grid, align_corners=True): 88 | """ 89 | Grid sample function to sample the input tensor using the given grid. 90 | 91 | Args: 92 | input_tensor: numpy.ndarray of shape (B, C, H, W) 93 | grid: numpy.ndarray of shape (B, H, W, 2) with values in [-1, 1] 94 | align_corners: bool, whether to align corners in bilinear interpolation 95 | 96 | Returns: 97 | numpy.ndarray of shape (B, C, H, W) 98 | """ 99 | B, C, H, W = input_tensor.shape 100 | B_grid, H_grid, W_grid, _ = grid.shape 101 | 102 | if B != B_grid or H != H_grid or W != W_grid: 103 | raise ValueError("Input tensor and grid must have the same spatial dimensions.") 104 | 105 | # Convert grid coordinates from [-1, 1] to [0, W-1] and [0, H-1] 106 | if align_corners: 107 | grid[:, :, :, 0] = (grid[:, :, :, 0] + 1) * (W - 1) / 2 108 | grid[:, :, :, 1] = (grid[:, :, :, 1] + 1) * (H - 1) / 2 109 | else: 110 | grid[:, :, :, 0] = ((grid[:, :, :, 0] + 1) * W - 1) / 2 111 | grid[:, :, :, 1] = ((grid[:, :, :, 1] + 1) * H - 1) / 2 112 | 113 | sampled_tensors = [] 114 | for b in range(B): 115 | sampled_channels = [] 116 | for c in range(C): 117 | channel = input_tensor[b, c] 118 | x_coords = grid[b, :, :, 0].flatten() 119 | y_coords = grid[b, :, :, 1].flatten() 120 | coords = np.stack([y_coords, x_coords], axis=-1) 121 | sampled_channel = map_coordinates(channel, coords.T, order=1, mode='constant', cval=0.0).reshape(H, W) 122 | sampled_channels.append(sampled_channel) 123 | sampled_tensors.append(np.stack(sampled_channels, axis=0)) 124 | 125 | return np.stack(sampled_tensors, axis=0) 126 | 127 | 128 | def my_map_coordinates(input, coordinates): 129 | def set_value(input, x, y): 130 | mask = (x >= 0) & (x < input.shape[1]) & (y >= 0) & (y < input.shape[0]) 131 | out = np.zeros(y.shape) 132 | out[mask] = input[y[mask], x[mask]] 133 | return out 134 | 135 | y = coordinates[0,:] 136 | x = coordinates[1,:] 137 | x0 = np.floor(x).astype(np.int32) 138 | x1 = x0 + 1 139 | y0 = np.floor(y).astype(np.int32) 140 | y1 = y0 + 1 141 | 142 | f_x0_y0 = set_value(input, x0, y0) 143 | f_x1_y0 = set_value(input, x1, y0) 144 | f_x0_y1 = set_value(input, x0, y1) 145 | f_x1_y1 = set_value(input, x1, y1) 146 | 147 | denom = (y1-y0)*(x1-x0) 148 | f = ((y1-y)*(x1-x) / denom) * f_x0_y0 + ((y1-y)*(x-x0) / denom) * f_x1_y0 + ((y-y0)*(x1-x) / denom) * f_x0_y1 + ((y-y0)*(x-x0) / denom) * f_x1_y1 149 | return f.astype(np.float32) 150 | 151 | if __name__=='__main__': 152 | model = UVDocPredictor('weights/uvdoc.onnx') 153 | img = cv2.imread('images/demo3.jpg') 154 | out_img = model.predict(img) 155 | cv2.imwrite('unwrap_predictor_out.jpg', out_img) --------------------------------------------------------------------------------