├── CMakeLists.txt ├── LICENSE ├── Makefile ├── README.md ├── images ├── engineContext.png ├── head.png ├── head.psd ├── inferImp.png ├── inplace_right_way.svg ├── inplace_wrong_way.svg ├── mixmemory.svg ├── onnx_constant.png ├── onnx_product.jpg ├── onnx_proto组成.svg ├── pytorch自定义插件.png ├── pytorch自定义插件json.png ├── shape_right_way.svg ├── shape_wrong_way.svg ├── split_image_demo.jpg ├── tensorRT.gif ├── tensorRT库文件一览.png ├── tensorRT推理路线.svg ├── tensorRT编译推理流程.svg ├── tensor_meomory_mark.png ├── 仿射变换三步走.png ├── 仿射变换旋转.jpg ├── 双线性插值.jpg ├── 双线性插值中心对齐问题.png └── 目标检测后处理开发流程.svg ├── src ├── application │ ├── app_alphapose.cpp │ ├── app_alphapose │ │ ├── alpha_pose.cpp │ │ └── alpha_pose.hpp │ ├── app_alphapose_old │ │ ├── alpha_pose_old.cpp │ │ └── alpha_pose_old.hpp │ ├── app_arcface.cpp │ ├── app_arcface │ │ ├── arcface.cpp │ │ └── arcface.hpp │ ├── app_bert.cpp │ ├── app_centernet.cpp │ ├── app_centernet │ │ ├── centernet.cpp │ │ ├── centernet.hpp │ │ └── centernet_decode.cu │ ├── app_dbface.cpp │ ├── app_dbface │ │ ├── dbface.cpp │ │ ├── dbface.hpp │ │ └── dbface_decode.cu │ ├── app_fall_gcn │ │ ├── fall_gcn.cpp │ │ └── fall_gcn.hpp │ ├── app_fall_recognize.cpp │ ├── app_high_performance.cpp │ ├── app_high_performance │ │ ├── alpha_pose_high_perf.cpp │ │ ├── alpha_pose_high_perf.hpp │ │ ├── high_performance.cpp │ │ ├── high_performance.hpp │ │ ├── yolo_high_perf.cpp │ │ └── yolo_high_perf.hpp │ ├── app_lesson.cpp │ ├── app_plugin.cpp │ ├── app_python │ │ └── interface.cpp │ ├── app_retinaface.cpp │ ├── app_retinaface │ │ ├── retinaface.cpp │ │ ├── retinaface.hpp │ │ └── retinaface_decode.cu │ ├── app_scrfd.cpp │ ├── app_scrfd │ │ ├── scrfd.cpp │ │ ├── scrfd.hpp │ │ └── scrfd_decode.cu │ ├── app_yolo.cpp │ ├── app_yolo │ │ ├── multi_gpu.cpp │ │ ├── multi_gpu.hpp │ │ ├── yolo.cpp │ │ ├── yolo.hpp │ │ └── yolo_decode.cu │ ├── app_yolo_fast.cpp │ ├── app_yolo_fast │ │ ├── yolo_fast.cpp │ │ ├── yolo_fast.hpp │ │ ├── yolov5_decode.cu │ │ └── yolox_decode.cu │ ├── app_yolo_gpuptr.cpp │ ├── app_yolo_gpuptr │ │ ├── yolo_gpuptr.cpp │ │ ├── yolo_gpuptr.hpp │ │ └── yolo_gpuptr_decode.cu │ ├── common │ │ ├── face_detector.hpp │ │ └── object_detector.hpp │ ├── test_warpaffine.cpp │ ├── test_yolo_map.cpp │ └── tools │ │ ├── Eigen │ │ ├── CMakeLists.txt │ │ ├── Cholesky │ │ ├── CholmodSupport │ │ ├── Core │ │ ├── Dense │ │ ├── Eigen │ │ ├── Eigenvalues │ │ ├── Geometry │ │ ├── Householder │ │ ├── IterativeLinearSolvers │ │ ├── Jacobi │ │ ├── LU │ │ ├── MetisSupport │ │ ├── OrderingMethods │ │ ├── PaStiXSupport │ │ ├── PardisoSupport │ │ ├── QR │ │ ├── QtAlignedMalloc │ │ ├── SPQRSupport │ │ ├── SVD │ │ ├── Sparse │ │ ├── SparseCholesky │ │ ├── SparseCore │ │ ├── SparseLU │ │ ├── SparseQR │ │ ├── StdDeque │ │ ├── StdList │ │ ├── StdVector │ │ ├── SuperLUSupport │ │ ├── UmfPackSupport │ │ └── src │ │ │ ├── Cholesky │ │ │ ├── LDLT.h │ │ │ ├── LLT.h │ │ │ └── LLT_LAPACKE.h │ │ │ ├── CholmodSupport │ │ │ └── CholmodSupport.h │ │ │ ├── Core │ │ │ ├── Array.h │ │ │ ├── ArrayBase.h │ │ │ ├── ArrayWrapper.h │ │ │ ├── Assign.h │ │ │ ├── AssignEvaluator.h │ │ │ ├── Assign_MKL.h │ │ │ ├── BandMatrix.h │ │ │ ├── Block.h │ │ │ ├── BooleanRedux.h │ │ │ ├── CommaInitializer.h │ │ │ ├── ConditionEstimator.h │ │ │ ├── CoreEvaluators.h │ │ │ ├── CoreIterators.h │ │ │ ├── CwiseBinaryOp.h │ │ │ ├── CwiseNullaryOp.h │ │ │ ├── CwiseTernaryOp.h │ │ │ ├── CwiseUnaryOp.h │ │ │ ├── CwiseUnaryView.h │ │ │ ├── DenseBase.h │ │ │ ├── DenseCoeffsBase.h │ │ │ ├── DenseStorage.h │ │ │ ├── Diagonal.h │ │ │ ├── DiagonalMatrix.h │ │ │ ├── DiagonalProduct.h │ │ │ ├── Dot.h │ │ │ ├── EigenBase.h │ │ │ ├── ForceAlignedAccess.h │ │ │ ├── Fuzzy.h │ │ │ ├── GeneralProduct.h │ │ │ ├── GenericPacketMath.h │ │ │ ├── GlobalFunctions.h │ │ │ ├── IO.h │ │ │ ├── Inverse.h │ │ │ ├── Map.h │ │ │ ├── MapBase.h │ │ │ ├── MathFunctions.h │ │ │ ├── MathFunctionsImpl.h │ │ │ ├── Matrix.h │ │ │ ├── MatrixBase.h │ │ │ ├── NestByValue.h │ │ │ ├── NoAlias.h │ │ │ ├── NumTraits.h │ │ │ ├── PermutationMatrix.h │ │ │ ├── PlainObjectBase.h │ │ │ ├── Product.h │ │ │ ├── ProductEvaluators.h │ │ │ ├── Random.h │ │ │ ├── Redux.h │ │ │ ├── Ref.h │ │ │ ├── Replicate.h │ │ │ ├── ReturnByValue.h │ │ │ ├── Reverse.h │ │ │ ├── Select.h │ │ │ ├── SelfAdjointView.h │ │ │ ├── SelfCwiseBinaryOp.h │ │ │ ├── Solve.h │ │ │ ├── SolveTriangular.h │ │ │ ├── SolverBase.h │ │ │ ├── StableNorm.h │ │ │ ├── Stride.h │ │ │ ├── Swap.h │ │ │ ├── Transpose.h │ │ │ ├── Transpositions.h │ │ │ ├── TriangularMatrix.h │ │ │ ├── VectorBlock.h │ │ │ ├── VectorwiseOp.h │ │ │ ├── Visitor.h │ │ │ ├── arch │ │ │ │ ├── AVX │ │ │ │ │ ├── Complex.h │ │ │ │ │ ├── MathFunctions.h │ │ │ │ │ ├── PacketMath.h │ │ │ │ │ └── TypeCasting.h │ │ │ │ ├── AVX512 │ │ │ │ │ ├── MathFunctions.h │ │ │ │ │ └── PacketMath.h │ │ │ │ ├── AltiVec │ │ │ │ │ ├── Complex.h │ │ │ │ │ ├── MathFunctions.h │ │ │ │ │ └── PacketMath.h │ │ │ │ ├── CUDA │ │ │ │ │ ├── Complex.h │ │ │ │ │ ├── Half.h │ │ │ │ │ ├── MathFunctions.h │ │ │ │ │ ├── PacketMath.h │ │ │ │ │ ├── PacketMathHalf.h │ │ │ │ │ └── TypeCasting.h │ │ │ │ ├── Default │ │ │ │ │ └── Settings.h │ │ │ │ ├── NEON │ │ │ │ │ ├── Complex.h │ │ │ │ │ ├── MathFunctions.h │ │ │ │ │ └── PacketMath.h │ │ │ │ ├── SSE │ │ │ │ │ ├── Complex.h │ │ │ │ │ ├── MathFunctions.h │ │ │ │ │ ├── PacketMath.h │ │ │ │ │ └── TypeCasting.h │ │ │ │ └── ZVector │ │ │ │ │ ├── Complex.h │ │ │ │ │ ├── MathFunctions.h │ │ │ │ │ └── PacketMath.h │ │ │ ├── functors │ │ │ │ ├── AssignmentFunctors.h │ │ │ │ ├── BinaryFunctors.h │ │ │ │ ├── NullaryFunctors.h │ │ │ │ ├── StlFunctors.h │ │ │ │ ├── TernaryFunctors.h │ │ │ │ └── UnaryFunctors.h │ │ │ ├── products │ │ │ │ ├── GeneralBlockPanelKernel.h │ │ │ │ ├── GeneralMatrixMatrix.h │ │ │ │ ├── GeneralMatrixMatrixTriangular.h │ │ │ │ ├── GeneralMatrixMatrixTriangular_BLAS.h │ │ │ │ ├── GeneralMatrixMatrix_BLAS.h │ │ │ │ ├── GeneralMatrixVector.h │ │ │ │ ├── GeneralMatrixVector_BLAS.h │ │ │ │ ├── Parallelizer.h │ │ │ │ ├── SelfadjointMatrixMatrix.h │ │ │ │ ├── SelfadjointMatrixMatrix_BLAS.h │ │ │ │ ├── SelfadjointMatrixVector.h │ │ │ │ ├── SelfadjointMatrixVector_BLAS.h │ │ │ │ ├── SelfadjointProduct.h │ │ │ │ ├── SelfadjointRank2Update.h │ │ │ │ ├── TriangularMatrixMatrix.h │ │ │ │ ├── TriangularMatrixMatrix_BLAS.h │ │ │ │ ├── TriangularMatrixVector.h │ │ │ │ ├── TriangularMatrixVector_BLAS.h │ │ │ │ ├── TriangularSolverMatrix.h │ │ │ │ ├── TriangularSolverMatrix_BLAS.h │ │ │ │ └── TriangularSolverVector.h │ │ │ └── util │ │ │ │ ├── BlasUtil.h │ │ │ │ ├── Constants.h │ │ │ │ ├── DisableStupidWarnings.h │ │ │ │ ├── ForwardDeclarations.h │ │ │ │ ├── MKL_support.h │ │ │ │ ├── Macros.h │ │ │ │ ├── Memory.h │ │ │ │ ├── Meta.h │ │ │ │ ├── NonMPL2.h │ │ │ │ ├── ReenableStupidWarnings.h │ │ │ │ ├── StaticAssert.h │ │ │ │ └── XprHelper.h │ │ │ ├── Eigenvalues │ │ │ ├── ComplexEigenSolver.h │ │ │ ├── ComplexSchur.h │ │ │ ├── ComplexSchur_LAPACKE.h │ │ │ ├── EigenSolver.h │ │ │ ├── GeneralizedEigenSolver.h │ │ │ ├── GeneralizedSelfAdjointEigenSolver.h │ │ │ ├── HessenbergDecomposition.h │ │ │ ├── MatrixBaseEigenvalues.h │ │ │ ├── RealQZ.h │ │ │ ├── RealSchur.h │ │ │ ├── RealSchur_LAPACKE.h │ │ │ ├── SelfAdjointEigenSolver.h │ │ │ ├── SelfAdjointEigenSolver_LAPACKE.h │ │ │ └── Tridiagonalization.h │ │ │ ├── Geometry │ │ │ ├── AlignedBox.h │ │ │ ├── AngleAxis.h │ │ │ ├── EulerAngles.h │ │ │ ├── Homogeneous.h │ │ │ ├── Hyperplane.h │ │ │ ├── OrthoMethods.h │ │ │ ├── ParametrizedLine.h │ │ │ ├── Quaternion.h │ │ │ ├── Rotation2D.h │ │ │ ├── RotationBase.h │ │ │ ├── Scaling.h │ │ │ ├── Transform.h │ │ │ ├── Translation.h │ │ │ ├── Umeyama.h │ │ │ └── arch │ │ │ │ └── Geometry_SSE.h │ │ │ ├── Householder │ │ │ ├── BlockHouseholder.h │ │ │ ├── Householder.h │ │ │ └── HouseholderSequence.h │ │ │ ├── IterativeLinearSolvers │ │ │ ├── BasicPreconditioners.h │ │ │ ├── BiCGSTAB.h │ │ │ ├── ConjugateGradient.h │ │ │ ├── IncompleteCholesky.h │ │ │ ├── IncompleteLUT.h │ │ │ ├── IterativeSolverBase.h │ │ │ ├── LeastSquareConjugateGradient.h │ │ │ └── SolveWithGuess.h │ │ │ ├── Jacobi │ │ │ └── Jacobi.h │ │ │ ├── LU │ │ │ ├── Determinant.h │ │ │ ├── FullPivLU.h │ │ │ ├── InverseImpl.h │ │ │ ├── PartialPivLU.h │ │ │ ├── PartialPivLU_LAPACKE.h │ │ │ └── arch │ │ │ │ └── Inverse_SSE.h │ │ │ ├── MetisSupport │ │ │ └── MetisSupport.h │ │ │ ├── OrderingMethods │ │ │ ├── Amd.h │ │ │ ├── Eigen_Colamd.h │ │ │ └── Ordering.h │ │ │ ├── PaStiXSupport │ │ │ └── PaStiXSupport.h │ │ │ ├── PardisoSupport │ │ │ └── PardisoSupport.h │ │ │ ├── QR │ │ │ ├── ColPivHouseholderQR.h │ │ │ ├── ColPivHouseholderQR_LAPACKE.h │ │ │ ├── CompleteOrthogonalDecomposition.h │ │ │ ├── FullPivHouseholderQR.h │ │ │ ├── HouseholderQR.h │ │ │ └── HouseholderQR_LAPACKE.h │ │ │ ├── SPQRSupport │ │ │ └── SuiteSparseQRSupport.h │ │ │ ├── SVD │ │ │ ├── BDCSVD.h │ │ │ ├── JacobiSVD.h │ │ │ ├── JacobiSVD_LAPACKE.h │ │ │ ├── SVDBase.h │ │ │ └── UpperBidiagonalization.h │ │ │ ├── SparseCholesky │ │ │ ├── SimplicialCholesky.h │ │ │ └── SimplicialCholesky_impl.h │ │ │ ├── SparseCore │ │ │ ├── AmbiVector.h │ │ │ ├── CompressedStorage.h │ │ │ ├── ConservativeSparseSparseProduct.h │ │ │ ├── MappedSparseMatrix.h │ │ │ ├── SparseAssign.h │ │ │ ├── SparseBlock.h │ │ │ ├── SparseColEtree.h │ │ │ ├── SparseCompressedBase.h │ │ │ ├── SparseCwiseBinaryOp.h │ │ │ ├── SparseCwiseUnaryOp.h │ │ │ ├── SparseDenseProduct.h │ │ │ ├── SparseDiagonalProduct.h │ │ │ ├── SparseDot.h │ │ │ ├── SparseFuzzy.h │ │ │ ├── SparseMap.h │ │ │ ├── SparseMatrix.h │ │ │ ├── SparseMatrixBase.h │ │ │ ├── SparsePermutation.h │ │ │ ├── SparseProduct.h │ │ │ ├── SparseRedux.h │ │ │ ├── SparseRef.h │ │ │ ├── SparseSelfAdjointView.h │ │ │ ├── SparseSolverBase.h │ │ │ ├── SparseSparseProductWithPruning.h │ │ │ ├── SparseTranspose.h │ │ │ ├── SparseTriangularView.h │ │ │ ├── SparseUtil.h │ │ │ ├── SparseVector.h │ │ │ ├── SparseView.h │ │ │ └── TriangularSolver.h │ │ │ ├── SparseLU │ │ │ ├── SparseLU.h │ │ │ ├── SparseLUImpl.h │ │ │ ├── SparseLU_Memory.h │ │ │ ├── SparseLU_Structs.h │ │ │ ├── SparseLU_SupernodalMatrix.h │ │ │ ├── SparseLU_Utils.h │ │ │ ├── SparseLU_column_bmod.h │ │ │ ├── SparseLU_column_dfs.h │ │ │ ├── SparseLU_copy_to_ucol.h │ │ │ ├── SparseLU_gemm_kernel.h │ │ │ ├── SparseLU_heap_relax_snode.h │ │ │ ├── SparseLU_kernel_bmod.h │ │ │ ├── SparseLU_panel_bmod.h │ │ │ ├── SparseLU_panel_dfs.h │ │ │ ├── SparseLU_pivotL.h │ │ │ ├── SparseLU_pruneL.h │ │ │ └── SparseLU_relax_snode.h │ │ │ ├── SparseQR │ │ │ └── SparseQR.h │ │ │ ├── StlSupport │ │ │ ├── StdDeque.h │ │ │ ├── StdList.h │ │ │ ├── StdVector.h │ │ │ └── details.h │ │ │ ├── SuperLUSupport │ │ │ └── SuperLUSupport.h │ │ │ ├── UmfPackSupport │ │ │ └── UmfPackSupport.h │ │ │ ├── misc │ │ │ ├── Image.h │ │ │ ├── Kernel.h │ │ │ ├── RealSvd2x2.h │ │ │ ├── blas.h │ │ │ ├── lapack.h │ │ │ ├── lapacke.h │ │ │ └── lapacke_mangling.h │ │ │ └── plugins │ │ │ ├── ArrayCwiseBinaryOps.h │ │ │ ├── ArrayCwiseUnaryOps.h │ │ │ ├── BlockMethods.h │ │ │ ├── CommonCwiseBinaryOps.h │ │ │ ├── CommonCwiseUnaryOps.h │ │ │ ├── MatrixCwiseBinaryOps.h │ │ │ └── MatrixCwiseUnaryOps.h │ │ ├── auto_download.cpp │ │ ├── deepsort.cpp │ │ ├── deepsort.hpp │ │ ├── pybind11.hpp │ │ ├── zmq_remote_show.cpp │ │ ├── zmq_remote_show.hpp │ │ ├── zmq_u.cpp │ │ └── zmq_u.hpp ├── direct │ ├── direct_classifier.cpp │ ├── direct_mae.cpp │ ├── direct_unet.cpp │ └── direct_yolo.cpp ├── main.cpp └── tensorRT │ ├── builder │ ├── trt_builder.cpp │ └── trt_builder.hpp │ ├── common │ ├── cuda_tools.cpp │ ├── cuda_tools.hpp │ ├── ilogger.cpp │ ├── ilogger.hpp │ ├── infer_controller.hpp │ ├── json.cpp │ ├── json.hpp │ ├── monopoly_allocator.hpp │ ├── preprocess_kernel.cu │ ├── preprocess_kernel.cuh │ ├── trt_tensor.cpp │ └── trt_tensor.hpp │ ├── infer │ ├── trt_infer.cpp │ └── trt_infer.hpp │ ├── onnx │ ├── onnx-ml.pb.cpp │ ├── onnx-ml.pb.h │ ├── onnx-operators-ml.pb.cpp │ ├── onnx-operators-ml.pb.h │ ├── onnx_pb.h │ ├── onnxifi.h │ └── readme.md │ ├── onnx_parser │ ├── ImporterContext.hpp │ ├── LoopHelpers.cpp │ ├── LoopHelpers.hpp │ ├── ModelImporter.cpp │ ├── ModelImporter.hpp │ ├── NvOnnxParser.cpp │ ├── NvOnnxParser.h │ ├── OnnxAttrs.cpp │ ├── OnnxAttrs.hpp │ ├── RNNHelpers.cpp │ ├── RNNHelpers.hpp │ ├── ShapeTensor.cpp │ ├── ShapeTensor.hpp │ ├── ShapedWeights.cpp │ ├── ShapedWeights.hpp │ ├── Status.hpp │ ├── TensorOrWeights.hpp │ ├── builtin_op_importers.cpp │ ├── builtin_op_importers.hpp │ ├── onnx2trt.hpp │ ├── onnx2trt_common.hpp │ ├── onnx2trt_runtime.hpp │ ├── onnx2trt_utils.cpp │ ├── onnx2trt_utils.hpp │ ├── onnxErrorRecorder.cpp │ ├── onnxErrorRecorder.hpp │ ├── onnx_utils.hpp │ ├── readme.md │ ├── toposort.hpp │ ├── trt_utils.hpp │ └── utils.hpp │ └── onnxplugin │ ├── onnxplugin.cpp │ ├── onnxplugin.hpp │ ├── plugin_binary_io.cpp │ ├── plugin_binary_io.hpp │ └── plugins │ ├── DCNv2.cu │ ├── HSigmoid.cu │ ├── HSwish.cu │ └── ScatterND.cu └── workspace ├── exp ├── face_tracker.mp4 └── fall_video.mp4 ├── face ├── library │ ├── 2ys2.jpg │ ├── 2ys3.jpg │ └── male.jpg └── recognize │ ├── 2ys1.jpg │ ├── 2ys3.jpg │ └── 2ys5.jpg ├── inference ├── car.jpg ├── gril.jpg ├── group.jpg ├── yq.jpg ├── zand.jpg └── zgjr.jpg ├── labels.imagenet.txt ├── pro └── python ├── test_dcnv2.py └── test_hswish.py /images/engineContext.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvdong/TRT_PRO_LEARN/77939ba3a5b3056bdf584672afcf39e30c8ffb8d/images/engineContext.png -------------------------------------------------------------------------------- /images/head.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvdong/TRT_PRO_LEARN/77939ba3a5b3056bdf584672afcf39e30c8ffb8d/images/head.png -------------------------------------------------------------------------------- /images/head.psd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvdong/TRT_PRO_LEARN/77939ba3a5b3056bdf584672afcf39e30c8ffb8d/images/head.psd -------------------------------------------------------------------------------- /images/inferImp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvdong/TRT_PRO_LEARN/77939ba3a5b3056bdf584672afcf39e30c8ffb8d/images/inferImp.png -------------------------------------------------------------------------------- /images/onnx_constant.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvdong/TRT_PRO_LEARN/77939ba3a5b3056bdf584672afcf39e30c8ffb8d/images/onnx_constant.png -------------------------------------------------------------------------------- /images/onnx_product.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvdong/TRT_PRO_LEARN/77939ba3a5b3056bdf584672afcf39e30c8ffb8d/images/onnx_product.jpg -------------------------------------------------------------------------------- /images/pytorch自定义插件.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvdong/TRT_PRO_LEARN/77939ba3a5b3056bdf584672afcf39e30c8ffb8d/images/pytorch自定义插件.png -------------------------------------------------------------------------------- /images/pytorch自定义插件json.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvdong/TRT_PRO_LEARN/77939ba3a5b3056bdf584672afcf39e30c8ffb8d/images/pytorch自定义插件json.png -------------------------------------------------------------------------------- /images/shape_right_way.svg: -------------------------------------------------------------------------------- 1 | batch×3×640×640ConstantConstant_0outputfloat32[batch,3,640,640] -------------------------------------------------------------------------------- /images/split_image_demo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvdong/TRT_PRO_LEARN/77939ba3a5b3056bdf584672afcf39e30c8ffb8d/images/split_image_demo.jpg -------------------------------------------------------------------------------- /images/tensorRT.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvdong/TRT_PRO_LEARN/77939ba3a5b3056bdf584672afcf39e30c8ffb8d/images/tensorRT.gif -------------------------------------------------------------------------------- /images/tensorRT库文件一览.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvdong/TRT_PRO_LEARN/77939ba3a5b3056bdf584672afcf39e30c8ffb8d/images/tensorRT库文件一览.png -------------------------------------------------------------------------------- /images/tensor_meomory_mark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvdong/TRT_PRO_LEARN/77939ba3a5b3056bdf584672afcf39e30c8ffb8d/images/tensor_meomory_mark.png -------------------------------------------------------------------------------- /images/仿射变换三步走.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvdong/TRT_PRO_LEARN/77939ba3a5b3056bdf584672afcf39e30c8ffb8d/images/仿射变换三步走.png -------------------------------------------------------------------------------- /images/仿射变换旋转.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvdong/TRT_PRO_LEARN/77939ba3a5b3056bdf584672afcf39e30c8ffb8d/images/仿射变换旋转.jpg -------------------------------------------------------------------------------- /images/双线性插值.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvdong/TRT_PRO_LEARN/77939ba3a5b3056bdf584672afcf39e30c8ffb8d/images/双线性插值.jpg -------------------------------------------------------------------------------- /images/双线性插值中心对齐问题.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvdong/TRT_PRO_LEARN/77939ba3a5b3056bdf584672afcf39e30c8ffb8d/images/双线性插值中心对齐问题.png -------------------------------------------------------------------------------- /src/application/app_alphapose.cpp: -------------------------------------------------------------------------------- 1 | 2 | /** 3 | * @file _main.cpp 4 | * @author 手写AI (zifuture.com:8090) 5 | * @date 2021-07-26 6 | * 7 | * 实现了基于TensorRT对yolox的推理工作 8 | * 1. 基于FP32的模型编译、和推理执行 9 | * 2. 基于INT8的模型编译、和推理执行 10 | * 3. 自定义插件的实现,从pytorch导出到推理编译,并支持FP16 11 | * 12 | * 预处理、后处理采用CPU实现(若想GPU可以自行实现) 13 | * 一次推理5张图获取结果 14 | * 15 | * 我们是一群热血的个人组织者,力图发布免费高质量内容 16 | * 我们的博客地址:http://zifuture.com:8090 17 | * 我们的B站地址:https://space.bilibili.com/1413433465 18 | * 19 | * 如果想要深入学习关于tensorRT的技术栈,请通过博客中的二维码联系我们(免费崔更即可) 20 | * 请关注B站,我们根据情况发布相关教程视频(免费) 21 | */ 22 | 23 | #include 24 | #include 25 | #include 26 | #include "app_alphapose/alpha_pose.hpp" 27 | 28 | using namespace std; 29 | using namespace cv; 30 | 31 | bool requires(const char* name); 32 | 33 | int app_alphapose(){ 34 | 35 | TRT::set_device(0); 36 | INFO("===================== test alphapose fp32 =================================="); 37 | 38 | const char* name = "alpha-pose-136"; 39 | if(not requires(name)) 40 | return 0; 41 | 42 | string onnx_file = iLogger::format("%s.onnx", name); 43 | string model_file = iLogger::format("%s.FP32.trtmodel", name); 44 | int test_batch_size = 16; 45 | 46 | if(!iLogger::exists(model_file)){ 47 | TRT::compile( 48 | TRT::Mode::FP32, // FP32、FP16、INT8 49 | test_batch_size, // max_batch_size 50 | onnx_file, // source 51 | model_file // save to 52 | ); 53 | } 54 | 55 | Mat image = imread("inference/gril.jpg"); 56 | auto engine = AlphaPose::create_infer(model_file, 0); 57 | auto box = Rect(158, 104, 176, 693); 58 | auto keys = engine->commit(make_tuple(image, box)).get(); 59 | for(int i = 0; i < keys.size(); ++i){ 60 | float x = keys[i].x; 61 | float y = keys[i].y; 62 | if(keys[i].z > 0.05){ 63 | cv::circle(image, Point(x, y), 1, Scalar(0, 255, 0), -1, 16); 64 | } 65 | } 66 | 67 | auto save_file = "pose.show.jpg"; 68 | INFO("Save to %s", save_file); 69 | 70 | imwrite(save_file, image); 71 | INFO("Done"); 72 | return 0; 73 | } -------------------------------------------------------------------------------- /src/application/app_alphapose/alpha_pose.hpp: -------------------------------------------------------------------------------- 1 | #ifndef ALPHA_POSE_HPP 2 | #define ALPHA_POSE_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | /* 11 | 12 | # change AlphaPose-master/configs/halpe_136/resnet/256x192_res50_lr1e-3_2x-regression.yaml 13 | CONV_DIM : 256 -> CONV_DIM : 128 14 | 15 | import torch 16 | import yaml 17 | from easydict import EasyDict as edict 18 | 19 | from alphapose.models import builder 20 | 21 | class Alphapose(torch.nn.Module): 22 | def __init__(self): 23 | super().__init__() 24 | config_file = "configs/halpe_136/resnet/256x192_res50_lr1e-3_2x-regression.yaml" 25 | check_point = "pretrained_models/multi_domain_fast50_regression_256x192.pth" 26 | with open(config_file, "r") as f: 27 | config = edict(yaml.load(f, Loader=yaml.FullLoader)) 28 | 29 | self.pose_model = builder.build_sppe(config.MODEL, preset_cfg=config.DATA_PRESET) 30 | self.pose_model.load_state_dict(torch.load(check_point, map_location="cpu")) 31 | 32 | def forward(self, x): 33 | hm = self.pose_model(x) 34 | 35 | # postprocess 36 | stride = int(x.size(2) / hm.size(2)) 37 | b, c, h, w = map(int, hm.size()) 38 | prob = hm.sigmoid() 39 | confidence, _ = prob.view(-1, c, h * w).max(dim=2, keepdim=True) 40 | prob = prob / prob.sum(dim=[2, 3], keepdim=True) 41 | coordx = torch.arange(w, device=prob.device, dtype=torch.float32) 42 | coordy = torch.arange(h, device=prob.device, dtype=torch.float32) 43 | hmx = (prob.sum(dim=2) * coordx).sum(dim=2, keepdim=True) * stride 44 | hmy = (prob.sum(dim=3) * coordy).sum(dim=2, keepdim=True) * stride 45 | return torch.cat([hmx, hmy, confidence], dim=2) 46 | 47 | model = Alphapose().eval() 48 | dummy = torch.zeros(1, 3, 256, 192) 49 | torch.onnx.export( 50 | model, (dummy,), "alpha-pose-136.onnx", input_names=["images"], output_names=["keypoints"], 51 | opset_version=11, 52 | dynamic_axes={ 53 | "images": {0: "batch"}, 54 | "keypoints": {0: "batch"} 55 | } 56 | ) 57 | */ 58 | 59 | // based on https://github.com/MVIG-SJTU/AlphaPose v0.5.0 version 60 | namespace AlphaPose{ 61 | 62 | using namespace std; 63 | using namespace cv; 64 | 65 | typedef tuple Input; 66 | 67 | class Infer{ 68 | public: 69 | virtual shared_future> commit(const Input& input) = 0; 70 | virtual vector>> commits(const vector& inputs) = 0; 71 | }; 72 | 73 | shared_ptr create_infer(const string& engine_file, int gpuid); 74 | 75 | }; // namespace AlphaPose 76 | 77 | #endif // ALPHA_POSE_HPP -------------------------------------------------------------------------------- /src/application/app_alphapose_old/alpha_pose_old.hpp: -------------------------------------------------------------------------------- 1 | #ifndef ALPHA_POSE_HPP 2 | #define ALPHA_POSE_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | // based on https://github.com/MVIG-SJTU/AlphaPose v0.3.0 version 11 | namespace AlphaPoseOld{ 12 | 13 | using namespace std; 14 | using namespace cv; 15 | 16 | typedef tuple Input; 17 | 18 | class Infer{ 19 | public: 20 | virtual shared_future> commit(const Input& input) = 0; 21 | virtual vector>> commits(const vector& inputs) = 0; 22 | }; 23 | 24 | shared_ptr create_infer(const string& engine_file, int gpuid); 25 | 26 | }; // namespace AlphaPose 27 | 28 | #endif // ALPHA_POSE_HPP -------------------------------------------------------------------------------- /src/application/app_arcface/arcface.hpp: -------------------------------------------------------------------------------- 1 | #ifndef ARCFACE_HPP 2 | #define ARCFACE_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | namespace Arcface{ 11 | 12 | using namespace std; 13 | 14 | struct landmarks{ 15 | float points[10]; 16 | }; 17 | 18 | typedef cv::Mat_ feature; 19 | typedef tuple commit_input; 20 | 21 | class Infer{ 22 | public: 23 | virtual shared_future commit (const commit_input& input) = 0; 24 | virtual vector> commits(const vector& inputs) = 0; 25 | }; 26 | 27 | cv::Mat face_alignment(const cv::Mat& image, const landmarks& landmark); 28 | shared_ptr create_infer(const string& engine_file, int gpuid=0); 29 | 30 | }; // namespace RetinaFace 31 | 32 | #endif // ARCFACE_HPP -------------------------------------------------------------------------------- /src/application/app_centernet/centernet.hpp: -------------------------------------------------------------------------------- 1 | #ifndef CENTERNET_HPP 2 | #define CENTERNET_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | namespace CenterNet{ 13 | 14 | using namespace std; 15 | using namespace ObjectDetector; 16 | 17 | void image_to_tensor(const cv::Mat& image, shared_ptr& tensor, int ibatch); 18 | 19 | class Infer{ 20 | public: 21 | virtual shared_future commit(const cv::Mat& image) = 0; 22 | virtual vector> commits(const vector& images) = 0; 23 | }; 24 | 25 | shared_ptr create_infer(const string& engine_file, int gpuid, float confidence_threshold=0.25f, float nms_threshold=0.5f); 26 | 27 | }; // namespace CenterNet 28 | 29 | 30 | #endif // CENTERNET_HPP -------------------------------------------------------------------------------- /src/application/app_dbface/dbface.hpp: -------------------------------------------------------------------------------- 1 | #ifndef DBFACE_HPP 2 | #define DBFACE_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | namespace DBFace{ 13 | 14 | using namespace std; 15 | using namespace FaceDetector; 16 | 17 | void image_to_tensor(const cv::Mat& image, shared_ptr& tensor, int ibatch); 18 | 19 | class Infer{ 20 | public: 21 | virtual shared_future commit(const cv::Mat& image) = 0; 22 | virtual vector> commits(const vector& images) = 0; 23 | }; 24 | 25 | shared_ptr create_infer(const string& engine_file, int gpuid, float confidence_threshold=0.25f, float nms_threshold=0.5f); 26 | 27 | }; // namespace CenterNet 28 | 29 | 30 | #endif // DBFACE_HPP -------------------------------------------------------------------------------- /src/application/app_fall_gcn/fall_gcn.hpp: -------------------------------------------------------------------------------- 1 | #ifndef FALL_GCN_HPP 2 | #define FALL_GCN_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | namespace FallGCN{ 11 | 12 | using namespace std; 13 | using namespace cv; 14 | 15 | typedef tuple, Rect> Input; 16 | 17 | enum class FallState : int{ 18 | Fall = 0, 19 | Stand = 1, 20 | UnCertain = 2 21 | }; 22 | 23 | const char* state_name(FallState state); 24 | 25 | class Infer{ 26 | public: 27 | virtual shared_future> commit(const Input& input) = 0; 28 | virtual vector>> commits(const vector& inputs) = 0; 29 | }; 30 | 31 | shared_ptr create_infer(const string& engine_file, int gpuid); 32 | 33 | }; // namespace AlphaPose 34 | 35 | #endif // FALL_GCN_HPP -------------------------------------------------------------------------------- /src/application/app_high_performance/alpha_pose_high_perf.hpp: -------------------------------------------------------------------------------- 1 | #ifndef ALPHA_POSE_HIGH_PERF_HPP 2 | #define ALPHA_POSE_HIGH_PERF_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "high_performance.hpp" 10 | 11 | namespace AlphaPoseHighPerf{ 12 | 13 | using namespace std; 14 | using namespace cv; 15 | using namespace HighPerformance; 16 | 17 | typedef tuple Input; 18 | 19 | class PointArray : public Data, public vector{ 20 | public: 21 | SetupData(PointArray); 22 | }; 23 | 24 | class Infer{ 25 | public: 26 | virtual shared_future commit(const Input& input) = 0; 27 | virtual vector> commits(const vector& inputs) = 0; 28 | }; 29 | 30 | shared_ptr create_infer(const string& engine_file, int gpuid); 31 | 32 | }; // namespace AlphaPose 33 | 34 | #endif // ALPHA_POSE_HIGH_PERF_HPP -------------------------------------------------------------------------------- /src/application/app_high_performance/yolo_high_perf.hpp: -------------------------------------------------------------------------------- 1 | #ifndef YOLO_HIGHPERF_HPP 2 | #define YOLO_HIGHPERF_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "high_performance.hpp" 10 | 11 | /** 12 | * @brief 发挥极致的性能体验 13 | * 支持YoloX和YoloV5 14 | */ 15 | namespace YoloHighPerf{ 16 | 17 | using namespace std; 18 | using namespace HighPerformance; 19 | 20 | enum class Type : int{ 21 | V5 = 0, 22 | X = 1, 23 | V3 = 2, 24 | V7 = 3 25 | }; 26 | 27 | struct Box{ 28 | float left, top, right, bottom, confidence; 29 | int class_label; 30 | 31 | Box() = default; 32 | 33 | Box(float left, float top, float right, float bottom, float confidence, int class_label) 34 | :left(left), top(top), right(right), bottom(bottom), confidence(confidence), class_label(class_label){} 35 | }; 36 | 37 | class BoxArray : public Data, public vector{ 38 | public: 39 | SetupData(BoxArray); 40 | }; 41 | 42 | class Infer{ 43 | public: 44 | virtual shared_future commit(const cv::Mat& image) = 0; 45 | virtual vector> commits(const vector& images) = 0; 46 | }; 47 | 48 | shared_ptr create_infer(const string& engine_file, Type type, int gpuid, float confidence_threshold=0.25f, float nms_threshold=0.5f); 49 | const char* type_name(Type type); 50 | 51 | }; // namespace Yolo 52 | 53 | #endif // YOLO_HIGHPERF_HPP -------------------------------------------------------------------------------- /src/application/app_plugin.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include "app_yolo/yolo.hpp" 6 | 7 | using namespace std; 8 | 9 | static void test_hswish(TRT::Mode mode){ 10 | 11 | // The plugin.onnx can be generated by the following code 12 | // cd workspace 13 | // python test_plugin.py 14 | iLogger::set_log_level(iLogger::LogLevel::Verbose); 15 | TRT::set_device(0); 16 | 17 | auto mode_name = TRT::mode_string(mode); 18 | auto engine_name = iLogger::format("hswish.plugin.%s.trtmodel", mode_name); 19 | TRT::compile( 20 | mode, 3, "hswish.plugin.onnx", engine_name, {} 21 | ); 22 | 23 | auto engine = TRT::load_infer(engine_name); 24 | engine->print(); 25 | 26 | auto input0 = engine->input(0); 27 | auto input1 = engine->input(1); 28 | auto output = engine->output(0); 29 | 30 | INFO("offset %d", output->offset(1, 0)); 31 | INFO("input0: %s", input0->shape_string()); 32 | INFO("input1: %s", input1->shape_string()); 33 | INFO("output: %s", output->shape_string()); 34 | 35 | float input0_val = 0.8; 36 | float input1_val = 2; 37 | input0->set_to(input0_val); 38 | input1->set_to(input1_val); 39 | 40 | auto hswish = [](float x){float a = x + 3; a=a<0?0:(a>=6?6:a); return x * a / 6;}; 41 | auto sigmoid = [](float x){return 1 / (1 + exp(-x));}; 42 | auto relu = [](float x){return max(0.0f, x);}; 43 | float output_real = relu(hswish(input0_val) * input1_val); 44 | engine->forward(true); 45 | 46 | INFO("output %f, output_real = %f", output->at(0, 0), output_real); 47 | } 48 | 49 | static void test_dcnv2(TRT::Mode mode){ 50 | 51 | // The plugin.onnx can be generated by the following code 52 | // cd workspace 53 | // python test_plugin.py 54 | iLogger::set_log_level(iLogger::LogLevel::Verbose); 55 | TRT::set_device(0); 56 | 57 | auto mode_name = TRT::mode_string(mode); 58 | auto engine_name = iLogger::format("dcnv2.plugin.%s.trtmodel", mode_name); 59 | TRT::compile( 60 | mode, 1, "dcnv2.plugin.onnx", engine_name, {} 61 | ); 62 | 63 | auto engine = TRT::load_infer(engine_name); 64 | engine->print(); 65 | 66 | auto input0 = engine->input(0); 67 | auto input1 = engine->input(1); 68 | auto output = engine->output(0); 69 | 70 | INFO("input0: %s", input0->shape_string()); 71 | INFO("input1: %s", input1->shape_string()); 72 | INFO("output: %s", output->shape_string()); 73 | 74 | float input0_val = 1; 75 | float input1_val = 1; 76 | input0->set_to(input0_val); 77 | input1->set_to(input1_val); 78 | engine->forward(true); 79 | 80 | for(int i = 0; i < output->count(); ++i) 81 | INFO("output[%d] = %f", i, output->cpu()[i]); 82 | } 83 | 84 | int app_plugin(){ 85 | 86 | //test_hswish(TRT::Mode::FP32); 87 | test_dcnv2(TRT::Mode::FP32); 88 | //test_plugin(TRT::Mode::FP16); 89 | return 0; 90 | } -------------------------------------------------------------------------------- /src/application/app_retinaface/retinaface.hpp: -------------------------------------------------------------------------------- 1 | #ifndef RETINAFACE_HPP 2 | #define RETINAFACE_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "../common/face_detector.hpp" 10 | 11 | namespace RetinaFace{ 12 | 13 | using namespace std; 14 | using namespace FaceDetector; 15 | 16 | class Infer{ 17 | public: 18 | virtual shared_future commit(const cv::Mat& image) = 0; 19 | virtual vector> commits(const vector& images) = 0; 20 | 21 | }; 22 | 23 | tuple crop_face_and_landmark( 24 | const cv::Mat& image, const Box& box, float scale_box=1.5f 25 | ); 26 | 27 | shared_ptr create_infer(const string& engine_file, int gpuid, float confidence_threshold=0.5f, float nms_threshold=0.5f); 28 | 29 | }; // namespace RetinaFace 30 | 31 | #endif // RETINAFACE_HPP -------------------------------------------------------------------------------- /src/application/app_scrfd/scrfd.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SCRFD_HPP 2 | #define SCRFD_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "../common/face_detector.hpp" 10 | 11 | namespace Scrfd{ 12 | 13 | using namespace std; 14 | using namespace FaceDetector; 15 | 16 | class Infer{ 17 | public: 18 | virtual shared_future commit(const cv::Mat& image) = 0; 19 | virtual vector> commits(const vector& images) = 0; 20 | }; 21 | 22 | tuple crop_face_and_landmark( 23 | const cv::Mat& image, const Box& box, float scale_box=1.5f 24 | ); 25 | 26 | shared_ptr create_infer(const string& engine_file, int gpuid, float confidence_threshold=0.5f, float nms_threshold=0.5f); 27 | 28 | }; // namespace Scrfd 29 | 30 | #endif // SCRFD_HPP -------------------------------------------------------------------------------- /src/application/app_yolo/multi_gpu.cpp: -------------------------------------------------------------------------------- 1 | #include "multi_gpu.hpp" 2 | #include 3 | #include 4 | #include 5 | 6 | namespace Yolo{ 7 | 8 | class MultiGPUInferImpl{ 9 | public: 10 | virtual bool startup( 11 | const string& engine_file, Type type, const vector gpuids, 12 | float confidence_threshold, float nms_threshold, 13 | NMSMethod nms_method, int max_objects 14 | ){ 15 | if(gpuids.empty()){ 16 | INFOE("gpuids is empty"); 17 | return false; 18 | } 19 | 20 | if(!iLogger::exists(engine_file)){ 21 | INFOE("Engine file %s not exists", engine_file.c_str()); 22 | return false; 23 | } 24 | 25 | infers_.resize(gpuids.size()); 26 | 27 | #pragma omp parallel for num_threads(infers_.size()) 28 | for(int i = 0; i < gpuids.size(); ++i){ 29 | auto& gpuid = gpuids[i]; 30 | infers_[i] = Yolo::create_infer( 31 | engine_file, type, gpuid, confidence_threshold, 32 | nms_threshold, nms_method, max_objects 33 | ); 34 | } 35 | 36 | for(int i = 0; i < gpuids.size(); ++i){ 37 | if(infers_[i] == nullptr){ 38 | INFOE("Infer create failed, gpuid = %d", gpuids[i]); 39 | return false; 40 | } 41 | } 42 | return true; 43 | } 44 | 45 | protected: 46 | vector> infers_; 47 | }; 48 | 49 | class BalancedImpl : public MultiGPUInfer, public MultiGPUInferImpl{ 50 | public: 51 | int get_gpu_index(){ 52 | return ((cursor_++) + 1) % infers_.size(); 53 | } 54 | 55 | virtual shared_future commit(const cv::Mat& image) override{ 56 | return infers_[get_gpu_index()]->commit(image); 57 | } 58 | 59 | virtual vector> commits(const vector& images) override{ 60 | return infers_[get_gpu_index()]->commits(images); 61 | } 62 | 63 | private: 64 | atomic cursor_{0}; 65 | }; 66 | 67 | shared_ptr create_multi_gpu_infer( 68 | const string& engine_file, Type type, const vector gpuids, 69 | float confidence_threshold, float nms_threshold, 70 | NMSMethod nms_method, int max_objects 71 | ){ 72 | shared_ptr instance(new BalancedImpl()); 73 | auto impl = std::dynamic_pointer_cast(instance); 74 | if(!impl->startup( 75 | engine_file, type, gpuids, confidence_threshold, nms_threshold, nms_method, max_objects 76 | )){ 77 | instance.reset(); 78 | } 79 | return instance; 80 | } 81 | 82 | }; -------------------------------------------------------------------------------- /src/application/app_yolo/multi_gpu.hpp: -------------------------------------------------------------------------------- 1 | #ifndef YOLO_MULTI_GPU_HPP 2 | #define YOLO_MULTI_GPU_HPP 3 | 4 | #include "yolo.hpp" 5 | 6 | namespace Yolo{ 7 | 8 | class MultiGPUInfer : public Yolo::Infer{}; 9 | 10 | shared_ptr create_multi_gpu_infer( 11 | const string& engine_file, Type type, const vector gpuids, 12 | float confidence_threshold=0.25f, float nms_threshold=0.5f, 13 | NMSMethod nms_method = NMSMethod::FastGPU, int max_objects = 1024 14 | ); 15 | }; 16 | 17 | 18 | #endif // YOLO_MULTI_GPU_HPP -------------------------------------------------------------------------------- /src/application/app_yolo/yolo.hpp: -------------------------------------------------------------------------------- 1 | #ifndef YOLO_HPP 2 | #define YOLO_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | /** 13 | * @brief 发挥极致的性能体验 14 | * 支持YoloX和YoloV5 15 | */ 16 | namespace Yolo{ 17 | 18 | using namespace std; 19 | using namespace ObjectDetector; 20 | 21 | enum class Type : int{ 22 | V5 = 0, 23 | X = 1, 24 | V3 = 2, 25 | V7 = 3 26 | }; 27 | 28 | enum class NMSMethod : int{ 29 | CPU = 0, // General, for estimate mAP 30 | FastGPU = 1 // Fast NMS with a small loss of accuracy in corner cases 31 | }; 32 | 33 | void image_to_tensor(const cv::Mat& image, shared_ptr& tensor, Type type, int ibatch); 34 | 35 | class Infer{ 36 | public: 37 | virtual shared_future commit(const cv::Mat& image) = 0; 38 | virtual vector> commits(const vector& images) = 0; 39 | }; 40 | 41 | shared_ptr create_infer( 42 | const string& engine_file, Type type, int gpuid, 43 | float confidence_threshold=0.25f, float nms_threshold=0.5f, 44 | NMSMethod nms_method = NMSMethod::FastGPU, int max_objects = 1024, 45 | bool use_multi_preprocess_stream = false 46 | ); 47 | const char* type_name(Type type); 48 | 49 | }; // namespace Yolo 50 | 51 | #endif // YOLO_HPP -------------------------------------------------------------------------------- /src/application/app_yolo_fast/yolo_fast.hpp: -------------------------------------------------------------------------------- 1 | #ifndef YOLO_FAST_HPP 2 | #define YOLO_FAST_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | /** 13 | * @brief 发挥极致的性能体验 14 | * 支持YoloX和YoloV5 15 | */ 16 | namespace YoloFast{ 17 | 18 | using namespace std; 19 | using namespace ObjectDetector; 20 | 21 | enum class Type : int{ 22 | V5_P5 = 0, 23 | V5_P6 = 1, 24 | X = 2 25 | }; 26 | 27 | struct DecodeMeta{ 28 | int num_anchor; 29 | int num_level; 30 | float w[16], h[16]; 31 | int strides[16]; 32 | 33 | static DecodeMeta v5_p5_default_meta(); 34 | static DecodeMeta v5_p6_default_meta(); 35 | static DecodeMeta x_default_meta(); 36 | }; 37 | 38 | class Infer{ 39 | public: 40 | virtual shared_future commit(const cv::Mat& image) = 0; 41 | virtual vector> commits(const vector& images) = 0; 42 | }; 43 | 44 | void image_to_tensor(const cv::Mat& image, shared_ptr& tensor, Type type, int ibatch); 45 | 46 | shared_ptr create_infer( 47 | const string& engine_file, 48 | Type type, 49 | int gpuid, 50 | float confidence_threshold=0.25f, 51 | float nms_threshold=0.5f, 52 | const DecodeMeta& meta = DecodeMeta::v5_p5_default_meta() 53 | ); 54 | const char* type_name(Type type); 55 | 56 | }; // namespace YoloFast 57 | 58 | #endif // YOLO_FAST_HPP -------------------------------------------------------------------------------- /src/application/app_yolo_gpuptr/yolo_gpuptr.hpp: -------------------------------------------------------------------------------- 1 | #ifndef YOLO_GPUPTR_HPP 2 | #define YOLO_GPUPTR_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | /** 13 | * @brief 发挥极致的性能体验 14 | * 支持YoloX和YoloV5,允许直接提供GPU图像地址进行推理 15 | */ 16 | namespace YoloGPUPtr{ 17 | 18 | using namespace std; 19 | using namespace ObjectDetector; 20 | 21 | enum class Type : int{ 22 | V5 = 0, 23 | X = 1, 24 | V3 = 2, 25 | V7 = 3 26 | }; 27 | 28 | enum class NMSMethod : int{ 29 | CPU = 0, // General, for estimate mAP 30 | FastGPU = 1 // Fast NMS with a small loss of accuracy in corner cases 31 | }; 32 | 33 | enum class ImageType : int{ 34 | CVMat = 0, 35 | GPUYUVNV12 = 1, // nv12 36 | GPUBGR = 2, // BGR 37 | }; 38 | 39 | struct Image{ 40 | ImageType type = ImageType::CVMat; 41 | cv::Mat cvmat; 42 | 43 | // GPU YUV image 44 | TRT::CUStream stream = nullptr; 45 | uint8_t* device_data = nullptr; 46 | int width = 0, height = 0; 47 | int device_id = 0; 48 | 49 | Image() = default; 50 | Image(const cv::Mat& cvmat):cvmat(cvmat), type(ImageType::CVMat){} 51 | Image(uint8_t* data_device, int width, int height, int device_id, TRT::CUStream stream, ImageType type) 52 | :device_data(data_device), width(width), height(height), device_id(device_id), stream(stream), type(type){} 53 | 54 | int get_width() const{return type == ImageType::CVMat ? cvmat.cols : width;} 55 | int get_height() const{return type == ImageType::CVMat ? cvmat.rows : height;} 56 | cv::Size get_size() const{return cv::Size(get_width(), get_height());} 57 | bool empty() const{return type == ImageType::CVMat ? cvmat.empty() : (device_data == nullptr || width < 1 || height < 1);} 58 | size_t get_data_size() const{ 59 | switch(type){ 60 | case ImageType::CVMat: return width * height * 3; 61 | case ImageType::GPUYUVNV12: return width * height * 1.5; 62 | case ImageType::GPUBGR: return width * height * 3; 63 | default: return 0; 64 | } 65 | } 66 | }; 67 | 68 | void image_to_tensor(const cv::Mat& image, shared_ptr& tensor, Type type, int ibatch); 69 | 70 | class Infer{ 71 | public: 72 | virtual shared_future commit(const Image& image) = 0; 73 | virtual vector> commits(const vector& images) = 0; 74 | virtual void* stream() = 0; // cudaStream_t 75 | }; 76 | 77 | shared_ptr create_infer( 78 | const string& engine_file, Type type, int gpuid, 79 | float confidence_threshold=0.25f, float nms_threshold=0.5f, 80 | NMSMethod nms_method = NMSMethod::FastGPU, int max_objects = 1024 81 | ); 82 | const char* type_name(Type type); 83 | 84 | }; // namespace YoloGPUPtr 85 | 86 | #endif // YOLO_GPUPTR_HPP -------------------------------------------------------------------------------- /src/application/common/face_detector.hpp: -------------------------------------------------------------------------------- 1 | #ifndef FACE_DETECTOR_HPP 2 | #define FACE_DETECTOR_HPP 3 | 4 | #include 5 | #include 6 | 7 | namespace FaceDetector{ 8 | 9 | struct Box{ 10 | float left, top, right, bottom, confidence; 11 | float landmark[10]; 12 | 13 | cv::Rect cvbox() const{return cv::Rect(left, top, right-left, bottom-top);} 14 | float width() const{return std::max(0.0f, right-left);} 15 | float height() const{return std::max(0.0f, bottom-top);} 16 | float area() const{return width() * height();} 17 | float get_left() {return left;} 18 | void set_left(float value) {left = value;} 19 | float get_top() {return top;} 20 | void set_top(float value) {top = value;} 21 | float get_right() {return right;} 22 | void set_right(float value) {right = value;} 23 | float get_bottom() {return bottom;} 24 | void set_bottom(float value) {bottom = value;} 25 | float get_confidence() {return confidence;} 26 | void set_confidence(float value){confidence = value;} 27 | }; 28 | 29 | typedef std::vector BoxArray; 30 | }; 31 | 32 | #endif // FACE_DETECTOR_HPP -------------------------------------------------------------------------------- /src/application/common/object_detector.hpp: -------------------------------------------------------------------------------- 1 | #ifndef OBJECT_DETECTOR_HPP 2 | #define OBJECT_DETECTOR_HPP 3 | 4 | #include 5 | 6 | namespace ObjectDetector{ 7 | 8 | struct Box{ 9 | float left, top, right, bottom, confidence; 10 | int class_label; 11 | 12 | Box() = default; 13 | 14 | Box(float left, float top, float right, float bottom, float confidence, int class_label) 15 | :left(left), top(top), right(right), bottom(bottom), confidence(confidence), class_label(class_label){} 16 | }; 17 | 18 | typedef std::vector BoxArray; 19 | }; 20 | 21 | 22 | #endif // OBJECT_DETECTOR_HPP -------------------------------------------------------------------------------- /src/application/tools/Eigen/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include(RegexUtils) 2 | test_escape_string_as_regex() 3 | 4 | file(GLOB Eigen_directory_files "*") 5 | 6 | escape_string_as_regex(ESCAPED_CMAKE_CURRENT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") 7 | 8 | foreach(f ${Eigen_directory_files}) 9 | if(NOT f MATCHES "\\.txt" AND NOT f MATCHES "${ESCAPED_CMAKE_CURRENT_SOURCE_DIR}/[.].+" AND NOT f MATCHES "${ESCAPED_CMAKE_CURRENT_SOURCE_DIR}/src") 10 | list(APPEND Eigen_directory_files_to_install ${f}) 11 | endif() 12 | endforeach(f ${Eigen_directory_files}) 13 | 14 | install(FILES 15 | ${Eigen_directory_files_to_install} 16 | DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen COMPONENT Devel 17 | ) 18 | 19 | install(DIRECTORY src DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen COMPONENT Devel FILES_MATCHING PATTERN "*.h") 20 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/Cholesky: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // This Source Code Form is subject to the terms of the Mozilla 5 | // Public License v. 2.0. If a copy of the MPL was not distributed 6 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | 8 | #ifndef EIGEN_CHOLESKY_MODULE_H 9 | #define EIGEN_CHOLESKY_MODULE_H 10 | 11 | #include "Core" 12 | 13 | #include "src/Core/util/DisableStupidWarnings.h" 14 | 15 | /** \defgroup Cholesky_Module Cholesky module 16 | * 17 | * 18 | * 19 | * This module provides two variants of the Cholesky decomposition for selfadjoint (hermitian) matrices. 20 | * Those decompositions are also accessible via the following methods: 21 | * - MatrixBase::llt() 22 | * - MatrixBase::ldlt() 23 | * - SelfAdjointView::llt() 24 | * - SelfAdjointView::ldlt() 25 | * 26 | * \code 27 | * #include 28 | * \endcode 29 | */ 30 | 31 | #include "src/Cholesky/LLT.h" 32 | #include "src/Cholesky/LDLT.h" 33 | #ifdef EIGEN_USE_LAPACKE 34 | #include "src/misc/lapacke.h" 35 | #include "src/Cholesky/LLT_LAPACKE.h" 36 | #endif 37 | 38 | #include "src/Core/util/ReenableStupidWarnings.h" 39 | 40 | #endif // EIGEN_CHOLESKY_MODULE_H 41 | /* vim: set filetype=cpp et sw=2 ts=2 ai: */ 42 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/CholmodSupport: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // This Source Code Form is subject to the terms of the Mozilla 5 | // Public License v. 2.0. If a copy of the MPL was not distributed 6 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | 8 | #ifndef EIGEN_CHOLMODSUPPORT_MODULE_H 9 | #define EIGEN_CHOLMODSUPPORT_MODULE_H 10 | 11 | #include "SparseCore" 12 | 13 | #include "src/Core/util/DisableStupidWarnings.h" 14 | 15 | extern "C" { 16 | #include 17 | } 18 | 19 | /** \ingroup Support_modules 20 | * \defgroup CholmodSupport_Module CholmodSupport module 21 | * 22 | * This module provides an interface to the Cholmod library which is part of the suitesparse package. 23 | * It provides the two following main factorization classes: 24 | * - class CholmodSupernodalLLT: a supernodal LLT Cholesky factorization. 25 | * - class CholmodDecomposiiton: a general L(D)LT Cholesky factorization with automatic or explicit runtime selection of the underlying factorization method (supernodal or simplicial). 26 | * 27 | * For the sake of completeness, this module also propose the two following classes: 28 | * - class CholmodSimplicialLLT 29 | * - class CholmodSimplicialLDLT 30 | * Note that these classes does not bring any particular advantage compared to the built-in 31 | * SimplicialLLT and SimplicialLDLT factorization classes. 32 | * 33 | * \code 34 | * #include 35 | * \endcode 36 | * 37 | * In order to use this module, the cholmod headers must be accessible from the include paths, and your binary must be linked to the cholmod library and its dependencies. 38 | * The dependencies depend on how cholmod has been compiled. 39 | * For a cmake based project, you can use our FindCholmod.cmake module to help you in this task. 40 | * 41 | */ 42 | 43 | #include "src/CholmodSupport/CholmodSupport.h" 44 | 45 | #include "src/Core/util/ReenableStupidWarnings.h" 46 | 47 | #endif // EIGEN_CHOLMODSUPPORT_MODULE_H 48 | 49 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/Dense: -------------------------------------------------------------------------------- 1 | #include "Core" 2 | #include "LU" 3 | #include "Cholesky" 4 | #include "QR" 5 | #include "SVD" 6 | #include "Geometry" 7 | #include "Eigenvalues" 8 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/Eigen: -------------------------------------------------------------------------------- 1 | #include "Dense" 2 | #include "Sparse" 3 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/Eigenvalues: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // This Source Code Form is subject to the terms of the Mozilla 5 | // Public License v. 2.0. If a copy of the MPL was not distributed 6 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | 8 | #ifndef EIGEN_EIGENVALUES_MODULE_H 9 | #define EIGEN_EIGENVALUES_MODULE_H 10 | 11 | #include "Core" 12 | 13 | #include "src/Core/util/DisableStupidWarnings.h" 14 | 15 | #include "Cholesky" 16 | #include "Jacobi" 17 | #include "Householder" 18 | #include "LU" 19 | #include "Geometry" 20 | 21 | /** \defgroup Eigenvalues_Module Eigenvalues module 22 | * 23 | * 24 | * 25 | * This module mainly provides various eigenvalue solvers. 26 | * This module also provides some MatrixBase methods, including: 27 | * - MatrixBase::eigenvalues(), 28 | * - MatrixBase::operatorNorm() 29 | * 30 | * \code 31 | * #include 32 | * \endcode 33 | */ 34 | 35 | #include "src/misc/RealSvd2x2.h" 36 | #include "src/Eigenvalues/Tridiagonalization.h" 37 | #include "src/Eigenvalues/RealSchur.h" 38 | #include "src/Eigenvalues/EigenSolver.h" 39 | #include "src/Eigenvalues/SelfAdjointEigenSolver.h" 40 | #include "src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h" 41 | #include "src/Eigenvalues/HessenbergDecomposition.h" 42 | #include "src/Eigenvalues/ComplexSchur.h" 43 | #include "src/Eigenvalues/ComplexEigenSolver.h" 44 | #include "src/Eigenvalues/RealQZ.h" 45 | #include "src/Eigenvalues/GeneralizedEigenSolver.h" 46 | #include "src/Eigenvalues/MatrixBaseEigenvalues.h" 47 | #ifdef EIGEN_USE_LAPACKE 48 | #include "src/misc/lapacke.h" 49 | #include "src/Eigenvalues/RealSchur_LAPACKE.h" 50 | #include "src/Eigenvalues/ComplexSchur_LAPACKE.h" 51 | #include "src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h" 52 | #endif 53 | 54 | #include "src/Core/util/ReenableStupidWarnings.h" 55 | 56 | #endif // EIGEN_EIGENVALUES_MODULE_H 57 | /* vim: set filetype=cpp et sw=2 ts=2 ai: */ 58 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/Geometry: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // This Source Code Form is subject to the terms of the Mozilla 5 | // Public License v. 2.0. If a copy of the MPL was not distributed 6 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | 8 | #ifndef EIGEN_GEOMETRY_MODULE_H 9 | #define EIGEN_GEOMETRY_MODULE_H 10 | 11 | #include "Core" 12 | 13 | #include "src/Core/util/DisableStupidWarnings.h" 14 | 15 | #include "SVD" 16 | #include "LU" 17 | #include 18 | 19 | /** \defgroup Geometry_Module Geometry module 20 | * 21 | * This module provides support for: 22 | * - fixed-size homogeneous transformations 23 | * - translation, scaling, 2D and 3D rotations 24 | * - \link Quaternion quaternions \endlink 25 | * - cross products (\ref MatrixBase::cross, \ref MatrixBase::cross3) 26 | * - orthognal vector generation (\ref MatrixBase::unitOrthogonal) 27 | * - some linear components: \link ParametrizedLine parametrized-lines \endlink and \link Hyperplane hyperplanes \endlink 28 | * - \link AlignedBox axis aligned bounding boxes \endlink 29 | * - \link umeyama least-square transformation fitting \endlink 30 | * 31 | * \code 32 | * #include 33 | * \endcode 34 | */ 35 | 36 | #include "src/Geometry/OrthoMethods.h" 37 | #include "src/Geometry/EulerAngles.h" 38 | 39 | #include "src/Geometry/Homogeneous.h" 40 | #include "src/Geometry/RotationBase.h" 41 | #include "src/Geometry/Rotation2D.h" 42 | #include "src/Geometry/Quaternion.h" 43 | #include "src/Geometry/AngleAxis.h" 44 | #include "src/Geometry/Transform.h" 45 | #include "src/Geometry/Translation.h" 46 | #include "src/Geometry/Scaling.h" 47 | #include "src/Geometry/Hyperplane.h" 48 | #include "src/Geometry/ParametrizedLine.h" 49 | #include "src/Geometry/AlignedBox.h" 50 | #include "src/Geometry/Umeyama.h" 51 | 52 | // Use the SSE optimized version whenever possible. At the moment the 53 | // SSE version doesn't compile when AVX is enabled 54 | #if defined EIGEN_VECTORIZE_SSE && !defined EIGEN_VECTORIZE_AVX 55 | #include "src/Geometry/arch/Geometry_SSE.h" 56 | #endif 57 | 58 | #include "src/Core/util/ReenableStupidWarnings.h" 59 | 60 | #endif // EIGEN_GEOMETRY_MODULE_H 61 | /* vim: set filetype=cpp et sw=2 ts=2 ai: */ 62 | 63 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/Householder: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // This Source Code Form is subject to the terms of the Mozilla 5 | // Public License v. 2.0. If a copy of the MPL was not distributed 6 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | 8 | #ifndef EIGEN_HOUSEHOLDER_MODULE_H 9 | #define EIGEN_HOUSEHOLDER_MODULE_H 10 | 11 | #include "Core" 12 | 13 | #include "src/Core/util/DisableStupidWarnings.h" 14 | 15 | /** \defgroup Householder_Module Householder module 16 | * This module provides Householder transformations. 17 | * 18 | * \code 19 | * #include 20 | * \endcode 21 | */ 22 | 23 | #include "src/Householder/Householder.h" 24 | #include "src/Householder/HouseholderSequence.h" 25 | #include "src/Householder/BlockHouseholder.h" 26 | 27 | #include "src/Core/util/ReenableStupidWarnings.h" 28 | 29 | #endif // EIGEN_HOUSEHOLDER_MODULE_H 30 | /* vim: set filetype=cpp et sw=2 ts=2 ai: */ 31 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/IterativeLinearSolvers: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // This Source Code Form is subject to the terms of the Mozilla 5 | // Public License v. 2.0. If a copy of the MPL was not distributed 6 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | 8 | #ifndef EIGEN_ITERATIVELINEARSOLVERS_MODULE_H 9 | #define EIGEN_ITERATIVELINEARSOLVERS_MODULE_H 10 | 11 | #include "SparseCore" 12 | #include "OrderingMethods" 13 | 14 | #include "src/Core/util/DisableStupidWarnings.h" 15 | 16 | /** 17 | * \defgroup IterativeLinearSolvers_Module IterativeLinearSolvers module 18 | * 19 | * This module currently provides iterative methods to solve problems of the form \c A \c x = \c b, where \c A is a squared matrix, usually very large and sparse. 20 | * Those solvers are accessible via the following classes: 21 | * - ConjugateGradient for selfadjoint (hermitian) matrices, 22 | * - LeastSquaresConjugateGradient for rectangular least-square problems, 23 | * - BiCGSTAB for general square matrices. 24 | * 25 | * These iterative solvers are associated with some preconditioners: 26 | * - IdentityPreconditioner - not really useful 27 | * - DiagonalPreconditioner - also called Jacobi preconditioner, work very well on diagonal dominant matrices. 28 | * - IncompleteLUT - incomplete LU factorization with dual thresholding 29 | * 30 | * Such problems can also be solved using the direct sparse decomposition modules: SparseCholesky, CholmodSupport, UmfPackSupport, SuperLUSupport. 31 | * 32 | \code 33 | #include 34 | \endcode 35 | */ 36 | 37 | #include "src/IterativeLinearSolvers/SolveWithGuess.h" 38 | #include "src/IterativeLinearSolvers/IterativeSolverBase.h" 39 | #include "src/IterativeLinearSolvers/BasicPreconditioners.h" 40 | #include "src/IterativeLinearSolvers/ConjugateGradient.h" 41 | #include "src/IterativeLinearSolvers/LeastSquareConjugateGradient.h" 42 | #include "src/IterativeLinearSolvers/BiCGSTAB.h" 43 | #include "src/IterativeLinearSolvers/IncompleteLUT.h" 44 | #include "src/IterativeLinearSolvers/IncompleteCholesky.h" 45 | 46 | #include "src/Core/util/ReenableStupidWarnings.h" 47 | 48 | #endif // EIGEN_ITERATIVELINEARSOLVERS_MODULE_H 49 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/Jacobi: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // This Source Code Form is subject to the terms of the Mozilla 5 | // Public License v. 2.0. If a copy of the MPL was not distributed 6 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | 8 | #ifndef EIGEN_JACOBI_MODULE_H 9 | #define EIGEN_JACOBI_MODULE_H 10 | 11 | #include "Core" 12 | 13 | #include "src/Core/util/DisableStupidWarnings.h" 14 | 15 | /** \defgroup Jacobi_Module Jacobi module 16 | * This module provides Jacobi and Givens rotations. 17 | * 18 | * \code 19 | * #include 20 | * \endcode 21 | * 22 | * In addition to listed classes, it defines the two following MatrixBase methods to apply a Jacobi or Givens rotation: 23 | * - MatrixBase::applyOnTheLeft() 24 | * - MatrixBase::applyOnTheRight(). 25 | */ 26 | 27 | #include "src/Jacobi/Jacobi.h" 28 | 29 | #include "src/Core/util/ReenableStupidWarnings.h" 30 | 31 | #endif // EIGEN_JACOBI_MODULE_H 32 | /* vim: set filetype=cpp et sw=2 ts=2 ai: */ 33 | 34 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/LU: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // This Source Code Form is subject to the terms of the Mozilla 5 | // Public License v. 2.0. If a copy of the MPL was not distributed 6 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | 8 | #ifndef EIGEN_LU_MODULE_H 9 | #define EIGEN_LU_MODULE_H 10 | 11 | #include "Core" 12 | 13 | #include "src/Core/util/DisableStupidWarnings.h" 14 | 15 | /** \defgroup LU_Module LU module 16 | * This module includes %LU decomposition and related notions such as matrix inversion and determinant. 17 | * This module defines the following MatrixBase methods: 18 | * - MatrixBase::inverse() 19 | * - MatrixBase::determinant() 20 | * 21 | * \code 22 | * #include 23 | * \endcode 24 | */ 25 | 26 | #include "src/misc/Kernel.h" 27 | #include "src/misc/Image.h" 28 | #include "src/LU/FullPivLU.h" 29 | #include "src/LU/PartialPivLU.h" 30 | #ifdef EIGEN_USE_LAPACKE 31 | #include "src/misc/lapacke.h" 32 | #include "src/LU/PartialPivLU_LAPACKE.h" 33 | #endif 34 | #include "src/LU/Determinant.h" 35 | #include "src/LU/InverseImpl.h" 36 | 37 | // Use the SSE optimized version whenever possible. At the moment the 38 | // SSE version doesn't compile when AVX is enabled 39 | #if defined EIGEN_VECTORIZE_SSE && !defined EIGEN_VECTORIZE_AVX 40 | #include "src/LU/arch/Inverse_SSE.h" 41 | #endif 42 | 43 | #include "src/Core/util/ReenableStupidWarnings.h" 44 | 45 | #endif // EIGEN_LU_MODULE_H 46 | /* vim: set filetype=cpp et sw=2 ts=2 ai: */ 47 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/MetisSupport: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // This Source Code Form is subject to the terms of the Mozilla 5 | // Public License v. 2.0. If a copy of the MPL was not distributed 6 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | 8 | #ifndef EIGEN_METISSUPPORT_MODULE_H 9 | #define EIGEN_METISSUPPORT_MODULE_H 10 | 11 | #include "SparseCore" 12 | 13 | #include "src/Core/util/DisableStupidWarnings.h" 14 | 15 | extern "C" { 16 | #include 17 | } 18 | 19 | 20 | /** \ingroup Support_modules 21 | * \defgroup MetisSupport_Module MetisSupport module 22 | * 23 | * \code 24 | * #include 25 | * \endcode 26 | * This module defines an interface to the METIS reordering package (http://glaros.dtc.umn.edu/gkhome/views/metis). 27 | * It can be used just as any other built-in method as explained in \link OrderingMethods_Module here. \endlink 28 | */ 29 | 30 | 31 | #include "src/MetisSupport/MetisSupport.h" 32 | 33 | #include "src/Core/util/ReenableStupidWarnings.h" 34 | 35 | #endif // EIGEN_METISSUPPORT_MODULE_H 36 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/OrderingMethods: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // This Source Code Form is subject to the terms of the Mozilla 5 | // Public License v. 2.0. If a copy of the MPL was not distributed 6 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | 8 | #ifndef EIGEN_ORDERINGMETHODS_MODULE_H 9 | #define EIGEN_ORDERINGMETHODS_MODULE_H 10 | 11 | #include "SparseCore" 12 | 13 | #include "src/Core/util/DisableStupidWarnings.h" 14 | 15 | /** 16 | * \defgroup OrderingMethods_Module OrderingMethods module 17 | * 18 | * This module is currently for internal use only 19 | * 20 | * It defines various built-in and external ordering methods for sparse matrices. 21 | * They are typically used to reduce the number of elements during 22 | * the sparse matrix decomposition (LLT, LU, QR). 23 | * Precisely, in a preprocessing step, a permutation matrix P is computed using 24 | * those ordering methods and applied to the columns of the matrix. 25 | * Using for instance the sparse Cholesky decomposition, it is expected that 26 | * the nonzeros elements in LLT(A*P) will be much smaller than that in LLT(A). 27 | * 28 | * 29 | * Usage : 30 | * \code 31 | * #include 32 | * \endcode 33 | * 34 | * A simple usage is as a template parameter in the sparse decomposition classes : 35 | * 36 | * \code 37 | * SparseLU > solver; 38 | * \endcode 39 | * 40 | * \code 41 | * SparseQR > solver; 42 | * \endcode 43 | * 44 | * It is possible as well to call directly a particular ordering method for your own purpose, 45 | * \code 46 | * AMDOrdering ordering; 47 | * PermutationMatrix perm; 48 | * SparseMatrix A; 49 | * //Fill the matrix ... 50 | * 51 | * ordering(A, perm); // Call AMD 52 | * \endcode 53 | * 54 | * \note Some of these methods (like AMD or METIS), need the sparsity pattern 55 | * of the input matrix to be symmetric. When the matrix is structurally unsymmetric, 56 | * Eigen computes internally the pattern of \f$A^T*A\f$ before calling the method. 57 | * If your matrix is already symmetric (at leat in structure), you can avoid that 58 | * by calling the method with a SelfAdjointView type. 59 | * 60 | * \code 61 | * // Call the ordering on the pattern of the lower triangular matrix A 62 | * ordering(A.selfadjointView(), perm); 63 | * \endcode 64 | */ 65 | 66 | #ifndef EIGEN_MPL2_ONLY 67 | #include "src/OrderingMethods/Amd.h" 68 | #endif 69 | 70 | #include "src/OrderingMethods/Ordering.h" 71 | #include "src/Core/util/ReenableStupidWarnings.h" 72 | 73 | #endif // EIGEN_ORDERINGMETHODS_MODULE_H 74 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/PaStiXSupport: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // This Source Code Form is subject to the terms of the Mozilla 5 | // Public License v. 2.0. If a copy of the MPL was not distributed 6 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | 8 | #ifndef EIGEN_PASTIXSUPPORT_MODULE_H 9 | #define EIGEN_PASTIXSUPPORT_MODULE_H 10 | 11 | #include "SparseCore" 12 | 13 | #include "src/Core/util/DisableStupidWarnings.h" 14 | 15 | extern "C" { 16 | #include 17 | #include 18 | } 19 | 20 | #ifdef complex 21 | #undef complex 22 | #endif 23 | 24 | /** \ingroup Support_modules 25 | * \defgroup PaStiXSupport_Module PaStiXSupport module 26 | * 27 | * This module provides an interface to the PaSTiX library. 28 | * PaSTiX is a general \b supernodal, \b parallel and \b opensource sparse solver. 29 | * It provides the two following main factorization classes: 30 | * - class PastixLLT : a supernodal, parallel LLt Cholesky factorization. 31 | * - class PastixLDLT: a supernodal, parallel LDLt Cholesky factorization. 32 | * - class PastixLU : a supernodal, parallel LU factorization (optimized for a symmetric pattern). 33 | * 34 | * \code 35 | * #include 36 | * \endcode 37 | * 38 | * In order to use this module, the PaSTiX headers must be accessible from the include paths, and your binary must be linked to the PaSTiX library and its dependencies. 39 | * The dependencies depend on how PaSTiX has been compiled. 40 | * For a cmake based project, you can use our FindPaSTiX.cmake module to help you in this task. 41 | * 42 | */ 43 | 44 | #include "src/PaStiXSupport/PaStiXSupport.h" 45 | 46 | #include "src/Core/util/ReenableStupidWarnings.h" 47 | 48 | #endif // EIGEN_PASTIXSUPPORT_MODULE_H 49 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/PardisoSupport: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // This Source Code Form is subject to the terms of the Mozilla 5 | // Public License v. 2.0. If a copy of the MPL was not distributed 6 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | 8 | #ifndef EIGEN_PARDISOSUPPORT_MODULE_H 9 | #define EIGEN_PARDISOSUPPORT_MODULE_H 10 | 11 | #include "SparseCore" 12 | 13 | #include "src/Core/util/DisableStupidWarnings.h" 14 | 15 | #include 16 | 17 | /** \ingroup Support_modules 18 | * \defgroup PardisoSupport_Module PardisoSupport module 19 | * 20 | * This module brings support for the Intel(R) MKL PARDISO direct sparse solvers. 21 | * 22 | * \code 23 | * #include 24 | * \endcode 25 | * 26 | * In order to use this module, the MKL headers must be accessible from the include paths, and your binary must be linked to the MKL library and its dependencies. 27 | * See this \ref TopicUsingIntelMKL "page" for more information on MKL-Eigen integration. 28 | * 29 | */ 30 | 31 | #include "src/PardisoSupport/PardisoSupport.h" 32 | 33 | #include "src/Core/util/ReenableStupidWarnings.h" 34 | 35 | #endif // EIGEN_PARDISOSUPPORT_MODULE_H 36 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/QR: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // This Source Code Form is subject to the terms of the Mozilla 5 | // Public License v. 2.0. If a copy of the MPL was not distributed 6 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | 8 | #ifndef EIGEN_QR_MODULE_H 9 | #define EIGEN_QR_MODULE_H 10 | 11 | #include "Core" 12 | 13 | #include "src/Core/util/DisableStupidWarnings.h" 14 | 15 | #include "Cholesky" 16 | #include "Jacobi" 17 | #include "Householder" 18 | 19 | /** \defgroup QR_Module QR module 20 | * 21 | * 22 | * 23 | * This module provides various QR decompositions 24 | * This module also provides some MatrixBase methods, including: 25 | * - MatrixBase::householderQr() 26 | * - MatrixBase::colPivHouseholderQr() 27 | * - MatrixBase::fullPivHouseholderQr() 28 | * 29 | * \code 30 | * #include 31 | * \endcode 32 | */ 33 | 34 | #include "src/QR/HouseholderQR.h" 35 | #include "src/QR/FullPivHouseholderQR.h" 36 | #include "src/QR/ColPivHouseholderQR.h" 37 | #include "src/QR/CompleteOrthogonalDecomposition.h" 38 | #ifdef EIGEN_USE_LAPACKE 39 | #include "src/misc/lapacke.h" 40 | #include "src/QR/HouseholderQR_LAPACKE.h" 41 | #include "src/QR/ColPivHouseholderQR_LAPACKE.h" 42 | #endif 43 | 44 | #include "src/Core/util/ReenableStupidWarnings.h" 45 | 46 | #endif // EIGEN_QR_MODULE_H 47 | /* vim: set filetype=cpp et sw=2 ts=2 ai: */ 48 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/QtAlignedMalloc: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // This Source Code Form is subject to the terms of the Mozilla 5 | // Public License v. 2.0. If a copy of the MPL was not distributed 6 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | 8 | #ifndef EIGEN_QTMALLOC_MODULE_H 9 | #define EIGEN_QTMALLOC_MODULE_H 10 | 11 | #include "Core" 12 | 13 | #if (!EIGEN_MALLOC_ALREADY_ALIGNED) 14 | 15 | #include "src/Core/util/DisableStupidWarnings.h" 16 | 17 | void *qMalloc(std::size_t size) 18 | { 19 | return Eigen::internal::aligned_malloc(size); 20 | } 21 | 22 | void qFree(void *ptr) 23 | { 24 | Eigen::internal::aligned_free(ptr); 25 | } 26 | 27 | void *qRealloc(void *ptr, std::size_t size) 28 | { 29 | void* newPtr = Eigen::internal::aligned_malloc(size); 30 | memcpy(newPtr, ptr, size); 31 | Eigen::internal::aligned_free(ptr); 32 | return newPtr; 33 | } 34 | 35 | #include "src/Core/util/ReenableStupidWarnings.h" 36 | 37 | #endif 38 | 39 | #endif // EIGEN_QTMALLOC_MODULE_H 40 | /* vim: set filetype=cpp et sw=2 ts=2 ai: */ 41 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/SPQRSupport: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // This Source Code Form is subject to the terms of the Mozilla 5 | // Public License v. 2.0. If a copy of the MPL was not distributed 6 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | 8 | #ifndef EIGEN_SPQRSUPPORT_MODULE_H 9 | #define EIGEN_SPQRSUPPORT_MODULE_H 10 | 11 | #include "SparseCore" 12 | 13 | #include "src/Core/util/DisableStupidWarnings.h" 14 | 15 | #include "SuiteSparseQR.hpp" 16 | 17 | /** \ingroup Support_modules 18 | * \defgroup SPQRSupport_Module SuiteSparseQR module 19 | * 20 | * This module provides an interface to the SPQR library, which is part of the suitesparse package. 21 | * 22 | * \code 23 | * #include 24 | * \endcode 25 | * 26 | * In order to use this module, the SPQR headers must be accessible from the include paths, and your binary must be linked to the SPQR library and its dependencies (Cholmod, AMD, COLAMD,...). 27 | * For a cmake based project, you can use our FindSPQR.cmake and FindCholmod.Cmake modules 28 | * 29 | */ 30 | 31 | #include "src/CholmodSupport/CholmodSupport.h" 32 | #include "src/SPQRSupport/SuiteSparseQRSupport.h" 33 | 34 | #endif 35 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/SVD: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // This Source Code Form is subject to the terms of the Mozilla 5 | // Public License v. 2.0. If a copy of the MPL was not distributed 6 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | 8 | #ifndef EIGEN_SVD_MODULE_H 9 | #define EIGEN_SVD_MODULE_H 10 | 11 | #include "QR" 12 | #include "Householder" 13 | #include "Jacobi" 14 | 15 | #include "src/Core/util/DisableStupidWarnings.h" 16 | 17 | /** \defgroup SVD_Module SVD module 18 | * 19 | * 20 | * 21 | * This module provides SVD decomposition for matrices (both real and complex). 22 | * Two decomposition algorithms are provided: 23 | * - JacobiSVD implementing two-sided Jacobi iterations is numerically very accurate, fast for small matrices, but very slow for larger ones. 24 | * - BDCSVD implementing a recursive divide & conquer strategy on top of an upper-bidiagonalization which remains fast for large problems. 25 | * These decompositions are accessible via the respective classes and following MatrixBase methods: 26 | * - MatrixBase::jacobiSvd() 27 | * - MatrixBase::bdcSvd() 28 | * 29 | * \code 30 | * #include 31 | * \endcode 32 | */ 33 | 34 | #include "src/misc/RealSvd2x2.h" 35 | #include "src/SVD/UpperBidiagonalization.h" 36 | #include "src/SVD/SVDBase.h" 37 | #include "src/SVD/JacobiSVD.h" 38 | #include "src/SVD/BDCSVD.h" 39 | #if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT) 40 | #include "src/misc/lapacke.h" 41 | #include "src/SVD/JacobiSVD_LAPACKE.h" 42 | #endif 43 | 44 | #include "src/Core/util/ReenableStupidWarnings.h" 45 | 46 | #endif // EIGEN_SVD_MODULE_H 47 | /* vim: set filetype=cpp et sw=2 ts=2 ai: */ 48 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/Sparse: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // This Source Code Form is subject to the terms of the Mozilla 5 | // Public License v. 2.0. If a copy of the MPL was not distributed 6 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | 8 | #ifndef EIGEN_SPARSE_MODULE_H 9 | #define EIGEN_SPARSE_MODULE_H 10 | 11 | /** \defgroup Sparse_Module Sparse meta-module 12 | * 13 | * Meta-module including all related modules: 14 | * - \ref SparseCore_Module 15 | * - \ref OrderingMethods_Module 16 | * - \ref SparseCholesky_Module 17 | * - \ref SparseLU_Module 18 | * - \ref SparseQR_Module 19 | * - \ref IterativeLinearSolvers_Module 20 | * 21 | \code 22 | #include 23 | \endcode 24 | */ 25 | 26 | #include "SparseCore" 27 | #include "OrderingMethods" 28 | #ifndef EIGEN_MPL2_ONLY 29 | #include "SparseCholesky" 30 | #endif 31 | #include "SparseLU" 32 | #include "SparseQR" 33 | #include "IterativeLinearSolvers" 34 | 35 | #endif // EIGEN_SPARSE_MODULE_H 36 | 37 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/SparseCholesky: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // Copyright (C) 2008-2013 Gael Guennebaud 5 | // 6 | // This Source Code Form is subject to the terms of the Mozilla 7 | // Public License v. 2.0. If a copy of the MPL was not distributed 8 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 | 10 | #ifndef EIGEN_SPARSECHOLESKY_MODULE_H 11 | #define EIGEN_SPARSECHOLESKY_MODULE_H 12 | 13 | #include "SparseCore" 14 | #include "OrderingMethods" 15 | 16 | #include "src/Core/util/DisableStupidWarnings.h" 17 | 18 | /** 19 | * \defgroup SparseCholesky_Module SparseCholesky module 20 | * 21 | * This module currently provides two variants of the direct sparse Cholesky decomposition for selfadjoint (hermitian) matrices. 22 | * Those decompositions are accessible via the following classes: 23 | * - SimplicialLLt, 24 | * - SimplicialLDLt 25 | * 26 | * Such problems can also be solved using the ConjugateGradient solver from the IterativeLinearSolvers module. 27 | * 28 | * \code 29 | * #include 30 | * \endcode 31 | */ 32 | 33 | #ifdef EIGEN_MPL2_ONLY 34 | #error The SparseCholesky module has nothing to offer in MPL2 only mode 35 | #endif 36 | 37 | #include "src/SparseCholesky/SimplicialCholesky.h" 38 | 39 | #ifndef EIGEN_MPL2_ONLY 40 | #include "src/SparseCholesky/SimplicialCholesky_impl.h" 41 | #endif 42 | 43 | #include "src/Core/util/ReenableStupidWarnings.h" 44 | 45 | #endif // EIGEN_SPARSECHOLESKY_MODULE_H 46 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/SparseCore: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // This Source Code Form is subject to the terms of the Mozilla 5 | // Public License v. 2.0. If a copy of the MPL was not distributed 6 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | 8 | #ifndef EIGEN_SPARSECORE_MODULE_H 9 | #define EIGEN_SPARSECORE_MODULE_H 10 | 11 | #include "Core" 12 | 13 | #include "src/Core/util/DisableStupidWarnings.h" 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | /** 22 | * \defgroup SparseCore_Module SparseCore module 23 | * 24 | * This module provides a sparse matrix representation, and basic associated matrix manipulations 25 | * and operations. 26 | * 27 | * See the \ref TutorialSparse "Sparse tutorial" 28 | * 29 | * \code 30 | * #include 31 | * \endcode 32 | * 33 | * This module depends on: Core. 34 | */ 35 | 36 | #include "src/SparseCore/SparseUtil.h" 37 | #include "src/SparseCore/SparseMatrixBase.h" 38 | #include "src/SparseCore/SparseAssign.h" 39 | #include "src/SparseCore/CompressedStorage.h" 40 | #include "src/SparseCore/AmbiVector.h" 41 | #include "src/SparseCore/SparseCompressedBase.h" 42 | #include "src/SparseCore/SparseMatrix.h" 43 | #include "src/SparseCore/SparseMap.h" 44 | #include "src/SparseCore/MappedSparseMatrix.h" 45 | #include "src/SparseCore/SparseVector.h" 46 | #include "src/SparseCore/SparseRef.h" 47 | #include "src/SparseCore/SparseCwiseUnaryOp.h" 48 | #include "src/SparseCore/SparseCwiseBinaryOp.h" 49 | #include "src/SparseCore/SparseTranspose.h" 50 | #include "src/SparseCore/SparseBlock.h" 51 | #include "src/SparseCore/SparseDot.h" 52 | #include "src/SparseCore/SparseRedux.h" 53 | #include "src/SparseCore/SparseView.h" 54 | #include "src/SparseCore/SparseDiagonalProduct.h" 55 | #include "src/SparseCore/ConservativeSparseSparseProduct.h" 56 | #include "src/SparseCore/SparseSparseProductWithPruning.h" 57 | #include "src/SparseCore/SparseProduct.h" 58 | #include "src/SparseCore/SparseDenseProduct.h" 59 | #include "src/SparseCore/SparseSelfAdjointView.h" 60 | #include "src/SparseCore/SparseTriangularView.h" 61 | #include "src/SparseCore/TriangularSolver.h" 62 | #include "src/SparseCore/SparsePermutation.h" 63 | #include "src/SparseCore/SparseFuzzy.h" 64 | #include "src/SparseCore/SparseSolverBase.h" 65 | 66 | #include "src/Core/util/ReenableStupidWarnings.h" 67 | 68 | #endif // EIGEN_SPARSECORE_MODULE_H 69 | 70 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/SparseLU: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // Copyright (C) 2012 Désiré Nuentsa-Wakam 5 | // Copyright (C) 2012 Gael Guennebaud 6 | // 7 | // This Source Code Form is subject to the terms of the Mozilla 8 | // Public License v. 2.0. If a copy of the MPL was not distributed 9 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 10 | 11 | #ifndef EIGEN_SPARSELU_MODULE_H 12 | #define EIGEN_SPARSELU_MODULE_H 13 | 14 | #include "SparseCore" 15 | 16 | /** 17 | * \defgroup SparseLU_Module SparseLU module 18 | * This module defines a supernodal factorization of general sparse matrices. 19 | * The code is fully optimized for supernode-panel updates with specialized kernels. 20 | * Please, see the documentation of the SparseLU class for more details. 21 | */ 22 | 23 | // Ordering interface 24 | #include "OrderingMethods" 25 | 26 | #include "src/SparseLU/SparseLU_gemm_kernel.h" 27 | 28 | #include "src/SparseLU/SparseLU_Structs.h" 29 | #include "src/SparseLU/SparseLU_SupernodalMatrix.h" 30 | #include "src/SparseLU/SparseLUImpl.h" 31 | #include "src/SparseCore/SparseColEtree.h" 32 | #include "src/SparseLU/SparseLU_Memory.h" 33 | #include "src/SparseLU/SparseLU_heap_relax_snode.h" 34 | #include "src/SparseLU/SparseLU_relax_snode.h" 35 | #include "src/SparseLU/SparseLU_pivotL.h" 36 | #include "src/SparseLU/SparseLU_panel_dfs.h" 37 | #include "src/SparseLU/SparseLU_kernel_bmod.h" 38 | #include "src/SparseLU/SparseLU_panel_bmod.h" 39 | #include "src/SparseLU/SparseLU_column_dfs.h" 40 | #include "src/SparseLU/SparseLU_column_bmod.h" 41 | #include "src/SparseLU/SparseLU_copy_to_ucol.h" 42 | #include "src/SparseLU/SparseLU_pruneL.h" 43 | #include "src/SparseLU/SparseLU_Utils.h" 44 | #include "src/SparseLU/SparseLU.h" 45 | 46 | #endif // EIGEN_SPARSELU_MODULE_H 47 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/SparseQR: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // This Source Code Form is subject to the terms of the Mozilla 5 | // Public License v. 2.0. If a copy of the MPL was not distributed 6 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | 8 | #ifndef EIGEN_SPARSEQR_MODULE_H 9 | #define EIGEN_SPARSEQR_MODULE_H 10 | 11 | #include "SparseCore" 12 | #include "OrderingMethods" 13 | #include "src/Core/util/DisableStupidWarnings.h" 14 | 15 | /** \defgroup SparseQR_Module SparseQR module 16 | * \brief Provides QR decomposition for sparse matrices 17 | * 18 | * This module provides a simplicial version of the left-looking Sparse QR decomposition. 19 | * The columns of the input matrix should be reordered to limit the fill-in during the 20 | * decomposition. Built-in methods (COLAMD, AMD) or external methods (METIS) can be used to this end. 21 | * See the \link OrderingMethods_Module OrderingMethods\endlink module for the list 22 | * of built-in and external ordering methods. 23 | * 24 | * \code 25 | * #include 26 | * \endcode 27 | * 28 | * 29 | */ 30 | 31 | #include "OrderingMethods" 32 | #include "src/SparseCore/SparseColEtree.h" 33 | #include "src/SparseQR/SparseQR.h" 34 | 35 | #include "src/Core/util/ReenableStupidWarnings.h" 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/StdDeque: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // Copyright (C) 2009 Gael Guennebaud 5 | // Copyright (C) 2009 Hauke Heibel 6 | // 7 | // This Source Code Form is subject to the terms of the Mozilla 8 | // Public License v. 2.0. If a copy of the MPL was not distributed 9 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 10 | 11 | #ifndef EIGEN_STDDEQUE_MODULE_H 12 | #define EIGEN_STDDEQUE_MODULE_H 13 | 14 | #include "Core" 15 | #include 16 | 17 | #if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 && (EIGEN_MAX_STATIC_ALIGN_BYTES<=16) /* MSVC auto aligns up to 16 bytes in 64 bit builds */ 18 | 19 | #define EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(...) 20 | 21 | #else 22 | 23 | #include "src/StlSupport/StdDeque.h" 24 | 25 | #endif 26 | 27 | #endif // EIGEN_STDDEQUE_MODULE_H 28 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/StdList: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // Copyright (C) 2009 Hauke Heibel 5 | // 6 | // This Source Code Form is subject to the terms of the Mozilla 7 | // Public License v. 2.0. If a copy of the MPL was not distributed 8 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 | 10 | #ifndef EIGEN_STDLIST_MODULE_H 11 | #define EIGEN_STDLIST_MODULE_H 12 | 13 | #include "Core" 14 | #include 15 | 16 | #if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 && (EIGEN_MAX_STATIC_ALIGN_BYTES<=16) /* MSVC auto aligns up to 16 bytes in 64 bit builds */ 17 | 18 | #define EIGEN_DEFINE_STL_LIST_SPECIALIZATION(...) 19 | 20 | #else 21 | 22 | #include "src/StlSupport/StdList.h" 23 | 24 | #endif 25 | 26 | #endif // EIGEN_STDLIST_MODULE_H 27 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/StdVector: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // Copyright (C) 2009 Gael Guennebaud 5 | // Copyright (C) 2009 Hauke Heibel 6 | // 7 | // This Source Code Form is subject to the terms of the Mozilla 8 | // Public License v. 2.0. If a copy of the MPL was not distributed 9 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 10 | 11 | #ifndef EIGEN_STDVECTOR_MODULE_H 12 | #define EIGEN_STDVECTOR_MODULE_H 13 | 14 | #include "Core" 15 | #include 16 | 17 | #if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 && (EIGEN_MAX_STATIC_ALIGN_BYTES<=16) /* MSVC auto aligns up to 16 bytes in 64 bit builds */ 18 | 19 | #define EIGEN_DEFINE_STL_VECTOR_SPECIALIZATION(...) 20 | 21 | #else 22 | 23 | #include "src/StlSupport/StdVector.h" 24 | 25 | #endif 26 | 27 | #endif // EIGEN_STDVECTOR_MODULE_H 28 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/SuperLUSupport: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // This Source Code Form is subject to the terms of the Mozilla 5 | // Public License v. 2.0. If a copy of the MPL was not distributed 6 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | 8 | #ifndef EIGEN_SUPERLUSUPPORT_MODULE_H 9 | #define EIGEN_SUPERLUSUPPORT_MODULE_H 10 | 11 | #include "SparseCore" 12 | 13 | #include "src/Core/util/DisableStupidWarnings.h" 14 | 15 | #ifdef EMPTY 16 | #define EIGEN_EMPTY_WAS_ALREADY_DEFINED 17 | #endif 18 | 19 | typedef int int_t; 20 | #include 21 | #include 22 | #include 23 | 24 | // slu_util.h defines a preprocessor token named EMPTY which is really polluting, 25 | // so we remove it in favor of a SUPERLU_EMPTY token. 26 | // If EMPTY was already defined then we don't undef it. 27 | 28 | #if defined(EIGEN_EMPTY_WAS_ALREADY_DEFINED) 29 | # undef EIGEN_EMPTY_WAS_ALREADY_DEFINED 30 | #elif defined(EMPTY) 31 | # undef EMPTY 32 | #endif 33 | 34 | #define SUPERLU_EMPTY (-1) 35 | 36 | namespace Eigen { struct SluMatrix; } 37 | 38 | /** \ingroup Support_modules 39 | * \defgroup SuperLUSupport_Module SuperLUSupport module 40 | * 41 | * This module provides an interface to the SuperLU library. 42 | * It provides the following factorization class: 43 | * - class SuperLU: a supernodal sequential LU factorization. 44 | * - class SuperILU: a supernodal sequential incomplete LU factorization (to be used as a preconditioner for iterative methods). 45 | * 46 | * \warning This wrapper requires at least versions 4.0 of SuperLU. The 3.x versions are not supported. 47 | * 48 | * \warning When including this module, you have to use SUPERLU_EMPTY instead of EMPTY which is no longer defined because it is too polluting. 49 | * 50 | * \code 51 | * #include 52 | * \endcode 53 | * 54 | * In order to use this module, the superlu headers must be accessible from the include paths, and your binary must be linked to the superlu library and its dependencies. 55 | * The dependencies depend on how superlu has been compiled. 56 | * For a cmake based project, you can use our FindSuperLU.cmake module to help you in this task. 57 | * 58 | */ 59 | 60 | #include "src/SuperLUSupport/SuperLUSupport.h" 61 | 62 | #include "src/Core/util/ReenableStupidWarnings.h" 63 | 64 | #endif // EIGEN_SUPERLUSUPPORT_MODULE_H 65 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/UmfPackSupport: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // This Source Code Form is subject to the terms of the Mozilla 5 | // Public License v. 2.0. If a copy of the MPL was not distributed 6 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | 8 | #ifndef EIGEN_UMFPACKSUPPORT_MODULE_H 9 | #define EIGEN_UMFPACKSUPPORT_MODULE_H 10 | 11 | #include "SparseCore" 12 | 13 | #include "src/Core/util/DisableStupidWarnings.h" 14 | 15 | extern "C" { 16 | #include 17 | } 18 | 19 | /** \ingroup Support_modules 20 | * \defgroup UmfPackSupport_Module UmfPackSupport module 21 | * 22 | * This module provides an interface to the UmfPack library which is part of the suitesparse package. 23 | * It provides the following factorization class: 24 | * - class UmfPackLU: a multifrontal sequential LU factorization. 25 | * 26 | * \code 27 | * #include 28 | * \endcode 29 | * 30 | * In order to use this module, the umfpack headers must be accessible from the include paths, and your binary must be linked to the umfpack library and its dependencies. 31 | * The dependencies depend on how umfpack has been compiled. 32 | * For a cmake based project, you can use our FindUmfPack.cmake module to help you in this task. 33 | * 34 | */ 35 | 36 | #include "src/UmfPackSupport/UmfPackSupport.h" 37 | 38 | #include "src/Core/util/ReenableStupidWarnings.h" 39 | 40 | #endif // EIGEN_UMFPACKSUPPORT_MODULE_H 41 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/src/Core/Assign.h: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // Copyright (C) 2007 Michael Olbrich 5 | // Copyright (C) 2006-2010 Benoit Jacob 6 | // Copyright (C) 2008 Gael Guennebaud 7 | // 8 | // This Source Code Form is subject to the terms of the Mozilla 9 | // Public License v. 2.0. If a copy of the MPL was not distributed 10 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 11 | 12 | #ifndef EIGEN_ASSIGN_H 13 | #define EIGEN_ASSIGN_H 14 | 15 | namespace Eigen { 16 | 17 | template 18 | template 19 | EIGEN_STRONG_INLINE Derived& DenseBase 20 | ::lazyAssign(const DenseBase& other) 21 | { 22 | enum{ 23 | SameType = internal::is_same::value 24 | }; 25 | 26 | EIGEN_STATIC_ASSERT_LVALUE(Derived) 27 | EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived) 28 | EIGEN_STATIC_ASSERT(SameType,YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) 29 | 30 | eigen_assert(rows() == other.rows() && cols() == other.cols()); 31 | internal::call_assignment_no_alias(derived(),other.derived()); 32 | 33 | return derived(); 34 | } 35 | 36 | template 37 | template 38 | EIGEN_DEVICE_FUNC 39 | EIGEN_STRONG_INLINE Derived& DenseBase::operator=(const DenseBase& other) 40 | { 41 | internal::call_assignment(derived(), other.derived()); 42 | return derived(); 43 | } 44 | 45 | template 46 | EIGEN_DEVICE_FUNC 47 | EIGEN_STRONG_INLINE Derived& DenseBase::operator=(const DenseBase& other) 48 | { 49 | internal::call_assignment(derived(), other.derived()); 50 | return derived(); 51 | } 52 | 53 | template 54 | EIGEN_DEVICE_FUNC 55 | EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const MatrixBase& other) 56 | { 57 | internal::call_assignment(derived(), other.derived()); 58 | return derived(); 59 | } 60 | 61 | template 62 | template 63 | EIGEN_DEVICE_FUNC 64 | EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const DenseBase& other) 65 | { 66 | internal::call_assignment(derived(), other.derived()); 67 | return derived(); 68 | } 69 | 70 | template 71 | template 72 | EIGEN_DEVICE_FUNC 73 | EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const EigenBase& other) 74 | { 75 | internal::call_assignment(derived(), other.derived()); 76 | return derived(); 77 | } 78 | 79 | template 80 | template 81 | EIGEN_DEVICE_FUNC 82 | EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const ReturnByValue& other) 83 | { 84 | other.derived().evalTo(derived()); 85 | return derived(); 86 | } 87 | 88 | } // end namespace Eigen 89 | 90 | #endif // EIGEN_ASSIGN_H 91 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/src/Core/DiagonalProduct.h: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // Copyright (C) 2008 Gael Guennebaud 5 | // Copyright (C) 2007-2009 Benoit Jacob 6 | // 7 | // This Source Code Form is subject to the terms of the Mozilla 8 | // Public License v. 2.0. If a copy of the MPL was not distributed 9 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 10 | 11 | #ifndef EIGEN_DIAGONALPRODUCT_H 12 | #define EIGEN_DIAGONALPRODUCT_H 13 | 14 | namespace Eigen { 15 | 16 | /** \returns the diagonal matrix product of \c *this by the diagonal matrix \a diagonal. 17 | */ 18 | template 19 | template 20 | inline const Product 21 | MatrixBase::operator*(const DiagonalBase &a_diagonal) const 22 | { 23 | return Product(derived(),a_diagonal.derived()); 24 | } 25 | 26 | } // end namespace Eigen 27 | 28 | #endif // EIGEN_DIAGONALPRODUCT_H 29 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/src/Core/MathFunctionsImpl.h: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com) 5 | // Copyright (C) 2016 Gael Guennebaud 6 | // 7 | // This Source Code Form is subject to the terms of the Mozilla 8 | // Public License v. 2.0. If a copy of the MPL was not distributed 9 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 10 | 11 | #ifndef EIGEN_MATHFUNCTIONSIMPL_H 12 | #define EIGEN_MATHFUNCTIONSIMPL_H 13 | 14 | namespace Eigen { 15 | 16 | namespace internal { 17 | 18 | /** \internal \returns the hyperbolic tan of \a a (coeff-wise) 19 | Doesn't do anything fancy, just a 13/6-degree rational interpolant which 20 | is accurate up to a couple of ulp in the range [-9, 9], outside of which 21 | the tanh(x) = +/-1. 22 | 23 | This implementation works on both scalars and packets. 24 | */ 25 | template 26 | T generic_fast_tanh_float(const T& a_x) 27 | { 28 | // Clamp the inputs to the range [-9, 9] since anything outside 29 | // this range is +/-1.0f in single-precision. 30 | const T plus_9 = pset1(9.f); 31 | const T minus_9 = pset1(-9.f); 32 | // NOTE GCC prior to 6.3 might improperly optimize this max/min 33 | // step such that if a_x is nan, x will be either 9 or -9, 34 | // and tanh will return 1 or -1 instead of nan. 35 | // This is supposed to be fixed in gcc6.3, 36 | // see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867 37 | const T x = pmax(minus_9,pmin(plus_9,a_x)); 38 | // The monomial coefficients of the numerator polynomial (odd). 39 | const T alpha_1 = pset1(4.89352455891786e-03f); 40 | const T alpha_3 = pset1(6.37261928875436e-04f); 41 | const T alpha_5 = pset1(1.48572235717979e-05f); 42 | const T alpha_7 = pset1(5.12229709037114e-08f); 43 | const T alpha_9 = pset1(-8.60467152213735e-11f); 44 | const T alpha_11 = pset1(2.00018790482477e-13f); 45 | const T alpha_13 = pset1(-2.76076847742355e-16f); 46 | 47 | // The monomial coefficients of the denominator polynomial (even). 48 | const T beta_0 = pset1(4.89352518554385e-03f); 49 | const T beta_2 = pset1(2.26843463243900e-03f); 50 | const T beta_4 = pset1(1.18534705686654e-04f); 51 | const T beta_6 = pset1(1.19825839466702e-06f); 52 | 53 | // Since the polynomials are odd/even, we need x^2. 54 | const T x2 = pmul(x, x); 55 | 56 | // Evaluate the numerator polynomial p. 57 | T p = pmadd(x2, alpha_13, alpha_11); 58 | p = pmadd(x2, p, alpha_9); 59 | p = pmadd(x2, p, alpha_7); 60 | p = pmadd(x2, p, alpha_5); 61 | p = pmadd(x2, p, alpha_3); 62 | p = pmadd(x2, p, alpha_1); 63 | p = pmul(x, p); 64 | 65 | // Evaluate the denominator polynomial p. 66 | T q = pmadd(x2, beta_6, beta_4); 67 | q = pmadd(x2, q, beta_2); 68 | q = pmadd(x2, q, beta_0); 69 | 70 | // Divide the numerator by the denominator. 71 | return pdiv(p, q); 72 | } 73 | 74 | } // end namespace internal 75 | 76 | } // end namespace Eigen 77 | 78 | #endif // EIGEN_MATHFUNCTIONSIMPL_H 79 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/src/Core/SelfCwiseBinaryOp.h: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // Copyright (C) 2009-2010 Gael Guennebaud 5 | // 6 | // This Source Code Form is subject to the terms of the Mozilla 7 | // Public License v. 2.0. If a copy of the MPL was not distributed 8 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 | 10 | #ifndef EIGEN_SELFCWISEBINARYOP_H 11 | #define EIGEN_SELFCWISEBINARYOP_H 12 | 13 | namespace Eigen { 14 | 15 | // TODO generalize the scalar type of 'other' 16 | 17 | template 18 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::operator*=(const Scalar& other) 19 | { 20 | typedef typename Derived::PlainObject PlainObject; 21 | internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op()); 22 | return derived(); 23 | } 24 | 25 | template 26 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase::operator+=(const Scalar& other) 27 | { 28 | typedef typename Derived::PlainObject PlainObject; 29 | internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op()); 30 | return derived(); 31 | } 32 | 33 | template 34 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase::operator-=(const Scalar& other) 35 | { 36 | typedef typename Derived::PlainObject PlainObject; 37 | internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op()); 38 | return derived(); 39 | } 40 | 41 | template 42 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::operator/=(const Scalar& other) 43 | { 44 | typedef typename Derived::PlainObject PlainObject; 45 | internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op()); 46 | return derived(); 47 | } 48 | 49 | } // end namespace Eigen 50 | 51 | #endif // EIGEN_SELFCWISEBINARYOP_H 52 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/src/Core/Swap.h: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // Copyright (C) 2006-2008 Benoit Jacob 5 | // 6 | // This Source Code Form is subject to the terms of the Mozilla 7 | // Public License v. 2.0. If a copy of the MPL was not distributed 8 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 | 10 | #ifndef EIGEN_SWAP_H 11 | #define EIGEN_SWAP_H 12 | 13 | namespace Eigen { 14 | 15 | namespace internal { 16 | 17 | // Overload default assignPacket behavior for swapping them 18 | template 19 | class generic_dense_assignment_kernel, Specialized> 20 | : public generic_dense_assignment_kernel, BuiltIn> 21 | { 22 | protected: 23 | typedef generic_dense_assignment_kernel, BuiltIn> Base; 24 | using Base::m_dst; 25 | using Base::m_src; 26 | using Base::m_functor; 27 | 28 | public: 29 | typedef typename Base::Scalar Scalar; 30 | typedef typename Base::DstXprType DstXprType; 31 | typedef swap_assign_op Functor; 32 | 33 | EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorTypeT &dst, const SrcEvaluatorTypeT &src, const Functor &func, DstXprType& dstExpr) 34 | : Base(dst, src, func, dstExpr) 35 | {} 36 | 37 | template 38 | void assignPacket(Index row, Index col) 39 | { 40 | PacketType tmp = m_src.template packet(row,col); 41 | const_cast(m_src).template writePacket(row,col, m_dst.template packet(row,col)); 42 | m_dst.template writePacket(row,col,tmp); 43 | } 44 | 45 | template 46 | void assignPacket(Index index) 47 | { 48 | PacketType tmp = m_src.template packet(index); 49 | const_cast(m_src).template writePacket(index, m_dst.template packet(index)); 50 | m_dst.template writePacket(index,tmp); 51 | } 52 | 53 | // TODO find a simple way not to have to copy/paste this function from generic_dense_assignment_kernel, by simple I mean no CRTP (Gael) 54 | template 55 | void assignPacketByOuterInner(Index outer, Index inner) 56 | { 57 | Index row = Base::rowIndexByOuterInner(outer, inner); 58 | Index col = Base::colIndexByOuterInner(outer, inner); 59 | assignPacket(row, col); 60 | } 61 | }; 62 | 63 | } // namespace internal 64 | 65 | } // end namespace Eigen 66 | 67 | #endif // EIGEN_SWAP_H 68 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/src/Core/VectorBlock.h: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // Copyright (C) 2008-2010 Gael Guennebaud 5 | // Copyright (C) 2006-2008 Benoit Jacob 6 | // 7 | // This Source Code Form is subject to the terms of the Mozilla 8 | // Public License v. 2.0. If a copy of the MPL was not distributed 9 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 10 | 11 | #ifndef EIGEN_VECTORBLOCK_H 12 | #define EIGEN_VECTORBLOCK_H 13 | 14 | namespace Eigen { 15 | 16 | namespace internal { 17 | template 18 | struct traits > 19 | : public traits::Flags & RowMajorBit ? 1 : Size, 21 | traits::Flags & RowMajorBit ? Size : 1> > 22 | { 23 | }; 24 | } 25 | 26 | /** \class VectorBlock 27 | * \ingroup Core_Module 28 | * 29 | * \brief Expression of a fixed-size or dynamic-size sub-vector 30 | * 31 | * \tparam VectorType the type of the object in which we are taking a sub-vector 32 | * \tparam Size size of the sub-vector we are taking at compile time (optional) 33 | * 34 | * This class represents an expression of either a fixed-size or dynamic-size sub-vector. 35 | * It is the return type of DenseBase::segment(Index,Index) and DenseBase::segment(Index) and 36 | * most of the time this is the only way it is used. 37 | * 38 | * However, if you want to directly maniputate sub-vector expressions, 39 | * for instance if you want to write a function returning such an expression, you 40 | * will need to use this class. 41 | * 42 | * Here is an example illustrating the dynamic case: 43 | * \include class_VectorBlock.cpp 44 | * Output: \verbinclude class_VectorBlock.out 45 | * 46 | * \note Even though this expression has dynamic size, in the case where \a VectorType 47 | * has fixed size, this expression inherits a fixed maximal size which means that evaluating 48 | * it does not cause a dynamic memory allocation. 49 | * 50 | * Here is an example illustrating the fixed-size case: 51 | * \include class_FixedVectorBlock.cpp 52 | * Output: \verbinclude class_FixedVectorBlock.out 53 | * 54 | * \sa class Block, DenseBase::segment(Index,Index,Index,Index), DenseBase::segment(Index,Index) 55 | */ 56 | template class VectorBlock 57 | : public Block::Flags & RowMajorBit ? 1 : Size, 59 | internal::traits::Flags & RowMajorBit ? Size : 1> 60 | { 61 | typedef Block::Flags & RowMajorBit ? 1 : Size, 63 | internal::traits::Flags & RowMajorBit ? Size : 1> Base; 64 | enum { 65 | IsColVector = !(internal::traits::Flags & RowMajorBit) 66 | }; 67 | public: 68 | EIGEN_DENSE_PUBLIC_INTERFACE(VectorBlock) 69 | 70 | using Base::operator=; 71 | 72 | /** Dynamic-size constructor 73 | */ 74 | EIGEN_DEVICE_FUNC 75 | inline VectorBlock(VectorType& vector, Index start, Index size) 76 | : Base(vector, 77 | IsColVector ? start : 0, IsColVector ? 0 : start, 78 | IsColVector ? size : 1, IsColVector ? 1 : size) 79 | { 80 | EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorBlock); 81 | } 82 | 83 | /** Fixed-size constructor 84 | */ 85 | EIGEN_DEVICE_FUNC 86 | inline VectorBlock(VectorType& vector, Index start) 87 | : Base(vector, IsColVector ? start : 0, IsColVector ? 0 : start) 88 | { 89 | EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorBlock); 90 | } 91 | }; 92 | 93 | 94 | } // end namespace Eigen 95 | 96 | #endif // EIGEN_VECTORBLOCK_H 97 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/src/Core/arch/AVX/TypeCasting.h: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // Copyright (C) 2015 Benoit Steiner 5 | // 6 | // This Source Code Form is subject to the terms of the Mozilla 7 | // Public License v. 2.0. If a copy of the MPL was not distributed 8 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 | 10 | #ifndef EIGEN_TYPE_CASTING_AVX_H 11 | #define EIGEN_TYPE_CASTING_AVX_H 12 | 13 | namespace Eigen { 14 | 15 | namespace internal { 16 | 17 | // For now we use SSE to handle integers, so we can't use AVX instructions to cast 18 | // from int to float 19 | template <> 20 | struct type_casting_traits { 21 | enum { 22 | VectorizedCast = 0, 23 | SrcCoeffRatio = 1, 24 | TgtCoeffRatio = 1 25 | }; 26 | }; 27 | 28 | template <> 29 | struct type_casting_traits { 30 | enum { 31 | VectorizedCast = 0, 32 | SrcCoeffRatio = 1, 33 | TgtCoeffRatio = 1 34 | }; 35 | }; 36 | 37 | 38 | 39 | template<> EIGEN_STRONG_INLINE Packet8i pcast(const Packet8f& a) { 40 | return _mm256_cvtps_epi32(a); 41 | } 42 | 43 | template<> EIGEN_STRONG_INLINE Packet8f pcast(const Packet8i& a) { 44 | return _mm256_cvtepi32_ps(a); 45 | } 46 | 47 | } // end namespace internal 48 | 49 | } // end namespace Eigen 50 | 51 | #endif // EIGEN_TYPE_CASTING_AVX_H 52 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/src/Core/arch/CUDA/MathFunctions.h: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // Copyright (C) 2014 Benoit Steiner 5 | // 6 | // This Source Code Form is subject to the terms of the Mozilla 7 | // Public License v. 2.0. If a copy of the MPL was not distributed 8 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 | 10 | #ifndef EIGEN_MATH_FUNCTIONS_CUDA_H 11 | #define EIGEN_MATH_FUNCTIONS_CUDA_H 12 | 13 | namespace Eigen { 14 | 15 | namespace internal { 16 | 17 | // Make sure this is only available when targeting a GPU: we don't want to 18 | // introduce conflicts between these packet_traits definitions and the ones 19 | // we'll use on the host side (SSE, AVX, ...) 20 | #if defined(__CUDACC__) && defined(EIGEN_USE_GPU) 21 | template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 22 | float4 plog(const float4& a) 23 | { 24 | return make_float4(logf(a.x), logf(a.y), logf(a.z), logf(a.w)); 25 | } 26 | 27 | template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 28 | double2 plog(const double2& a) 29 | { 30 | using ::log; 31 | return make_double2(log(a.x), log(a.y)); 32 | } 33 | 34 | template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 35 | float4 plog1p(const float4& a) 36 | { 37 | return make_float4(log1pf(a.x), log1pf(a.y), log1pf(a.z), log1pf(a.w)); 38 | } 39 | 40 | template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 41 | double2 plog1p(const double2& a) 42 | { 43 | return make_double2(log1p(a.x), log1p(a.y)); 44 | } 45 | 46 | template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 47 | float4 pexp(const float4& a) 48 | { 49 | return make_float4(expf(a.x), expf(a.y), expf(a.z), expf(a.w)); 50 | } 51 | 52 | template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 53 | double2 pexp(const double2& a) 54 | { 55 | using ::exp; 56 | return make_double2(exp(a.x), exp(a.y)); 57 | } 58 | 59 | template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 60 | float4 psqrt(const float4& a) 61 | { 62 | return make_float4(sqrtf(a.x), sqrtf(a.y), sqrtf(a.z), sqrtf(a.w)); 63 | } 64 | 65 | template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 66 | double2 psqrt(const double2& a) 67 | { 68 | using ::sqrt; 69 | return make_double2(sqrt(a.x), sqrt(a.y)); 70 | } 71 | 72 | template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 73 | float4 prsqrt(const float4& a) 74 | { 75 | return make_float4(rsqrtf(a.x), rsqrtf(a.y), rsqrtf(a.z), rsqrtf(a.w)); 76 | } 77 | 78 | template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE 79 | double2 prsqrt(const double2& a) 80 | { 81 | return make_double2(rsqrt(a.x), rsqrt(a.y)); 82 | } 83 | 84 | 85 | #endif 86 | 87 | } // end namespace internal 88 | 89 | } // end namespace Eigen 90 | 91 | #endif // EIGEN_MATH_FUNCTIONS_CUDA_H 92 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/src/Core/arch/Default/Settings.h: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // Copyright (C) 2008-2010 Gael Guennebaud 5 | // Copyright (C) 2006-2008 Benoit Jacob 6 | // 7 | // This Source Code Form is subject to the terms of the Mozilla 8 | // Public License v. 2.0. If a copy of the MPL was not distributed 9 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 10 | 11 | 12 | /* All the parameters defined in this file can be specialized in the 13 | * architecture specific files, and/or by the user. 14 | * More to come... */ 15 | 16 | #ifndef EIGEN_DEFAULT_SETTINGS_H 17 | #define EIGEN_DEFAULT_SETTINGS_H 18 | 19 | /** Defines the maximal loop size to enable meta unrolling of loops. 20 | * Note that the value here is expressed in Eigen's own notion of "number of FLOPS", 21 | * it does not correspond to the number of iterations or the number of instructions 22 | */ 23 | #ifndef EIGEN_UNROLLING_LIMIT 24 | #define EIGEN_UNROLLING_LIMIT 100 25 | #endif 26 | 27 | /** Defines the threshold between a "small" and a "large" matrix. 28 | * This threshold is mainly used to select the proper product implementation. 29 | */ 30 | #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 31 | #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8 32 | #endif 33 | 34 | /** Defines the maximal width of the blocks used in the triangular product and solver 35 | * for vectors (level 2 blas xTRMV and xTRSV). The default is 8. 36 | */ 37 | #ifndef EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH 38 | #define EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH 8 39 | #endif 40 | 41 | 42 | /** Defines the default number of registers available for that architecture. 43 | * Currently it must be 8 or 16. Other values will fail. 44 | */ 45 | #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 46 | #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 8 47 | #endif 48 | 49 | #endif // EIGEN_DEFAULT_SETTINGS_H 50 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/src/Core/arch/NEON/MathFunctions.h: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // This Source Code Form is subject to the terms of the Mozilla 5 | // Public License v. 2.0. If a copy of the MPL was not distributed 6 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 | 8 | /* The sin, cos, exp, and log functions of this file come from 9 | * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/ 10 | */ 11 | 12 | #ifndef EIGEN_MATH_FUNCTIONS_NEON_H 13 | #define EIGEN_MATH_FUNCTIONS_NEON_H 14 | 15 | namespace Eigen { 16 | 17 | namespace internal { 18 | 19 | template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED 20 | Packet4f pexp(const Packet4f& _x) 21 | { 22 | Packet4f x = _x; 23 | Packet4f tmp, fx; 24 | 25 | _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f); 26 | _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f); 27 | _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f); 28 | _EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647950f); 29 | _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f); 30 | _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f); 31 | _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f); 32 | _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f); 33 | _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f); 34 | _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f); 35 | _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f); 36 | _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f); 37 | _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f); 38 | _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f); 39 | 40 | x = vminq_f32(x, p4f_exp_hi); 41 | x = vmaxq_f32(x, p4f_exp_lo); 42 | 43 | /* express exp(x) as exp(g + n*log(2)) */ 44 | fx = vmlaq_f32(p4f_half, x, p4f_cephes_LOG2EF); 45 | 46 | /* perform a floorf */ 47 | tmp = vcvtq_f32_s32(vcvtq_s32_f32(fx)); 48 | 49 | /* if greater, substract 1 */ 50 | Packet4ui mask = vcgtq_f32(tmp, fx); 51 | mask = vandq_u32(mask, vreinterpretq_u32_f32(p4f_1)); 52 | 53 | fx = vsubq_f32(tmp, vreinterpretq_f32_u32(mask)); 54 | 55 | tmp = vmulq_f32(fx, p4f_cephes_exp_C1); 56 | Packet4f z = vmulq_f32(fx, p4f_cephes_exp_C2); 57 | x = vsubq_f32(x, tmp); 58 | x = vsubq_f32(x, z); 59 | 60 | Packet4f y = vmulq_f32(p4f_cephes_exp_p0, x); 61 | z = vmulq_f32(x, x); 62 | y = vaddq_f32(y, p4f_cephes_exp_p1); 63 | y = vmulq_f32(y, x); 64 | y = vaddq_f32(y, p4f_cephes_exp_p2); 65 | y = vmulq_f32(y, x); 66 | y = vaddq_f32(y, p4f_cephes_exp_p3); 67 | y = vmulq_f32(y, x); 68 | y = vaddq_f32(y, p4f_cephes_exp_p4); 69 | y = vmulq_f32(y, x); 70 | y = vaddq_f32(y, p4f_cephes_exp_p5); 71 | 72 | y = vmulq_f32(y, z); 73 | y = vaddq_f32(y, x); 74 | y = vaddq_f32(y, p4f_1); 75 | 76 | /* build 2^n */ 77 | int32x4_t mm; 78 | mm = vcvtq_s32_f32(fx); 79 | mm = vaddq_s32(mm, p4i_0x7f); 80 | mm = vshlq_n_s32(mm, 23); 81 | Packet4f pow2n = vreinterpretq_f32_s32(mm); 82 | 83 | y = vmulq_f32(y, pow2n); 84 | return y; 85 | } 86 | 87 | } // end namespace internal 88 | 89 | } // end namespace Eigen 90 | 91 | #endif // EIGEN_MATH_FUNCTIONS_NEON_H 92 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/src/Core/arch/SSE/TypeCasting.h: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // Copyright (C) 2015 Benoit Steiner 5 | // 6 | // This Source Code Form is subject to the terms of the Mozilla 7 | // Public License v. 2.0. If a copy of the MPL was not distributed 8 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 | 10 | #ifndef EIGEN_TYPE_CASTING_SSE_H 11 | #define EIGEN_TYPE_CASTING_SSE_H 12 | 13 | namespace Eigen { 14 | 15 | namespace internal { 16 | 17 | template <> 18 | struct type_casting_traits { 19 | enum { 20 | VectorizedCast = 1, 21 | SrcCoeffRatio = 1, 22 | TgtCoeffRatio = 1 23 | }; 24 | }; 25 | 26 | template<> EIGEN_STRONG_INLINE Packet4i pcast(const Packet4f& a) { 27 | return _mm_cvttps_epi32(a); 28 | } 29 | 30 | 31 | template <> 32 | struct type_casting_traits { 33 | enum { 34 | VectorizedCast = 1, 35 | SrcCoeffRatio = 1, 36 | TgtCoeffRatio = 1 37 | }; 38 | }; 39 | 40 | template<> EIGEN_STRONG_INLINE Packet4f pcast(const Packet4i& a) { 41 | return _mm_cvtepi32_ps(a); 42 | } 43 | 44 | 45 | template <> 46 | struct type_casting_traits { 47 | enum { 48 | VectorizedCast = 1, 49 | SrcCoeffRatio = 2, 50 | TgtCoeffRatio = 1 51 | }; 52 | }; 53 | 54 | template<> EIGEN_STRONG_INLINE Packet4f pcast(const Packet2d& a, const Packet2d& b) { 55 | return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6)); 56 | } 57 | 58 | template <> 59 | struct type_casting_traits { 60 | enum { 61 | VectorizedCast = 1, 62 | SrcCoeffRatio = 1, 63 | TgtCoeffRatio = 2 64 | }; 65 | }; 66 | 67 | template<> EIGEN_STRONG_INLINE Packet2d pcast(const Packet4f& a) { 68 | // Simply discard the second half of the input 69 | return _mm_cvtps_pd(a); 70 | } 71 | 72 | 73 | } // end namespace internal 74 | 75 | } // end namespace Eigen 76 | 77 | #endif // EIGEN_TYPE_CASTING_SSE_H 78 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/src/Core/functors/TernaryFunctors.h: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // Copyright (C) 2016 Eugene Brevdo 5 | // 6 | // This Source Code Form is subject to the terms of the Mozilla 7 | // Public License v. 2.0. If a copy of the MPL was not distributed 8 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 | 10 | #ifndef EIGEN_TERNARY_FUNCTORS_H 11 | #define EIGEN_TERNARY_FUNCTORS_H 12 | 13 | namespace Eigen { 14 | 15 | namespace internal { 16 | 17 | //---------- associative ternary functors ---------- 18 | 19 | 20 | 21 | } // end namespace internal 22 | 23 | } // end namespace Eigen 24 | 25 | #endif // EIGEN_TERNARY_FUNCTORS_H 26 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/src/Core/util/NonMPL2.h: -------------------------------------------------------------------------------- 1 | #ifdef EIGEN_MPL2_ONLY 2 | #error Including non-MPL2 code in EIGEN_MPL2_ONLY mode 3 | #endif 4 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/src/Core/util/ReenableStupidWarnings.h: -------------------------------------------------------------------------------- 1 | #ifdef EIGEN_WARNINGS_DISABLED 2 | #undef EIGEN_WARNINGS_DISABLED 3 | 4 | #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS 5 | #ifdef _MSC_VER 6 | #pragma warning( pop ) 7 | #elif defined __INTEL_COMPILER 8 | #pragma warning pop 9 | #elif defined __clang__ 10 | #pragma clang diagnostic pop 11 | #elif defined __GNUC__ && __GNUC__>=6 12 | #pragma GCC diagnostic pop 13 | #endif 14 | 15 | #if defined __NVCC__ 16 | // Don't reenable the diagnostic messages, as it turns out these messages need 17 | // to be disabled at the point of the template instantiation (i.e the user code) 18 | // otherwise they'll be triggered by nvcc. 19 | // #pragma diag_default code_is_unreachable 20 | // #pragma diag_default initialization_not_reachable 21 | // #pragma diag_default 2651 22 | // #pragma diag_default 2653 23 | #endif 24 | 25 | #endif 26 | 27 | #endif // EIGEN_WARNINGS_DISABLED 28 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/src/LU/Determinant.h: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // Copyright (C) 2008 Benoit Jacob 5 | // 6 | // This Source Code Form is subject to the terms of the Mozilla 7 | // Public License v. 2.0. If a copy of the MPL was not distributed 8 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 | 10 | #ifndef EIGEN_DETERMINANT_H 11 | #define EIGEN_DETERMINANT_H 12 | 13 | namespace Eigen { 14 | 15 | namespace internal { 16 | 17 | template 18 | inline const typename Derived::Scalar bruteforce_det3_helper 19 | (const MatrixBase& matrix, int a, int b, int c) 20 | { 21 | return matrix.coeff(0,a) 22 | * (matrix.coeff(1,b) * matrix.coeff(2,c) - matrix.coeff(1,c) * matrix.coeff(2,b)); 23 | } 24 | 25 | template 26 | const typename Derived::Scalar bruteforce_det4_helper 27 | (const MatrixBase& matrix, int j, int k, int m, int n) 28 | { 29 | return (matrix.coeff(j,0) * matrix.coeff(k,1) - matrix.coeff(k,0) * matrix.coeff(j,1)) 30 | * (matrix.coeff(m,2) * matrix.coeff(n,3) - matrix.coeff(n,2) * matrix.coeff(m,3)); 31 | } 32 | 33 | template struct determinant_impl 36 | { 37 | static inline typename traits::Scalar run(const Derived& m) 38 | { 39 | if(Derived::ColsAtCompileTime==Dynamic && m.rows()==0) 40 | return typename traits::Scalar(1); 41 | return m.partialPivLu().determinant(); 42 | } 43 | }; 44 | 45 | template struct determinant_impl 46 | { 47 | static inline typename traits::Scalar run(const Derived& m) 48 | { 49 | return m.coeff(0,0); 50 | } 51 | }; 52 | 53 | template struct determinant_impl 54 | { 55 | static inline typename traits::Scalar run(const Derived& m) 56 | { 57 | return m.coeff(0,0) * m.coeff(1,1) - m.coeff(1,0) * m.coeff(0,1); 58 | } 59 | }; 60 | 61 | template struct determinant_impl 62 | { 63 | static inline typename traits::Scalar run(const Derived& m) 64 | { 65 | return bruteforce_det3_helper(m,0,1,2) 66 | - bruteforce_det3_helper(m,1,0,2) 67 | + bruteforce_det3_helper(m,2,0,1); 68 | } 69 | }; 70 | 71 | template struct determinant_impl 72 | { 73 | static typename traits::Scalar run(const Derived& m) 74 | { 75 | // trick by Martin Costabel to compute 4x4 det with only 30 muls 76 | return bruteforce_det4_helper(m,0,1,2,3) 77 | - bruteforce_det4_helper(m,0,2,1,3) 78 | + bruteforce_det4_helper(m,0,3,1,2) 79 | + bruteforce_det4_helper(m,1,2,0,3) 80 | - bruteforce_det4_helper(m,1,3,0,2) 81 | + bruteforce_det4_helper(m,2,3,0,1); 82 | } 83 | }; 84 | 85 | } // end namespace internal 86 | 87 | /** \lu_module 88 | * 89 | * \returns the determinant of this matrix 90 | */ 91 | template 92 | inline typename internal::traits::Scalar MatrixBase::determinant() const 93 | { 94 | eigen_assert(rows() == cols()); 95 | typedef typename internal::nested_eval::type Nested; 96 | return internal::determinant_impl::type>::run(derived()); 97 | } 98 | 99 | } // end namespace Eigen 100 | 101 | #endif // EIGEN_DETERMINANT_H 102 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/src/QR/HouseholderQR_LAPACKE.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2011, Intel Corporation. All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, 5 | are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | * Neither the name of Intel Corporation nor the names of its contributors may 13 | be used to endorse or promote products derived from this software without 14 | specific prior written permission. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 20 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 23 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | 27 | ******************************************************************************** 28 | * Content : Eigen bindings to LAPACKe 29 | * Householder QR decomposition of a matrix w/o pivoting based on 30 | * LAPACKE_?geqrf function. 31 | ******************************************************************************** 32 | */ 33 | 34 | #ifndef EIGEN_QR_LAPACKE_H 35 | #define EIGEN_QR_LAPACKE_H 36 | 37 | namespace Eigen { 38 | 39 | namespace internal { 40 | 41 | /** \internal Specialization for the data types supported by LAPACKe */ 42 | 43 | #define EIGEN_LAPACKE_QR_NOPIV(EIGTYPE, LAPACKE_TYPE, LAPACKE_PREFIX) \ 44 | template \ 45 | struct householder_qr_inplace_blocked \ 46 | { \ 47 | static void run(MatrixQR& mat, HCoeffs& hCoeffs, Index = 32, \ 48 | typename MatrixQR::Scalar* = 0) \ 49 | { \ 50 | lapack_int m = (lapack_int) mat.rows(); \ 51 | lapack_int n = (lapack_int) mat.cols(); \ 52 | lapack_int lda = (lapack_int) mat.outerStride(); \ 53 | lapack_int matrix_order = (MatrixQR::IsRowMajor) ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \ 54 | LAPACKE_##LAPACKE_PREFIX##geqrf( matrix_order, m, n, (LAPACKE_TYPE*)mat.data(), lda, (LAPACKE_TYPE*)hCoeffs.data()); \ 55 | hCoeffs.adjointInPlace(); \ 56 | } \ 57 | }; 58 | 59 | EIGEN_LAPACKE_QR_NOPIV(double, double, d) 60 | EIGEN_LAPACKE_QR_NOPIV(float, float, s) 61 | EIGEN_LAPACKE_QR_NOPIV(dcomplex, lapack_complex_double, z) 62 | EIGEN_LAPACKE_QR_NOPIV(scomplex, lapack_complex_float, c) 63 | 64 | } // end namespace internal 65 | 66 | } // end namespace Eigen 67 | 68 | #endif // EIGEN_QR_LAPACKE_H 69 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/src/SparseCore/MappedSparseMatrix.h: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // Copyright (C) 2008-2014 Gael Guennebaud 5 | // 6 | // This Source Code Form is subject to the terms of the Mozilla 7 | // Public License v. 2.0. If a copy of the MPL was not distributed 8 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 | 10 | #ifndef EIGEN_MAPPED_SPARSEMATRIX_H 11 | #define EIGEN_MAPPED_SPARSEMATRIX_H 12 | 13 | namespace Eigen { 14 | 15 | /** \deprecated Use Map > 16 | * \class MappedSparseMatrix 17 | * 18 | * \brief Sparse matrix 19 | * 20 | * \param _Scalar the scalar type, i.e. the type of the coefficients 21 | * 22 | * See http://www.netlib.org/linalg/html_templates/node91.html for details on the storage scheme. 23 | * 24 | */ 25 | namespace internal { 26 | template 27 | struct traits > : traits > 28 | {}; 29 | } // end namespace internal 30 | 31 | template 32 | class MappedSparseMatrix 33 | : public Map > 34 | { 35 | typedef Map > Base; 36 | 37 | public: 38 | 39 | typedef typename Base::StorageIndex StorageIndex; 40 | typedef typename Base::Scalar Scalar; 41 | 42 | inline MappedSparseMatrix(Index rows, Index cols, Index nnz, StorageIndex* outerIndexPtr, StorageIndex* innerIndexPtr, Scalar* valuePtr, StorageIndex* innerNonZeroPtr = 0) 43 | : Base(rows, cols, nnz, outerIndexPtr, innerIndexPtr, valuePtr, innerNonZeroPtr) 44 | {} 45 | 46 | /** Empty destructor */ 47 | inline ~MappedSparseMatrix() {} 48 | }; 49 | 50 | namespace internal { 51 | 52 | template 53 | struct evaluator > 54 | : evaluator > > 55 | { 56 | typedef MappedSparseMatrix<_Scalar,_Options,_StorageIndex> XprType; 57 | typedef evaluator > Base; 58 | 59 | evaluator() : Base() {} 60 | explicit evaluator(const XprType &mat) : Base(mat) {} 61 | }; 62 | 63 | } 64 | 65 | } // end namespace Eigen 66 | 67 | #endif // EIGEN_MAPPED_SPARSEMATRIX_H 68 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/src/SparseCore/SparseDot.h: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // Copyright (C) 2008 Gael Guennebaud 5 | // 6 | // This Source Code Form is subject to the terms of the Mozilla 7 | // Public License v. 2.0. If a copy of the MPL was not distributed 8 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 | 10 | #ifndef EIGEN_SPARSE_DOT_H 11 | #define EIGEN_SPARSE_DOT_H 12 | 13 | namespace Eigen { 14 | 15 | template 16 | template 17 | typename internal::traits::Scalar 18 | SparseMatrixBase::dot(const MatrixBase& other) const 19 | { 20 | EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) 21 | EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived) 22 | EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived) 23 | EIGEN_STATIC_ASSERT((internal::is_same::value), 24 | YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) 25 | 26 | eigen_assert(size() == other.size()); 27 | eigen_assert(other.size()>0 && "you are using a non initialized vector"); 28 | 29 | internal::evaluator thisEval(derived()); 30 | typename internal::evaluator::InnerIterator i(thisEval, 0); 31 | Scalar res(0); 32 | while (i) 33 | { 34 | res += numext::conj(i.value()) * other.coeff(i.index()); 35 | ++i; 36 | } 37 | return res; 38 | } 39 | 40 | template 41 | template 42 | typename internal::traits::Scalar 43 | SparseMatrixBase::dot(const SparseMatrixBase& other) const 44 | { 45 | EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) 46 | EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived) 47 | EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived) 48 | EIGEN_STATIC_ASSERT((internal::is_same::value), 49 | YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) 50 | 51 | eigen_assert(size() == other.size()); 52 | 53 | internal::evaluator thisEval(derived()); 54 | typename internal::evaluator::InnerIterator i(thisEval, 0); 55 | 56 | internal::evaluator otherEval(other.derived()); 57 | typename internal::evaluator::InnerIterator j(otherEval, 0); 58 | 59 | Scalar res(0); 60 | while (i && j) 61 | { 62 | if (i.index()==j.index()) 63 | { 64 | res += numext::conj(i.value()) * j.value(); 65 | ++i; ++j; 66 | } 67 | else if (i.index() 76 | inline typename NumTraits::Scalar>::Real 77 | SparseMatrixBase::squaredNorm() const 78 | { 79 | return numext::real((*this).cwiseAbs2().sum()); 80 | } 81 | 82 | template 83 | inline typename NumTraits::Scalar>::Real 84 | SparseMatrixBase::norm() const 85 | { 86 | using std::sqrt; 87 | return sqrt(squaredNorm()); 88 | } 89 | 90 | template 91 | inline typename NumTraits::Scalar>::Real 92 | SparseMatrixBase::blueNorm() const 93 | { 94 | return internal::blueNorm_impl(*this); 95 | } 96 | } // end namespace Eigen 97 | 98 | #endif // EIGEN_SPARSE_DOT_H 99 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/src/SparseCore/SparseFuzzy.h: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // Copyright (C) 2008-2014 Gael Guennebaud 5 | // 6 | // This Source Code Form is subject to the terms of the Mozilla 7 | // Public License v. 2.0. If a copy of the MPL was not distributed 8 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 | 10 | #ifndef EIGEN_SPARSE_FUZZY_H 11 | #define EIGEN_SPARSE_FUZZY_H 12 | 13 | namespace Eigen { 14 | 15 | template 16 | template 17 | bool SparseMatrixBase::isApprox(const SparseMatrixBase& other, const RealScalar &prec) const 18 | { 19 | const typename internal::nested_eval::type actualA(derived()); 20 | typename internal::conditional::type, 22 | const PlainObject>::type actualB(other.derived()); 23 | 24 | return (actualA - actualB).squaredNorm() <= prec * prec * numext::mini(actualA.squaredNorm(), actualB.squaredNorm()); 25 | } 26 | 27 | } // end namespace Eigen 28 | 29 | #endif // EIGEN_SPARSE_FUZZY_H 30 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/src/SparseCore/SparseRedux.h: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // Copyright (C) 2008-2014 Gael Guennebaud 5 | // 6 | // This Source Code Form is subject to the terms of the Mozilla 7 | // Public License v. 2.0. If a copy of the MPL was not distributed 8 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 | 10 | #ifndef EIGEN_SPARSEREDUX_H 11 | #define EIGEN_SPARSEREDUX_H 12 | 13 | namespace Eigen { 14 | 15 | template 16 | typename internal::traits::Scalar 17 | SparseMatrixBase::sum() const 18 | { 19 | eigen_assert(rows()>0 && cols()>0 && "you are using a non initialized matrix"); 20 | Scalar res(0); 21 | internal::evaluator thisEval(derived()); 22 | for (Index j=0; j::InnerIterator iter(thisEval,j); iter; ++iter) 24 | res += iter.value(); 25 | return res; 26 | } 27 | 28 | template 29 | typename internal::traits >::Scalar 30 | SparseMatrix<_Scalar,_Options,_Index>::sum() const 31 | { 32 | eigen_assert(rows()>0 && cols()>0 && "you are using a non initialized matrix"); 33 | if(this->isCompressed()) 34 | return Matrix::Map(m_data.valuePtr(), m_data.size()).sum(); 35 | else 36 | return Base::sum(); 37 | } 38 | 39 | template 40 | typename internal::traits >::Scalar 41 | SparseVector<_Scalar,_Options,_Index>::sum() const 42 | { 43 | eigen_assert(rows()>0 && cols()>0 && "you are using a non initialized matrix"); 44 | return Matrix::Map(m_data.valuePtr(), m_data.size()).sum(); 45 | } 46 | 47 | } // end namespace Eigen 48 | 49 | #endif // EIGEN_SPARSEREDUX_H 50 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/src/SparseCore/SparseTranspose.h: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // Copyright (C) 2008-2015 Gael Guennebaud 5 | // 6 | // This Source Code Form is subject to the terms of the Mozilla 7 | // Public License v. 2.0. If a copy of the MPL was not distributed 8 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 | 10 | #ifndef EIGEN_SPARSETRANSPOSE_H 11 | #define EIGEN_SPARSETRANSPOSE_H 12 | 13 | namespace Eigen { 14 | 15 | namespace internal { 16 | template 17 | class SparseTransposeImpl 18 | : public SparseMatrixBase > 19 | {}; 20 | 21 | template 22 | class SparseTransposeImpl 23 | : public SparseCompressedBase > 24 | { 25 | typedef SparseCompressedBase > Base; 26 | public: 27 | using Base::derived; 28 | typedef typename Base::Scalar Scalar; 29 | typedef typename Base::StorageIndex StorageIndex; 30 | 31 | inline Index nonZeros() const { return derived().nestedExpression().nonZeros(); } 32 | 33 | inline const Scalar* valuePtr() const { return derived().nestedExpression().valuePtr(); } 34 | inline const StorageIndex* innerIndexPtr() const { return derived().nestedExpression().innerIndexPtr(); } 35 | inline const StorageIndex* outerIndexPtr() const { return derived().nestedExpression().outerIndexPtr(); } 36 | inline const StorageIndex* innerNonZeroPtr() const { return derived().nestedExpression().innerNonZeroPtr(); } 37 | 38 | inline Scalar* valuePtr() { return derived().nestedExpression().valuePtr(); } 39 | inline StorageIndex* innerIndexPtr() { return derived().nestedExpression().innerIndexPtr(); } 40 | inline StorageIndex* outerIndexPtr() { return derived().nestedExpression().outerIndexPtr(); } 41 | inline StorageIndex* innerNonZeroPtr() { return derived().nestedExpression().innerNonZeroPtr(); } 42 | }; 43 | } 44 | 45 | template class TransposeImpl 46 | : public internal::SparseTransposeImpl 47 | { 48 | protected: 49 | typedef internal::SparseTransposeImpl Base; 50 | }; 51 | 52 | namespace internal { 53 | 54 | template 55 | struct unary_evaluator, IteratorBased> 56 | : public evaluator_base > 57 | { 58 | typedef typename evaluator::InnerIterator EvalIterator; 59 | public: 60 | typedef Transpose XprType; 61 | 62 | inline Index nonZerosEstimate() const { 63 | return m_argImpl.nonZerosEstimate(); 64 | } 65 | 66 | class InnerIterator : public EvalIterator 67 | { 68 | public: 69 | EIGEN_STRONG_INLINE InnerIterator(const unary_evaluator& unaryOp, Index outer) 70 | : EvalIterator(unaryOp.m_argImpl,outer) 71 | {} 72 | 73 | Index row() const { return EvalIterator::col(); } 74 | Index col() const { return EvalIterator::row(); } 75 | }; 76 | 77 | enum { 78 | CoeffReadCost = evaluator::CoeffReadCost, 79 | Flags = XprType::Flags 80 | }; 81 | 82 | explicit unary_evaluator(const XprType& op) :m_argImpl(op.nestedExpression()) {} 83 | 84 | protected: 85 | evaluator m_argImpl; 86 | }; 87 | 88 | } // end namespace internal 89 | 90 | } // end namespace Eigen 91 | 92 | #endif // EIGEN_SPARSETRANSPOSE_H 93 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/src/SparseLU/SparseLU_Utils.h: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // Copyright (C) 2012 Désiré Nuentsa-Wakam 5 | // 6 | // This Source Code Form is subject to the terms of the Mozilla 7 | // Public License v. 2.0. If a copy of the MPL was not distributed 8 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 | 10 | 11 | #ifndef EIGEN_SPARSELU_UTILS_H 12 | #define EIGEN_SPARSELU_UTILS_H 13 | 14 | namespace Eigen { 15 | namespace internal { 16 | 17 | /** 18 | * \brief Count Nonzero elements in the factors 19 | */ 20 | template 21 | void SparseLUImpl::countnz(const Index n, Index& nnzL, Index& nnzU, GlobalLU_t& glu) 22 | { 23 | nnzL = 0; 24 | nnzU = (glu.xusub)(n); 25 | Index nsuper = (glu.supno)(n); 26 | Index jlen; 27 | Index i, j, fsupc; 28 | if (n <= 0 ) return; 29 | // For each supernode 30 | for (i = 0; i <= nsuper; i++) 31 | { 32 | fsupc = glu.xsup(i); 33 | jlen = glu.xlsub(fsupc+1) - glu.xlsub(fsupc); 34 | 35 | for (j = fsupc; j < glu.xsup(i+1); j++) 36 | { 37 | nnzL += jlen; 38 | nnzU += j - fsupc + 1; 39 | jlen--; 40 | } 41 | } 42 | } 43 | 44 | /** 45 | * \brief Fix up the data storage lsub for L-subscripts. 46 | * 47 | * It removes the subscripts sets for structural pruning, 48 | * and applies permutation to the remaining subscripts 49 | * 50 | */ 51 | template 52 | void SparseLUImpl::fixupL(const Index n, const IndexVector& perm_r, GlobalLU_t& glu) 53 | { 54 | Index fsupc, i, j, k, jstart; 55 | 56 | StorageIndex nextl = 0; 57 | Index nsuper = (glu.supno)(n); 58 | 59 | // For each supernode 60 | for (i = 0; i <= nsuper; i++) 61 | { 62 | fsupc = glu.xsup(i); 63 | jstart = glu.xlsub(fsupc); 64 | glu.xlsub(fsupc) = nextl; 65 | for (j = jstart; j < glu.xlsub(fsupc + 1); j++) 66 | { 67 | glu.lsub(nextl) = perm_r(glu.lsub(j)); // Now indexed into P*A 68 | nextl++; 69 | } 70 | for (k = fsupc+1; k < glu.xsup(i+1); k++) 71 | glu.xlsub(k) = nextl; // other columns in supernode i 72 | } 73 | 74 | glu.xlsub(n) = nextl; 75 | } 76 | 77 | } // end namespace internal 78 | 79 | } // end namespace Eigen 80 | #endif // EIGEN_SPARSELU_UTILS_H 81 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/src/SparseLU/SparseLU_relax_snode.h: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // Copyright (C) 2012 Désiré Nuentsa-Wakam 5 | // 6 | // This Source Code Form is subject to the terms of the Mozilla 7 | // Public License v. 2.0. If a copy of the MPL was not distributed 8 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 | 10 | /* This file is a modified version of heap_relax_snode.c file in SuperLU 11 | * -- SuperLU routine (version 3.0) -- 12 | * Univ. of California Berkeley, Xerox Palo Alto Research Center, 13 | * and Lawrence Berkeley National Lab. 14 | * October 15, 2003 15 | * 16 | * Copyright (c) 1994 by Xerox Corporation. All rights reserved. 17 | * 18 | * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY 19 | * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. 20 | * 21 | * Permission is hereby granted to use or copy this program for any 22 | * purpose, provided the above notices are retained on all copies. 23 | * Permission to modify the code and to distribute modified code is 24 | * granted, provided the above notices are retained, and a notice that 25 | * the code was modified is included with the above copyright notice. 26 | */ 27 | 28 | #ifndef SPARSELU_RELAX_SNODE_H 29 | #define SPARSELU_RELAX_SNODE_H 30 | 31 | namespace Eigen { 32 | 33 | namespace internal { 34 | 35 | /** 36 | * \brief Identify the initial relaxed supernodes 37 | * 38 | * This routine is applied to a column elimination tree. 39 | * It assumes that the matrix has been reordered according to the postorder of the etree 40 | * \param n the number of columns 41 | * \param et elimination tree 42 | * \param relax_columns Maximum number of columns allowed in a relaxed snode 43 | * \param descendants Number of descendants of each node in the etree 44 | * \param relax_end last column in a supernode 45 | */ 46 | template 47 | void SparseLUImpl::relax_snode (const Index n, IndexVector& et, const Index relax_columns, IndexVector& descendants, IndexVector& relax_end) 48 | { 49 | 50 | // compute the number of descendants of each node in the etree 51 | Index parent; 52 | relax_end.setConstant(emptyIdxLU); 53 | descendants.setZero(); 54 | for (Index j = 0; j < n; j++) 55 | { 56 | parent = et(j); 57 | if (parent != n) // not the dummy root 58 | descendants(parent) += descendants(j) + 1; 59 | } 60 | // Identify the relaxed supernodes by postorder traversal of the etree 61 | Index snode_start; // beginning of a snode 62 | for (Index j = 0; j < n; ) 63 | { 64 | parent = et(j); 65 | snode_start = j; 66 | while ( parent != n && descendants(parent) < relax_columns ) 67 | { 68 | j = parent; 69 | parent = et(j); 70 | } 71 | // Found a supernode in postordered etree, j is the last column 72 | relax_end(snode_start) = StorageIndex(j); // Record last column 73 | j++; 74 | // Search for a new leaf 75 | while (descendants(j) != 0 && j < n) j++; 76 | } // End postorder traversal of the etree 77 | 78 | } 79 | 80 | } // end namespace internal 81 | 82 | } // end namespace Eigen 83 | #endif 84 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/src/StlSupport/details.h: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // Copyright (C) 2009 Gael Guennebaud 5 | // Copyright (C) 2009 Hauke Heibel 6 | // 7 | // This Source Code Form is subject to the terms of the Mozilla 8 | // Public License v. 2.0. If a copy of the MPL was not distributed 9 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 10 | 11 | #ifndef EIGEN_STL_DETAILS_H 12 | #define EIGEN_STL_DETAILS_H 13 | 14 | #ifndef EIGEN_ALIGNED_ALLOCATOR 15 | #define EIGEN_ALIGNED_ALLOCATOR Eigen::aligned_allocator 16 | #endif 17 | 18 | namespace Eigen { 19 | 20 | // This one is needed to prevent reimplementing the whole std::vector. 21 | template 22 | class aligned_allocator_indirection : public EIGEN_ALIGNED_ALLOCATOR 23 | { 24 | public: 25 | typedef std::size_t size_type; 26 | typedef std::ptrdiff_t difference_type; 27 | typedef T* pointer; 28 | typedef const T* const_pointer; 29 | typedef T& reference; 30 | typedef const T& const_reference; 31 | typedef T value_type; 32 | 33 | template 34 | struct rebind 35 | { 36 | typedef aligned_allocator_indirection other; 37 | }; 38 | 39 | aligned_allocator_indirection() {} 40 | aligned_allocator_indirection(const aligned_allocator_indirection& ) : EIGEN_ALIGNED_ALLOCATOR() {} 41 | aligned_allocator_indirection(const EIGEN_ALIGNED_ALLOCATOR& ) {} 42 | template 43 | aligned_allocator_indirection(const aligned_allocator_indirection& ) {} 44 | template 45 | aligned_allocator_indirection(const EIGEN_ALIGNED_ALLOCATOR& ) {} 46 | ~aligned_allocator_indirection() {} 47 | }; 48 | 49 | #if EIGEN_COMP_MSVC 50 | 51 | // sometimes, MSVC detects, at compile time, that the argument x 52 | // in std::vector::resize(size_t s,T x) won't be aligned and generate an error 53 | // even if this function is never called. Whence this little wrapper. 54 | #define EIGEN_WORKAROUND_MSVC_STL_SUPPORT(T) \ 55 | typename Eigen::internal::conditional< \ 56 | Eigen::internal::is_arithmetic::value, \ 57 | T, \ 58 | Eigen::internal::workaround_msvc_stl_support \ 59 | >::type 60 | 61 | namespace internal { 62 | template struct workaround_msvc_stl_support : public T 63 | { 64 | inline workaround_msvc_stl_support() : T() {} 65 | inline workaround_msvc_stl_support(const T& other) : T(other) {} 66 | inline operator T& () { return *static_cast(this); } 67 | inline operator const T& () const { return *static_cast(this); } 68 | template 69 | inline T& operator=(const OtherT& other) 70 | { T::operator=(other); return *this; } 71 | inline workaround_msvc_stl_support& operator=(const workaround_msvc_stl_support& other) 72 | { T::operator=(other); return *this; } 73 | }; 74 | } 75 | 76 | #else 77 | 78 | #define EIGEN_WORKAROUND_MSVC_STL_SUPPORT(T) T 79 | 80 | #endif 81 | 82 | } 83 | 84 | #endif // EIGEN_STL_DETAILS_H 85 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/src/misc/Image.h: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // Copyright (C) 2009 Benoit Jacob 5 | // 6 | // This Source Code Form is subject to the terms of the Mozilla 7 | // Public License v. 2.0. If a copy of the MPL was not distributed 8 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 | 10 | #ifndef EIGEN_MISC_IMAGE_H 11 | #define EIGEN_MISC_IMAGE_H 12 | 13 | namespace Eigen { 14 | 15 | namespace internal { 16 | 17 | /** \class image_retval_base 18 | * 19 | */ 20 | template 21 | struct traits > 22 | { 23 | typedef typename DecompositionType::MatrixType MatrixType; 24 | typedef Matrix< 25 | typename MatrixType::Scalar, 26 | MatrixType::RowsAtCompileTime, // the image is a subspace of the destination space, whose 27 | // dimension is the number of rows of the original matrix 28 | Dynamic, // we don't know at compile time the dimension of the image (the rank) 29 | MatrixType::Options, 30 | MatrixType::MaxRowsAtCompileTime, // the image matrix will consist of columns from the original matrix, 31 | MatrixType::MaxColsAtCompileTime // so it has the same number of rows and at most as many columns. 32 | > ReturnType; 33 | }; 34 | 35 | template struct image_retval_base 36 | : public ReturnByValue > 37 | { 38 | typedef _DecompositionType DecompositionType; 39 | typedef typename DecompositionType::MatrixType MatrixType; 40 | typedef ReturnByValue Base; 41 | 42 | image_retval_base(const DecompositionType& dec, const MatrixType& originalMatrix) 43 | : m_dec(dec), m_rank(dec.rank()), 44 | m_cols(m_rank == 0 ? 1 : m_rank), 45 | m_originalMatrix(originalMatrix) 46 | {} 47 | 48 | inline Index rows() const { return m_dec.rows(); } 49 | inline Index cols() const { return m_cols; } 50 | inline Index rank() const { return m_rank; } 51 | inline const DecompositionType& dec() const { return m_dec; } 52 | inline const MatrixType& originalMatrix() const { return m_originalMatrix; } 53 | 54 | template inline void evalTo(Dest& dst) const 55 | { 56 | static_cast*>(this)->evalTo(dst); 57 | } 58 | 59 | protected: 60 | const DecompositionType& m_dec; 61 | Index m_rank, m_cols; 62 | const MatrixType& m_originalMatrix; 63 | }; 64 | 65 | } // end namespace internal 66 | 67 | #define EIGEN_MAKE_IMAGE_HELPERS(DecompositionType) \ 68 | typedef typename DecompositionType::MatrixType MatrixType; \ 69 | typedef typename MatrixType::Scalar Scalar; \ 70 | typedef typename MatrixType::RealScalar RealScalar; \ 71 | typedef Eigen::internal::image_retval_base Base; \ 72 | using Base::dec; \ 73 | using Base::originalMatrix; \ 74 | using Base::rank; \ 75 | using Base::rows; \ 76 | using Base::cols; \ 77 | image_retval(const DecompositionType& dec, const MatrixType& originalMatrix) \ 78 | : Base(dec, originalMatrix) {} 79 | 80 | } // end namespace Eigen 81 | 82 | #endif // EIGEN_MISC_IMAGE_H 83 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/src/misc/Kernel.h: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // Copyright (C) 2009 Benoit Jacob 5 | // 6 | // This Source Code Form is subject to the terms of the Mozilla 7 | // Public License v. 2.0. If a copy of the MPL was not distributed 8 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 | 10 | #ifndef EIGEN_MISC_KERNEL_H 11 | #define EIGEN_MISC_KERNEL_H 12 | 13 | namespace Eigen { 14 | 15 | namespace internal { 16 | 17 | /** \class kernel_retval_base 18 | * 19 | */ 20 | template 21 | struct traits > 22 | { 23 | typedef typename DecompositionType::MatrixType MatrixType; 24 | typedef Matrix< 25 | typename MatrixType::Scalar, 26 | MatrixType::ColsAtCompileTime, // the number of rows in the "kernel matrix" 27 | // is the number of cols of the original matrix 28 | // so that the product "matrix * kernel = zero" makes sense 29 | Dynamic, // we don't know at compile-time the dimension of the kernel 30 | MatrixType::Options, 31 | MatrixType::MaxColsAtCompileTime, // see explanation for 2nd template parameter 32 | MatrixType::MaxColsAtCompileTime // the kernel is a subspace of the domain space, 33 | // whose dimension is the number of columns of the original matrix 34 | > ReturnType; 35 | }; 36 | 37 | template struct kernel_retval_base 38 | : public ReturnByValue > 39 | { 40 | typedef _DecompositionType DecompositionType; 41 | typedef ReturnByValue Base; 42 | 43 | explicit kernel_retval_base(const DecompositionType& dec) 44 | : m_dec(dec), 45 | m_rank(dec.rank()), 46 | m_cols(m_rank==dec.cols() ? 1 : dec.cols() - m_rank) 47 | {} 48 | 49 | inline Index rows() const { return m_dec.cols(); } 50 | inline Index cols() const { return m_cols; } 51 | inline Index rank() const { return m_rank; } 52 | inline const DecompositionType& dec() const { return m_dec; } 53 | 54 | template inline void evalTo(Dest& dst) const 55 | { 56 | static_cast*>(this)->evalTo(dst); 57 | } 58 | 59 | protected: 60 | const DecompositionType& m_dec; 61 | Index m_rank, m_cols; 62 | }; 63 | 64 | } // end namespace internal 65 | 66 | #define EIGEN_MAKE_KERNEL_HELPERS(DecompositionType) \ 67 | typedef typename DecompositionType::MatrixType MatrixType; \ 68 | typedef typename MatrixType::Scalar Scalar; \ 69 | typedef typename MatrixType::RealScalar RealScalar; \ 70 | typedef Eigen::internal::kernel_retval_base Base; \ 71 | using Base::dec; \ 72 | using Base::rank; \ 73 | using Base::rows; \ 74 | using Base::cols; \ 75 | kernel_retval(const DecompositionType& dec) : Base(dec) {} 76 | 77 | } // end namespace Eigen 78 | 79 | #endif // EIGEN_MISC_KERNEL_H 80 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/src/misc/RealSvd2x2.h: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // Copyright (C) 2009-2010 Benoit Jacob 5 | // Copyright (C) 2013-2016 Gael Guennebaud 6 | // 7 | // This Source Code Form is subject to the terms of the Mozilla 8 | // Public License v. 2.0. If a copy of the MPL was not distributed 9 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 10 | 11 | #ifndef EIGEN_REALSVD2X2_H 12 | #define EIGEN_REALSVD2X2_H 13 | 14 | namespace Eigen { 15 | 16 | namespace internal { 17 | 18 | template 19 | void real_2x2_jacobi_svd(const MatrixType& matrix, Index p, Index q, 20 | JacobiRotation *j_left, 21 | JacobiRotation *j_right) 22 | { 23 | using std::sqrt; 24 | using std::abs; 25 | Matrix m; 26 | m << numext::real(matrix.coeff(p,p)), numext::real(matrix.coeff(p,q)), 27 | numext::real(matrix.coeff(q,p)), numext::real(matrix.coeff(q,q)); 28 | JacobiRotation rot1; 29 | RealScalar t = m.coeff(0,0) + m.coeff(1,1); 30 | RealScalar d = m.coeff(1,0) - m.coeff(0,1); 31 | 32 | if(abs(d) < (std::numeric_limits::min)()) 33 | { 34 | rot1.s() = RealScalar(0); 35 | rot1.c() = RealScalar(1); 36 | } 37 | else 38 | { 39 | // If d!=0, then t/d cannot overflow because the magnitude of the 40 | // entries forming d are not too small compared to the ones forming t. 41 | RealScalar u = t / d; 42 | RealScalar tmp = sqrt(RealScalar(1) + numext::abs2(u)); 43 | rot1.s() = RealScalar(1) / tmp; 44 | rot1.c() = u / tmp; 45 | } 46 | m.applyOnTheLeft(0,1,rot1); 47 | j_right->makeJacobi(m,0,1); 48 | *j_left = rot1 * j_right->transpose(); 49 | } 50 | 51 | } // end namespace internal 52 | 53 | } // end namespace Eigen 54 | 55 | #endif // EIGEN_REALSVD2X2_H 56 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/src/misc/lapacke_mangling.h: -------------------------------------------------------------------------------- 1 | #ifndef LAPACK_HEADER_INCLUDED 2 | #define LAPACK_HEADER_INCLUDED 3 | 4 | #ifndef LAPACK_GLOBAL 5 | #if defined(LAPACK_GLOBAL_PATTERN_LC) || defined(ADD_) 6 | #define LAPACK_GLOBAL(lcname,UCNAME) lcname##_ 7 | #elif defined(LAPACK_GLOBAL_PATTERN_UC) || defined(UPPER) 8 | #define LAPACK_GLOBAL(lcname,UCNAME) UCNAME 9 | #elif defined(LAPACK_GLOBAL_PATTERN_MC) || defined(NOCHANGE) 10 | #define LAPACK_GLOBAL(lcname,UCNAME) lcname 11 | #else 12 | #define LAPACK_GLOBAL(lcname,UCNAME) lcname##_ 13 | #endif 14 | #endif 15 | 16 | #endif 17 | 18 | -------------------------------------------------------------------------------- /src/application/tools/Eigen/src/plugins/MatrixCwiseUnaryOps.h: -------------------------------------------------------------------------------- 1 | // This file is part of Eigen, a lightweight C++ template library 2 | // for linear algebra. 3 | // 4 | // Copyright (C) 2008-2009 Gael Guennebaud 5 | // Copyright (C) 2006-2008 Benoit Jacob 6 | // 7 | // This Source Code Form is subject to the terms of the Mozilla 8 | // Public License v. 2.0. If a copy of the MPL was not distributed 9 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 10 | 11 | // This file is included into the body of the base classes supporting matrix specific coefficient-wise functions. 12 | // This include MatrixBase and SparseMatrixBase. 13 | 14 | 15 | typedef CwiseUnaryOp, const Derived> CwiseAbsReturnType; 16 | typedef CwiseUnaryOp, const Derived> CwiseAbs2ReturnType; 17 | typedef CwiseUnaryOp, const Derived> CwiseSqrtReturnType; 18 | typedef CwiseUnaryOp, const Derived> CwiseSignReturnType; 19 | typedef CwiseUnaryOp, const Derived> CwiseInverseReturnType; 20 | 21 | /// \returns an expression of the coefficient-wise absolute value of \c *this 22 | /// 23 | /// Example: \include MatrixBase_cwiseAbs.cpp 24 | /// Output: \verbinclude MatrixBase_cwiseAbs.out 25 | /// 26 | EIGEN_DOC_UNARY_ADDONS(cwiseAbs,absolute value) 27 | /// 28 | /// \sa cwiseAbs2() 29 | /// 30 | EIGEN_DEVICE_FUNC 31 | EIGEN_STRONG_INLINE const CwiseAbsReturnType 32 | cwiseAbs() const { return CwiseAbsReturnType(derived()); } 33 | 34 | /// \returns an expression of the coefficient-wise squared absolute value of \c *this 35 | /// 36 | /// Example: \include MatrixBase_cwiseAbs2.cpp 37 | /// Output: \verbinclude MatrixBase_cwiseAbs2.out 38 | /// 39 | EIGEN_DOC_UNARY_ADDONS(cwiseAbs2,squared absolute value) 40 | /// 41 | /// \sa cwiseAbs() 42 | /// 43 | EIGEN_DEVICE_FUNC 44 | EIGEN_STRONG_INLINE const CwiseAbs2ReturnType 45 | cwiseAbs2() const { return CwiseAbs2ReturnType(derived()); } 46 | 47 | /// \returns an expression of the coefficient-wise square root of *this. 48 | /// 49 | /// Example: \include MatrixBase_cwiseSqrt.cpp 50 | /// Output: \verbinclude MatrixBase_cwiseSqrt.out 51 | /// 52 | EIGEN_DOC_UNARY_ADDONS(cwiseSqrt,square-root) 53 | /// 54 | /// \sa cwisePow(), cwiseSquare() 55 | /// 56 | EIGEN_DEVICE_FUNC 57 | inline const CwiseSqrtReturnType 58 | cwiseSqrt() const { return CwiseSqrtReturnType(derived()); } 59 | 60 | /// \returns an expression of the coefficient-wise signum of *this. 61 | /// 62 | /// Example: \include MatrixBase_cwiseSign.cpp 63 | /// Output: \verbinclude MatrixBase_cwiseSign.out 64 | /// 65 | EIGEN_DOC_UNARY_ADDONS(cwiseSign,sign function) 66 | /// 67 | EIGEN_DEVICE_FUNC 68 | inline const CwiseSignReturnType 69 | cwiseSign() const { return CwiseSignReturnType(derived()); } 70 | 71 | 72 | /// \returns an expression of the coefficient-wise inverse of *this. 73 | /// 74 | /// Example: \include MatrixBase_cwiseInverse.cpp 75 | /// Output: \verbinclude MatrixBase_cwiseInverse.out 76 | /// 77 | EIGEN_DOC_UNARY_ADDONS(cwiseInverse,inverse) 78 | /// 79 | /// \sa cwiseProduct() 80 | /// 81 | EIGEN_DEVICE_FUNC 82 | inline const CwiseInverseReturnType 83 | cwiseInverse() const { return CwiseInverseReturnType(derived()); } 84 | 85 | 86 | -------------------------------------------------------------------------------- /src/application/tools/auto_download.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | 5 | using namespace std; 6 | 7 | bool requires(const char* name) { 8 | 9 | auto onnx_file = iLogger::format("%s.onnx", name); 10 | if (not iLogger::exists(onnx_file)) { 11 | INFO("Auto download %s", onnx_file.c_str()); 12 | system(iLogger::format("wget http://zifuture.com:1556/fs/25.shared/%s", onnx_file.c_str()).c_str()); 13 | } 14 | 15 | bool exists = iLogger::exists(onnx_file); 16 | if (not exists) { 17 | INFOE("Download %s failed", onnx_file.c_str()); 18 | } 19 | return exists; 20 | } -------------------------------------------------------------------------------- /src/application/tools/deepsort.hpp: -------------------------------------------------------------------------------- 1 | 2 | #ifndef DEEPSORT_HPP 3 | #define DEEPSORT_HPP 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | namespace DeepSORT { 10 | 11 | struct Box{ 12 | float left, top, right, bottom; 13 | cv::Mat feature; 14 | 15 | Box() = default; 16 | Box(float left, float top, float right, float bottom):left(left), top(top), right(right), bottom(bottom){} 17 | const float width() const{return right - left;} 18 | const float height() const{return bottom - top;} 19 | const cv::Point2f center() const{return cv::Point2f((left+right)/2, (top+bottom)/2);} 20 | }; 21 | 22 | template 23 | inline Box convert_to_box(const _T& b){ 24 | return Box(b.left, b.top, b.right, b.bottom); 25 | } 26 | 27 | template 28 | inline cv::Rect convert_box_to_rect(const _T& b){ 29 | return cv::Rect(b.left, b.top, b.right-b.left, b.bottom-b.top); 30 | } 31 | 32 | enum class State : int{ 33 | Tentative = 1, 34 | Confirmed = 2, 35 | Deleted = 3 36 | }; 37 | 38 | struct TrackerConfig{ 39 | 40 | int max_age = 150; 41 | int nhit = 3; 42 | float distance_threshold = 100; 43 | int nbuckets = 0; 44 | bool has_feature = false; 45 | 46 | // kalman 47 | // /** 初始状态 **/ 48 | float initiate_state[8]; 49 | 50 | // /** 每一侦的运动量协方差,下一侦 = 当前帧 + 运动量 **/ 51 | float per_frame_motion[8]; 52 | 53 | // /** 测量噪声,把输入映射到测量空间中后的噪声 **/ 54 | float noise[4]; 55 | 56 | void set_initiate_state(const std::vector& values); 57 | void set_per_frame_motion(const std::vector& values); 58 | void set_noise(const std::vector& values); 59 | 60 | TrackerConfig(); 61 | }; 62 | 63 | typedef std::vector BBoxes; 64 | 65 | class TrackObject{ 66 | public: 67 | virtual int id() const = 0; 68 | virtual State state() const = 0; 69 | virtual Box predict_box() const = 0; 70 | virtual Box last_position() const = 0; 71 | virtual bool is_confirmed() const = 0; 72 | virtual int time_since_update() const = 0; 73 | virtual std::vector trace_line() const = 0; 74 | virtual int trace_size() const = 0; 75 | virtual Box& location(int time_since_update=0) = 0; 76 | virtual const cv::Mat& feature_bucket() const = 0; 77 | }; 78 | 79 | class Tracker{ 80 | public: 81 | virtual std::vector get_objects() = 0; 82 | virtual void update(const BBoxes& boxes) = 0; 83 | }; 84 | 85 | std::shared_ptr create_tracker( 86 | const TrackerConfig& config = TrackerConfig() 87 | ); 88 | 89 | } 90 | 91 | #endif // DEEPSORT_HPP -------------------------------------------------------------------------------- /src/application/tools/zmq_remote_show.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "zmq_remote_show.hpp" 3 | #include "zmq_u.hpp" 4 | #include 5 | 6 | using namespace std; 7 | 8 | class ZMQRemoteShowImpl : public ZMQRemoteShow{ 9 | public: 10 | bool listen(const char* url){ 11 | try{ 12 | context_.reset(new zmq::context_t()); 13 | socket_.reset(new zmq::socket_t(*context_.get(), zmq::socket_type::rep)); 14 | socket_->bind(url); 15 | return true; 16 | }catch(zmq::error_t err){ 17 | INFOE("ZMQ exception: %s", err.what()); 18 | socket_.reset(); 19 | context_.reset(); 20 | } 21 | return false; 22 | } 23 | 24 | virtual void post(const void* data, int size) override{ 25 | 26 | if(size < 1 || data == nullptr){ 27 | INFOE("Null data to post"); 28 | return; 29 | } 30 | 31 | zmq::message_t msg; 32 | socket_->recv(msg); 33 | socket_->send(zmq::message_t(data, size)); 34 | } 35 | 36 | virtual void post(const cv::Mat& image) override{ 37 | 38 | vector data; 39 | cv::imencode(".jpg", image, data); 40 | post(data.data(), data.size()); 41 | } 42 | 43 | private: 44 | shared_ptr context_; 45 | shared_ptr socket_; 46 | }; 47 | 48 | std::shared_ptr create_zmq_remote_show(const char* listen){ 49 | 50 | shared_ptr instance(new ZMQRemoteShowImpl()); 51 | if(!instance->listen(listen)){ 52 | instance.reset(); 53 | } 54 | return instance; 55 | } 56 | -------------------------------------------------------------------------------- /src/application/tools/zmq_remote_show.hpp: -------------------------------------------------------------------------------- 1 | 2 | 3 | #ifndef ZMQ_REMOTE_SHOW_HPP 4 | #define ZMQ_REMOTE_SHOW_HPP 5 | 6 | #include 7 | #include 8 | 9 | class ZMQRemoteShow{ 10 | public: 11 | virtual void post(const void* data, int size) = 0; 12 | virtual void post(const cv::Mat& image) = 0; 13 | }; 14 | 15 | std::shared_ptr create_zmq_remote_show(const char* listen="tcp://0.0.0.0:15556"); 16 | 17 | #endif // ZMQ_REMOTE_SHOW_HPP -------------------------------------------------------------------------------- /src/direct/direct_classifier.cpp: -------------------------------------------------------------------------------- 1 | 2 | /** 3 | * onnx导出项目在这里: 4 | * https://github.com/shouxieai/tensorrt-pro-sample-python-classifier 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | int direct_classifier(){ 12 | 13 | if(!iLogger::exists("classifier.onnx")){ 14 | INFOE("classifier.onnx not found, reference: https://github.com/shouxieai/tensorrt-pro-sample-python-classifier"); 15 | return -1; 16 | } 17 | 18 | TRT::set_device(0); 19 | if(!iLogger::exists("classifier.trtmodel")){ 20 | TRT::compile( 21 | TRT::Mode::FP32, 22 | 1, 23 | "classifier.onnx", 24 | "classifier.trtmodel" 25 | ); 26 | INFO("Compile done"); 27 | } 28 | 29 | auto engine = TRT::load_infer("classifier.trtmodel"); 30 | if(engine == nullptr){ 31 | INFOE("Engine is nullptr"); 32 | return -1; 33 | } 34 | 35 | auto image = cv::imread("dog.jpg"); 36 | float mean[] = {0.485, 0.456, 0.406}; 37 | float std[] = {0.229, 0.224, 0.225}; 38 | engine->input()->set_norm_mat(0, image, mean, std); 39 | engine->forward(); 40 | 41 | float* prob = engine->output()->cpu(); 42 | int num_classes = engine->output()->channel(); 43 | int predict_label = std::max_element(prob, prob + num_classes) - prob; 44 | auto labels = iLogger::split_string(iLogger::load_text_file("labels.imagenet.txt"), "\n"); 45 | auto predict_name = labels[predict_label]; 46 | float confidence = prob[predict_label]; 47 | 48 | INFO("Predict: %s, confidence = %f, label = %d", predict_name.c_str(), confidence, predict_label); 49 | return 0; 50 | } -------------------------------------------------------------------------------- /src/direct/direct_mae.cpp: -------------------------------------------------------------------------------- 1 | 2 | /** 3 | * onnx导出项目在这里: 4 | * https://github.com/shouxieai/MAE-pytorch 5 | * 实验mae的特征提取功能 6 | * Masked Autoencoders Are Scalable Vision Learners 7 | * 8 | * onnx下载: 9 | * 链接:https://pan.baidu.com/s/1r0e82KQj99ue7sNBawNvUQ 10 | * 提取码:sxai 11 | */ 12 | 13 | #include 14 | #include 15 | #include 16 | 17 | int direct_mae(){ 18 | 19 | TRT::set_device(0); 20 | TRT::compile( 21 | TRT::Mode::FP32, 22 | 1, 23 | "mae.onnx", "mae.trtmodel" 24 | ); 25 | INFO("Done"); 26 | 27 | auto engine = TRT::load_infer("mae.trtmodel"); 28 | auto image = cv::imread("test.jpg"); 29 | cv::cvtColor(image, image, cv::COLOR_BGR2RGB); 30 | 31 | float mean[] = {0.485, 0.456, 0.406}; 32 | float std[] = {0.229, 0.224, 0.225}; 33 | engine->input()->set_norm_mat(0, image, mean, std); 34 | engine->forward(); 35 | 36 | std::cout << engine->output()->shape_string() << std::endl; 37 | engine->output()->save_to_file("test.binary"); 38 | return 0; 39 | } -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | int app_yolo(); 8 | int app_yolo_gpuptr(); 9 | int app_alphapose(); 10 | int app_fall_recognize(); 11 | int app_retinaface(); 12 | int app_arcface(); 13 | int app_arcface_video(); 14 | int app_arcface_tracker(); 15 | int app_scrfd(); 16 | int app_high_performance(); 17 | int app_lesson(); 18 | int app_plugin(); 19 | int app_yolo_fast(); 20 | int app_centernet(); 21 | int app_dbface(); 22 | int app_bert(); 23 | int direct_yolo(); 24 | int direct_unet(); 25 | int direct_mae(); 26 | int direct_classifier(); 27 | int test_warpaffine(); 28 | int test_yolo_map(); 29 | 30 | int main(int argc, char** argv){ 31 | 32 | const char* method = "yolo"; 33 | if(argc > 1){ 34 | method = argv[1]; 35 | } 36 | 37 | if(strcmp(method, "yolo") == 0){ 38 | app_yolo(); 39 | }else if(strcmp(method, "yolo_gpuptr") == 0){ 40 | app_yolo_gpuptr(); 41 | }else if(strcmp(method, "yolo_fast") == 0){ 42 | app_yolo_fast(); 43 | }else if(strcmp(method, "dyolo") == 0){ 44 | direct_yolo(); 45 | }else if(strcmp(method, "dunet") == 0){ 46 | direct_unet(); 47 | }else if(strcmp(method, "dmae") == 0){ 48 | direct_mae(); 49 | }else if(strcmp(method, "dclassifier") == 0){ 50 | direct_classifier(); 51 | }else if(strcmp(method, "bert") == 0){ 52 | app_bert(); 53 | }else if(strcmp(method, "centernet") == 0){ 54 | app_centernet(); 55 | }else if(strcmp(method, "dbface") == 0){ 56 | app_dbface(); 57 | }else if(strcmp(method, "alphapose") == 0){ 58 | app_alphapose(); 59 | }else if(strcmp(method, "fall_recognize") == 0){ 60 | app_fall_recognize(); 61 | }else if(strcmp(method, "retinaface") == 0){ 62 | app_retinaface(); 63 | }else if(strcmp(method, "arcface") == 0){ 64 | app_arcface(); 65 | }else if(strcmp(method, "arcface_video") == 0){ 66 | app_arcface_video(); 67 | }else if(strcmp(method, "arcface_tracker") == 0){ 68 | app_arcface_tracker(); 69 | }else if(strcmp(method, "scrfd") == 0){ 70 | app_scrfd(); 71 | }else if(strcmp(method, "test_warpaffine") == 0){ 72 | test_warpaffine(); 73 | }else if(strcmp(method, "test_yolo_map") == 0){ 74 | test_yolo_map(); 75 | }else if(strcmp(method, "high_perf") == 0){ 76 | app_high_performance(); 77 | }else if(strcmp(method, "lesson") == 0){ 78 | app_lesson(); 79 | }else if(strcmp(method, "plugin") == 0){ 80 | app_plugin(); 81 | }else{ 82 | printf("Unknow method: %s\n", method); 83 | printf( 84 | "Help: \n" 85 | " ./pro method[yolo、alphapose、fall、retinaface、arcface、arcface_video、arcface_tracker]\n" 86 | "\n" 87 | " ./pro yolo\n" 88 | " ./pro alphapose\n" 89 | " ./pro fall\n" 90 | ); 91 | } 92 | return 0; 93 | } 94 | -------------------------------------------------------------------------------- /src/tensorRT/builder/trt_builder.hpp: -------------------------------------------------------------------------------- 1 | 2 | 3 | #ifndef TRT_BUILDER_HPP 4 | #define TRT_BUILDER_HPP 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | namespace TRT { 12 | 13 | typedef std::function& files, std::shared_ptr& tensor)> Int8Process; 14 | typedef std::function(const std::string& name, const std::vector& shape)> LayerHookFuncReshape; 15 | 16 | enum class ModelSourceType : int{ 17 | OnnX, 18 | OnnXData 19 | }; 20 | 21 | class ModelSource { 22 | public: 23 | ModelSource() = default; 24 | ModelSource(const std::string& onnxmodel); 25 | ModelSource(const char* onnxmodel); 26 | ModelSourceType type() const; 27 | std::string onnxmodel() const; 28 | std::string descript() const; 29 | const void* onnx_data() const; 30 | size_t onnx_data_size() const; 31 | 32 | static ModelSource onnx(const std::string& file){ 33 | ModelSource output; 34 | output.onnxmodel_ = file; 35 | output.type_ = ModelSourceType::OnnX; 36 | return output; 37 | } 38 | 39 | static ModelSource onnx_data(const void* ptr, size_t size){ 40 | ModelSource output; 41 | output.onnx_data_ = ptr; 42 | output.onnx_data_size_ = size; 43 | output.type_ = ModelSourceType::OnnXData; 44 | return output; 45 | } 46 | 47 | private: 48 | std::string onnxmodel_; 49 | const void* onnx_data_ = nullptr; 50 | size_t onnx_data_size_ = 0; 51 | ModelSourceType type_; 52 | }; 53 | 54 | enum class CompileOutputType : int{ 55 | File, 56 | Memory 57 | }; 58 | 59 | class CompileOutput{ 60 | public: 61 | CompileOutput(CompileOutputType type = CompileOutputType::Memory); 62 | CompileOutput(const std::string& file); 63 | CompileOutput(const char* file); 64 | void set_data(const std::vector& data); 65 | void set_data(std::vector&& data); 66 | 67 | const std::vector& data() const{return data_;}; 68 | CompileOutputType type() const{return type_;} 69 | std::string file() const{return file_;} 70 | 71 | private: 72 | CompileOutputType type_ = CompileOutputType::Memory; 73 | std::vector data_; 74 | std::string file_; 75 | }; 76 | 77 | class InputDims { 78 | public: 79 | InputDims() = default; 80 | 81 | // 当为-1时,保留导入时的网络结构尺寸 82 | InputDims(const std::initializer_list& dims); 83 | InputDims(const std::vector& dims); 84 | 85 | const std::vector& dims() const; 86 | 87 | private: 88 | std::vector dims_; 89 | }; 90 | 91 | enum class Mode : int { 92 | FP32, 93 | FP16, 94 | INT8 95 | }; 96 | 97 | const char* mode_string(Mode type); 98 | 99 | void set_layer_hook_reshape(const LayerHookFuncReshape& func); 100 | 101 | /** 当处于INT8模式时,int8process必须制定 102 | int8ImageDirectory和int8EntropyCalibratorFile指定一个即可 103 | 如果初次生成,指定了int8EntropyCalibratorFile,calibrator会保存到int8EntropyCalibratorFile指定的文件 104 | 如果已经生成过,指定了int8EntropyCalibratorFile,calibrator会从int8EntropyCalibratorFile指定的文件加载,而不是 105 | 从int8ImageDirectory读取图片再重新生成 106 | 当处于FP32或者FP16时,int8process、int8ImageDirectory、int8EntropyCalibratorFile都不需要指定 107 | 对于嵌入式设备,请把maxWorkspaceSize设置小一点,比如128MB = 1ul << 27 108 | **/ 109 | bool compile( 110 | Mode mode, 111 | unsigned int maxBatchSize, 112 | const ModelSource& source, 113 | const CompileOutput& saveto, 114 | const std::vector inputsDimsSetup = {}, 115 | Int8Process int8process = nullptr, 116 | const std::string& int8ImageDirectory = "", 117 | const std::string& int8EntropyCalibratorFile = "", 118 | const size_t maxWorkspaceSize = 1ul << 30 // 1ul << 30 = 1GB 119 | ); 120 | }; 121 | 122 | #endif //TRT_BUILDER_HPP -------------------------------------------------------------------------------- /src/tensorRT/common/cuda_tools.cpp: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * 系统关于CUDA的功能函数 4 | */ 5 | 6 | 7 | #include "cuda_tools.hpp" 8 | 9 | namespace CUDATools{ 10 | bool check_driver(CUresult e, const char* call, int line, const char *file) { 11 | if (e != CUDA_SUCCESS) { 12 | 13 | const char* message = nullptr; 14 | const char* name = nullptr; 15 | cuGetErrorString(e, &message); 16 | cuGetErrorName(e, &name); 17 | INFOE("CUDA Driver error %s # %s, code = %s [ %d ] in file %s:%d", call, message, name, e, file, line); 18 | return false; 19 | } 20 | return true; 21 | } 22 | 23 | bool check_runtime(cudaError_t e, const char* call, int line, const char *file){ 24 | if (e != cudaSuccess) { 25 | INFOE("CUDA Runtime error %s # %s, code = %s [ %d ] in file %s:%d", call, cudaGetErrorString(e), cudaGetErrorName(e), e, file, line); 26 | return false; 27 | } 28 | return true; 29 | } 30 | 31 | bool check_device_id(int device_id){ 32 | int device_count = -1; 33 | checkCudaRuntime(cudaGetDeviceCount(&device_count)); 34 | if(device_id < 0 || device_id >= device_count){ 35 | INFOE("Invalid device id: %d, count = %d", device_id, device_count); 36 | return false; 37 | } 38 | return true; 39 | } 40 | 41 | int current_device_id(){ 42 | int device_id = 0; 43 | checkCudaRuntime(cudaGetDevice(&device_id)); 44 | return device_id; 45 | } 46 | 47 | dim3 grid_dims(int numJobs) { 48 | int numBlockThreads = numJobs < GPU_BLOCK_THREADS ? numJobs : GPU_BLOCK_THREADS; 49 | return dim3(((numJobs + numBlockThreads - 1) / (float)numBlockThreads)); 50 | } 51 | 52 | dim3 block_dims(int numJobs) { 53 | return numJobs < GPU_BLOCK_THREADS ? numJobs : GPU_BLOCK_THREADS; 54 | } 55 | 56 | std::string device_capability(int device_id){ 57 | cudaDeviceProp prop; 58 | checkCudaRuntime(cudaGetDeviceProperties(&prop, device_id)); 59 | return iLogger::format("%d.%d", prop.major, prop.minor); 60 | } 61 | 62 | std::string device_name(int device_id){ 63 | cudaDeviceProp prop; 64 | checkCudaRuntime(cudaGetDeviceProperties(&prop, device_id)); 65 | return prop.name; 66 | } 67 | 68 | std::string device_description(){ 69 | 70 | cudaDeviceProp prop; 71 | size_t free_mem, total_mem; 72 | int device_id = 0; 73 | 74 | checkCudaRuntime(cudaGetDevice(&device_id)); 75 | checkCudaRuntime(cudaGetDeviceProperties(&prop, device_id)); 76 | checkCudaRuntime(cudaMemGetInfo(&free_mem, &total_mem)); 77 | 78 | return iLogger::format( 79 | "[ID %d]<%s>[arch %d.%d][GMEM %.2f GB/%.2f GB]", 80 | device_id, prop.name, prop.major, prop.minor, 81 | free_mem / 1024.0f / 1024.0f / 1024.0f, 82 | total_mem / 1024.0f / 1024.0f / 1024.0f 83 | ); 84 | } 85 | 86 | AutoDevice::AutoDevice(int device_id){ 87 | 88 | cudaGetDevice(&old_); 89 | checkCudaRuntime(cudaSetDevice(device_id)); 90 | } 91 | 92 | AutoDevice::~AutoDevice(){ 93 | checkCudaRuntime(cudaSetDevice(old_)); 94 | } 95 | } -------------------------------------------------------------------------------- /src/tensorRT/common/cuda_tools.hpp: -------------------------------------------------------------------------------- 1 | #ifndef CUDA_TOOLS_HPP 2 | #define CUDA_TOOLS_HPP 3 | 4 | /* 5 | * 系统关于CUDA的功能函数 6 | */ 7 | 8 | #include 9 | #include 10 | #include "ilogger.hpp" 11 | 12 | #define GPU_BLOCK_THREADS 512 13 | 14 | 15 | #define KernelPositionBlock \ 16 | int position = (blockDim.x * blockIdx.x + threadIdx.x); \ 17 | if (position >= (edge)) return; 18 | 19 | 20 | #define checkCudaDriver(call) CUDATools::check_driver(call, #call, __LINE__, __FILE__) 21 | #define checkCudaRuntime(call) CUDATools::check_runtime(call, #call, __LINE__, __FILE__) 22 | 23 | #define checkCudaKernel(...) \ 24 | __VA_ARGS__; \ 25 | do{cudaError_t cudaStatus = cudaPeekAtLastError(); \ 26 | if (cudaStatus != cudaSuccess){ \ 27 | INFOE("launch failed: %s", cudaGetErrorString(cudaStatus)); \ 28 | }} while(0); 29 | 30 | 31 | #define Assert(op) \ 32 | do{ \ 33 | bool cond = !(!(op)); \ 34 | if(!cond){ \ 35 | INFOF("Assert failed, " #op); \ 36 | } \ 37 | }while(false) 38 | 39 | 40 | struct CUctx_st; 41 | struct CUstream_st; 42 | 43 | typedef CUstream_st* ICUStream; 44 | typedef CUctx_st* ICUContext; 45 | typedef void* ICUDeviceptr; 46 | typedef int DeviceID; 47 | 48 | namespace CUDATools{ 49 | bool check_driver(CUresult e, const char* call, int iLine, const char *szFile); 50 | bool check_runtime(cudaError_t e, const char* call, int iLine, const char *szFile); 51 | bool check_device_id(int device_id); 52 | int current_device_id(); 53 | 54 | dim3 grid_dims(int numJobs); 55 | dim3 block_dims(int numJobs); 56 | 57 | // return 8.6 etc. 58 | std::string device_capability(int device_id); 59 | std::string device_name(int device_id); 60 | std::string device_description(); 61 | 62 | class AutoDevice{ 63 | public: 64 | AutoDevice(int device_id = 0); 65 | virtual ~AutoDevice(); 66 | 67 | private: 68 | int old_ = -1; 69 | }; 70 | } 71 | 72 | 73 | #endif // CUDA_TOOLS_HPP -------------------------------------------------------------------------------- /src/tensorRT/common/preprocess_kernel.cuh: -------------------------------------------------------------------------------- 1 | #ifndef PREPROCESS_KERNEL_CUH 2 | #define PREPROCESS_KERNEL_CUH 3 | 4 | #include "cuda_tools.hpp" 5 | 6 | namespace CUDAKernel{ 7 | 8 | enum class NormType : int{ 9 | None = 0, 10 | MeanStd = 1, 11 | AlphaBeta = 2 12 | }; 13 | 14 | enum class ChannelType : int{ 15 | None = 0, 16 | Invert = 1 17 | }; 18 | 19 | struct Norm{ 20 | float mean[3]; 21 | float std[3]; 22 | float alpha, beta; 23 | NormType type = NormType::None; 24 | ChannelType channel_type = ChannelType::None; 25 | 26 | // out = (x * alpha - mean) / std 27 | static Norm mean_std(const float mean[3], const float std[3], float alpha = 1/255.0f, ChannelType channel_type=ChannelType::None); 28 | 29 | // out = x * alpha + beta 30 | static Norm alpha_beta(float alpha, float beta = 0, ChannelType channel_type=ChannelType::None); 31 | 32 | // None 33 | static Norm None(); 34 | }; 35 | 36 | void resize_bilinear_and_normalize( 37 | uint8_t* src, int src_line_size, int src_width, int src_height, float* dst, int dst_width, int dst_height, 38 | const Norm& norm, 39 | cudaStream_t stream); 40 | 41 | void warp_affine_bilinear_and_normalize_plane( 42 | uint8_t* src, int src_line_size, int src_width, int src_height, 43 | float* dst , int dst_width, int dst_height, 44 | float* matrix_2_3, uint8_t const_value, const Norm& norm, 45 | cudaStream_t stream); 46 | 47 | void warp_affine_bilinear_and_normalize_focus( 48 | uint8_t* src, int src_line_size, int src_width, int src_height, 49 | float* dst , int dst_width, int dst_height, 50 | float* matrix_2_3, uint8_t const_value, const Norm& norm, 51 | cudaStream_t stream); 52 | 53 | // 可以用来图像校正、图像旋转等等 (测试比cpu快10倍以上) 54 | // 使用示范: 55 | // float* matrix_3_3 = nullptr; 56 | // size_t matrix_bytes = 3 * 3 * sizeof(f32); 57 | // checkCudaRuntime(cudaMalloc(&matrix_3_3, matrix_bytes)); 58 | // checkCudaRuntime(cudaMemset(matrix_3_3, 0, matrix_bytes)); 59 | // 60 | // #左上、右上、右下、左下 原图像四个点的坐标 61 | // cv::Point2f src_points[] = { 62 | // vctvctPoints[nImageIdx][0], 63 | // vctvctPoints[nImageIdx][1], 64 | // vctvctPoints[nImageIdx][2], 65 | // vctvctPoints[nImageIdx][3]}; 66 | // 67 | // #左上、右上、左下、右下(Z 字形排列) 目标图像四个点的坐标 68 | // cv::Point2f dst_points[] = { 69 | // cv::Point2f(0, 0), 70 | // cv::Point2f(nw-1, 0), 71 | // cv::Point2f(0, nh-1), 72 | // cv::Point2f(nw-1, nh-1) }; 73 | // 利用opencv 得到变换矩阵 dst -> src 的 矩阵 74 | // cv::Mat Perspect_Matrix = cv::getPerspectiveTransform(dst_points, src_points); 75 | // Perspect_Matrix.convertTo(Perspect_Matrix, CV_32FC1); 76 | // 拷贝到 gpu 77 | // checkCudaRuntime(cudaMemcpy(matrix_3_3, Perspect_Matrix.data, matrix_bytes, cudaMemcpyHostToDevice)); 78 | void warp_perspective( 79 | uint8_t* src, int src_line_size, int src_width, int src_height, float* dst, int dst_width, int dst_height, 80 | float* matrix_3_3, uint8_t const_value, const Norm& norm, cudaStream_t stream 81 | ); 82 | 83 | void norm_feature( 84 | float* feature_array, int num_feature, int feature_length, 85 | cudaStream_t stream 86 | ); 87 | 88 | void convert_nv12_to_bgr_invoke( 89 | const uint8_t* y, const uint8_t* uv, int width, int height, 90 | int linesize, uint8_t* dst, 91 | cudaStream_t stream); 92 | }; 93 | 94 | #endif // PREPROCESS_KERNEL_CUH -------------------------------------------------------------------------------- /src/tensorRT/infer/trt_infer.hpp: -------------------------------------------------------------------------------- 1 | 2 | 3 | #ifndef TRT_INFER_HPP 4 | #define TRT_INFER_HPP 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | namespace TRT { 13 | 14 | class Infer { 15 | public: 16 | virtual void forward(bool sync = true) = 0; 17 | virtual int get_max_batch_size() = 0; 18 | virtual void set_stream(CUStream stream) = 0; 19 | virtual CUStream get_stream() = 0; 20 | virtual void synchronize() = 0; 21 | virtual size_t get_device_memory_size() = 0; 22 | virtual std::shared_ptr get_workspace() = 0; 23 | virtual std::shared_ptr input (int index = 0) = 0; 24 | virtual std::shared_ptr output(int index = 0) = 0; 25 | virtual std::shared_ptr tensor(const std::string& name) = 0; 26 | virtual std::string get_input_name (int index = 0) = 0; 27 | virtual std::string get_output_name(int index = 0) = 0; 28 | virtual bool is_output_name(const std::string& name) = 0; 29 | virtual bool is_input_name (const std::string& name) = 0; 30 | virtual int num_output() = 0; 31 | virtual int num_input() = 0; 32 | virtual void print() = 0; 33 | virtual int device() = 0; 34 | virtual void set_input (int index, std::shared_ptr tensor) = 0; 35 | virtual void set_output(int index, std::shared_ptr tensor) = 0; 36 | virtual std::shared_ptr> serial_engine() = 0; 37 | }; 38 | yb7h 39 | struct DeviceMemorySummary { 40 | size_t total; 41 | size_t available; 42 | }; 43 | 44 | DeviceMemorySummary get_current_device_summary(); 45 | int get_device_count(); 46 | int get_device(); 47 | 48 | void set_device(int device_id); 49 | std::shared_ptr load_infer_from_memory(const void* pdata, size_t size); 50 | std::shared_ptr load_infer(const std::string& file); 51 | bool init_nv_plugins(); 52 | 53 | }; //TRTInfer 54 | 55 | 56 | #endif //TRT_INFER_HPP -------------------------------------------------------------------------------- /src/tensorRT/onnx/onnx_pb.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) ONNX Project Contributors. 2 | // Licensed under the MIT license. 3 | 4 | #ifndef ONNX_ONNX_PB_H 5 | #define ONNX_ONNX_PB_H 6 | 7 | // Defines ONNX_EXPORT and ONNX_IMPORT. On Windows, this corresponds to 8 | // different declarations (dllexport and dllimport). On Linux/Mac, it just 9 | // resolves to the same "default visibility" setting. 10 | #if defined(_MSC_VER) 11 | #if defined(ONNX_BUILD_SHARED_LIBS) || defined(ONNX_BUILD_MAIN_LIB) 12 | #define ONNX_EXPORT __declspec(dllexport) 13 | #define ONNX_IMPORT __declspec(dllimport) 14 | #else 15 | #define ONNX_EXPORT 16 | #define ONNX_IMPORT 17 | #endif 18 | #else 19 | #if defined(__GNUC__) 20 | #define ONNX_EXPORT __attribute__((__visibility__("default"))) 21 | #else 22 | #define ONNX_EXPORT 23 | #endif 24 | #define ONNX_IMPORT ONNX_EXPORT 25 | #endif 26 | 27 | // ONNX_API is a macro that, depends on whether you are building the 28 | // main ONNX library or not, resolves to either ONNX_EXPORT or 29 | // ONNX_IMPORT. 30 | // 31 | // This is used in e.g. ONNX's protobuf files: when building the main library, 32 | // it is defined as ONNX_EXPORT to fix a Windows global-variable-in-dll 33 | // issue, and for anyone dependent on ONNX it will be defined as 34 | // ONNX_IMPORT. ONNX_BUILD_MAIN_LIB can also be set when being built 35 | // statically if ONNX is being linked into a shared library that wants 36 | // to export the ONNX APIs and classes. 37 | // 38 | // More details on Windows dllimport / dllexport can be found at 39 | // https://msdn.microsoft.com/en-us/library/3y1sfaz2.aspx 40 | // 41 | // This solution is similar to 42 | // https://github.com/pytorch/pytorch/blob/master/caffe2/core/common.h 43 | #define ONNX_API 44 | #include "onnx-ml.pb.h" 45 | 46 | #endif // ! ONNX_ONNX_PB_H 47 | -------------------------------------------------------------------------------- /src/tensorRT/onnx/readme.md: -------------------------------------------------------------------------------- 1 | # ONNX 2 | - 这几个文件来自于对ONNX的编译后提取的结果,由protoc生成的cpp 3 | - https://github.com/onnx/onnx -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/LoopHelpers.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #include "LoopHelpers.hpp" 6 | #include "onnx2trt_utils.hpp" 7 | 8 | namespace onnx2trt 9 | { 10 | 11 | nvinfer1::ITensor* addLoopCounter(IImporterContext* ctx, nvinfer1::ILoop* loop, int32_t initial) 12 | { 13 | nvinfer1::ITensor* initialTensor = addConstantScalar(ctx, initial, ::onnx::TensorProto::INT32, nvinfer1::Dims{1, 1})->getOutput(0); 14 | nvinfer1::ITensor* one = addConstantScalar(ctx, 1, ::onnx::TensorProto::INT32, nvinfer1::Dims{1, 1})->getOutput(0); 15 | 16 | auto counter = loop->addRecurrence(*initialTensor); 17 | nvinfer1::ITensor* addOne = ctx->network()->addElementWise(*counter->getOutput(0), *one, nvinfer1::ElementWiseOperation::kSUM)->getOutput(0); 18 | counter->setInput(1, *addOne); 19 | return counter->getOutput(0); 20 | } 21 | 22 | } // namespace onnx2trt 23 | -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/LoopHelpers.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | #include "ImporterContext.hpp" 10 | 11 | namespace onnx2trt 12 | { 13 | 14 | nvinfer1::ITensor* addLoopCounter(IImporterContext* ctx, nvinfer1::ILoop* loop, int32_t initial = 0); 15 | 16 | } // namespace onnx2trt 17 | -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/ModelImporter.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #pragma once 6 | 7 | #include "ImporterContext.hpp" 8 | #include "NvInferPlugin.h" 9 | #include "NvOnnxParser.h" 10 | #include "builtin_op_importers.hpp" 11 | #include "utils.hpp" 12 | 13 | namespace onnx2trt 14 | { 15 | 16 | Status parseGraph(IImporterContext* ctx, const ::onnx::GraphProto& graph, bool deserializingINetwork = false, int* currentNode = nullptr); 17 | 18 | class ModelImporter : public nvonnxparser::IParser 19 | { 20 | protected: 21 | string_map _op_importers; 22 | virtual Status importModel(::onnx::ModelProto const& model); 23 | 24 | private: 25 | ImporterContext _importer_ctx; 26 | std::list<::onnx::ModelProto> _onnx_models; // Needed for ownership of weights 27 | int _current_node; 28 | std::vector _errors; 29 | std::vector _input_dims; 30 | 31 | public: 32 | ModelImporter(nvinfer1::INetworkDefinition* network, nvinfer1::ILogger* logger, const std::vector& input_dims) 33 | : _op_importers(getBuiltinOpImporterMap()) 34 | , _importer_ctx(network, logger) 35 | , _input_dims(input_dims) 36 | { 37 | } 38 | bool parseWithWeightDescriptors(void const* serialized_onnx_model, size_t serialized_onnx_model_size) override; 39 | bool parse(void const* serialized_onnx_model, size_t serialized_onnx_model_size, const char* model_path = nullptr) override; 40 | bool supportsModel(void const* serialized_onnx_model, size_t serialized_onnx_model_size, 41 | SubGraphCollection_t& sub_graph_collection, const char* model_path = nullptr) override; 42 | 43 | bool supportsOperator(const char* op_name) const override; 44 | void destroy() override 45 | { 46 | delete this; 47 | } 48 | // virtual void registerOpImporter(std::string op, 49 | // NodeImporter const &node_importer) override { 50 | // // Note: This allows existing importers to be replaced 51 | // _op_importers[op] = node_importer; 52 | //} 53 | // virtual Status const &setInput(const char *name, 54 | // nvinfer1::ITensor *input) override; 55 | // virtual Status const& setOutput(const char* name, nvinfer1::ITensor** output) override; 56 | int getNbErrors() const override 57 | { 58 | return _errors.size(); 59 | } 60 | nvonnxparser::IParserError const* getError(int index) const override 61 | { 62 | assert(0 <= index && index < (int) _errors.size()); 63 | return &_errors[index]; 64 | } 65 | void clearErrors() override 66 | { 67 | _errors.clear(); 68 | } 69 | 70 | //...LG: Move the implementation to .cpp 71 | bool parseFromFile(const char* onnxModelFile, int verbosity) override; 72 | bool parseFromData(const void* onnx_data, size_t size, int verbosity) override; 73 | }; 74 | 75 | } // namespace onnx2trt 76 | -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/NvOnnxParser.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #include "NvOnnxParser.h" 6 | #include "ModelImporter.hpp" 7 | 8 | extern "C" void* createNvOnnxParser_INTERNAL(void* network_, void* logger_, int version, const std::vector& input_dims) 9 | { 10 | auto network = static_cast(network_); 11 | auto logger = static_cast(logger_); 12 | return new onnx2trt::ModelImporter(network, logger, input_dims); 13 | } 14 | 15 | extern "C" int getNvOnnxParserVersion() 16 | { 17 | return NV_ONNX_PARSER_VERSION; 18 | } -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/OnnxAttrs.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "ImporterContext.hpp" 13 | 14 | class OnnxAttrs 15 | { 16 | template 17 | using string_map = std::unordered_map; 18 | typedef string_map<::onnx::AttributeProto const*> AttrMap; 19 | AttrMap _attrs; 20 | onnx2trt::IImporterContext* mCtx; 21 | 22 | public: 23 | explicit OnnxAttrs(::onnx::NodeProto const& onnx_node, onnx2trt::IImporterContext* ctx) 24 | : mCtx{ctx} 25 | { 26 | for (auto const& attr : onnx_node.attribute()) 27 | { 28 | _attrs.insert({attr.name(), &attr}); 29 | } 30 | } 31 | 32 | bool count(const std::string& key) const 33 | { 34 | return _attrs.count(key); 35 | } 36 | 37 | ::onnx::AttributeProto const* at(std::string key) const 38 | { 39 | if (!_attrs.count(key)) 40 | { 41 | throw std::out_of_range("Attribute not found: " + key); 42 | } 43 | return _attrs.at(key); 44 | } 45 | 46 | ::onnx::AttributeProto::AttributeType type(const std::string& key) const 47 | { 48 | return this->at(key)->type(); 49 | } 50 | 51 | 52 | template 53 | T get(const std::string& key) const; 54 | 55 | template 56 | T get(const std::string& key, T const& default_value) const 57 | { 58 | return _attrs.count(key) ? this->get(key) : default_value; 59 | } 60 | }; 61 | -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/RNNHelpers.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include "TensorOrWeights.hpp" 12 | #include "ImporterContext.hpp" 13 | 14 | namespace onnx2trt 15 | { 16 | 17 | nvinfer1::ITensor* addRNNInput(IImporterContext* ctx, const ::onnx::NodeProto& node, nvinfer1::ILoop* loop, std::vector& inputs, const std::string& direction); 18 | 19 | // Zeros out invalid timesteps in toMask. maxLen must be provided if reverse is true 20 | nvinfer1::ITensor* clearMissingSequenceElements(IImporterContext* ctx, const ::onnx::NodeProto& node, nvinfer1::ILoop* loop, nvinfer1::ITensor* seqLens, nvinfer1::ITensor* toMask, nvinfer1::ITensor* maxLen, bool reverse = false, nvinfer1::ITensor* counter = nullptr); 21 | 22 | // Returns a bool tensor which is true during valid timesteps 23 | nvinfer1::ITensor* getRaggedMask(IImporterContext* ctx, const ::onnx::NodeProto& node, nvinfer1::ILoop* loop, nvinfer1::ITensor* seqLens, nvinfer1::ITensor* maxLen = nullptr, bool reverse = false, nvinfer1::ITensor* counter = nullptr); 24 | 25 | // Selects between prevH and Ht to forward previous hidden state through invalid timesteps 26 | nvinfer1::ITensor* maskRNNHidden(IImporterContext* ctx, const ::onnx::NodeProto& node, nvinfer1::ILoop* loop, nvinfer1::ITensor* seqLens, nvinfer1::ITensor* prevH, nvinfer1::ITensor* Ht, nvinfer1::ITensor* maxLen = nullptr, bool reverse = false, nvinfer1::ITensor* counter = nullptr); 27 | 28 | // Splits a bidirectional hidden state into forward and reverse passes, masks each using maskRNNHidden, then concatenates 29 | nvinfer1::ITensor* maskBidirRNNHidden(IImporterContext* ctx, const ::onnx::NodeProto& node, nvinfer1::ILoop* loop, nvinfer1::ITensor* seqLens, nvinfer1::ITensor* maxLen, nvinfer1::ITensor* Ht1, nvinfer1::ITensor* Ht, nvinfer1::ITensor* singlePassShape); 30 | 31 | } // namespace onnx2trt 32 | -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/ShapedWeights.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | namespace onnx2trt 11 | { 12 | 13 | class ShapedWeights 14 | { 15 | public: 16 | using DataType = int32_t; 17 | 18 | static ShapedWeights empty(DataType type); 19 | 20 | ShapedWeights(); 21 | 22 | explicit ShapedWeights(DataType type, void* values, nvinfer1::Dims shape_); 23 | 24 | size_t count() const; 25 | 26 | size_t size_bytes() const; 27 | 28 | const char* getName() const; 29 | 30 | void setName(const char* name); 31 | 32 | explicit operator bool() const; 33 | 34 | operator nvinfer1::Weights() const; 35 | 36 | template 37 | T& at(size_t index) 38 | { 39 | assert(index >= 0 && (index * sizeof(T)) < size_bytes()); 40 | return static_cast(values)[index]; 41 | } 42 | 43 | template 44 | const T& at(size_t index) const 45 | { 46 | assert(index >= 0 && (index * sizeof(T)) < size_bytes()); 47 | return static_cast(values)[index]; 48 | } 49 | 50 | public: 51 | DataType type; 52 | void* values; 53 | nvinfer1::Dims shape; 54 | const char* name{}; 55 | }; 56 | 57 | class IImporterContext; 58 | bool transposeWeights(ShapedWeights const& weights, nvinfer1::Permutation const& perm, ShapedWeights* result, IImporterContext* ctx); 59 | 60 | } // namespace onnx2trt 61 | -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/builtin_op_importers.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #pragma once 6 | 7 | #include "onnx2trt.hpp" 8 | #include "utils.hpp" 9 | 10 | namespace onnx2trt 11 | { 12 | 13 | string_map& getBuiltinOpImporterMap(); 14 | 15 | } // namespace onnx2trt 16 | -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/onnx2trt.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #pragma once 6 | 7 | #include "NvOnnxParser.h" 8 | #include "ShapedWeights.hpp" 9 | #include "Status.hpp" 10 | #include "TensorOrWeights.hpp" 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | namespace onnx2trt 21 | { 22 | 23 | class IImporterContext; 24 | 25 | // TODO: Find ABI-safe alternative approach for this: 26 | // Can't use std::vector 27 | // Can't use ::onnx::NodeProto 28 | // Can't use std::function 29 | typedef ValueOrStatus> NodeImportResult; 30 | typedef std::function& inputs)> 32 | NodeImporter; 33 | 34 | template 35 | using StringMap = std::unordered_map; 36 | 37 | class IImporterContext 38 | { 39 | public: 40 | virtual nvinfer1::INetworkDefinition* network() = 0; 41 | virtual StringMap& tensors() = 0; 42 | virtual StringMap& tensorLocations() = 0; 43 | virtual StringMap& tensorRangeMins() = 0; 44 | virtual StringMap& tensorRangeMaxes() = 0; 45 | virtual StringMap& layerPrecisions() = 0; 46 | virtual std::unordered_set& unsupportedShapeTensors() = 0; 47 | virtual StringMap& loopTensors() = 0; 48 | virtual void setOnnxFileLocation(std::string location) = 0; 49 | virtual std::string getOnnxFileLocation() = 0; 50 | virtual void registerTensor(TensorOrWeights tensor, const std::string& basename) = 0; 51 | virtual void registerLayer(nvinfer1::ILayer* layer, const std::string& basename) = 0; 52 | virtual ShapedWeights createTempWeights(ShapedWeights::DataType type, nvinfer1::Dims shape, uint8_t value = 0) = 0; 53 | virtual int64_t getOpsetVersion(const char* domain = "") const = 0; 54 | virtual nvinfer1::ILogger& logger() = 0; 55 | virtual bool hasError() const = 0; 56 | virtual nvinfer1::IErrorRecorder* getErrorRecorder() const = 0; 57 | 58 | protected: 59 | virtual ~IImporterContext() 60 | { 61 | } 62 | }; 63 | 64 | } // namespace onnx2trt 65 | -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/onnx2trt_common.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | #if NV_TENSORRT_MAJOR < 4 11 | namespace nvinfer1 12 | { 13 | 14 | enum class PluginFormat : uint8_t 15 | { 16 | kNCHW = 0, //!< NCHW 17 | kNC2HW2 = 1, //!< NCHW with 2-element packed channels 18 | kNHWC8 = 2 //!< NHWC with 8-element packed channels (C 19 | //! must be a multiple of 8) 20 | }; 21 | // from NvInfer.h 22 | class IPluginExt : public IPlugin 23 | { 24 | public: 25 | virtual int getTensorRTVersion() const noexcept 26 | { 27 | return NV_TENSORRT_VERSION; 28 | } 29 | virtual bool supportsFormat(DataType type, PluginFormat format) const noexcept = 0; 30 | virtual void configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, 31 | DataType type, PluginFormat format, int maxBatchSize) noexcept 32 | = 0; 33 | 34 | protected: 35 | void configure( 36 | const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, int maxBatchSize) noexcept final 37 | { 38 | try 39 | { 40 | DataType type = nvinfer1::DataType::kFLOAT; 41 | PluginFormat format = nvinfer1::PluginFormat::kLINEAR; 42 | return this->configureWithFormat(inputDims, nbInputs, outputDims, nbOutputs, type, format, maxBatchSize); 43 | } 44 | catch (const std::exception& e) 45 | { 46 | nvinfer1::getLogger()->log(nvinfer1::ILogger::Severity::kERROR, e.what().c_str()); 47 | } 48 | } 49 | virtual ~IPluginExt() 50 | { 51 | } 52 | }; 53 | 54 | } // namespace nvinfer1 55 | #endif 56 | 57 | namespace onnx2trt 58 | { 59 | 60 | struct IOwnable 61 | { 62 | virtual void destroy() = 0; 63 | 64 | protected: 65 | virtual ~IOwnable() 66 | { 67 | } 68 | }; 69 | 70 | struct OwnableDeleter 71 | { 72 | void operator()(IOwnable* obj) const 73 | { 74 | obj->destroy(); 75 | } 76 | }; 77 | 78 | using UniqueOwnable = std::unique_ptr; 79 | class Plugin; 80 | class PluginV2; 81 | 82 | } // namespace onnx2trt 83 | -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/onnx2trt_runtime.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #pragma once 6 | 7 | #include "onnx2trt_common.hpp" 8 | 9 | namespace onnx2trt 10 | { 11 | 12 | typedef Plugin* (*plugin_deserializer)(const void* serialData, size_t serialLength); 13 | 14 | } // namespace onnx2trt 15 | -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/onnxErrorRecorder.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #include "onnxErrorRecorder.hpp" 6 | #include 7 | 8 | namespace onnx2trt 9 | { 10 | 11 | 12 | ONNXParserErrorRecorder* ONNXParserErrorRecorder::create( 13 | nvinfer1::ILogger* logger, nvinfer1::IErrorRecorder* otherRecorder) 14 | { 15 | try 16 | { 17 | auto recorder = new ONNXParserErrorRecorder(logger, otherRecorder); 18 | if (recorder) 19 | { 20 | recorder->incRefCount(); 21 | } 22 | return recorder; 23 | } 24 | catch (const std::exception& e) 25 | { 26 | logError(logger, e.what()); 27 | return nullptr; 28 | } 29 | } 30 | 31 | void ONNXParserErrorRecorder::destroy(ONNXParserErrorRecorder*& recorder) 32 | { 33 | if (recorder) 34 | { 35 | recorder->decRefCount(); 36 | recorder = nullptr; 37 | } 38 | } 39 | 40 | void ONNXParserErrorRecorder::logError(nvinfer1::ILogger* logger, const char* str) 41 | { 42 | if (logger) 43 | { 44 | logger->log(ILogger::Severity::kERROR, str); 45 | } 46 | } 47 | 48 | ONNXParserErrorRecorder::ONNXParserErrorRecorder( 49 | nvinfer1::ILogger* logger, nvinfer1::IErrorRecorder* otherRecorder) 50 | : mUserRecorder(otherRecorder) 51 | , mLogger(logger) 52 | { 53 | if (mUserRecorder) 54 | { 55 | mUserRecorder->incRefCount(); 56 | } 57 | } 58 | 59 | ONNXParserErrorRecorder::~ONNXParserErrorRecorder() noexcept 60 | { 61 | if (mUserRecorder) 62 | { 63 | mUserRecorder->decRefCount(); 64 | } 65 | } 66 | 67 | void ONNXParserErrorRecorder::clear() noexcept 68 | { 69 | try 70 | { 71 | // grab a lock so that there is no addition while clearing. 72 | std::lock_guard guard(mStackLock); 73 | mErrorStack.clear(); 74 | } 75 | catch (const std::exception& e) 76 | { 77 | logError(mLogger, e.what()); 78 | } 79 | }; 80 | 81 | bool ONNXParserErrorRecorder::reportError( 82 | nvinfer1::ErrorCode val, nvinfer1::IErrorRecorder::ErrorDesc desc) noexcept 83 | { 84 | try 85 | { 86 | std::lock_guard guard(mStackLock); 87 | mErrorStack.push_back(errorPair(val, desc)); 88 | if (mUserRecorder) 89 | { 90 | mUserRecorder->reportError(val, desc); 91 | } 92 | else 93 | { 94 | logError(mLogger, desc); 95 | } 96 | } 97 | catch (const std::exception& e) 98 | { 99 | logError(mLogger, e.what()); 100 | } 101 | // All errors are considered fatal. 102 | return true; 103 | } 104 | 105 | nvinfer1::IErrorRecorder::RefCount ONNXParserErrorRecorder::incRefCount() noexcept 106 | { 107 | // Atomically increment or decrement the ref counter. 108 | return ++mRefCount; 109 | } 110 | 111 | nvinfer1::IErrorRecorder::RefCount ONNXParserErrorRecorder::decRefCount() noexcept 112 | { 113 | auto newVal = --mRefCount; 114 | if (newVal == 0) 115 | { 116 | delete this; 117 | } 118 | return newVal; 119 | } 120 | 121 | } // namespace onnx2trt 122 | -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/onnxErrorRecorder.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #pragma once 6 | 7 | #include "NvInferRuntimeCommon.h" 8 | #include "onnx2trt_utils.hpp" 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | namespace onnx2trt 16 | { 17 | 18 | //! 19 | //! A simple implementation of the IErrorRecorder interface for 20 | //! use by ONNX importer. 21 | //! ONNX-importer Error recorder is based on a vector that pairs the error 22 | //! code and the error string into a single element. It also uses 23 | //! standard mutex and atomics in order to make sure that the code 24 | //! works in a multi-threaded environment. 25 | //! 26 | class ONNXParserErrorRecorder : public nvinfer1::IErrorRecorder 27 | { 28 | using RefCount = nvinfer1::IErrorRecorder::RefCount; 29 | using ErrorDesc = nvinfer1::IErrorRecorder::ErrorDesc; 30 | using ErrorCode = nvinfer1::ErrorCode; 31 | using IErrorRecorder = nvinfer1::IErrorRecorder; 32 | using ILogger = nvinfer1::ILogger; 33 | 34 | using errorPair = std::pair; 35 | using errorStack = std::vector; 36 | 37 | public: 38 | static ONNXParserErrorRecorder* create( 39 | ILogger* logger, IErrorRecorder* otherRecorder = nullptr); 40 | 41 | static void destroy(ONNXParserErrorRecorder*& recorder); 42 | 43 | void clear() noexcept final; 44 | RefCount incRefCount() noexcept final; 45 | RefCount decRefCount() noexcept final; 46 | bool reportError(ErrorCode val, ErrorDesc desc) noexcept final; 47 | 48 | int32_t getNbErrors() const noexcept final 49 | { 50 | return mErrorStack.size(); 51 | } 52 | 53 | ErrorCode getErrorCode(int32_t errorIdx) const noexcept final 54 | { 55 | return invalidIndexCheck(errorIdx) ? ErrorCode::kINVALID_ARGUMENT : (*this)[errorIdx].first; 56 | } 57 | 58 | ErrorDesc getErrorDesc(int32_t errorIdx) const noexcept final 59 | { 60 | return invalidIndexCheck(errorIdx) ? "errorIdx out of range." : (*this)[errorIdx].second.c_str(); 61 | } 62 | 63 | bool hasOverflowed() const noexcept final 64 | { 65 | // This class can never overflow since we have dynamic resize via std::vector usage. 66 | return false; 67 | } 68 | 69 | protected: 70 | ONNXParserErrorRecorder(ILogger* logger, IErrorRecorder* otherRecorder = nullptr); 71 | 72 | virtual ~ONNXParserErrorRecorder() noexcept; 73 | 74 | static void logError(ILogger* logger, const char* str); 75 | 76 | // Simple helper functions. 77 | const errorPair& operator[](size_t index) const noexcept 78 | { 79 | return mErrorStack[index]; 80 | } 81 | 82 | bool invalidIndexCheck(int32_t index) const noexcept 83 | { 84 | // By converting signed to unsigned, we only need a single check since 85 | // negative numbers turn into large positive greater than the size. 86 | size_t sIndex = index; 87 | return sIndex >= mErrorStack.size(); 88 | } 89 | // Mutex to hold when locking mErrorStack. 90 | std::mutex mStackLock; 91 | 92 | // Reference count of the class. Destruction of the class when mRefCount 93 | // is not zero causes undefined behavior. 94 | std::atomic mRefCount{0}; 95 | 96 | // The error stack that holds the errors recorded by TensorRT. 97 | errorStack mErrorStack; 98 | 99 | // Original error recorder (set by user) 100 | IErrorRecorder* mUserRecorder{nullptr}; 101 | 102 | // logger 103 | ILogger* mLogger{nullptr}; 104 | }; // class ONNXParserErrorRecorder 105 | 106 | } // namespace onnx2trt 107 | -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/readme.md: -------------------------------------------------------------------------------- 1 | # ONNX Parser 2 | - 这几个文件提取自官方的onnx-tensorrt,去掉python方面,其他都在 3 | - 另外增加了Plugin节点的支持 4 | - https://github.com/onnx/onnx-tensorrt -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/toposort.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | #include 11 | using std::cout; 12 | using std::cerr; 13 | using std::endl; 14 | 15 | namespace 16 | { 17 | 18 | enum NodeState 19 | { 20 | NODE_UNVISITED, 21 | NODE_ACTIVE, 22 | NODE_VISITED 23 | }; 24 | 25 | template 26 | bool get_post_order(size_t node_idx, Container const& nodes, std::unordered_map const& node_map, 27 | std::vector* node_states, std::vector* order) 28 | { 29 | NodeState& node_state = node_states->at(node_idx); 30 | if (node_state == NODE_ACTIVE) 31 | { 32 | // Cycle detected! 33 | cerr << "ERROR: Graph contains a cycle" << endl; 34 | return false; 35 | } 36 | else if (node_state == NODE_VISITED) 37 | { 38 | return true; 39 | } 40 | else 41 | { 42 | node_state = NODE_ACTIVE; 43 | // TODO: This .Get().input() is highly specific to protobuf, should 44 | // generalise it somehow. 45 | for (auto const& input : nodes.Get(node_idx).input()) 46 | { 47 | if (!node_map.count(input)) 48 | { 49 | // Input node not found in graph! 50 | // cerr << "ERROR: Input node not found in graph: " 51 | // << input << endl; 52 | // return false; 53 | continue; // Skip missing input edges 54 | } 55 | size_t input_node_idx = node_map.at(input); 56 | if (!get_post_order(input_node_idx, nodes, node_map, node_states, order)) 57 | { 58 | return false; 59 | } 60 | } 61 | node_state = NODE_VISITED; 62 | order->push_back(node_idx); 63 | } 64 | return true; 65 | } 66 | 67 | } // anonymous namespace 68 | 69 | template 70 | bool toposort(Container const& nodes, std::vector* order) 71 | { 72 | std::unordered_map node_map; 73 | for (size_t i = 0; i < (size_t) nodes.size(); ++i) 74 | { 75 | // TODO: This .Get().input() is highly specific to protobuf, should 76 | // generalise it somehow. 77 | for (auto const& output : nodes.Get(i).output()) 78 | { 79 | if (!node_map.emplace(output, i).second) 80 | { 81 | // Output name appears more than once in graph! 82 | cerr << "ERROR: Output name is not unique: " << output << endl; 83 | return false; 84 | } 85 | } 86 | } 87 | order->reserve(nodes.size()); 88 | std::vector node_states(nodes.size(), NODE_UNVISITED); 89 | for (size_t i = 0; i < (size_t) nodes.size(); ++i) 90 | { 91 | if (!get_post_order(i, nodes, node_map, &node_states, order)) 92 | { 93 | return false; 94 | } 95 | } 96 | return true; 97 | } 98 | -------------------------------------------------------------------------------- /src/tensorRT/onnx_parser/utils.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-License-Identifier: Apache-2.0 3 | */ 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | template 10 | using string_map = std::unordered_map; 11 | -------------------------------------------------------------------------------- /src/tensorRT/onnxplugin/plugin_binary_io.hpp: -------------------------------------------------------------------------------- 1 | #ifndef PLUGIN_BINARY_IO_HPP 2 | #define PLUGIN_BINARY_IO_HPP 3 | 4 | #include 5 | #include 6 | 7 | namespace Plugin{ 8 | 9 | class BinIO { 10 | public: 11 | enum Head { 12 | MemoryRead = 1, 13 | MemoryWrite = 2 14 | }; 15 | 16 | BinIO() { openMemoryWrite(); } 17 | BinIO(const void* ptr, int memoryLength = -1) { openMemoryRead(ptr, memoryLength); } 18 | virtual ~BinIO(); 19 | bool opened(); 20 | bool openMemoryRead(const void* ptr, int memoryLength = -1); 21 | void openMemoryWrite(); 22 | const std::string& writedMemory() { return memoryWrite_; } 23 | void close(); 24 | int write(const void* pdata, size_t length); 25 | int writeData(const std::string& data); 26 | int read(void* pdata, size_t length); 27 | std::string readData(int numBytes); 28 | int readInt(); 29 | float readFloat(); 30 | bool eof(); 31 | 32 | BinIO& operator >> (std::string& value); 33 | BinIO& operator << (const std::string& value); 34 | BinIO& operator << (const char* value); 35 | BinIO& operator << (const std::vector& value); 36 | BinIO& operator >> (std::vector& value); 37 | 38 | template 39 | BinIO& operator >> (std::vector<_T>& value) { 40 | int length = 0; 41 | (*this) >> length; 42 | 43 | value.resize(length); 44 | read(value.data(), length * sizeof(_T)); 45 | return *this; 46 | } 47 | 48 | template 49 | BinIO& operator << (const std::vector<_T>& value) { 50 | (*this) << (int)value.size(); 51 | write(value.data(), sizeof(_T) * value.size()); 52 | return *this; 53 | } 54 | 55 | template 56 | BinIO& operator >> (_T& value) { 57 | read(&value, sizeof(_T)); 58 | return *this; 59 | } 60 | 61 | template 62 | BinIO& operator << (const _T& value) { 63 | write(&value, sizeof(_T)); 64 | return *this; 65 | } 66 | 67 | bool opstate() const { 68 | return opstate_; 69 | } 70 | 71 | private: 72 | size_t readModeEndSEEK_ = 0; 73 | std::string memoryWrite_; 74 | const char* memoryRead_ = nullptr; 75 | int memoryCursor_ = 0; 76 | int memoryLength_ = -1; 77 | Head flag_ = MemoryWrite; 78 | bool opstate_ = true; 79 | }; 80 | }; // namespace Plugin 81 | 82 | #endif //PLUGIN_BINARY_IO_HPP -------------------------------------------------------------------------------- /src/tensorRT/onnxplugin/plugins/HSigmoid.cu: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | 5 | using namespace ONNXPlugin; 6 | 7 | static __global__ void hsigmoid_kernel_fp32(float* input, float* output, int edge) { 8 | 9 | KernelPositionBlock; 10 | float x = input[position]; 11 | float a = x + 3; 12 | a = a < 0 ? 0 : (a >= 6 ? 6 : a); 13 | output[position] = a / 6; 14 | } 15 | 16 | // static __global__ void hsigmoid_kernel_fp16(__half* input, __half* output, int edge) { 17 | 18 | // KernelPositionBlock; 19 | 20 | // __half _six = 6.0f; 21 | // __half _three = 3.0f; 22 | // __half x = input[position]; 23 | // __half a = x + _three; 24 | // __half _zero = 0.0f; 25 | // a = a < _zero ? _zero : (a >= _six ? _six : a); 26 | // output[position] = a / _six; 27 | // } 28 | 29 | class HSigmoid : public TRTPlugin { 30 | public: 31 | SetupPlugin(HSigmoid); 32 | 33 | virtual void config_finish() override{ 34 | 35 | // INFO("init hsigmoid config: %s", config_->info_.c_str()); 36 | // INFO("weights = %d", config_->weights_.size()); 37 | // for(int i = 0; i < config_->weights_.size(); ++i){ 38 | // auto& w = config_->weights_[i]; 39 | // if(w->type() == TRT::DataType::Float16){ 40 | // INFO("Weight[%d] shape is %s, dtype = %s, value[0] = %f", i, w->shape_string(), data_type_string(w->type()), float(w->at<__half>(0))); 41 | // }else{ 42 | // INFO("Weight[%d] shape is %s, dtype = %s, value[0] = %f", i, w->shape_string(), data_type_string(w->type()), w->at(0)); 43 | // } 44 | // } 45 | } 46 | 47 | virtual std::shared_ptr new_config() override{ 48 | auto cfg = TRTPlugin::new_config(); 49 | 50 | // cfg->support_dtype_set_ = {nvinfer1::DataType::kHALF, nvinfer1::DataType::kFLOAT}; 51 | cfg->support_dtype_set_ = {nvinfer1::DataType::kFLOAT}; 52 | return cfg; 53 | } 54 | 55 | virtual nvinfer1::DimsExprs getOutputDimensions( 56 | int32_t outputIndex, const nvinfer1::DimsExprs* inputs, int32_t nbInputs, nvinfer1::IExprBuilder& exprBuilder) noexcept override{ 57 | 58 | return inputs[0]; 59 | } 60 | 61 | int enqueue(const std::vector& inputs, std::vector& outputs, const std::vector& weights, void* workspace, cudaStream_t stream) override{ 62 | 63 | int count = inputs[0].count(); 64 | auto grid = CUDATools::grid_dims(count); 65 | auto block = CUDATools::block_dims(count); 66 | 67 | if (config_->usage_dtype_ == TRT::DataType::Float) { 68 | hsigmoid_kernel_fp32 <<>> (inputs[0].ptr(), outputs[0].ptr(), count); 69 | } 70 | else if (config_->usage_dtype_ == TRT::DataType::Float16) { 71 | //hsigmoid_kernel_fp16 <<>> (inputs[0].ptr<__half>(), outputs[0].ptr<__half>(), count); 72 | INFOF("not implement function"); 73 | } 74 | else{ 75 | INFOF("not implement function"); 76 | } 77 | return 0; 78 | } 79 | }; 80 | 81 | RegisterPlugin(HSigmoid); -------------------------------------------------------------------------------- /src/tensorRT/onnxplugin/plugins/HSwish.cu: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | 5 | using namespace ONNXPlugin; 6 | 7 | static __global__ void hswish_kernel_fp32(float* input, float* output, int edge) { 8 | 9 | KernelPositionBlock; 10 | float x = input[position]; 11 | float a = x + 3; 12 | a = a < 0 ? 0 : (a >= 6 ? 6 : a); 13 | output[position] = x * a / 6; 14 | } 15 | 16 | // static __global__ void hswish_kernel_fp16(__half* input, __half* output, int edge) { 17 | 18 | // KernelPositionBlock; 19 | 20 | // __half _six = 6.0f; 21 | // __half _three = 3.0f; 22 | // __half x = input[position]; 23 | // __half a = x + _three; 24 | // __half _zero = 0.0f; 25 | // a = a < _zero ? _zero : (a >= _six ? _six : a); 26 | // output[position] = x * a / _six; 27 | // } 28 | 29 | class HSwish : public TRTPlugin { 30 | public: 31 | SetupPlugin(HSwish); 32 | 33 | virtual void config_finish() override{ 34 | 35 | // INFO("init hswish config: %s", config_->info_.c_str()); 36 | // INFO("weights = %d", config_->weights_.size()); 37 | // for(int i = 0; i < config_->weights_.size(); ++i){ 38 | // auto& w = config_->weights_[i]; 39 | // if(w->type() == TRT::DataType::Float16){ 40 | // INFO("Weight[%d] shape is %s, dtype = %s, value[0] = %f", i, w->shape_string(), data_type_string(w->type()), float(w->at<__half>(0))); 41 | // }else{ 42 | // INFO("Weight[%d] shape is %s, dtype = %s, value[0] = %f", i, w->shape_string(), data_type_string(w->type()), w->at(0)); 43 | // } 44 | // } 45 | } 46 | 47 | virtual std::shared_ptr new_config() override{ 48 | auto cfg = TRTPlugin::new_config(); 49 | 50 | //cfg->support_dtype_set_ = {nvinfer1::DataType::kHALF, nvinfer1::DataType::kFLOAT}; 51 | cfg->support_dtype_set_ = {nvinfer1::DataType::kFLOAT}; 52 | return cfg; 53 | } 54 | 55 | virtual nvinfer1::DimsExprs getOutputDimensions( 56 | int32_t outputIndex, const nvinfer1::DimsExprs* inputs, int32_t nbInputs, nvinfer1::IExprBuilder& exprBuilder) noexcept override{ 57 | 58 | return inputs[0]; 59 | } 60 | 61 | int enqueue(const std::vector& inputs, std::vector& outputs, const std::vector& weights, void* workspace, cudaStream_t stream) override{ 62 | 63 | int count = inputs[0].count(); 64 | auto grid = CUDATools::grid_dims(count); 65 | auto block = CUDATools::block_dims(count); 66 | 67 | if (config_->usage_dtype_ == TRT::DataType::Float) { 68 | hswish_kernel_fp32 <<>> (inputs[0].ptr(), outputs[0].ptr(), count); 69 | } 70 | else if (config_->usage_dtype_ == TRT::DataType::Float16) { 71 | // hswish_kernel_fp16 <<>> (inputs[0].ptr<__half>(), outputs[0].ptr<__half>(), count); 72 | INFOF("not implement function"); 73 | } 74 | else{ 75 | INFOF("not implement function"); 76 | } 77 | return 0; 78 | } 79 | }; 80 | 81 | RegisterPlugin(HSwish); -------------------------------------------------------------------------------- /workspace/exp/face_tracker.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvdong/TRT_PRO_LEARN/77939ba3a5b3056bdf584672afcf39e30c8ffb8d/workspace/exp/face_tracker.mp4 -------------------------------------------------------------------------------- /workspace/exp/fall_video.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvdong/TRT_PRO_LEARN/77939ba3a5b3056bdf584672afcf39e30c8ffb8d/workspace/exp/fall_video.mp4 -------------------------------------------------------------------------------- /workspace/face/library/2ys2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvdong/TRT_PRO_LEARN/77939ba3a5b3056bdf584672afcf39e30c8ffb8d/workspace/face/library/2ys2.jpg -------------------------------------------------------------------------------- /workspace/face/library/2ys3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvdong/TRT_PRO_LEARN/77939ba3a5b3056bdf584672afcf39e30c8ffb8d/workspace/face/library/2ys3.jpg -------------------------------------------------------------------------------- /workspace/face/library/male.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvdong/TRT_PRO_LEARN/77939ba3a5b3056bdf584672afcf39e30c8ffb8d/workspace/face/library/male.jpg -------------------------------------------------------------------------------- /workspace/face/recognize/2ys1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvdong/TRT_PRO_LEARN/77939ba3a5b3056bdf584672afcf39e30c8ffb8d/workspace/face/recognize/2ys1.jpg -------------------------------------------------------------------------------- /workspace/face/recognize/2ys3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvdong/TRT_PRO_LEARN/77939ba3a5b3056bdf584672afcf39e30c8ffb8d/workspace/face/recognize/2ys3.jpg -------------------------------------------------------------------------------- /workspace/face/recognize/2ys5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvdong/TRT_PRO_LEARN/77939ba3a5b3056bdf584672afcf39e30c8ffb8d/workspace/face/recognize/2ys5.jpg -------------------------------------------------------------------------------- /workspace/inference/car.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvdong/TRT_PRO_LEARN/77939ba3a5b3056bdf584672afcf39e30c8ffb8d/workspace/inference/car.jpg -------------------------------------------------------------------------------- /workspace/inference/gril.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvdong/TRT_PRO_LEARN/77939ba3a5b3056bdf584672afcf39e30c8ffb8d/workspace/inference/gril.jpg -------------------------------------------------------------------------------- /workspace/inference/group.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvdong/TRT_PRO_LEARN/77939ba3a5b3056bdf584672afcf39e30c8ffb8d/workspace/inference/group.jpg -------------------------------------------------------------------------------- /workspace/inference/yq.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvdong/TRT_PRO_LEARN/77939ba3a5b3056bdf584672afcf39e30c8ffb8d/workspace/inference/yq.jpg -------------------------------------------------------------------------------- /workspace/inference/zand.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvdong/TRT_PRO_LEARN/77939ba3a5b3056bdf584672afcf39e30c8ffb8d/workspace/inference/zand.jpg -------------------------------------------------------------------------------- /workspace/inference/zgjr.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvdong/TRT_PRO_LEARN/77939ba3a5b3056bdf584672afcf39e30c8ffb8d/workspace/inference/zgjr.jpg -------------------------------------------------------------------------------- /workspace/pro: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cvdong/TRT_PRO_LEARN/77939ba3a5b3056bdf584672afcf39e30c8ffb8d/workspace/pro -------------------------------------------------------------------------------- /workspace/python/test_dcnv2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | import torch.nn as nn 4 | import json 5 | 6 | class DCNv2Implementation(torch.autograd.Function): 7 | 8 | # 主要是这里,对于autograd.Function这种自定义实现的op,只需要添加静态方法symbolic即可,除了g以外的参数应与forward函数的除ctx以外完全一样 9 | # 这里演示了input->作为tensor输入,bias->作为参数输入,两者将会在tensorRT里面具有不同的处理方式 10 | # 对于附加属性(attributes),以 "名称_类型简写" 方式定义,类型简写,请参考:torch/onnx/symbolic_helper.py中_parse_arg函数的实现【from torch.onnx.symbolic_helper import _parse_arg】 11 | # 属性的定义会在对应节点生成attributes,并传给tensorRT的onnx解析器做处理 12 | @staticmethod 13 | def symbolic(g, input, offset_and_mask, weight, bias): 14 | # 如果配合当前tensorRT框架,则必须名称为Plugin,参考:tensorRT/src/tensorRT/onnx_parser/builtin_op_importers.cpp的160行定义 15 | # 若你想自己命名,可以考虑做类似修改即可 16 | # 17 | # name_s表示,name是string类型的,对应于C++插件的名称,参考:tensorRT/src/tensorRT/onnxplugin/plugins/HSwish.cu的82行定义的名称 18 | # info_s表示,info是string类型的,通常我们可以利用json.dumps,传一个复杂的字符串结构,然后在CPP中json解码即可。参考: 19 | # sxai/tensorRT/src/tensorRT/onnxplugin/plugins/HSwish.cu的39行 20 | return g.op("Plugin", input, offset_and_mask, weight, bias, name_s="DCNv2", info_s="") 21 | 22 | # 这里的forward只是为了让onnx导出时可以执行,实际上写与不写意义不大,只需要返回同等的输出维度即可 23 | @staticmethod 24 | def forward(ctx, input, offset_and_mask, weight, bias): 25 | return torch.zeros_like(input) 26 | 27 | # 这里省略了backward 28 | 29 | class DCNv2Module(nn.Module): 30 | def __init__(self): 31 | super(DCNv2Module, self).__init__() 32 | 33 | # 这里我们假设有bias作为权重参数 34 | self.weight = nn.Parameter(torch.ones((3, 3, 3, 3))) 35 | self.bias = nn.Parameter(torch.full((3,), 0.5)) 36 | 37 | def forward(self, input, offset_and_mask): 38 | return DCNv2Implementation.apply(input, offset_and_mask, self.weight, self.bias) 39 | 40 | class Model(torch.nn.Module): 41 | def __init__(self): 42 | super(Model, self).__init__() 43 | self.dcnv2 = DCNv2Module() 44 | 45 | def forward(self, input, offset_and_mask): 46 | return self.dcnv2(input, offset_and_mask) 47 | 48 | input = torch.zeros((1, 3, 3, 3)) 49 | offset_and_mask = torch.zeros((1, 27, 3, 3)) 50 | model = Model() 51 | 52 | # 这里演示了2个输入的情况,实际上你可以自己定义几个输入 53 | torch.onnx.export( 54 | model, 55 | (input, offset_and_mask), 56 | 'dcnv2.plugin.onnx', 57 | input_names=["input.0", "input.1"], 58 | output_names=["output.0"], 59 | verbose=True, 60 | opset_version=11, 61 | dynamic_axes={"input.0": {0:"batch"}, "input.1": {0:"batch"}, "output.0": {0:"batch"}}, 62 | enable_onnx_checker=False 63 | ) 64 | print("Done") -------------------------------------------------------------------------------- /workspace/python/test_hswish.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | import torch.nn as nn 4 | import json 5 | 6 | class HSwishImplementation(torch.autograd.Function): 7 | 8 | # 主要是这里,对于autograd.Function这种自定义实现的op,只需要添加静态方法symbolic即可,除了g以外的参数应与forward函数的除ctx以外完全一样 9 | # 这里演示了input->作为tensor输入,bias->作为参数输入,两者将会在tensorRT里面具有不同的处理方式 10 | # 对于附加属性(attributes),以 "名称_类型简写" 方式定义,类型简写,请参考:torch/onnx/symbolic_helper.py中_parse_arg函数的实现【from torch.onnx.symbolic_helper import _parse_arg】 11 | # 属性的定义会在对应节点生成attributes,并传给tensorRT的onnx解析器做处理 12 | @staticmethod 13 | def symbolic(g, input, bias): 14 | # 如果配合当前tensorRT框架,则必须名称为Plugin,参考:tensorRT/src/tensorRT/onnx_parser/builtin_op_importers.cpp的160行定义 15 | # 若你想自己命名,可以考虑做类似修改即可 16 | # 17 | # name_s表示,name是string类型的,对应于C++插件的名称,参考:tensorRT/src/tensorRT/onnxplugin/plugins/HSwish.cu的82行定义的名称 18 | # info_s表示,info是string类型的,通常我们可以利用json.dumps,传一个复杂的字符串结构,然后在CPP中json解码即可。参考: 19 | # sxai/tensorRT/src/tensorRT/onnxplugin/plugins/HSwish.cu的39行 20 | return g.op("Plugin", input, bias, name_s="HSwish", info_s=json.dumps({"alpha": 3.5, "beta": 2.88})) 21 | 22 | # 这里的forward只是为了让onnx导出时可以执行,实际上写与不写意义不大,只需要返回同等的输出维度即可 23 | @staticmethod 24 | def forward(ctx, i, bias): 25 | ctx.save_for_backward(i) 26 | return i * F.relu6(i + 3) / 6 27 | 28 | # 这里省略了backward 29 | 30 | class MemoryEfficientHSwish(nn.Module): 31 | def __init__(self): 32 | super(MemoryEfficientHSwish, self).__init__() 33 | 34 | # 这里我们假设有bias作为权重参数 35 | self.bias = nn.Parameter(torch.zeros((3, 3, 3, 3))) 36 | self.bias.data.fill_(3.15) 37 | 38 | def forward(self, x): 39 | # 我们假设丢一个bias进去 40 | return HSwishImplementation.apply(x, self.bias) 41 | 42 | class FooModel(torch.nn.Module): 43 | def __init__(self): 44 | super(FooModel, self).__init__() 45 | self.hswish = MemoryEfficientHSwish() 46 | 47 | def forward(self, input1, input2): 48 | return F.relu(input2 * self.hswish(input1)) 49 | 50 | dummy_input1 = torch.zeros((1, 3, 3, 3)) 51 | dummy_input2 = torch.zeros((1, 3, 3, 3)) 52 | model = FooModel() 53 | 54 | # 这里演示了2个输入的情况,实际上你可以自己定义几个输入 55 | torch.onnx.export( 56 | model, 57 | (dummy_input1, dummy_input2), 58 | 'hswish.plugin.onnx', 59 | input_names=["input.0", "input.1"], 60 | output_names=["output.0"], 61 | verbose=True, 62 | opset_version=11, 63 | dynamic_axes={"input.0": {0:"batch"}, "input.1": {0:"batch"}, "output.0": {0:"batch"}}, 64 | enable_onnx_checker=False 65 | ) 66 | print("Done") --------------------------------------------------------------------------------