├── .gitignore ├── D#0001-undefined_reference_to_XXX ├── D#0001.md ├── code │ ├── bar.cpp │ ├── bar.h │ ├── baz.c │ ├── baz.h │ ├── foo.cpp │ ├── foo.h │ ├── main.cpp │ ├── qux.cpp │ └── qux.h └── images │ ├── Selection_175.png │ ├── Selection_176.png │ ├── Selection_177.png │ ├── Selection_178.png │ ├── Selection_179.png │ ├── Selection_180.png │ ├── Selection_181.png │ ├── Selection_182.png │ ├── Selection_184.png │ ├── Selection_185.png │ ├── Selection_186.png │ ├── Selection_187.png │ └── Selection_188.png ├── D#0002-hack_your_printf ├── D#0002.md ├── code │ ├── 3rd_lib.cpp │ ├── 3rd_lib.h │ ├── lib3rd_new.so │ ├── main.cpp │ ├── main2.cpp │ ├── my_hack.cpp │ ├── my_hack.h │ └── replace.cpp └── images │ ├── 10-52-53.png │ ├── 20-30-03.png │ ├── 20-31-42.png │ ├── 20-36-09.png │ ├── 20-39-19.png │ ├── 20-40-08.png │ ├── 20-55-24.png │ ├── 21-18-07.png │ ├── 21-22-58.png │ ├── 21-56-03.png │ └── 22-09-21.png ├── D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set ├── D#0003.md ├── code │ ├── build.sh │ ├── cosine_similarity.h │ ├── main.cpp │ ├── search_best.h │ └── timer.h └── images │ ├── Selection_191.png │ ├── Selection_192.png │ ├── Selection_193.png │ ├── Selection_194.png │ ├── Selection_195.png │ ├── Selection_196.png │ ├── Selection_197.png │ ├── Selection_198.png │ ├── Selection_199.png │ ├── Selection_200.png │ ├── Selection_201.png │ ├── Selection_202.png │ ├── Selection_203.png │ ├── Selection_204.png │ ├── Selection_205.png │ ├── Selection_206.png │ ├── Selection_207.png │ ├── Selection_210.png │ ├── Selection_211.png │ ├── Selection_212.png │ ├── Selection_213.png │ ├── Selection_214.png │ ├── Selection_216.png │ ├── Selection_217.png │ ├── Selection_218.png │ ├── Selection_219.png │ ├── Selection_220.png │ ├── Selection_221.png │ ├── Selection_222.png │ ├── Selection_223.png │ ├── Selection_224.png │ ├── Selection_226.png │ └── Selection_227.png ├── D#0004-depthwise_separable_convolutions_in_mobilenet ├── D#0004.md └── images │ └── 436695808.jpg ├── D#0005-separable_convolutions_in_image_processing ├── D#0005.md ├── code │ ├── GaussianFilter.cpp │ ├── GaussianFilter.h │ ├── build.sh │ ├── main.cpp │ ├── separateGaussianFilter.cpp │ ├── separateGaussianFilter.h │ └── timer.h └── images │ ├── 1386915682.jpg │ ├── 1565291630.jpg │ ├── Selection_231.png │ └── lena.jpeg ├── D#0006-protect_my_function ├── D#0006.md ├── code │ ├── auth.cpp │ ├── auth.h │ ├── main.cpp │ ├── sdk.cpp │ └── sdk.h └── images │ ├── Selection_282.png │ ├── Selection_283.png │ ├── Selection_284.png │ ├── Selection_285.png │ ├── Selection_287.png │ ├── Selection_288.png │ ├── Selection_289.png │ ├── Selection_290.png │ ├── Selection_291.png │ ├── Selection_292.png │ ├── Selection_293.png │ ├── Selection_294.png │ ├── Selection_295.png │ ├── Selection_296.png │ ├── Selection_297.png │ └── Selection_298.png ├── D#0007-compile_git_commit_sha1_into_elf ├── D#0007.md ├── code │ ├── Makefile │ ├── main.cpp │ ├── sdk.cpp │ └── sdk.h └── images │ ├── Selection_301.png │ ├── Selection_302.png │ ├── Selection_303.png │ ├── Selection_304.png │ ├── Selection_305.png │ └── Selection_307.png ├── D#0008-拉格朗日乘子法 ├── D#0008.md └── images │ ├── 113536.jpg │ ├── 114729.png │ ├── 131203.png │ ├── 144724.png │ ├── 151353.png │ ├── 154641.png │ ├── 162338.jpg │ ├── dualfunc.png │ └── dualprob.png ├── D#0009-SVM ├── D#0009.md └── images │ ├── .gitkeep │ ├── 110529.png │ ├── 113708.png │ ├── 160121.png │ ├── 203213.jpg │ ├── 212511.jpg │ └── 231537.png ├── D#0010-从线性回归到对率回归到Softmax激活函数 ├── D#0010.md └── images │ ├── .gitkeep │ ├── 104656.png │ ├── 132651.png │ ├── 150909.png │ └── 175045.png ├── D#0011-kNN ├── D#0011.md └── images │ └── .gitkeep ├── D#0012-为什么选交叉熵作为分类问题的损失函数 ├── D#0012.md └── images │ ├── .gitkeep │ ├── 172059.png │ ├── 195031.png │ ├── 223112.png │ ├── 223449.png │ ├── 225907.png │ └── 231448.png ├── D#0013-深度学习调参常用方法总结 ├── D#0013.md └── images │ ├── .gitkeep │ ├── 124926.png │ ├── 180705.png │ ├── 183737.jpg │ ├── bayes1.jpg │ ├── bayes2.jpg │ └── bayes3.jpg ├── D#0014-数据降维常用方法总结(LDA,PCA) ├── D#0014.md └── images │ ├── .gitkeep │ ├── 133329.png │ ├── 143203.png │ └── 163536.png ├── D#0015-深度学习常用损失函数 ├── D#0015.md └── images │ ├── .gitkeep │ ├── 154349.png │ ├── 154353.png │ ├── 154408.png │ ├── 154834.png │ ├── 160509.png │ ├── 160647.png │ ├── 161349.png │ ├── 161758.png │ ├── 162310.png │ └── 164439.png ├── D#0016-深度学习中不平衡样本的处理 ├── D#0016.md └── images │ ├── .gitkeep │ ├── 225817.png │ └── 230646.png ├── D#0017-MTCNN和FaceBoxes ├── D#0017.md └── images │ ├── .gitkeep │ ├── 174303.png │ ├── 181230.png │ ├── 183249.png │ ├── 184628.png │ ├── 202239.png │ ├── 203348.png │ ├── 203537.png │ ├── 203658.png │ └── 203924.png ├── D#0018-Mask_TextSpotter ├── D#0018.md └── images │ └── .gitkeep ├── D#0019-DeepID1,DeepID2,DeepID2+和DeepID3 ├── D#0019.md └── images │ ├── .gitkeep │ ├── 211326.png │ ├── 213046.png │ ├── 213312.png │ ├── 213324.png │ ├── 213342.png │ ├── 224342.png │ ├── 224356.png │ ├── 225135.png │ ├── Selection_329.png │ ├── Selection_330.png │ ├── Selection_331.png │ ├── Selection_332.png │ ├── Selection_333.png │ ├── Selection_334.png │ ├── Selection_335.png │ └── Selection_336.png ├── D#0020-Batch-Normalization层原理与分析 ├── D#0020.md └── images │ ├── .gitkeep │ ├── 213118.png │ ├── 213528.png │ ├── 213536.png │ ├── 224019.png │ ├── 233104.png │ ├── 233310.png │ ├── Selection_343.png │ ├── Selection_345.png │ ├── Selection_346.png │ ├── Selection_347.png │ ├── Selection_348.png │ ├── Selection_349.png │ └── Selection_350.png ├── D#0021-机器学习中的过拟合及其解决办法 ├── D#0021.md └── images │ ├── .gitkeep │ ├── 214025.png │ ├── 232539.png │ ├── Selection_351.png │ ├── Selection_352.png │ ├── Selection_353.png │ ├── Selection_354.png │ ├── Selection_355.png │ ├── Selection_356.png │ ├── Selection_357.png │ ├── Selection_358.png │ └── Selection_359.png ├── D#0022-SSD ├── D#0022.md └── images │ └── .gitkeep ├── D#0023-CNN模型计算量估计 ├── D#0023.md └── images │ └── .gitkeep ├── D#0024-CNN模型内存访问估计 ├── D#0024.md └── images │ └── .gitkeep ├── D#0025-CNN中使用卷积代替全连接 ├── D#0025.md └── images │ ├── .gitkeep │ └── 225831.png ├── D#0026-深度学习检测小目标常用方法 ├── D#0026.md └── images │ ├── .gitkeep │ ├── 215102.png │ ├── 224212.png │ ├── 224506.png │ ├── 230223.png │ ├── 230407.png │ ├── 230607.png │ ├── 232423.png │ ├── Selection_381.png │ ├── Selection_382.png │ ├── Selection_383.png │ ├── Selection_384.png │ ├── Selection_387.png │ ├── Selection_388.png │ ├── Selection_389.png │ └── Selection_390.png ├── D#0027-聊聊2017 ImageNet夺冠的SENet ├── D#0027.md └── images │ ├── .gitkeep │ ├── SENet.jpg │ ├── Selection_418.png │ ├── Selection_419.png │ ├── Selection_420.png │ ├── Selection_421.png │ ├── Selection_423.png │ ├── Selection_424.png │ ├── Selection_425.png │ ├── Selection_426.png │ ├── Selection_427.png │ └── Selection_428.png ├── D#0028-再聊SENet的孪生兄弟SKNet ├── D#0028.md └── images │ ├── .gitkeep │ ├── Selection_430.png │ ├── Selection_431.png │ ├── Selection_432.png │ ├── Selection_433.png │ ├── Selection_434.png │ └── Selection_435.png ├── D#0029-CV中的注意力机制 ├── D#0029.md └── images │ ├── .gitkeep │ ├── Selection_437.png │ ├── Selection_438.png │ ├── Selection_439.png │ ├── Selection_440.png │ ├── Selection_441.png │ └── Selection_442.png ├── D#0030-类MTCNN的360RIP人脸检测器PCN ├── D#0030.md └── images │ ├── .gitkeep │ ├── Selection_443.png │ ├── Selection_444.png │ ├── Selection_445.png │ ├── Selection_446.png │ ├── Selection_447.png │ ├── Selection_448.png │ ├── Selection_449.png │ ├── Selection_451.png │ ├── Selection_452.png │ ├── Selection_453.png │ ├── Selection_454.png │ ├── Selection_455.png │ ├── Selection_456.png │ ├── Selection_458.png │ ├── Selection_459.png │ ├── Selection_461.png │ └── Selection_462.png ├── D#0031-知识蒸馏Knowledge-Distillation ├── D#0031.md └── images │ ├── .gitkeep │ ├── KD.png │ ├── Selection_478.png │ └── Selection_479.png ├── D#0032-CNN可视化之类激活热力图Grad-CAM ├── D#0032.md └── images │ ├── .gitkeep │ ├── Selection_004.png │ ├── Selection_005.png │ ├── Selection_006.png │ ├── Selection_007.png │ └── Selection_008.png ├── D#0033-一些分类网络的训练技巧 ├── D#0033.md └── images │ ├── .gitkeep │ ├── Selection_495.png │ ├── Selection_496.png │ ├── Selection_497.png │ ├── Selection_498.png │ ├── Selection_499.png │ ├── Selection_500.png │ ├── Selection_501.png │ └── Selection_502.png ├── D#0034-火箭发射:阿里巴巴的轻量网络训练方法 ├── D#0034.md └── images │ ├── .gitkeep │ ├── 210322.png │ ├── 211148.png │ └── 213237.png ├── D#0035-2KW短视频打标问题之Activate-Learning ├── D#0035.md └── images │ └── .gitkeep ├── D#0036-2KW短视频打标问题之Multi-Modal-Machine-Learning ├── D#0036.md └── images │ ├── .gitkeep │ ├── Selection_508.png │ ├── Selection_509.png │ ├── Selection_510.png │ ├── Selection_511.png │ ├── Selection_512.png │ └── TransferLearning.png ├── D#0037-CentralNet做多模态融合 ├── D#0037.md └── images │ ├── .gitkeep │ ├── Selection_511.png │ ├── Selection_513.png │ ├── Selection_514.png │ ├── Selection_515.png │ ├── Selection_516.png │ ├── Selection_517.png │ ├── Selection_518.png │ ├── Selection_519.png │ ├── Selection_520.png │ ├── Selection_521.png │ ├── Selection_522.png │ └── Selection_523.png ├── D#0038-Multi-View-Active-Learning做视频推荐 ├── D#0038.md └── images │ ├── .gitkeep │ ├── Selection_504.png │ ├── Selection_505.png │ ├── Selection_506.png │ ├── Selection_507.png │ ├── Selection_508.png │ ├── Selection_509.png │ ├── Selection_510.png │ └── Selection_511.png ├── D#0039-用FCN做分割 ├── D#0039.md └── images │ ├── .gitkeep │ ├── 225831.png │ ├── 232018.png │ ├── Selection_513.png │ ├── Selection_514.png │ ├── Selection_515.png │ └── Selection_516.png ├── D#0040-用U-Net做分割 ├── D#0040.md └── images │ ├── .gitkeep │ ├── Selection_518.png │ ├── Selection_519.png │ ├── Selection_520.png │ ├── Selection_521.png │ ├── Selection_522.png │ └── Selection_523.png ├── D#0041-用RefineNet做分割 ├── D#0041.md └── images │ ├── .gitkeep │ ├── Selection_525.png │ ├── Selection_526.png │ ├── Selection_527.png │ ├── Selection_528.png │ ├── Selection_529.png │ └── Selection_530.png ├── D#0042-用DeepLabv3+的Encoder-Decoder做分割 ├── D#0042.md └── images │ ├── .gitkeep │ ├── 232018.png │ ├── Selection_518.png │ ├── Selection_531.png │ ├── Selection_532.png │ ├── Selection_533.png │ └── Selection_534.png ├── D#0043-用HRNet做分割 ├── D#0043.md └── images │ ├── .gitkeep │ ├── 154321.png │ ├── 155806.png │ ├── 160639.png │ ├── 161658.png │ └── 232018.png ├── D#0044-用Network-Slimming做模型加速和压缩 ├── D#0044.md └── images │ ├── .gitkeep │ ├── 543.png │ ├── 544.png │ ├── 545.png │ ├── 546.png │ ├── 547.png │ ├── 548.png │ └── 549.png ├── D#0045-Stacked-Hourglass-Network-for-Human-Pose-Estimation ├── D#0045.md └── images │ ├── .gitkeep │ ├── 556.png │ ├── 557.png │ ├── 558.png │ ├── 560.png │ ├── 561.png │ ├── 562.png │ ├── 563.png │ └── 564.png ├── D#0046-Anchor-Free第一篇CornerNet-Detecting-Objects-as-Paired-Keypoints ├── D#0046.md └── images │ ├── .gitkeep │ ├── 565.png │ ├── 566.png │ ├── 567.png │ ├── 568.png │ ├── 569.png │ ├── 570.png │ ├── 571.png │ ├── 572.png │ ├── 573.png │ ├── 574.png │ ├── 575.png │ ├── 576.png │ └── 577.png ├── D#0047-Anchor-Free第二篇CornerNet的变种ExtremeNet ├── D#0047.md └── images │ ├── .gitkeep │ ├── 565.png │ ├── 575.png │ ├── 580.png │ ├── 581.png │ ├── 582.png │ ├── 583.png │ ├── 584.png │ ├── 585.png │ └── 586.png ├── D#0048-Anchor-Free第三篇Objects-as-Points ├── D#0048.md └── images │ ├── .gitkeep │ ├── 122149.png │ ├── 125313.png │ ├── 130128.png │ ├── 130954.png │ ├── 131049.png │ ├── 131135.png │ ├── 131329.png │ ├── 565.png │ └── 580.png ├── D#0049-Anchor-Free第四篇CenterNet-Keypoint-Triplets-for-Object-Detection ├── D#0049.md └── images │ ├── .gitkeep │ ├── 180227.png │ ├── 180345.png │ ├── 181626.png │ ├── 182402.png │ ├── 183054.png │ ├── 183111.png │ └── 184637.png ├── D#0050-C++中浮点值做比较的正确方法 ├── code │ └── main.cpp └── images │ └── .gitkeep ├── LICENSE └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | a.out 3 | 4 | .DS_Store 5 | -------------------------------------------------------------------------------- /D#0001-undefined_reference_to_XXX/code/bar.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "bar.h" 3 | 4 | void bar(int) 5 | { 6 | 7 | std::cout << "这是一行很牛逼的代码!" << std::endl; 8 | 9 | return; 10 | } 11 | -------------------------------------------------------------------------------- /D#0001-undefined_reference_to_XXX/code/bar.h: -------------------------------------------------------------------------------- 1 | #ifndef _BAR_H_ 2 | #define _BAR_H_ 3 | 4 | void bar(void); 5 | 6 | #endif //!_BAR_H_ 7 | -------------------------------------------------------------------------------- /D#0001-undefined_reference_to_XXX/code/baz.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "baz.h" 3 | 4 | void baz(void) 5 | { 6 | 7 | printf("这是一行很牛逼的C代码!\n"); 8 | 9 | return; 10 | } 11 | -------------------------------------------------------------------------------- /D#0001-undefined_reference_to_XXX/code/baz.h: -------------------------------------------------------------------------------- 1 | #ifndef _BAR_H_ 2 | #define _BAR_H_ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | void baz(void); 9 | 10 | #ifdef __cplusplus 11 | } 12 | #endif 13 | 14 | #endif //!_BAR_H_ 15 | -------------------------------------------------------------------------------- /D#0001-undefined_reference_to_XXX/code/foo.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "foo.h" 3 | 4 | void foo(void) 5 | { 6 | 7 | std::cout << "这是一行很牛逼的代码!" << std::endl; 8 | 9 | return; 10 | } 11 | -------------------------------------------------------------------------------- /D#0001-undefined_reference_to_XXX/code/foo.h: -------------------------------------------------------------------------------- 1 | #ifndef _FOO_H_ 2 | #define _FOO_H_ 3 | 4 | void foo(void); 5 | 6 | #endif //!_FOO_H_ 7 | -------------------------------------------------------------------------------- /D#0001-undefined_reference_to_XXX/code/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "qux.h" 3 | 4 | int main(int argc, char* argv[]) 5 | { 6 | qux(); 7 | 8 | return 0; 9 | } 10 | -------------------------------------------------------------------------------- /D#0001-undefined_reference_to_XXX/code/qux.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "qux.h" 3 | 4 | void qux(void) 5 | { 6 | std::string str = "Hello World!"; 7 | std::cout << str << std::endl; 8 | 9 | return; 10 | } 11 | -------------------------------------------------------------------------------- /D#0001-undefined_reference_to_XXX/code/qux.h: -------------------------------------------------------------------------------- 1 | #ifndef _QUX_H_ 2 | #define _QUX_H_ 3 | 4 | void qux(void); 5 | 6 | #endif //!_QUX_H_ 7 | -------------------------------------------------------------------------------- /D#0001-undefined_reference_to_XXX/images/Selection_175.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0001-undefined_reference_to_XXX/images/Selection_175.png -------------------------------------------------------------------------------- /D#0001-undefined_reference_to_XXX/images/Selection_176.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0001-undefined_reference_to_XXX/images/Selection_176.png -------------------------------------------------------------------------------- /D#0001-undefined_reference_to_XXX/images/Selection_177.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0001-undefined_reference_to_XXX/images/Selection_177.png -------------------------------------------------------------------------------- /D#0001-undefined_reference_to_XXX/images/Selection_178.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0001-undefined_reference_to_XXX/images/Selection_178.png -------------------------------------------------------------------------------- /D#0001-undefined_reference_to_XXX/images/Selection_179.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0001-undefined_reference_to_XXX/images/Selection_179.png -------------------------------------------------------------------------------- /D#0001-undefined_reference_to_XXX/images/Selection_180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0001-undefined_reference_to_XXX/images/Selection_180.png -------------------------------------------------------------------------------- /D#0001-undefined_reference_to_XXX/images/Selection_181.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0001-undefined_reference_to_XXX/images/Selection_181.png -------------------------------------------------------------------------------- /D#0001-undefined_reference_to_XXX/images/Selection_182.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0001-undefined_reference_to_XXX/images/Selection_182.png -------------------------------------------------------------------------------- /D#0001-undefined_reference_to_XXX/images/Selection_184.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0001-undefined_reference_to_XXX/images/Selection_184.png -------------------------------------------------------------------------------- /D#0001-undefined_reference_to_XXX/images/Selection_185.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0001-undefined_reference_to_XXX/images/Selection_185.png -------------------------------------------------------------------------------- /D#0001-undefined_reference_to_XXX/images/Selection_186.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0001-undefined_reference_to_XXX/images/Selection_186.png -------------------------------------------------------------------------------- /D#0001-undefined_reference_to_XXX/images/Selection_187.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0001-undefined_reference_to_XXX/images/Selection_187.png -------------------------------------------------------------------------------- /D#0001-undefined_reference_to_XXX/images/Selection_188.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0001-undefined_reference_to_XXX/images/Selection_188.png -------------------------------------------------------------------------------- /D#0002-hack_your_printf/code/3rd_lib.cpp: -------------------------------------------------------------------------------- 1 | //3rd_lib.cpp 2 | #include 3 | #include 4 | 5 | void _3rd_lib_func(void) 6 | { 7 | printf("this is a log printed from _3rd_lib_func.\n"); 8 | 9 | return; 10 | } 11 | -------------------------------------------------------------------------------- /D#0002-hack_your_printf/code/3rd_lib.h: -------------------------------------------------------------------------------- 1 | //3rd_lib.h 2 | #ifndef _3RD_LIB_ 3 | #define _3RD_LIB_ 4 | 5 | #include 6 | #include 7 | 8 | void _3rd_lib_func(void); 9 | 10 | #endif // !_3RD_LIB_ 11 | -------------------------------------------------------------------------------- /D#0002-hack_your_printf/code/lib3rd_new.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0002-hack_your_printf/code/lib3rd_new.so -------------------------------------------------------------------------------- /D#0002-hack_your_printf/code/main.cpp: -------------------------------------------------------------------------------- 1 | //main.cpp 2 | #include "my_hack.h" 3 | #include "3rd_lib.h" 4 | #include 5 | #include 6 | #include 7 | 8 | int main(int argc, char* argv[]) 9 | { 10 | _3rd_lib_func(); 11 | 12 | printf("TAG: log printed from main.\n"); 13 | // std::cout << "SSS" << std::endl; 14 | 15 | return 0; 16 | } 17 | -------------------------------------------------------------------------------- /D#0002-hack_your_printf/code/main2.cpp: -------------------------------------------------------------------------------- 1 | //main2.cpp 2 | #include 3 | #include 4 | #include "3rd_lib.h" 5 | 6 | int main(int argc, char* argv[]) 7 | { 8 | printf("TAG: print to stdout.\n"); 9 | 10 | _3rd_lib_func(); 11 | 12 | FILE *fp = NULL; 13 | fp = freopen("/dev/null", "r", stdout); 14 | if(fp!=NULL){ 15 | fprintf(stderr, "TAG: print to stderr\n"); 16 | printf("TAG: print to stdout\n"); 17 | } 18 | 19 | _3rd_lib_func(); 20 | 21 | return 0; 22 | } 23 | -------------------------------------------------------------------------------- /D#0002-hack_your_printf/code/my_hack.cpp: -------------------------------------------------------------------------------- 1 | //my_hack.cpp 2 | #include 3 | 4 | int puts ( const char * str ) 5 | { 6 | // fputs("HA, hacked printf.\n", stdout); 7 | // fputs(str, stdout); 8 | // fputs("\n", stdout); 9 | 10 | return 0; 11 | } 12 | 13 | -------------------------------------------------------------------------------- /D#0002-hack_your_printf/code/my_hack.h: -------------------------------------------------------------------------------- 1 | //my_hack.h 2 | #ifndef _MY_HACK_ 3 | #define _MY_HACK_ 4 | #include 5 | 6 | int puts ( const char * str ); 7 | 8 | #endif //!_MY_HACK_ 9 | -------------------------------------------------------------------------------- /D#0002-hack_your_printf/code/replace.cpp: -------------------------------------------------------------------------------- 1 | //replace.cpp 2 | #include 3 | #include 4 | #include 5 | //replace.cpp 6 | int main(int argc, char* argv[]) 7 | { 8 | char keyword[] = {"this is a log printed from _3rd_lib_func."}; 9 | int len = sizeof(keyword); 10 | 11 | FILE * fin = fopen(argv[1], "rb"); 12 | 13 | fseek (fin , 0 , SEEK_END); 14 | long lSize = ftell (fin); 15 | printf("lSize[%ld].\n", lSize); 16 | rewind (fin); 17 | 18 | int num = lSize/sizeof(char); 19 | char * buffer = (char*) malloc (sizeof(char)*num); 20 | 21 | long result = fread(buffer, sizeof(char), num, fin); 22 | 23 | for( int i = 0; i < lSize; i++) { 24 | char curStr[len] = {"\0"}; 25 | memcpy(curStr, buffer + i, len); 26 | if(strstr(curStr, keyword)) { 27 | printf("find curStr[%s].\n", curStr); 28 | char hack_info[] = {"O Captain!My Captain!"}; 29 | memcpy(buffer + i, hack_info, sizeof(hack_info)); 30 | break; 31 | } 32 | } 33 | 34 | FILE * fout = fopen(argv[2], "wb"); 35 | fwrite(buffer, sizeof(char), num, fout); 36 | 37 | free(buffer); buffer = NULL; 38 | fclose(fin); fin = NULL; 39 | fclose(fout); fout = NULL; 40 | 41 | return 0; 42 | } 43 | -------------------------------------------------------------------------------- /D#0002-hack_your_printf/images/10-52-53.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0002-hack_your_printf/images/10-52-53.png -------------------------------------------------------------------------------- /D#0002-hack_your_printf/images/20-30-03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0002-hack_your_printf/images/20-30-03.png -------------------------------------------------------------------------------- /D#0002-hack_your_printf/images/20-31-42.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0002-hack_your_printf/images/20-31-42.png -------------------------------------------------------------------------------- /D#0002-hack_your_printf/images/20-36-09.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0002-hack_your_printf/images/20-36-09.png -------------------------------------------------------------------------------- /D#0002-hack_your_printf/images/20-39-19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0002-hack_your_printf/images/20-39-19.png -------------------------------------------------------------------------------- /D#0002-hack_your_printf/images/20-40-08.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0002-hack_your_printf/images/20-40-08.png -------------------------------------------------------------------------------- /D#0002-hack_your_printf/images/20-55-24.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0002-hack_your_printf/images/20-55-24.png -------------------------------------------------------------------------------- /D#0002-hack_your_printf/images/21-18-07.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0002-hack_your_printf/images/21-18-07.png -------------------------------------------------------------------------------- /D#0002-hack_your_printf/images/21-22-58.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0002-hack_your_printf/images/21-22-58.png -------------------------------------------------------------------------------- /D#0002-hack_your_printf/images/21-56-03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0002-hack_your_printf/images/21-56-03.png -------------------------------------------------------------------------------- /D#0002-hack_your_printf/images/22-09-21.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0002-hack_your_printf/images/22-09-21.png -------------------------------------------------------------------------------- /D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/code/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | g++ main.cpp -std=c++11 -mfma -O3 -Ofast -ffast-math -fopenmp -lopenblas 3 | -------------------------------------------------------------------------------- /D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/code/cosine_similarity.h: -------------------------------------------------------------------------------- 1 | #ifndef _COSINE_SIMILARITY_ 2 | #define _COSINE_SIMILARITY_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | template 9 | T Cosine_similarity(const T* __restrict__ const vectorA, // 第一个特征向量的首地址 10 | const T* __restrict__ const vectorB, // 第二个特征向量的首地址 11 | const int len) // 特征向量长度(维数) 12 | { 13 | T mult_add = 0.0f; 14 | // Step 10,有了模归一化后,不必计算a_norm2和b_norm2 15 | // T a_norm2 = 0.0f; 16 | // T b_norm2 = 0.0f; 17 | 18 | for(int i = 0; i < len; i++) { 19 | const T ai = vectorA[i]; 20 | const T bi = vectorB[i]; 21 | mult_add += ai * bi; 22 | // a_norm2 += ai * ai; 23 | // b_norm2 += bi * bi; 24 | } 25 | 26 | // 避免除零错误,分母加上FLT_MIN 27 | //const T similarity = mult_add / (sqrt(a_norm2 * b_norm2) + FLT_MIN); 28 | const T similarity = mult_add; 29 | 30 | return similarity; 31 | } 32 | 33 | // Step 7, SIMD 34 | float inline reduceM128(const __m128 r) 35 | { 36 | // 128位操作只需要16字节对齐 37 | __attribute__((aligned(16))) float f[4] = {0.0f}; 38 | // assert(reinterpret_cast(f)%16 == 0); 39 | _mm_store_ps(f, r); // f必须16字节对齐 40 | // _mm_storeu_ps(f, r); // f可以不必对齐 41 | 42 | return (f[0]+f[1]) + (f[2]+f[3]); 43 | } 44 | 45 | float inline reduceM256(const __m256 r) 46 | { 47 | const __m128 hi = _mm256_extractf128_ps(r, 1); 48 | const __m128 lo = _mm256_extractf128_ps(r, 0); 49 | const __m128 sum = _mm_add_ps(hi, lo); 50 | 51 | return reduceM128(sum); 52 | } 53 | 54 | // Step 8, Fast InvSqrt from QUAKE-III 55 | float InvSqrt(float x){ 56 | const float xhalf = 0.5f*x; 57 | int i = *(int*)&x; 58 | // i = 0x5f3759df - (i>>1); 59 | i = 0x5f375a86 - (i>>1); 60 | x = *(float*)&i; 61 | x = x*(1.5f - xhalf*x*x); 62 | return x; 63 | } 64 | 65 | float 66 | Cosine_similarity_avx(const float* const vectorA, const float* const vectorB, const int len) 67 | { 68 | // 一个AVX指令每次可以计算8个32位浮点(float), 69 | // 暂不考虑有不能整除部分的情况 70 | assert(len%8 == 0); 71 | 72 | const int step = len / 8; 73 | const __m256* one = (__m256*) vectorA; 74 | const __m256* two = (__m256*) vectorB; 75 | 76 | __m256 mult_add_m256 = _mm256_setzero_ps(); 77 | __m256 a_norm_m256 = _mm256_setzero_ps(); 78 | // Step 9,考虑b_norm2预计算 79 | __m256 b_norm_m256 = _mm256_setzero_ps(); 80 | for(int i = 0; i < step; i++) { 81 | mult_add_m256 = _mm256_fmadd_ps(one[i], two[i], mult_add_m256); // a * b + c 82 | a_norm_m256 = _mm256_fmadd_ps(one[i], one[i], a_norm_m256); 83 | b_norm_m256 = _mm256_fmadd_ps(two[i], two[i], b_norm_m256); 84 | } 85 | 86 | const float mult_add = reduceM256(mult_add_m256); 87 | const float a_norm2 = reduceM256(a_norm_m256); 88 | const float b_norm2 = reduceM256(b_norm_m256); 89 | 90 | const float similarity = mult_add / (sqrt(a_norm2 * b_norm2) + FLT_MIN); 91 | //const float similarity = mult_add * InvSqrt(a_norm2 * b_norm2); 92 | 93 | return similarity; 94 | } 95 | 96 | #endif // !_COSINE_SIMILARITY_ 97 | 98 | -------------------------------------------------------------------------------- /D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/code/main.cpp: -------------------------------------------------------------------------------- 1 | //main.cpp 2 | #include 3 | #include 4 | #include "timer.h" 5 | #include "search_best.h" 6 | 7 | #define ALGIN (32) // 使用SIMD需要内存对齐,128bit的指令需要16位对齐,256bit的指令需要32位对齐 8 | #define FACENUM (1000*1000) // 底库中存有100万张人脸特征向量 9 | //#define FEATSIZE (512) // 每个人脸特征向量的维度是512维,每一维是一个DType类型的浮点数 10 | //Step 11,用PCA做特征选择,压缩512维到256维 11 | #define FEATSIZE (256) // 每个人脸特征向量的维度是256维,每一维是一个DType类型的浮点数 12 | 13 | // Step 4, double-->float(在我的电脑上,sizeof(float)==4,sizeof(double)==8, sizeof(short)==2, sizeof(int)==4 14 | //typedef float DType; 15 | // Step 12, float-->unsigned short,定点化 16 | typedef float DType; 17 | 18 | float calcL(const DType * const pVec, const int len) 19 | { 20 | float l = 0.0f; 21 | 22 | for(int i = 0; i < len; i++) { 23 | l += pVec[i] * pVec[i]; 24 | } 25 | 26 | return sqrt(l) + FLT_MIN; 27 | } 28 | 29 | int main(int argc, char* argv[]) 30 | { 31 | // 1.定义当前脸的特征,并初始化 32 | __attribute__((aligned(ALGIN))) DType vectorA[FEATSIZE]; 33 | for(int i = 0; i < FEATSIZE; i++) { 34 | vectorA[i] = static_cast(FACENUM/2*FEATSIZE + i) / (FACENUM * FEATSIZE); 35 | } 36 | 37 | // 模归一化 38 | const float l = calcL(vectorA, FEATSIZE); 39 | for(int i = 0; i < FEATSIZE; i++) { 40 | vectorA[i] /= l; 41 | } 42 | 43 | // 2.定义底库中所有脸的特征向量,并初始化 44 | // 为了使用SIMD优化,使用memalign申请对齐了的内存,牺牲了代码的可移植性 45 | DType* pDB = reinterpret_cast(memalign(ALGIN, sizeof(DType)*FACENUM*FEATSIZE)); 46 | if(!pDB) { 47 | std::cout << "out of memory\n"; 48 | return -1; 49 | } 50 | 51 | // 验证内存是否对齐 52 | // printf("vectorA[%p], pDB[%p].\n", vectorA, pDB); 53 | 54 | for(int i = 0; i < FACENUM; i++) { 55 | for(int j = 0; j < FEATSIZE; j++) { 56 | pDB[i*FEATSIZE+j] = static_cast(i*FEATSIZE + j) / (FACENUM * FEATSIZE); 57 | } 58 | 59 | // 模归一化 60 | const float norm = calcL(pDB + i*FEATSIZE, FEATSIZE); 61 | for(int j = 0; j < FEATSIZE; j++) { 62 | pDB[i*FEATSIZE+j] /= norm; 63 | } 64 | } 65 | 66 | // 3.定义计数器并开始计时 67 | Timer t; 68 | 69 | int best_index = SearchBest(static_cast(vectorA), FEATSIZE, pDB, FACENUM*FEATSIZE); 70 | 71 | // 4.打印结果 72 | std::cout << "Best face index is: " << best_index << std::endl; 73 | std::cout << "Find the best face index eat: " << t.elapsed_micro() << "us" << std::endl; 74 | std::cout << "PER Cosine_similarity call eat: " << t.elapsed_nano() / FACENUM << "ns" << std::endl; 75 | //printf("double[%d], float[%d], short[%d], int[%d].\n", (int)sizeof(double), (int)sizeof(float), (int)sizeof(short), (int)sizeof(int)); 76 | 77 | // 5.释放分配的内存,防止内存泄露 78 | // memalign分配的内存也可以用free释放 79 | free(pDB); 80 | 81 | return 0; 82 | } 83 | -------------------------------------------------------------------------------- /D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/code/search_best.h: -------------------------------------------------------------------------------- 1 | #ifndef _SEARCHBEST_ 2 | #define _SEARCHBEST_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | // use openblas 9 | #include 10 | #include "cosine_similarity.h" 11 | 12 | // Step 1, g++ main.cpp search_best.cpp cosine_similarity.cpp -std=c++11 13 | // Step 2, g++ main.cpp search_best.cpp cosine_similarity.cpp -std=c++11 -O3 14 | // Step 3, g++ main.cpp search_best.cpp cosine_similarity.cpp -std=c++11 -O3 -Ofast -ffast-math 15 | template 16 | int SearchBest(const T* __restrict__ const pVecA, // 待搜索的单个特征向量首地址 17 | const int lenA, // 待搜索特征向量长度(1 x 单个特征维数) 18 | const T* __restrict__ const pVecDB, // 底库首地址 19 | const int lenDB) // 底库长度(特征个数 x 单个特征维数) 20 | { 21 | assert(lenDB%lenA == 0); 22 | const int featsize = lenA; 23 | const int facenum = lenDB / lenA; 24 | 25 | int best_index = - INT_MAX; 26 | T best_similarity = - FLT_MAX; 27 | #if 0 28 | // Step 5, 加上OpenMP 29 | //GCC很聪明,OpenMP默认线程数就是多核处理器的核心数量,不必显示指定 30 | //OpenMP起线程,收回线程也是有开销的,所以要合理安排每个线程的任务量大小,不宜放入内层for循环(任务量太小划不来) 31 | //#pragma omp parallel for num_threads(8) 32 | #pragma omp parallel for 33 | for(int i = 0; i < facenum; i++) { 34 | // 普通C++代码实现的余弦相似度计算 35 | T similarity = Cosine_similarity(pVecA, pVecDB + i*featsize, featsize); 36 | // 使用向量化代码实现的余弦相似度计算 37 | //T similarity = Cosine_similarity_avx(pVecA, pVecDB + i*featsize, featsize); 38 | if(similarity > best_similarity) { 39 | best_similarity = similarity; 40 | best_index = i; 41 | } 42 | } 43 | 44 | #else 45 | // Step 12,使用OpenBLAS 46 | T simAll[facenum] = {0.0f}; 47 | cblas_sgemv(CblasRowMajor, CblasNoTrans, facenum, featsize, 1, pVecDB, featsize, pVecA, 1, 0, simAll, 1); 48 | // 寻找simAll里面最大的,它的序号就是要找的id 49 | for(int i = 0; i < facenum; i++) { 50 | if(simAll[i] > best_similarity) { 51 | best_similarity = simAll[i]; 52 | best_index = i; 53 | } 54 | } 55 | #endif 56 | 57 | return best_index; 58 | } 59 | 60 | #endif //!_SEARCHBEST_ 61 | -------------------------------------------------------------------------------- /D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/code/timer.h: -------------------------------------------------------------------------------- 1 | /********************************************************* 2 | * C++11 chrono库封装的一个高精度计时器类(使用真实世界挂钟时间,具体时间依赖于系统) 3 | * 使用方式: 4 | * Timer t; // 开始计时 5 | * func(); // 运行待计时的函数 6 | * std::cout << t.elapsed() << std::endl; // 打印计时时间,默认毫秒为单位 7 | * std::cout << t.elapsed_micro() << std::endl; // 微秒 8 | * std::cout << t.elapsed_nano() << std::endl; // 纳秒 9 | *********************************************************/ 10 | 11 | #ifndef _TIMER_ 12 | #define _TIMER_ 13 | 14 | #include 15 | using namespace std; 16 | using namespace std::chrono; 17 | 18 | class Timer { 19 | public: 20 | Timer() : m_begin(high_resolution_clock::now()) {} 21 | void reset() { m_begin = high_resolution_clock::now(); } 22 | 23 | // 默认输出毫秒 24 | template 25 | int64_t elapsed() const { 26 | return duration_cast(high_resolution_clock::now() - m_begin).count(); 27 | } 28 | 29 | // 微秒 30 | int64_t elapsed_micro() const { 31 | return elapsed(); 32 | } 33 | 34 | // 纳秒 35 | int64_t elapsed_nano() const { 36 | return elapsed(); 37 | } 38 | 39 | // 秒 40 | int64_t elapsed_seconds() const { 41 | return elapsed(); 42 | } 43 | 44 | // 分 45 | int64_t elapsed_minutes() const { 46 | return elapsed(); 47 | } 48 | 49 | // 时 50 | int64_t elapsed_hours() const { 51 | return elapsed(); 52 | } 53 | 54 | private: 55 | time_point m_begin; 56 | }; 57 | 58 | #endif // !_TIMER_ 59 | 60 | -------------------------------------------------------------------------------- /D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_191.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_191.png -------------------------------------------------------------------------------- /D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_192.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_192.png -------------------------------------------------------------------------------- /D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_193.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_193.png -------------------------------------------------------------------------------- /D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_194.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_194.png -------------------------------------------------------------------------------- /D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_195.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_195.png -------------------------------------------------------------------------------- /D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_196.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_196.png -------------------------------------------------------------------------------- /D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_197.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_197.png -------------------------------------------------------------------------------- /D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_198.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_198.png -------------------------------------------------------------------------------- /D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_199.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_199.png -------------------------------------------------------------------------------- /D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_200.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_200.png -------------------------------------------------------------------------------- /D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_201.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_201.png -------------------------------------------------------------------------------- /D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_202.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_202.png -------------------------------------------------------------------------------- /D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_203.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_203.png -------------------------------------------------------------------------------- /D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_204.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_204.png -------------------------------------------------------------------------------- /D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_205.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_205.png -------------------------------------------------------------------------------- /D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_206.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_206.png -------------------------------------------------------------------------------- /D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_207.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_207.png -------------------------------------------------------------------------------- /D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_210.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_210.png -------------------------------------------------------------------------------- /D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_211.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_211.png -------------------------------------------------------------------------------- /D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_212.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_212.png -------------------------------------------------------------------------------- /D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_213.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_213.png -------------------------------------------------------------------------------- /D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_214.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_214.png -------------------------------------------------------------------------------- /D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_216.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_216.png -------------------------------------------------------------------------------- /D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_217.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_217.png -------------------------------------------------------------------------------- /D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_218.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_218.png -------------------------------------------------------------------------------- /D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_219.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_219.png -------------------------------------------------------------------------------- /D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_220.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_220.png -------------------------------------------------------------------------------- /D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_221.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_221.png -------------------------------------------------------------------------------- /D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_222.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_222.png -------------------------------------------------------------------------------- /D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_223.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_223.png -------------------------------------------------------------------------------- /D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_224.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_224.png -------------------------------------------------------------------------------- /D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_226.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_226.png -------------------------------------------------------------------------------- /D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_227.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0003-optimizing_cosine_distance_searching_in_a_million_feature-set/images/Selection_227.png -------------------------------------------------------------------------------- /D#0004-depthwise_separable_convolutions_in_mobilenet/images/436695808.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0004-depthwise_separable_convolutions_in_mobilenet/images/436695808.jpg -------------------------------------------------------------------------------- /D#0005-separable_convolutions_in_image_processing/D#0005.md: -------------------------------------------------------------------------------- 1 | # separable convolutions in image processing 2 | ## 引言 3 | 在计算机图像处理中,有一种经常进行的操作,就是图像滤波,也叫图像卷积(深度学习中的卷积概念也是衍生于它,只不过深度学习中的卷积核是三维的,图像处理中的卷积核是二维的),比如用Canny卷积提取图像中的边缘信息,用Gaussian卷积构造金字塔等等。在深度学习中,深度可分离卷积(Depth-wise Separable Convolution)取代传统卷积,可以起到加速(减少计算量)和减小模型大小(参数数量)的作用;类似地,在图像处理中,往往也可以用两个独立的小的卷积串联,取代一个大的卷积,也可以起到减少计算量和减小参数数量的作用。 4 | 5 | **请注意,本文所用的术语“卷积convolution”并不是很恰当,恰当的称呼应该叫“滤波filter”或者“空间相关”,卷积的话卷积核要旋转180度。**但太多情况下并不区分这两者,这里将错就错称之为“卷积”,但这并不影响我们的结论。 6 | 7 | **欢迎探讨,本文持续维护。** 8 | 9 | ## 实验平台: 10 | 11 | + 操作系统:Ubuntu 16.04 LTS,Ubuntu 18.04 LTS 12 | + 编译器:g++ (Ubuntu 5.4.0-6ubuntu1~16.04.10) 5.4.0 20160609 13 | 14 |     gcc (Ubuntu 5.4.0-6ubuntu1~16.04.10) 5.4.0 20160609 15 | 16 | ### 1,二维Gaussian卷积核的可分离性分析 17 | 18 | + 推导过程: 19 | 20 | ![](images/1565291630.jpg) 21 | 22 | 可以从上图推导过程中看出,一个m行乘以n列的高斯卷积可以分解成一个1行乘以n列的行卷积,之后串联一个m行乘以1列的列卷积的形式,输出保持不变。行卷积的卷积核参数(均值和方差)等于原始m行n列卷积核在列方向(Y方向)的均值和方差,列卷积的卷积核参数等于原始m行n列卷积核在行方向(X方向)上的均值和方差。 23 | 24 | + 计算量分析: 25 | 26 | 原始卷积的mxn的卷积和,卷积一个图像,乘加次数等于图像的像素个数WxH,乘以以单个像素为中心店做卷积时卷积的乘加次数mxn,总次数等于MxHxmxn。 27 | 28 | 在把原始卷积拆分为行卷积和列卷积后,行卷积的乘加次数等于输入图像的像素点个数WxH,乘以单个行卷积的乘加次数n,列卷积的乘加次数等于上一步行卷积输出图像的像素点个数WxH,乘以单个列卷积的乘加次数m,所以,总的次数是WxHxn+WxHxm。 29 | 30 | 由此可见,将卷积分离后的计算量比原始计算量等于(m+n)/(mxn),常见的宽高相等,假设为k,则计算量的比可以简化为2/k。**计算复杂度从原来的O(k^2)将为了O(k)。**是一个很大的提速。 31 | 32 | + 参数数量分析: 33 | 34 | 原始卷积的参数数量为mxn个,卷积拆分后的参数数量为n+m。考虑到高斯核的对称性,这个数还略有冗余。 35 | 36 | ### 2,二维Gaussian卷积核的可分离代码实测 37 | 38 | 代码都放在code/目录下,都是按照高斯函数的定义写的,不过假设了卷积核是11x11的正方形框,x和y方向的方差都一样。 39 | 40 | 速度实测如下: 41 | 42 | ![](images/Selection_231.png) 43 | 44 | 取kernalsize=11, sigma=0.8,处理一副490x490的彩色图像,传统高斯滤波耗时498200us,卷积核分离之后滤波,耗时114679us。加速比为4.3443:1,和理论值(k^2)/(2k)=11:2=5.5:1稍微差一点。**卷积核的size越大,加速效果越明显**。 45 | 46 | ### 3,理论推广,什么样的核才可分离 47 | 48 | 上面已经在理论和实测中证明了高斯核的可分离性和加速效果。那么就有个很自然的问题,除了高斯核,还有哪些核也是可分离的,核的可分离性的充分必要条件是什么,弄清楚了这些问题,我们就可以在碰到新核的时候考虑一下它是否可用可分离性加速,或者我们自己设计新的满足可分离性的核。 49 | 50 | 先说结论,**一个卷积核K,如果可以表达成一个水平核h和一个竖直核v相乘的形式,即K=v*transpose(h)那么K就是可以分离的**;反之,一个卷积核是可分离的,那么必然可以分解成一个水平核h和一个竖直核v相乘的形式。 51 | 52 | 证明如下(这里只以K=v*transpose(h)的形式证明充分性,必要性反推即可): 53 | 54 | ![](images/1386915682.jpg) 55 | 56 | 所以,我们按照这个思路,可以发现,除了高斯卷积可以分离,方框卷积、双线性卷积、高斯二阶导数卷积,也是可以分离的。 57 | 58 | ### 4,理论再推广,把不能分离的核做近似分离 59 | 60 | 毕竟,不是每个核K都一定是可以分解成**一个**水平核和一个竖直核相乘的形式的。不过我们可以把核看成一个mxn的矩阵,对其进行[奇异值分解](https://en.wikipedia.org/wiki/Singular_value_decomposition),就可以得到**一些**水平核和一些竖直核相乘再累加的形式K=sum(sigma_i\*u_i\*transpose(v_i))。这里面的sqrt(sigma_i)\*u_i和sqrt(sigma_i)\*v_i就是每一组的水平和竖直的卷积核。组数取得越多,计算的结果越和原始K卷积的结果相近,但是计算量也会增加;组数取得越少,计算的结果越和原始卷积结果相远,但是速度也越快。 61 | 62 | 这种分离方式加速的多少和原卷积核效果近似的程度,取决于a. 原始核K的size大小,1节已经证明了卷积核分离算法的加速情况,原始核K的size越大,加速比越大;b. 原始核K的奇异值分解得到的奇异值数量和分布,分解得到的奇异值的绝对值如果都集中在头部,那么可以忽略很多不重要的奇异值,那么加速效果好,质量损失小(**其实可分离性,是奇异值分解的一个特例**,如果奇异值分解后得到的奇异值,除了一个不为0,其他都为0,那就是前面几节讨论到的可分离);c. 加速效果还和具体的实现有关,比如内存的访问,cache等等。 63 | 64 | ## 总结 65 | 66 | 把一个大卷积操作分解为一个水平卷积串联一个竖直卷积的过程叫卷积可分离,卷积可分离是图像处理中一个常见的优化手法,其效果和原始卷积相同,但是计算量是原始卷积计算量的2k/k^2,卷积核越大,计算量下降越明显。 67 | 68 | 卷积核只是一个矩阵,对这个矩阵做奇异值分解,如果有且仅有一个不为零的奇异值,那么它就是可以分离的;如果还有其它的奇异值,那么它就是可近似分离的,分离的效果取决于奇异值的分布。 69 | 70 | 最后值得一提的是,这种将一个大卷积分解成多个小卷积的串联来减小计算量和内存消耗的思路,在[深度学习的研究](https://arxiv.org/pdf/1512.00567.pdf)中,也有很大的影响。 71 | 72 | ## 参考资料 73 | + [《数字图像处理》](https://book.douban.com/subject/6434627/) 74 | + [高斯滤波器详解](https://www.cnblogs.com/wangguchangqing/p/6407717.html) 75 | + [《计算机视觉算法与应用》](https://book.douban.com/subject/10465997/) 76 | + [SVD论文](http://www-users.math.umn.edu/~lerman/math5467/svd.pdf) 77 | + [Inception V2&V3](https://arxiv.org/pdf/1512.00567.pdf) 78 | -------------------------------------------------------------------------------- /D#0005-separable_convolutions_in_image_processing/code/GaussianFilter.cpp: -------------------------------------------------------------------------------- 1 | //GaussianFilter.cpp 2 | #include 3 | #include 4 | #include "GaussianFilter.h" 5 | 6 | using namespace cv; 7 | 8 | void GaussianFilter(const Mat &src, Mat &dst, int ksize, double sigma) 9 | { 10 | CV_Assert(src.channels() == 3); // 只处理三通道图像 11 | const static double pi = 3.1415926; 12 | // 根据窗口大小和sigma生成高斯滤波器模板 13 | // 申请一个二维数组,存放生成的高斯模板矩阵 14 | double **templateMatrix = new double*[ksize]; 15 | for (int i = 0; i < ksize; i++) { 16 | templateMatrix[i] = new double[ksize]; 17 | } 18 | int origin = ksize / 2; // 以模板的中心为原点 19 | double x2, y2; 20 | double sum = 0; 21 | for (int i = 0; i < ksize; i++) { 22 | x2 = pow(i - origin, 2); 23 | for (int j = 0; j < ksize; j++) { 24 | y2 = pow(j - origin, 2); 25 | // 高斯函数前的常数可以不用计算,会在归一化的过程中给消去 26 | double g = exp(-(x2 + y2) / (2 * sigma * sigma)); 27 | sum += g; 28 | templateMatrix[i][j] = g; 29 | } 30 | } 31 | 32 | for (int i = 0; i < ksize; i++) { 33 | for (int j = 0; j < ksize; j++) { 34 | templateMatrix[i][j] /= sum; 35 | // cout << templateMatrix[i][j] << " "; 36 | } 37 | // cout << endl; 38 | } 39 | 40 | // 将模板应用到图像中 41 | int border = ksize / 2; 42 | copyMakeBorder(src, dst, border, border, border, border, BorderTypes::BORDER_REFLECT); 43 | int channels = dst.channels(); // 3 44 | int rows = dst.rows - border; 45 | int cols = dst.cols - border; 46 | for (int i = border; i < rows; i++) { 47 | for (int j = border; j < cols; j++) { 48 | double sum[3] = { 0 }; 49 | for (int a = -border; a <= border; a++) { 50 | for (int b = -border; b <= border; b++) { 51 | Vec3b rgb = dst.at(i + a, j + b); 52 | auto k = templateMatrix[border + a][border + b]; 53 | sum[0] += k * rgb[0]; 54 | sum[1] += k * rgb[1]; 55 | sum[2] += k * rgb[2]; 56 | } 57 | } 58 | Vec3b rgb = { static_cast(sum[0]), static_cast(sum[1]), static_cast(sum[2]) }; 59 | dst.at(i, j) = rgb; 60 | } 61 | } 62 | 63 | // 释放模板数组 64 | for (int i = 0; i < ksize; i++) 65 | delete[] templateMatrix[i]; 66 | delete[] templateMatrix; 67 | } 68 | 69 | -------------------------------------------------------------------------------- /D#0005-separable_convolutions_in_image_processing/code/GaussianFilter.h: -------------------------------------------------------------------------------- 1 | //GaussianFilter.h 2 | #ifndef _GAUSSIANFILTER_ 3 | #define _GAUSSIANFILTER_ 4 | 5 | #include 6 | #include 7 | 8 | using namespace cv; 9 | 10 | void GaussianFilter(const Mat &src, Mat &dst, int ksize, double sigma); 11 | 12 | #endif // !_GAUSSIANFILTER_ 13 | -------------------------------------------------------------------------------- /D#0005-separable_convolutions_in_image_processing/code/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | g++ main.cpp GaussianFilter.cpp separateGaussianFilter.cpp -std=c++11 `pkg-config --libs --cflags opencv` 3 | -------------------------------------------------------------------------------- /D#0005-separable_convolutions_in_image_processing/code/main.cpp: -------------------------------------------------------------------------------- 1 | //main.cpp 2 | #include 3 | #include 4 | #include "timer.h" 5 | #include "GaussianFilter.h" 6 | #include "separateGaussianFilter.h" 7 | 8 | int main(int argc, char* argv[]) 9 | { 10 | cv::Mat src = cv::imread(argv[1]); 11 | cv::imshow("src", src); 12 | 13 | cv::Mat dst; 14 | 15 | int ksize = 11; 16 | double sigma = 0.8; 17 | 18 | // 定义计数器并开始计时 19 | Timer t; 20 | GaussianFilter(src, dst, ksize, sigma); 21 | cv::imshow("GaussianFilter", dst); 22 | 23 | // 打印结果 24 | std::cout << "GaussianFilter eat: " << t.elapsed_micro() << "us" << std::endl; 25 | 26 | t.reset(); 27 | separateGaussianFilter(src, dst, ksize, sigma); 28 | cv::imshow("separateGaussianFilter", dst); 29 | 30 | // 打印结果 31 | std::cout << "separateGaussianFilter eat: " << t.elapsed_micro() << "us" << std::endl; 32 | 33 | cv::waitKey(0); 34 | 35 | return 0; 36 | } 37 | -------------------------------------------------------------------------------- /D#0005-separable_convolutions_in_image_processing/code/separateGaussianFilter.cpp: -------------------------------------------------------------------------------- 1 | //separateGaussianFilter.cpp 2 | #include 3 | #include 4 | #include "separateGaussianFilter.h" 5 | 6 | using namespace cv; 7 | 8 | void separateGaussianFilter(const Mat &src, Mat &dst, int ksize, double sigma) 9 | { 10 | CV_Assert(src.channels() == 3); // 只处理三通道图像 11 | // 生成一维的高斯滤波模板 12 | double *matrix = new double[ksize]; 13 | double sum = 0; 14 | int origin = ksize / 2; 15 | for (int i = 0; i < ksize; i++) { 16 | // 高斯函数前的常数可以不用计算,会在归一化的过程中给消去 17 | double g = exp(-(i - origin) * (i - origin) / (2 * sigma * sigma)); 18 | sum += g; 19 | matrix[i] = g; 20 | } 21 | // 归一化 22 | for (int i = 0; i < ksize; i++) { 23 | matrix[i] /= sum; 24 | } 25 | // 将模板应用到图像中 26 | int border = ksize / 2; 27 | copyMakeBorder(src, dst, border, border, border, border, BorderTypes::BORDER_REFLECT); 28 | int channels = dst.channels(); 29 | int rows = dst.rows - border; 30 | int cols = dst.cols - border; 31 | // 水平方向 32 | for (int i = border; i < rows; i++) { 33 | for (int j = border; j < cols; j++) { 34 | double sum[3] = { 0 }; 35 | for (int k = -border; k <= border; k++) { 36 | Vec3b rgb = dst.at(i, j + k); 37 | sum[0] += matrix[border + k] * rgb[0]; 38 | sum[1] += matrix[border + k] * rgb[1]; 39 | sum[2] += matrix[border + k] * rgb[2]; 40 | } 41 | Vec3b rgb = { static_cast(sum[0]), static_cast(sum[1]), static_cast(sum[2]) }; 42 | dst.at(i, j) = rgb; 43 | } 44 | } 45 | 46 | // 竖直方向 47 | for (int i = border; i < rows; i++) { 48 | for (int j = border; j < cols; j++) { 49 | double sum[3] = { 0 }; 50 | for (int k = -border; k <= border; k++) { 51 | Vec3b rgb = dst.at(i + k, j); 52 | sum[0] += matrix[border + k] * rgb[0]; 53 | sum[1] += matrix[border + k] * rgb[1]; 54 | sum[2] += matrix[border + k] * rgb[2]; 55 | } 56 | Vec3b rgb = { static_cast(sum[0]), static_cast(sum[1]), static_cast(sum[2]) }; 57 | dst.at(i, j) = rgb; 58 | } 59 | } 60 | 61 | delete[] matrix; 62 | } 63 | -------------------------------------------------------------------------------- /D#0005-separable_convolutions_in_image_processing/code/separateGaussianFilter.h: -------------------------------------------------------------------------------- 1 | //separateGaussianFilter.h 2 | #ifndef _SEPARATEGAUSSIANFILTER_ 3 | #define _SEPARATEGAUSSIANFILTER_ 4 | 5 | #include 6 | #include 7 | 8 | using namespace cv; 9 | 10 | void separateGaussianFilter(const Mat &src, Mat &dst, int ksize, double sigma); 11 | 12 | #endif // !_SEPARATEGAUSSIANFILTER_ 13 | -------------------------------------------------------------------------------- /D#0005-separable_convolutions_in_image_processing/code/timer.h: -------------------------------------------------------------------------------- 1 | /********************************************************* 2 | * C++11 chrono库封装的一个高精度计时器类(使用真实世界挂钟时间,具体时间依赖于系统) 3 | * 使用方式: 4 | * Timer t; // 开始计时 5 | * func(); // 运行待计时的函数 6 | * std::cout << t.elapsed() << std::endl; // 打印计时时间,默认毫秒为单位 7 | * std::cout << t.elapsed_micro() << std::endl; // 微秒 8 | * std::cout << t.elapsed_nano() << std::endl; // 纳秒 9 | *********************************************************/ 10 | 11 | #ifndef _TIMER_ 12 | #define _TIMER_ 13 | 14 | #include 15 | using namespace std; 16 | using namespace std::chrono; 17 | 18 | class Timer { 19 | public: 20 | Timer() : m_begin(high_resolution_clock::now()) {} 21 | void reset() { m_begin = high_resolution_clock::now(); } 22 | 23 | // 默认输出毫秒 24 | template 25 | int64_t elapsed() const { 26 | return duration_cast(high_resolution_clock::now() - m_begin).count(); 27 | } 28 | 29 | // 微秒 30 | int64_t elapsed_micro() const { 31 | return elapsed(); 32 | } 33 | 34 | // 纳秒 35 | int64_t elapsed_nano() const { 36 | return elapsed(); 37 | } 38 | 39 | // 秒 40 | int64_t elapsed_seconds() const { 41 | return elapsed(); 42 | } 43 | 44 | // 分 45 | int64_t elapsed_minutes() const { 46 | return elapsed(); 47 | } 48 | 49 | // 时 50 | int64_t elapsed_hours() const { 51 | return elapsed(); 52 | } 53 | 54 | private: 55 | time_point m_begin; 56 | }; 57 | 58 | #endif // !_TIMER_ 59 | 60 | -------------------------------------------------------------------------------- /D#0005-separable_convolutions_in_image_processing/images/1386915682.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0005-separable_convolutions_in_image_processing/images/1386915682.jpg -------------------------------------------------------------------------------- /D#0005-separable_convolutions_in_image_processing/images/1565291630.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0005-separable_convolutions_in_image_processing/images/1565291630.jpg -------------------------------------------------------------------------------- /D#0005-separable_convolutions_in_image_processing/images/Selection_231.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0005-separable_convolutions_in_image_processing/images/Selection_231.png -------------------------------------------------------------------------------- /D#0005-separable_convolutions_in_image_processing/images/lena.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0005-separable_convolutions_in_image_processing/images/lena.jpeg -------------------------------------------------------------------------------- /D#0006-protect_my_function/code/auth.cpp: -------------------------------------------------------------------------------- 1 | //auth.cpp 2 | #include "auth.h" 3 | 4 | AuthRet get_auth_result(const char* key) 5 | { 6 | 7 | AuthRet ret = {.iSuccess = -1, 8 | .magic = -1 9 | }; 10 | 11 | if(!strcmp(key, "a valid key")) { 12 | printf("auth successed.\n"); 13 | ret.iSuccess = 0; 14 | } else { 15 | printf("auth failed.\n"); 16 | ret.iSuccess = -1; 17 | } 18 | 19 | ret.magic = key[0] + key[strlen(key) - 1]; 20 | 21 | return ret; 22 | } 23 | -------------------------------------------------------------------------------- /D#0006-protect_my_function/code/auth.h: -------------------------------------------------------------------------------- 1 | //auth.h 2 | #ifndef _AUTH_ 3 | #define _AUTH_ 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | typedef struct tagAuthRet { 10 | int iSuccess; 11 | long magic; 12 | } AuthRet; 13 | 14 | AuthRet get_auth_result(const char* key); 15 | 16 | #endif // !_AUTH_ 17 | -------------------------------------------------------------------------------- /D#0006-protect_my_function/code/main.cpp: -------------------------------------------------------------------------------- 1 | //main.cpp 2 | #include 3 | #include 4 | #include 5 | #include "sdk.h" // 导入int sdk_add(const int a, const int b, const char* key)函数的声明 6 | 7 | int main(int argc, char* argv[]) 8 | { 9 | const char* valid_key = "a valid key"; 10 | const char* invalid_key = "a invalid key"; 11 | 12 | int a = 1; 13 | int b = 2; 14 | int sum = sdk_add(a, b, valid_key); 15 | 16 | printf("sum[%d].\n", sum); 17 | 18 | return 0; 19 | } 20 | -------------------------------------------------------------------------------- /D#0006-protect_my_function/code/sdk.cpp: -------------------------------------------------------------------------------- 1 | //sdk.cpp 2 | #include "sdk.h" 3 | #include "auth.h" 4 | 5 | int sdk_add(const int a, const int b, const char* key) 6 | { 7 | int sum = 0; 8 | 9 | AuthRet ret = get_auth_result(key); 10 | 11 | if(ret.magic == key[0] + key[strlen(key) - 1]) { 12 | if(ret.iSuccess == 0) { 13 | printf("sdk think auth ok.\n"); 14 | sum = a + b; 15 | } else { 16 | printf("sdk think auth bad(magic number check ok).\n"); 17 | sum = 0; 18 | } 19 | } else { 20 | printf("sdk think auth bad(magic number check fail).\n"); 21 | sum = 0; 22 | } 23 | 24 | return sum; 25 | } 26 | -------------------------------------------------------------------------------- /D#0006-protect_my_function/code/sdk.h: -------------------------------------------------------------------------------- 1 | //sdk.h 2 | #ifndef _SDK_ 3 | #define _SDK_ 4 | 5 | #include 6 | #include 7 | 8 | // sdk实现两个整数a和b相加,返回他们的和 9 | // 必须输入正确的key才能使用 10 | 11 | int sdk_add(const int a, const int b, const char* key); 12 | 13 | #endif // !_SDK_ 14 | -------------------------------------------------------------------------------- /D#0006-protect_my_function/images/Selection_282.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0006-protect_my_function/images/Selection_282.png -------------------------------------------------------------------------------- /D#0006-protect_my_function/images/Selection_283.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0006-protect_my_function/images/Selection_283.png -------------------------------------------------------------------------------- /D#0006-protect_my_function/images/Selection_284.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0006-protect_my_function/images/Selection_284.png -------------------------------------------------------------------------------- /D#0006-protect_my_function/images/Selection_285.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0006-protect_my_function/images/Selection_285.png -------------------------------------------------------------------------------- /D#0006-protect_my_function/images/Selection_287.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0006-protect_my_function/images/Selection_287.png -------------------------------------------------------------------------------- /D#0006-protect_my_function/images/Selection_288.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0006-protect_my_function/images/Selection_288.png -------------------------------------------------------------------------------- /D#0006-protect_my_function/images/Selection_289.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0006-protect_my_function/images/Selection_289.png -------------------------------------------------------------------------------- /D#0006-protect_my_function/images/Selection_290.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0006-protect_my_function/images/Selection_290.png -------------------------------------------------------------------------------- /D#0006-protect_my_function/images/Selection_291.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0006-protect_my_function/images/Selection_291.png -------------------------------------------------------------------------------- /D#0006-protect_my_function/images/Selection_292.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0006-protect_my_function/images/Selection_292.png -------------------------------------------------------------------------------- /D#0006-protect_my_function/images/Selection_293.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0006-protect_my_function/images/Selection_293.png -------------------------------------------------------------------------------- /D#0006-protect_my_function/images/Selection_294.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0006-protect_my_function/images/Selection_294.png -------------------------------------------------------------------------------- /D#0006-protect_my_function/images/Selection_295.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0006-protect_my_function/images/Selection_295.png -------------------------------------------------------------------------------- /D#0006-protect_my_function/images/Selection_296.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0006-protect_my_function/images/Selection_296.png -------------------------------------------------------------------------------- /D#0006-protect_my_function/images/Selection_297.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0006-protect_my_function/images/Selection_297.png -------------------------------------------------------------------------------- /D#0006-protect_my_function/images/Selection_298.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0006-protect_my_function/images/Selection_298.png -------------------------------------------------------------------------------- /D#0007-compile_git_commit_sha1_into_elf/D#0007.md: -------------------------------------------------------------------------------- 1 | #       compile git commit sha1 into elf 2 | ## 引言 3 | 我们自己会开发一些sdk以动态库的形式发布出去,为了便于问题回溯,我们会手动加上版本号来管理,或者在代码里面加上诸如\_\_DATE\_\_和\_\_TIME\_\_这样的宏来打印出代码编译时候的时间信息,但是如果commit比较频繁,这样还是很难将时间和源代码的版本精确对应起来,这里介绍一种方法,可以自动得获取git commit的sha1信息,将其自动化地编译进sdk。 4 | 5 | + 目标:自动化打印sdk编译时候的git commit sha1信息 6 | 7 | + 限制条件:1. 自动化;2.精确;3. 不可以影响系统正常运行。 8 | 9 | **欢迎探讨,本文持续维护。** 10 | 11 | ## 实验平台: 12 | 13 | + 操作系统:Ubuntu 16.04 LTS 14 | 15 | + 编译器:gcc version 5.4.0 20160609 (Ubuntu 5.4.0-6ubuntu1~16.04.10) 16 | 17 | ## 实验过程 18 | 19 | 这里为了讨论方便,约定sdk库由sdk.cpp编译而成,sdk头文件为sdk.h,应用程序由main.cpp编译而成,中间调用sdk的接口。 20 | 21 | 目录结构如下: 22 | 23 | ![](images/Selection_301.png) 24 | 25 | 代码内容如下: 26 | 27 | ![](images/Selection_302.png) 28 | 29 | ![](images/Selection_303.png) 30 | 31 | ![](images/Selection_304.png) 32 | 33 | ![](images/Selection_305.png) 34 | 35 | 结果分析: 36 | 37 | ![](images/Selection_307.png) 38 | 39 | 首先在Makefile中用git log, head和cut命令获取当前代码的git commit sha1值,然后将其赋给GIT_COMMIT_SHA1宏,最后通过gcc的-D选项将其告诉编译器。这样就可以在代码里面跟自己定义的字符串一样使用这个GIT_COMMIT_SHA1宏了。 40 | 41 | **注意**,一定要在代码commit后再编译,这样才能使当前仓库最新commit sha1值和代码一致。 42 | 43 | 44 | ## 总结 45 | 46 | 本文分享了一种将git commit sha1值自动编译进elf文件的方法,并做了简单的原理介绍,其他版本工应该也可以找到类似的方法。 47 | 48 | ## 参考资料 49 | 50 | + [Linux C/C++ 把Git commit SHA1值编译到程序中来方便查看版本](https://blog.csdn.net/thisinnocence/article/details/79517984) 51 | 52 | + [将git版本号编译进程序](https://www.cnblogs.com/wangqiguo/p/7191352.html) 53 | -------------------------------------------------------------------------------- /D#0007-compile_git_commit_sha1_into_elf/code/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile 2 | 3 | GIT_COMMIT_SHA1 = `git log -n 1 | head -n 1 | cut -d ' ' -f 2` 4 | 5 | # 编译参数: Git commit SHA1 6 | CFLAGS = -DGIT_COMMIT_SHA1=\"$(GIT_COMMIT_SHA1)\" 7 | 8 | # 编译参数:如优化等级,调试信息,打开所有告警等等 9 | CFLAGS += -O2 -g -Wall -std=c++11 10 | 11 | a.out : main.cpp sdk.cpp 12 | g++ $^ $(CFLAGS) -o $@ 13 | 14 | .PHONY: clean 15 | 16 | clean: 17 | rm a.out 18 | -------------------------------------------------------------------------------- /D#0007-compile_git_commit_sha1_into_elf/code/main.cpp: -------------------------------------------------------------------------------- 1 | //main.cpp 2 | #include 3 | #include 4 | #include "sdk.h" 5 | 6 | int main(int argc, char* argv[]) 7 | { 8 | 9 | sdk_func(nullptr); 10 | 11 | return 0; 12 | } 13 | -------------------------------------------------------------------------------- /D#0007-compile_git_commit_sha1_into_elf/code/sdk.cpp: -------------------------------------------------------------------------------- 1 | //sdk.cpp 2 | #include "sdk.h" 3 | 4 | int sdk_func(void* p) 5 | { 6 | 7 | // 打印git commit sha1,和build_time信息,便于问题回溯 8 | printf("build_sha1:\t%s\nbuild_time:\t%s %s\n", 9 | GIT_COMMIT_SHA1, __DATE__, __TIME__); 10 | 11 | // 这里可以做sdk的事情 12 | 13 | return 0; 14 | } 15 | 16 | -------------------------------------------------------------------------------- /D#0007-compile_git_commit_sha1_into_elf/code/sdk.h: -------------------------------------------------------------------------------- 1 | //sdk.h 2 | #ifndef _SDK_ 3 | #define _SDK_ 4 | 5 | #include 6 | #include 7 | 8 | int sdk_func(void* p); 9 | 10 | #endif // !_SDK_ 11 | -------------------------------------------------------------------------------- /D#0007-compile_git_commit_sha1_into_elf/images/Selection_301.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0007-compile_git_commit_sha1_into_elf/images/Selection_301.png -------------------------------------------------------------------------------- /D#0007-compile_git_commit_sha1_into_elf/images/Selection_302.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0007-compile_git_commit_sha1_into_elf/images/Selection_302.png -------------------------------------------------------------------------------- /D#0007-compile_git_commit_sha1_into_elf/images/Selection_303.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0007-compile_git_commit_sha1_into_elf/images/Selection_303.png -------------------------------------------------------------------------------- /D#0007-compile_git_commit_sha1_into_elf/images/Selection_304.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0007-compile_git_commit_sha1_into_elf/images/Selection_304.png -------------------------------------------------------------------------------- /D#0007-compile_git_commit_sha1_into_elf/images/Selection_305.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0007-compile_git_commit_sha1_into_elf/images/Selection_305.png -------------------------------------------------------------------------------- /D#0007-compile_git_commit_sha1_into_elf/images/Selection_307.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0007-compile_git_commit_sha1_into_elf/images/Selection_307.png -------------------------------------------------------------------------------- /D#0008-拉格朗日乘子法/images/113536.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0008-拉格朗日乘子法/images/113536.jpg -------------------------------------------------------------------------------- /D#0008-拉格朗日乘子法/images/114729.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0008-拉格朗日乘子法/images/114729.png -------------------------------------------------------------------------------- /D#0008-拉格朗日乘子法/images/131203.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0008-拉格朗日乘子法/images/131203.png -------------------------------------------------------------------------------- /D#0008-拉格朗日乘子法/images/144724.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0008-拉格朗日乘子法/images/144724.png -------------------------------------------------------------------------------- /D#0008-拉格朗日乘子法/images/151353.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0008-拉格朗日乘子法/images/151353.png -------------------------------------------------------------------------------- /D#0008-拉格朗日乘子法/images/154641.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0008-拉格朗日乘子法/images/154641.png -------------------------------------------------------------------------------- /D#0008-拉格朗日乘子法/images/162338.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0008-拉格朗日乘子法/images/162338.jpg -------------------------------------------------------------------------------- /D#0008-拉格朗日乘子法/images/dualfunc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0008-拉格朗日乘子法/images/dualfunc.png -------------------------------------------------------------------------------- /D#0008-拉格朗日乘子法/images/dualprob.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0008-拉格朗日乘子法/images/dualprob.png -------------------------------------------------------------------------------- /D#0009-SVM/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0009-SVM/images/.gitkeep -------------------------------------------------------------------------------- /D#0009-SVM/images/110529.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0009-SVM/images/110529.png -------------------------------------------------------------------------------- /D#0009-SVM/images/113708.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0009-SVM/images/113708.png -------------------------------------------------------------------------------- /D#0009-SVM/images/160121.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0009-SVM/images/160121.png -------------------------------------------------------------------------------- /D#0009-SVM/images/203213.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0009-SVM/images/203213.jpg -------------------------------------------------------------------------------- /D#0009-SVM/images/212511.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0009-SVM/images/212511.jpg -------------------------------------------------------------------------------- /D#0009-SVM/images/231537.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0009-SVM/images/231537.png -------------------------------------------------------------------------------- /D#0010-从线性回归到对率回归到Softmax激活函数/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0010-从线性回归到对率回归到Softmax激活函数/images/.gitkeep -------------------------------------------------------------------------------- /D#0010-从线性回归到对率回归到Softmax激活函数/images/104656.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0010-从线性回归到对率回归到Softmax激活函数/images/104656.png -------------------------------------------------------------------------------- /D#0010-从线性回归到对率回归到Softmax激活函数/images/132651.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0010-从线性回归到对率回归到Softmax激活函数/images/132651.png -------------------------------------------------------------------------------- /D#0010-从线性回归到对率回归到Softmax激活函数/images/150909.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0010-从线性回归到对率回归到Softmax激活函数/images/150909.png -------------------------------------------------------------------------------- /D#0010-从线性回归到对率回归到Softmax激活函数/images/175045.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0010-从线性回归到对率回归到Softmax激活函数/images/175045.png -------------------------------------------------------------------------------- /D#0011-kNN/D#0011.md: -------------------------------------------------------------------------------- 1 | #       kNN 2 | ## 引言 3 | 4 | k近邻算法是最简单最容易理解和实现的**惰性**机器**分类与回归**算法,它只是将全部的训练样本都记录下来,在预测的时候根据一定的距离度量准则和决策规则选出最合适的类别或则数值输出。和SVM或者CNN相比,它**没有显示的学习过程**,是一种惰性的学习算法。本文以kNN分类算法为例对其做简单介绍。 5 | 6 | **欢迎探讨,本文持续维护。** 7 | 8 | ## 实验平台 9 | 10 | N/A,纯数学公式推导,无代码 11 | 12 | ## 算法描述 13 | 14 | 当给定训练集合D,模型记录下所有的训练集合,在新的测试样本t来的时候,kNN方法在训练集合中寻找k个和t距离最近的样本,根据这k个样本中占主要类别的类作为t的类。 15 | 16 | ## 算法关键因素 17 | 从上面的描述可以知道,在给定了训练集合之后,kNN算法中的关键因素有如下三点: 18 | 19 | 1. k值的选择 20 | 2. 距离的定义 21 | 3. 决策规则 22 | 23 | 下面对这三点做逐一分析 24 | ### k值的选择 25 | 26 | 先讨论特殊的情况,如果k=1,那么就是选训练集合中与测试样本最近的样本的类作为预测的类,此时模型复杂,任何在训练样本中出现过的样本都会被正确分类,但是,每次分类结果仅取决于训练集合中的单一样本,这样也就对训练集合中的噪音非常敏感,类似于过拟合。另一个极端,如果k=训练集合中的样本个数,此时模型简单,对于任意的测试样本,输出都是训练集合中占主导类别的类,较远的(不相似的)点也对分类结果起作用,明显分类效果很差,类似于欠拟合。 27 | 28 | 那么怎么选择合适的k值呢? 29 | 30 | 首先,从训练集合的数据量来看。在训练集合比较小的情况下,建议先认真清理数据,然后选择比较小的k值;在训练集合比较大的情况下,选择比较大的k值提高对噪音的鲁棒程度。 31 | 32 | 其次,用k折交叉验证来选择合适的k值是一个比较稳妥的方法。另外,可以利用网格搜索技术来加快超参数挑选。 33 | 34 | ### 距离的定义 35 | 36 | 特征空间中两个相似点的距离远近代表它们的相似度。有很多计算空间中点点相似度的方法,比如L1距离,L2距离,L无穷大距离,还有余弦距离等。需要注意的是,**相同的两个点,取不同的距离度量方式,得出的相似度可能是不一样的!** 37 | 38 | 具体的距离度量方式取决于具体的问题(要选距离越小,相似性越大的距离,比如文本分类选余弦距离比欧氏距离更合适),这里很难一概而论。不过,有几点需要注意一下的: 39 | 40 | 1. 预处理,要将不同维度的**取值范围归一化**,避免少数维度在距离度量中占主导地位; 41 | 2. 特征选择,特征空间中各个维度的特征关联性越小越好,最好**正交**。 42 | 43 | ### 决策规则怎么选 44 | 45 | 在选取了k个距离最近的训练样本后,按什么方式决策测试样本的类别呢?一般用多数表决法,即选择这k个最近邻中出现次数做多的类别作为模型的输出类别。 46 | 47 | 可以证明,在损失函数取0-1损失函数时候,**多数表决法等价于经验风险最小化**。 48 | 49 | ## 算法性能分析 50 | 51 | 研究表明,在一定条件下,**最近邻**的分类错误率不会超过两倍的贝叶斯错误率。而且,**一般kNN**方法的错误率会收敛到贝叶斯错误率,所以我们**可以将kNN算法作为贝叶斯的近似**。 52 | 53 | ## 总结 54 | 55 | kNN算法是一种不需要训练过程的惰性算法,可以用于分类任务也可用于回归任务。它思路简单,容易理解,而且性能也不错。本文从算法原理、关键因素、超参数选择和性能分析等方面对其进行了初步介绍和总结。 56 | 57 | ## 参考资料 58 | 59 | + [《统计学习方法》](https://book.douban.com/subject/10590856/) 60 | + [《数据挖掘十大算法》](https://book.douban.com/subject/24735417/) 61 | + [《深度学习》](https://book.douban.com/subject/27087503/) 62 | -------------------------------------------------------------------------------- /D#0011-kNN/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0011-kNN/images/.gitkeep -------------------------------------------------------------------------------- /D#0012-为什么选交叉熵作为分类问题的损失函数/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0012-为什么选交叉熵作为分类问题的损失函数/images/.gitkeep -------------------------------------------------------------------------------- /D#0012-为什么选交叉熵作为分类问题的损失函数/images/172059.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0012-为什么选交叉熵作为分类问题的损失函数/images/172059.png -------------------------------------------------------------------------------- /D#0012-为什么选交叉熵作为分类问题的损失函数/images/195031.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0012-为什么选交叉熵作为分类问题的损失函数/images/195031.png -------------------------------------------------------------------------------- /D#0012-为什么选交叉熵作为分类问题的损失函数/images/223112.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0012-为什么选交叉熵作为分类问题的损失函数/images/223112.png -------------------------------------------------------------------------------- /D#0012-为什么选交叉熵作为分类问题的损失函数/images/223449.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0012-为什么选交叉熵作为分类问题的损失函数/images/223449.png -------------------------------------------------------------------------------- /D#0012-为什么选交叉熵作为分类问题的损失函数/images/225907.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0012-为什么选交叉熵作为分类问题的损失函数/images/225907.png -------------------------------------------------------------------------------- /D#0012-为什么选交叉熵作为分类问题的损失函数/images/231448.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0012-为什么选交叉熵作为分类问题的损失函数/images/231448.png -------------------------------------------------------------------------------- /D#0013-深度学习调参常用方法总结/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0013-深度学习调参常用方法总结/images/.gitkeep -------------------------------------------------------------------------------- /D#0013-深度学习调参常用方法总结/images/124926.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0013-深度学习调参常用方法总结/images/124926.png -------------------------------------------------------------------------------- /D#0013-深度学习调参常用方法总结/images/180705.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0013-深度学习调参常用方法总结/images/180705.png -------------------------------------------------------------------------------- /D#0013-深度学习调参常用方法总结/images/183737.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0013-深度学习调参常用方法总结/images/183737.jpg -------------------------------------------------------------------------------- /D#0013-深度学习调参常用方法总结/images/bayes1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0013-深度学习调参常用方法总结/images/bayes1.jpg -------------------------------------------------------------------------------- /D#0013-深度学习调参常用方法总结/images/bayes2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0013-深度学习调参常用方法总结/images/bayes2.jpg -------------------------------------------------------------------------------- /D#0013-深度学习调参常用方法总结/images/bayes3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0013-深度学习调参常用方法总结/images/bayes3.jpg -------------------------------------------------------------------------------- /D#0014-数据降维常用方法总结(LDA,PCA)/D#0014.md: -------------------------------------------------------------------------------- 1 | #       数据降维常用方法总结(LDA,PCA) 2 | ## 引言 3 | 4 | 做机器学习的时候,一般来说特征越多,我们对训练样本的信息越多,越有利于学习。但是特征太多,也会带来两个问题,1.**维数灾难**,为了学习到每个维数的规律,那个维数的样本就不能太少,而样本的数量是跟着特征维数的增大而指数性增大的;2.**资源开销大**,我们提取的特征太多,这些特征中就有可能有相关和**冗余**,给数据存储,学习和优化都带来负担。有鉴于此,在特征维数过大的时候,我们有必要对这些特征进行辨别和合并,有选择性地压缩特征的维度。本文介绍了几种常用的特征选择方法。 5 | 6 | **欢迎探讨,本文持续维护。** 7 | 8 | ## 实验平台 9 | 10 | N/A 11 | 12 | ## 线性判别分析(Linear Discriminant Analysis) 13 | 14 | 什么是好的特征?好的特征有两点才叫好, 15 | 16 | **1. 同类样本在特征空间中应该尽可能靠近;** 17 | 18 | **2. 不同类样本在特征空间中应该尽可能远离。** 19 | 20 | **同时满足**这两个的要求的特征,在做分类时才好分,线性判别分析正是根据上面两点来做特征降维的。 21 | 22 | **线性判别分析的思想非常朴素:给定训练样例集,设法将样例投影到一条直线上,使得同类样例的投影点尽可能接近、不同类样例的投影点尽可能远离。在对新样本进行分类时,将其投影到同样的这条直线上,再根据新样本投影点的位置来确定它的类别。** 23 | 24 | ![](images/133329.png) 25 | 26 | LDA由二维降维到一维的最简单情况如上图所示,我们有在由x1,x2张成的二维特征空间内有一些正样本+和一些负样本-,LDA就是找一个由二维到一维的投影Y = W_t \* X(这里只做降维,为了分析简单,所以投影直线的截距设置为0,过二维原点),W是直线的法向量,**在这个投影上,这两类新的中心点要尽量的远离,但每类内部的点要尽量地靠拢。** 27 | 28 | 在下面公式推导之前,有一个重要的结论就是这里每个点投影后的值W_t \* x代表的是投影后的点到二维原点o的距离,可以利用向量的内积在几何上去理解,W_t \* x = = |W|\*|x|\*cos(theta) = |x|\*cos(theta),W是直线的法向量,长度为1。 29 | 30 | ![](images/143203.png) 31 | 32 | 这里推导很直观,不再赘述。 33 | 34 | ## 主成分分析(Principal Components Analysis) 35 | 36 | 线性判别分析压缩需要用到不同类的标签,如果同一类要做特征压缩,那么我们可以用主成分分析。 37 | 38 | 主成分分析的目标是通过**旋转坐标轴**(也就是线性变换!)将各个原始特征组合成新的特征,而这些新的特征有的特征所携带的信息量大,有的特征所携带的信息量小,PCA**删除一些携带信息量小的特征**,留下信息量大的特征,最大程度的保留原始特征所能表达的信息。 39 | 40 | ![](images/163536.png) 41 | 42 | 如上图所示,在原始特征X1和X2张成的特征空间,有一群样本点,这些样本点在X1轴方向分布的范围比较大,在X2轴方向分布的范围也比较大。直观地想,分布范围比较大,相当于数据分布比较乱,分布没啥规律,从信息论的角度理解就是数据在X1和X2方向上的熵比较大,携带信息比较多,不能简单地丢掉某一维度来降维。而PCA是先旋转原始X1,X2轴到新的方向F1,F2,在这个F1,F2张成的坐标系中,F1方向携带的信息多,F2方向携带的信息少(因为F2方向数据投影的取值范围很集中),这样就可以省略F2这个特征,取用F1特征达到降维的目的。 43 | 44 | **主成分分析的这种坐标轴变化是通过将原来的坐标轴进行线性组合完成的。** 45 | 46 | 为了形式化找到X1,X2到F1,F2的旋转矩阵,需要先计算原始数据**协方差矩阵**X_t\*X的特征值和单位特征向量,特征值大的特征向量方向所含的信息多(上图中F1),特征值越大,所含信息越多。PCA选取特征值大的特征向量组成旋转矩阵对原始数据降维。 47 | 48 | **如果特征根小于1,说明该主成分的解释力度还不如直接引入一个原变量的平均解释力度大,因此一般可以用特征根大于1作为纳入标准。** 49 | 50 | 更详细的推导过程可见[理解主成分分析 (PCA)](https://zhuanlan.zhihu.com/p/37810506) 51 | 52 | ## 总结 53 | 54 | 本文总结了两种常用的数据降维方法,第一个是用于**多类**的线性判别分析LDA降维,第二个是用于**单类**的主成分分析PCA方法降维。两者都有比价扎实的数学基础和压缩指导。 55 | 56 | ## 参考资料 57 | 58 | + [《深度学习》](https://book.douban.com/subject/27087503/) 59 | + [主成分分析原理介绍-笔记](https://zhuanlan.zhihu.com/p/55981609) 60 | + [理解主成分分析(PCA)](https://zhuanlan.zhihu.com/p/37810506) 61 | + [奇异值分解(SVD)](https://zhuanlan.zhihu.com/p/29846048) 62 | -------------------------------------------------------------------------------- /D#0014-数据降维常用方法总结(LDA,PCA)/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0014-数据降维常用方法总结(LDA,PCA)/images/.gitkeep -------------------------------------------------------------------------------- /D#0014-数据降维常用方法总结(LDA,PCA)/images/133329.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0014-数据降维常用方法总结(LDA,PCA)/images/133329.png -------------------------------------------------------------------------------- /D#0014-数据降维常用方法总结(LDA,PCA)/images/143203.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0014-数据降维常用方法总结(LDA,PCA)/images/143203.png -------------------------------------------------------------------------------- /D#0014-数据降维常用方法总结(LDA,PCA)/images/163536.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0014-数据降维常用方法总结(LDA,PCA)/images/163536.png -------------------------------------------------------------------------------- /D#0015-深度学习常用损失函数/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0015-深度学习常用损失函数/images/.gitkeep -------------------------------------------------------------------------------- /D#0015-深度学习常用损失函数/images/154349.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0015-深度学习常用损失函数/images/154349.png -------------------------------------------------------------------------------- /D#0015-深度学习常用损失函数/images/154353.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0015-深度学习常用损失函数/images/154353.png -------------------------------------------------------------------------------- /D#0015-深度学习常用损失函数/images/154408.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0015-深度学习常用损失函数/images/154408.png -------------------------------------------------------------------------------- /D#0015-深度学习常用损失函数/images/154834.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0015-深度学习常用损失函数/images/154834.png -------------------------------------------------------------------------------- /D#0015-深度学习常用损失函数/images/160509.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0015-深度学习常用损失函数/images/160509.png -------------------------------------------------------------------------------- /D#0015-深度学习常用损失函数/images/160647.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0015-深度学习常用损失函数/images/160647.png -------------------------------------------------------------------------------- /D#0015-深度学习常用损失函数/images/161349.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0015-深度学习常用损失函数/images/161349.png -------------------------------------------------------------------------------- /D#0015-深度学习常用损失函数/images/161758.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0015-深度学习常用损失函数/images/161758.png -------------------------------------------------------------------------------- /D#0015-深度学习常用损失函数/images/162310.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0015-深度学习常用损失函数/images/162310.png -------------------------------------------------------------------------------- /D#0015-深度学习常用损失函数/images/164439.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0015-深度学习常用损失函数/images/164439.png -------------------------------------------------------------------------------- /D#0016-深度学习中不平衡样本的处理/D#0016.md: -------------------------------------------------------------------------------- 1 | #       深度学习中不平衡样本的处理 2 | ## 引言 3 | 4 | 在目标检测问题中,负样本更容易采集,所以我们能得到的负样本数量一般会比正样本数量多很多。但是负样本多了,就会引起训练数据类别不平衡问题,这会带来: 5 | 6 | 1. 大量容易负样本(不提供有用的学习信息)会导致训练过程无效。 7 | 8 | 2. 大量容易负样本产生的loss会压倒少量正样本的loss(即容易负样本的梯度占主导),导致模型性能衰退。 9 | 10 | 样本不平衡问题不是目标检测问题独有的困难,在反欺诈,灾害预测,垃圾邮件预测等等问题中也会有正样本过少导致训练集样本不平衡的问题。要解决这个问题,可以采用本文介绍的从数据层面和算法两个层面要思考解决方案。 11 | 12 | **欢迎探讨,本文持续维护。** 13 | 14 | ## 实验平台 15 | 16 | N/A 17 | 18 | ## 数据层面 19 | 通过对训练集合数据的处理,让正样本的数量和负样本的数量比例趋于平衡(例如1:3)。常见的方式有数据重采样和数据增强。 20 | ### 数据重采样 21 | 数据重采样,是指在训练之前或者训练时候,对样本多的类别采样频率减少,对样本少的类别采样频率增大,从而使在训练的时候各类类别样本数目比较平衡。 22 | #### 多数样本下采样 23 | 拿二分类人脸检测来说,背景是数量样本较多的类别,而人脸是样本较少的类别。对于负样本来说,可以选择**随机抛弃**一部分样本来减少训练时背景样本的数量来达到平衡,但是这样做会降低训练数据的多样性而影响模型泛化能力一般不采用。正确的下采样方式是这样的,比如假设训练时负样本和正样本的比例为3:1(这个比例需要根据实际问题来做出合理的假设),那么**在批训练时候,每批样本随机采集3个负样本(而不是更多)的时候就随机采集1个正样本,使每个批次的训练数据保持负样本比正样本比例大致为3:1。** 24 | 另外,根据训练模型在验证集上测试结果,**观察假阳性的规律**(比如手掌部位的假阳性较多),可以特地选取一些手掌的图片作为负样本进行训练(对负样本进行了约束,自然也就降低了负样本的数量)。 25 | 26 | #### 少数样本上采样 27 | 少数样本的上采样是指:在训练时,对少数样本那一类进行**有放回的抽样**,以用来增加负样本在训练批次里面的数量比例。或者在训练之前,对少数样本那一类进行简单复制,也属于少数样本上采样的一种方式。 28 | 需要注意的是,仅仅采用少数样本上采样,因为本质上没有真正增加少数样本的多样性,没有带来更多的信息,可能会引起模型**过拟合**问题。更保险和有效的方式是采取多数样本下采样和少数样本上采样**相结合使用**。 29 | 30 | ### 数据增强 31 | 32 | 正样本不够,可以采取一些处理方式,增加正样本,这是一种简单易行的方式。 33 | 34 | #### 图像处理增加少类样本 35 | 对少数样本的图片添加噪音(例如高斯噪音),进行直方图均衡化操作,进行裁剪,小角度旋转,翻转等等,这些都可以在不改变样本种类的前提下增加少类样本的数量。 36 | #### SMOTE 37 | SMOTE[Chawla et a., 2002]是通过对少数样本进行**插值**合成新的样本。比如对于每个少数类样本a,从a最邻近的样本中选取样本b,然后在[a,b]区间中随机选择一点作为新样本。 38 | 39 | #### Fancy PCA 40 | 在AlexNet提出的同时,Krizhevsky等人还提出了使用"Fancy PCA"的方法进行数据扩充,这个方法讲Top-1错误率降低了一个百分点(很显著的提升)。 41 | 具体操作如下:首先,对所有数据的R,G,B通道进行主成分分析,得到对应特征向量p_i和特征值lamda_i。再根据特征向量和特征值计算一组新的特征值[p1,p2,p3]\*[alpha_1\*lamda_1,alpha_2\*lamda_2,alpha_3\*lamda_3],其中alpha为均值为0,方差为0.1的高斯分布随机值。最后将其作为扰动加入原来图像的像素值即可。在每一个epoch之后,再次采样新的alpha值进行扰动。 42 | 43 | Krizhevsky等人提到“**Fancy PCA可以近似地捕获自然图像的一个重要特性,即物体特质与光照强度和颜色变化无关**”。 44 | 45 | #### 监督式数据扩充 46 | 47 | 在某些情况下,图像分类所用的是整个场景的高级语义特征,此时如果采用随机裁剪作为数据增强方式,可能会裁剪到部分样本而破坏了原来的高级语义,那样的数据,连标签都错了,完全不能用来做训练。 48 | 49 | ![](images/225817.png) 50 | 51 | 如上图所示的一个场景分类的样本,样本的标签是“海滩”,随机裁剪可能裁剪到了“天空”或者“树”上面去了,就完全不能代表“海滩”这一类场景了。 52 | 53 | 在2016年ImageNet竞赛场景分类任务中,国内的**海康威视**团队提出了一种利用原始数据标签的监督式数据增强方法:如下图所示,首先,利用已有的数据训练出一个初始模型;然后,利用该模型跑原始训练样本集,对每个样本生成对应的热力图(可直接将分类模型最后一层卷积层特征**按照深度方向加和**得到),这**张热力图可以指示图像区域与场景标记之间的概率关系**,然后对热力图概率高的地方为中心对应原图取裁剪图像块就可以了。 54 | 55 | ![](images/230646.png) 56 | 57 | ## 算法层面 58 | 在算法层面减轻类别不平衡的方法基本上是**改造优化时的目标函数**,使目标函数倾向于减轻多数样本的惩罚力度,加大少数样本的惩罚力度;或者加大难分样本的惩罚力度,减轻容易分样本的惩罚力度。这其中最具有典型性的是**Focal Loss**。关于Focal Loss的介绍,可以参见[《深度学习常用损失函数》](https://github.com/Captain1986/CaptainBlackboard/blob/master/D%230015-%E6%B7%B1%E5%BA%A6%E5%AD%A6%E4%B9%A0%E5%B8%B8%E7%94%A8%E6%8D%9F%E5%A4%B1%E5%87%BD%E6%95%B0/D%230015.md)一文。 59 | ## 总结 60 | 61 | 本文对于深度学习实践中常遇到的数据集不平衡问题的处理方式,从数据层面和算法层面两个角度总结了一些实际可行的缓解方法。 62 | 63 | ## 参考资料 64 | 65 | + [《深度学习》](https://book.douban.com/subject/27087503/) 66 | + [CNN_book](http://210.28.132.67/weixs/book/CNN_book.pdf) 67 | -------------------------------------------------------------------------------- /D#0016-深度学习中不平衡样本的处理/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0016-深度学习中不平衡样本的处理/images/.gitkeep -------------------------------------------------------------------------------- /D#0016-深度学习中不平衡样本的处理/images/225817.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0016-深度学习中不平衡样本的处理/images/225817.png -------------------------------------------------------------------------------- /D#0016-深度学习中不平衡样本的处理/images/230646.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0016-深度学习中不平衡样本的处理/images/230646.png -------------------------------------------------------------------------------- /D#0017-MTCNN和FaceBoxes/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0017-MTCNN和FaceBoxes/images/.gitkeep -------------------------------------------------------------------------------- /D#0017-MTCNN和FaceBoxes/images/174303.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0017-MTCNN和FaceBoxes/images/174303.png -------------------------------------------------------------------------------- /D#0017-MTCNN和FaceBoxes/images/181230.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0017-MTCNN和FaceBoxes/images/181230.png -------------------------------------------------------------------------------- /D#0017-MTCNN和FaceBoxes/images/183249.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0017-MTCNN和FaceBoxes/images/183249.png -------------------------------------------------------------------------------- /D#0017-MTCNN和FaceBoxes/images/184628.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0017-MTCNN和FaceBoxes/images/184628.png -------------------------------------------------------------------------------- /D#0017-MTCNN和FaceBoxes/images/202239.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0017-MTCNN和FaceBoxes/images/202239.png -------------------------------------------------------------------------------- /D#0017-MTCNN和FaceBoxes/images/203348.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0017-MTCNN和FaceBoxes/images/203348.png -------------------------------------------------------------------------------- /D#0017-MTCNN和FaceBoxes/images/203537.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0017-MTCNN和FaceBoxes/images/203537.png -------------------------------------------------------------------------------- /D#0017-MTCNN和FaceBoxes/images/203658.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0017-MTCNN和FaceBoxes/images/203658.png -------------------------------------------------------------------------------- /D#0017-MTCNN和FaceBoxes/images/203924.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0017-MTCNN和FaceBoxes/images/203924.png -------------------------------------------------------------------------------- /D#0018-Mask_TextSpotter/D#0018.md: -------------------------------------------------------------------------------- 1 | #       Mask TextSpotter 2 | ## 引言 3 | 4 | **欢迎探讨,本文持续维护。** 5 | 6 | ## 实验平台 7 | 8 | N/A 9 | 10 | ## 文本检测与识别简要回顾 11 | 12 | 1. [CTPN](https://arxiv.org/abs/1609.03605) 13 | 14 | 2. [TextBoxes](https://arxiv.org/abs/1611.06779v1) 15 | 16 | 3. [TextBoxes++](https://arxiv.org/abs/1801.02765) 17 | 18 | ## [Mask TextSpotter](https://arxiv.org/abs/1807.02242) 19 | 20 | ### 简单介绍 21 | 22 | ### 算法流程 23 | 24 | ### 网络结构 25 | 26 | ### 网络训练 27 | 28 | #### 样本准备 29 | 30 | #### 目标函数 31 | 32 | #### 在线难例挖掘 33 | 34 | ### 实践中可能的优化点 35 | 36 | ## 总结 37 | 38 | ## 参考资料 39 | 40 | + [Detecting Text in Natural Image with Connectionist Text Proposal Network](https://arxiv.org/abs/1609.03605) 41 | + [TextBoxes: A Fast Text Detector with a Single Deep Neural Network](https://arxiv.org/abs/1611.06779v1) 42 | + [TextBoxes++: A Single-Shot Oriented Scene Text Detector](https://arxiv.org/abs/1801.02765) 43 | + [Mask TextSpotter: An End-to-End Trainable Neural Network for Spotting Text with Arbitrary Shapes](https://arxiv.org/abs/1807.02242) 44 | + [文本检测之MaskTextSpotter](https://zhuanlan.zhihu.com/p/44491270) 45 | + [旷视科技提出新型端到端神经网络,可检测和识别任意形状的文本](https://zhuanlan.zhihu.com/p/40393967) 46 | -------------------------------------------------------------------------------- /D#0018-Mask_TextSpotter/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0018-Mask_TextSpotter/images/.gitkeep -------------------------------------------------------------------------------- /D#0019-DeepID1,DeepID2,DeepID2+和DeepID3/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0019-DeepID1,DeepID2,DeepID2+和DeepID3/images/.gitkeep -------------------------------------------------------------------------------- /D#0019-DeepID1,DeepID2,DeepID2+和DeepID3/images/211326.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0019-DeepID1,DeepID2,DeepID2+和DeepID3/images/211326.png -------------------------------------------------------------------------------- /D#0019-DeepID1,DeepID2,DeepID2+和DeepID3/images/213046.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0019-DeepID1,DeepID2,DeepID2+和DeepID3/images/213046.png -------------------------------------------------------------------------------- /D#0019-DeepID1,DeepID2,DeepID2+和DeepID3/images/213312.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0019-DeepID1,DeepID2,DeepID2+和DeepID3/images/213312.png -------------------------------------------------------------------------------- /D#0019-DeepID1,DeepID2,DeepID2+和DeepID3/images/213324.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0019-DeepID1,DeepID2,DeepID2+和DeepID3/images/213324.png -------------------------------------------------------------------------------- /D#0019-DeepID1,DeepID2,DeepID2+和DeepID3/images/213342.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0019-DeepID1,DeepID2,DeepID2+和DeepID3/images/213342.png -------------------------------------------------------------------------------- /D#0019-DeepID1,DeepID2,DeepID2+和DeepID3/images/224342.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0019-DeepID1,DeepID2,DeepID2+和DeepID3/images/224342.png -------------------------------------------------------------------------------- /D#0019-DeepID1,DeepID2,DeepID2+和DeepID3/images/224356.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0019-DeepID1,DeepID2,DeepID2+和DeepID3/images/224356.png -------------------------------------------------------------------------------- /D#0019-DeepID1,DeepID2,DeepID2+和DeepID3/images/225135.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0019-DeepID1,DeepID2,DeepID2+和DeepID3/images/225135.png -------------------------------------------------------------------------------- /D#0019-DeepID1,DeepID2,DeepID2+和DeepID3/images/Selection_329.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0019-DeepID1,DeepID2,DeepID2+和DeepID3/images/Selection_329.png -------------------------------------------------------------------------------- /D#0019-DeepID1,DeepID2,DeepID2+和DeepID3/images/Selection_330.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0019-DeepID1,DeepID2,DeepID2+和DeepID3/images/Selection_330.png -------------------------------------------------------------------------------- /D#0019-DeepID1,DeepID2,DeepID2+和DeepID3/images/Selection_331.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0019-DeepID1,DeepID2,DeepID2+和DeepID3/images/Selection_331.png -------------------------------------------------------------------------------- /D#0019-DeepID1,DeepID2,DeepID2+和DeepID3/images/Selection_332.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0019-DeepID1,DeepID2,DeepID2+和DeepID3/images/Selection_332.png -------------------------------------------------------------------------------- /D#0019-DeepID1,DeepID2,DeepID2+和DeepID3/images/Selection_333.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0019-DeepID1,DeepID2,DeepID2+和DeepID3/images/Selection_333.png -------------------------------------------------------------------------------- /D#0019-DeepID1,DeepID2,DeepID2+和DeepID3/images/Selection_334.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0019-DeepID1,DeepID2,DeepID2+和DeepID3/images/Selection_334.png -------------------------------------------------------------------------------- /D#0019-DeepID1,DeepID2,DeepID2+和DeepID3/images/Selection_335.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0019-DeepID1,DeepID2,DeepID2+和DeepID3/images/Selection_335.png -------------------------------------------------------------------------------- /D#0019-DeepID1,DeepID2,DeepID2+和DeepID3/images/Selection_336.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0019-DeepID1,DeepID2,DeepID2+和DeepID3/images/Selection_336.png -------------------------------------------------------------------------------- /D#0020-Batch-Normalization层原理与分析/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0020-Batch-Normalization层原理与分析/images/.gitkeep -------------------------------------------------------------------------------- /D#0020-Batch-Normalization层原理与分析/images/213118.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0020-Batch-Normalization层原理与分析/images/213118.png -------------------------------------------------------------------------------- /D#0020-Batch-Normalization层原理与分析/images/213528.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0020-Batch-Normalization层原理与分析/images/213528.png -------------------------------------------------------------------------------- /D#0020-Batch-Normalization层原理与分析/images/213536.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0020-Batch-Normalization层原理与分析/images/213536.png -------------------------------------------------------------------------------- /D#0020-Batch-Normalization层原理与分析/images/224019.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0020-Batch-Normalization层原理与分析/images/224019.png -------------------------------------------------------------------------------- /D#0020-Batch-Normalization层原理与分析/images/233104.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0020-Batch-Normalization层原理与分析/images/233104.png -------------------------------------------------------------------------------- /D#0020-Batch-Normalization层原理与分析/images/233310.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0020-Batch-Normalization层原理与分析/images/233310.png -------------------------------------------------------------------------------- /D#0020-Batch-Normalization层原理与分析/images/Selection_343.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0020-Batch-Normalization层原理与分析/images/Selection_343.png -------------------------------------------------------------------------------- /D#0020-Batch-Normalization层原理与分析/images/Selection_345.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0020-Batch-Normalization层原理与分析/images/Selection_345.png -------------------------------------------------------------------------------- /D#0020-Batch-Normalization层原理与分析/images/Selection_346.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0020-Batch-Normalization层原理与分析/images/Selection_346.png -------------------------------------------------------------------------------- /D#0020-Batch-Normalization层原理与分析/images/Selection_347.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0020-Batch-Normalization层原理与分析/images/Selection_347.png -------------------------------------------------------------------------------- /D#0020-Batch-Normalization层原理与分析/images/Selection_348.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0020-Batch-Normalization层原理与分析/images/Selection_348.png -------------------------------------------------------------------------------- /D#0020-Batch-Normalization层原理与分析/images/Selection_349.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0020-Batch-Normalization层原理与分析/images/Selection_349.png -------------------------------------------------------------------------------- /D#0020-Batch-Normalization层原理与分析/images/Selection_350.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0020-Batch-Normalization层原理与分析/images/Selection_350.png -------------------------------------------------------------------------------- /D#0021-机器学习中的过拟合及其解决办法/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0021-机器学习中的过拟合及其解决办法/images/.gitkeep -------------------------------------------------------------------------------- /D#0021-机器学习中的过拟合及其解决办法/images/214025.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0021-机器学习中的过拟合及其解决办法/images/214025.png -------------------------------------------------------------------------------- /D#0021-机器学习中的过拟合及其解决办法/images/232539.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0021-机器学习中的过拟合及其解决办法/images/232539.png -------------------------------------------------------------------------------- /D#0021-机器学习中的过拟合及其解决办法/images/Selection_351.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0021-机器学习中的过拟合及其解决办法/images/Selection_351.png -------------------------------------------------------------------------------- /D#0021-机器学习中的过拟合及其解决办法/images/Selection_352.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0021-机器学习中的过拟合及其解决办法/images/Selection_352.png -------------------------------------------------------------------------------- /D#0021-机器学习中的过拟合及其解决办法/images/Selection_353.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0021-机器学习中的过拟合及其解决办法/images/Selection_353.png -------------------------------------------------------------------------------- /D#0021-机器学习中的过拟合及其解决办法/images/Selection_354.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0021-机器学习中的过拟合及其解决办法/images/Selection_354.png -------------------------------------------------------------------------------- /D#0021-机器学习中的过拟合及其解决办法/images/Selection_355.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0021-机器学习中的过拟合及其解决办法/images/Selection_355.png -------------------------------------------------------------------------------- /D#0021-机器学习中的过拟合及其解决办法/images/Selection_356.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0021-机器学习中的过拟合及其解决办法/images/Selection_356.png -------------------------------------------------------------------------------- /D#0021-机器学习中的过拟合及其解决办法/images/Selection_357.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0021-机器学习中的过拟合及其解决办法/images/Selection_357.png -------------------------------------------------------------------------------- /D#0021-机器学习中的过拟合及其解决办法/images/Selection_358.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0021-机器学习中的过拟合及其解决办法/images/Selection_358.png -------------------------------------------------------------------------------- /D#0021-机器学习中的过拟合及其解决办法/images/Selection_359.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0021-机器学习中的过拟合及其解决办法/images/Selection_359.png -------------------------------------------------------------------------------- /D#0022-SSD/D#0022.md: -------------------------------------------------------------------------------- 1 | #       SSD 2 | ## 引言 3 | 4 | **欢迎探讨,本文持续维护。** 5 | 6 | ## 实验平台 7 | 8 | N/A 9 | 10 | ## 简单介绍 11 | 12 | ## 算法流程 13 | 14 | ## 网络结构 15 | 16 | ## 网络训练 17 | 18 | ### 训练集 19 | 20 | ### 目标函数 21 | 22 | ### 在线难例挖掘 23 | 24 | ## 总结 25 | 26 | ## 参考资料 27 | 28 | + [《机器学习与应用》](https://book.douban.com/subject/30445238/) 29 | -------------------------------------------------------------------------------- /D#0022-SSD/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0022-SSD/images/.gitkeep -------------------------------------------------------------------------------- /D#0023-CNN模型计算量估计/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0023-CNN模型计算量估计/images/.gitkeep -------------------------------------------------------------------------------- /D#0024-CNN模型内存访问估计/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0024-CNN模型内存访问估计/images/.gitkeep -------------------------------------------------------------------------------- /D#0025-CNN中使用卷积代替全连接/D#0025.md: -------------------------------------------------------------------------------- 1 | #           CNN中使用卷积代替全连接 2 | ## 引言 3 | 4 | 在经典分类网络,比如LeNet、AlexNet中,在前面的卷积层提取特征之后都串联全连接层来做分类。但是近些年来,越来越多的网络,比如SSD,FasterRCNN的RPN,MTCNN中的PNet,都使用卷积层来代替全连接,也一样可以做到目标分类的效果,而且 5 | 6 | 1. 更灵活,不需要限定输入图像的分辨率; 7 | 2. 更高效,只需要做一次前向计算。 8 | 9 | 本文首先对全连接层和卷积层关系做分析,然后比较全连接层和卷积层的优缺点,自然也就搞清楚了为什么用卷积层替代全连接层这个问题了。 10 | 11 | **欢迎探讨,本文持续维护。** 12 | 13 | ## 实验平台 14 | 15 | N/A 16 | 17 | ## 全连接和卷积的关系 18 | 19 | 全连接层和卷积层只要设置好了对应的参数,可以在达到相同输入输出的效果,在这个意义上,在数学上可以认为它们是可以相互替换的。 20 | 21 | 下面我们以输入10 x 10 x 3的特征图,输出10 x 10 x 1的特征图来证明。 22 | 23 | ### 全连接层是一种核很大的卷积层 24 | 25 | 全连接层怎么做?全连接层输入10 x 10 x 3的特征图,首先将其reshape成一维的300个输入神经元,然后每个输出神经元的值都是这300个输入神经元的线性组合,最后将100个输出reshape成10 x 10 x 1的形状。 26 | 27 | 卷积层怎么做?卷积层可以直接用100个10 x 10 x 3的滤波器,分别直接贴着输入的10 x 10 x 3的输入特征图做滤波,得到100个一维的输出,然后把这个100个一维输出reshape成10 x 10 x 1的形状。这100个输出,每个也都是由输入特征图上300个像素点线性组合而来,**在数学上和上面的全连接层理论上可以达到一样的效果**。这里用到的卷积和通常我们看到的卷积唯一有点特殊的是,为了达到全连接(采集到所有输入特征图像素的信息)的效果,它的分辨率和输入特征图分辨率一样。 28 | 29 | 所以,由上面的讨论可知,全连接成可以用(**分辨率和输入特征图相同的**)卷积代替。 30 | 31 | ### 卷积层是一种稀疏的全连接层 32 | 33 | 卷积层怎么做?假设卷积核大小为3 x 3的方形核,stride = 1, 为了输出分辨率不变padding用SAME方式。那么卷积核的形状是3 x 3 x 3,输出通道数为1,只用一个这样的卷积核按照常规卷积来做就行,输出10 x 10 x 1的特征图很容易。 34 | 35 | 全连接层怎么做?全连接层输入10 x 10 x 3 = 300个神经元,卷积的时候每次卷积只是连接了其中的一个3 x 3 x 3 = 27的子集,那么可以在做全连接的时候,除了这27个神经元设置连接关系,其余的 300 - 27 = 273个**连接系数直接设置为0**就可以了。做 10 x 10 x 1 = 100次这样的全连接,就可以得到100个输出神经元,再reshape成10 x 10 x 1的形状就可以了。 36 | 37 | 所以,由上面的讨论可以得到,卷积层只是全连接层的一个子集,把**全连接的某些连接系数设置为0**,就可以达到和卷积相同的效果。 38 | 39 | ## 卷积替代全连接的优点 40 | 41 | 由上一节的讨论可以知道,其实卷积层和全连接层在理论上是可以相互替代的,那么,为什么我们看到的趋势是全连接被卷积层替代,而不是卷积层被全连接层替代呢?这要从卷积层相比于全连接层两个工程上的优点来讲: 42 | 43 | ### 对输入分辨率的限制 44 | 45 | 如果网络后面有全连接层,而全连接层的输入神经元个数就是固定的,那么反推上层卷积层的输出是固定的,继续反推可知输入网络的图片的分辨率是固定的。例如,LetNet由于由全连接层,输入就只能是28 x 28的。如果网络中的全连接层都用卷积层替代,网络中只有卷积层,那么网络的输出分辨率是随着输入图片的分辨率而来的,输出图中每一个像素点都对应着输入图片的一个区域(可以用stride,pooling来反算)。 46 | 47 | ### 计算效率比较 48 | 49 | ![](images/225831.png) 50 | 51 | 同样以LeNet来做例子,如果一个图片是280 x 280的分辨率,为了识别图片中所有的数字(为了简单,假设每个数字都是在这个大图划分为10 x 10的网格中),那么为了识别这100个位置数字,那么至少需要做100次前向;而全卷积网络的特点就在于输入和输出都是二维的图像,并且**输入和输出具有相对应的空间结构**,我们可以将网络的输出看作是一张**heat-map**,用热度来代表待检测的原图位置出现目标的概率,只做一次前向就可以得到所有位置的分类概率。 52 | 53 | ## 总结 54 | 55 | 本文首先在理论上论证了卷积层和全连接层的可互换性质,然后详细分析了在实践中用卷积层代替全连接层的两个好处,第一个是去掉全连接层对网络输入图像分辨率的限制;第二个好处是全卷积网络只需要做一次前向运算就可以获得一张目标所在位置的heat-map,节约了计算。 56 | 57 | ## 参考资料 58 | 59 | + [CNN中使用卷积层替代全连接层训练](https://zhuanlan.zhihu.com/p/65150848) 60 | + [Joint Face Detection and Alignment using Multi-task Cascaded Convolutional Networks](https://kpzhang93.github.io/MTCNN_face_detection_alignment/paper/spl.pdf) 61 | -------------------------------------------------------------------------------- /D#0025-CNN中使用卷积代替全连接/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0025-CNN中使用卷积代替全连接/images/.gitkeep -------------------------------------------------------------------------------- /D#0025-CNN中使用卷积代替全连接/images/225831.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0025-CNN中使用卷积代替全连接/images/225831.png -------------------------------------------------------------------------------- /D#0026-深度学习检测小目标常用方法/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0026-深度学习检测小目标常用方法/images/.gitkeep -------------------------------------------------------------------------------- /D#0026-深度学习检测小目标常用方法/images/215102.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0026-深度学习检测小目标常用方法/images/215102.png -------------------------------------------------------------------------------- /D#0026-深度学习检测小目标常用方法/images/224212.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0026-深度学习检测小目标常用方法/images/224212.png -------------------------------------------------------------------------------- /D#0026-深度学习检测小目标常用方法/images/224506.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0026-深度学习检测小目标常用方法/images/224506.png -------------------------------------------------------------------------------- /D#0026-深度学习检测小目标常用方法/images/230223.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0026-深度学习检测小目标常用方法/images/230223.png -------------------------------------------------------------------------------- /D#0026-深度学习检测小目标常用方法/images/230407.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0026-深度学习检测小目标常用方法/images/230407.png -------------------------------------------------------------------------------- /D#0026-深度学习检测小目标常用方法/images/230607.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0026-深度学习检测小目标常用方法/images/230607.png -------------------------------------------------------------------------------- /D#0026-深度学习检测小目标常用方法/images/232423.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0026-深度学习检测小目标常用方法/images/232423.png -------------------------------------------------------------------------------- /D#0026-深度学习检测小目标常用方法/images/Selection_381.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0026-深度学习检测小目标常用方法/images/Selection_381.png -------------------------------------------------------------------------------- /D#0026-深度学习检测小目标常用方法/images/Selection_382.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0026-深度学习检测小目标常用方法/images/Selection_382.png -------------------------------------------------------------------------------- /D#0026-深度学习检测小目标常用方法/images/Selection_383.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0026-深度学习检测小目标常用方法/images/Selection_383.png -------------------------------------------------------------------------------- /D#0026-深度学习检测小目标常用方法/images/Selection_384.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0026-深度学习检测小目标常用方法/images/Selection_384.png -------------------------------------------------------------------------------- /D#0026-深度学习检测小目标常用方法/images/Selection_387.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0026-深度学习检测小目标常用方法/images/Selection_387.png -------------------------------------------------------------------------------- /D#0026-深度学习检测小目标常用方法/images/Selection_388.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0026-深度学习检测小目标常用方法/images/Selection_388.png -------------------------------------------------------------------------------- /D#0026-深度学习检测小目标常用方法/images/Selection_389.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0026-深度学习检测小目标常用方法/images/Selection_389.png -------------------------------------------------------------------------------- /D#0026-深度学习检测小目标常用方法/images/Selection_390.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0026-深度学习检测小目标常用方法/images/Selection_390.png -------------------------------------------------------------------------------- /D#0027-聊聊2017 ImageNet夺冠的SENet/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0027-聊聊2017 ImageNet夺冠的SENet/images/.gitkeep -------------------------------------------------------------------------------- /D#0027-聊聊2017 ImageNet夺冠的SENet/images/SENet.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0027-聊聊2017 ImageNet夺冠的SENet/images/SENet.jpg -------------------------------------------------------------------------------- /D#0027-聊聊2017 ImageNet夺冠的SENet/images/Selection_418.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0027-聊聊2017 ImageNet夺冠的SENet/images/Selection_418.png -------------------------------------------------------------------------------- /D#0027-聊聊2017 ImageNet夺冠的SENet/images/Selection_419.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0027-聊聊2017 ImageNet夺冠的SENet/images/Selection_419.png -------------------------------------------------------------------------------- /D#0027-聊聊2017 ImageNet夺冠的SENet/images/Selection_420.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0027-聊聊2017 ImageNet夺冠的SENet/images/Selection_420.png -------------------------------------------------------------------------------- /D#0027-聊聊2017 ImageNet夺冠的SENet/images/Selection_421.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0027-聊聊2017 ImageNet夺冠的SENet/images/Selection_421.png -------------------------------------------------------------------------------- /D#0027-聊聊2017 ImageNet夺冠的SENet/images/Selection_423.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0027-聊聊2017 ImageNet夺冠的SENet/images/Selection_423.png -------------------------------------------------------------------------------- /D#0027-聊聊2017 ImageNet夺冠的SENet/images/Selection_424.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0027-聊聊2017 ImageNet夺冠的SENet/images/Selection_424.png -------------------------------------------------------------------------------- /D#0027-聊聊2017 ImageNet夺冠的SENet/images/Selection_425.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0027-聊聊2017 ImageNet夺冠的SENet/images/Selection_425.png -------------------------------------------------------------------------------- /D#0027-聊聊2017 ImageNet夺冠的SENet/images/Selection_426.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0027-聊聊2017 ImageNet夺冠的SENet/images/Selection_426.png -------------------------------------------------------------------------------- /D#0027-聊聊2017 ImageNet夺冠的SENet/images/Selection_427.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0027-聊聊2017 ImageNet夺冠的SENet/images/Selection_427.png -------------------------------------------------------------------------------- /D#0027-聊聊2017 ImageNet夺冠的SENet/images/Selection_428.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0027-聊聊2017 ImageNet夺冠的SENet/images/Selection_428.png -------------------------------------------------------------------------------- /D#0028-再聊SENet的孪生兄弟SKNet/D#0028.md: -------------------------------------------------------------------------------- 1 | #         再聊SENet的孪生兄弟SKNet 2 | ## 引言 3 | 4 | 上节[《聊聊2017 ImageNet夺冠的SENet》](https://github.com/Captain1986/CaptainBlackboard/blob/master/D%230027-%E8%81%8A%E8%81%8A2017%20ImageNet%E5%A4%BA%E5%86%A0%E7%9A%84SENet/D%230027.md)对SENet这种在通道维度(channel-wise)进行显示学习来做信息调整的结构做了介绍。SENet的思想(显示学习不同通道特征图的重要性来标定不同通道)如果用在Kernal Size这个维度会怎样呢?不同尺寸的Kernal学习到的特征图,能不能用一种显示的方式来学习他们的输出权重进行标定融合呢?发表在2019 CVPR上的论文[Selective Kernel Networks](https://arxiv.org/abs/1903.06586)对此做了一些有益的分析,此文也是Momenta出的,可谓和SENet思路一脉相承,就像孪生兄弟一下,这里也简单介绍一下。 5 | 6 | **欢迎探讨,本文持续维护。** 7 | 8 | ## 实验平台 9 | 10 | N/A 11 | 12 | ## 从Inception到SKNet 13 | 14 | SKNet和SENet的渊源关系在前面引言部分已经交代清楚了,这里讲讲SKNet和经典的Inception结构之间的关系。 15 | 16 | ![](images/Selection_430.png) 17 | 18 | 如上图所示的是基本的Inception结构,该结构在上一层的输入上用多个不同Kernal size的卷积核进行卷积加上max polling操作得到输出的各个特征图,然后各个特征图Concate起来组合成输出的特征图。不同Kernal size的特征图上的像素点具有不同大小的感受野,所表达的信息在空间上大小不同,这样就丰富了所提取的特征,加强了信息的丰富程度和特征的表达能力,进而提升了网络的性能。 19 | 20 | 但是,这样的Concate还是觉得有点粗暴,能不能用类似于SENet的显示学习的方法,根据特征图的内容,去学习一套更精细的组合规则(系数)呢?而且,根据神经学的研究也发现,单个神经元的感受野的大小也不是固定不变的,而是根据输入的刺激的大小来做相应调整的。这就是SKNet要做的事情。 21 | 22 | ## SKNet模块介绍 23 | 24 | ### A Big Picture 25 | 26 | SKNet模块的构造很简单,以最简单的两个不同尺寸卷积核为例,如下图所示: 27 | 28 | ![](images/Selection_431.png) 29 | 30 | 首先给定一个输入特征图X,用两个不同尺寸的卷积核对其进行卷积得到黄色和绿色特征图,然后把这两个特征图加起来得到特征图U,对这个特征图U进行Global average polling得到特征图S,进行全连接得到Z,然后Softmax Attention得到两组权重向量a,b,用这两组权重向量对前面最先得到的黄色绿色特征图进行加权求和得到SKNet模块的输出V。 31 | 32 | ### 细节分析 33 | 34 | 上面的讨论还是比较抽象,这里分成三个阶段Split, Fuse和Select来一个个分析SKNet结构的实现细节。 35 | 36 | 1. **Split**: 首先是Split操作,Split操作用两个不同尺寸的卷积核对同一个输入X分别做卷积,得到两个特征图(上图中3x3卷积核生成黄色特征图,5x5卷积核生成绿色特征图)。在这里,为了减少计算量,卷积可以采用分组卷积的方式,后面加上BN层和ReLU激活函数;更进一步,可以采用dilation = 2的3x3空洞卷积作为这里的5x5卷积在不增加计算量的前提下得到更大的感受野; 37 | 38 | 2. **Fuse**: 再看中间,是Fuse操作。这里先把前面卷积出来的蓝色和绿色特征图加起来,然后和SENet类似地,用一个Global average pooling操作在空间维度压缩特征图S,获取全局感受野,然后对S采用全连接![](images/Selection_432.png)(先做全连接,再做BN,最后ReLU)的方式得到Z,当然,这里Z的长度也是可以用超参数![](images/Selection_433.png)控制的; 39 | 40 | 3. **Select**: 前得到的Z只是得到了一个包含不同感受野特征图和空间上全局信息的编码,接下来需要另外一种变换来抓取各个卷积核的特征图之间的关系。这里作者用了Softmax Attention机制: 41 | 42 | ![](images/Selection_434.png) 43 | 44 | 这里ac和bc是互补关系,如果想在更多的核做Select,那么就可以扩展到更多的Softmax输出。这里得到的ac和bc作为权重,对开始左边的黄色特征图和绿色特征图做加权求和,可以得到最终SKNet模块的输出特征图V。 45 | 46 | 47 | ## 我的一点困惑 48 | 49 | 在前面的SENet的分析中,对于SENet的Excitation步骤,用的是**Sigmoid**来把1x1xC的特征转换成1x1xC的权重,论文给的理由是各个Channel的特征不一定要互斥,这样允许有更大的灵活性(例如可以允许多个通道同时得到比较大的权值),原文如下图: 50 | 51 | ![](images/Selection_435.png) 52 | 53 | 但是,在SKNet里面,为什么就要用Softmax这种one-hot前提的形式的函数来转换呢?难道作者认为不同Kernal Size得到的特征图是mutually-exclusive的吗?这一点我觉得作者没解释得很清楚,也可能是我理解错了,如果哪位读者有好的理解,欢迎探讨。 54 | 55 | ## 总结 56 | 57 | 本文介绍的SKNet模块通过显示地学习一组权重,可以在一定程度上对不同的输入自适应地融合不同尺寸的卷积核得到的不同感受野的特征图的信息。思路是和SENet差不多,只不过SENet是显示地学习Channel-wise的权重来重标定通道特征图,它把SENet的思想迁移到了Inception上来显示地学习不同Kernal Size的特征图的权重。 58 | 59 | ## 参考资料 60 | 61 | + [Squeeze-and-Excitation Networks](https://arxiv.org/abs/1709.01507) 62 | + [SENet GitHub](https://github.com/hujie-frank/SENet) 63 | + [Going Deeper with Convolutions](https://arxiv.org/abs/1409.4842) 64 | + [Selective Kernel Networks](https://arxiv.org/abs/1903.06586) 65 | + [depthwise separable convolutions in mobilenet](https://github.com/Captain1986/CaptainBlackboard/blob/master/D%230004-depthwise_separable_convolutions_in_mobilenet/D%230004.md) 66 | + [《聊聊2017 ImageNet夺冠的SENet》](https://github.com/Captain1986/CaptainBlackboard/blob/master/D%230027-%E8%81%8A%E8%81%8A2017%20ImageNet%E5%A4%BA%E5%86%A0%E7%9A%84SENet/D%230027.md) 67 | 68 | -------------------------------------------------------------------------------- /D#0028-再聊SENet的孪生兄弟SKNet/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0028-再聊SENet的孪生兄弟SKNet/images/.gitkeep -------------------------------------------------------------------------------- /D#0028-再聊SENet的孪生兄弟SKNet/images/Selection_430.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0028-再聊SENet的孪生兄弟SKNet/images/Selection_430.png -------------------------------------------------------------------------------- /D#0028-再聊SENet的孪生兄弟SKNet/images/Selection_431.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0028-再聊SENet的孪生兄弟SKNet/images/Selection_431.png -------------------------------------------------------------------------------- /D#0028-再聊SENet的孪生兄弟SKNet/images/Selection_432.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0028-再聊SENet的孪生兄弟SKNet/images/Selection_432.png -------------------------------------------------------------------------------- /D#0028-再聊SENet的孪生兄弟SKNet/images/Selection_433.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0028-再聊SENet的孪生兄弟SKNet/images/Selection_433.png -------------------------------------------------------------------------------- /D#0028-再聊SENet的孪生兄弟SKNet/images/Selection_434.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0028-再聊SENet的孪生兄弟SKNet/images/Selection_434.png -------------------------------------------------------------------------------- /D#0028-再聊SENet的孪生兄弟SKNet/images/Selection_435.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0028-再聊SENet的孪生兄弟SKNet/images/Selection_435.png -------------------------------------------------------------------------------- /D#0029-CV中的注意力机制/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0029-CV中的注意力机制/images/.gitkeep -------------------------------------------------------------------------------- /D#0029-CV中的注意力机制/images/Selection_437.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0029-CV中的注意力机制/images/Selection_437.png -------------------------------------------------------------------------------- /D#0029-CV中的注意力机制/images/Selection_438.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0029-CV中的注意力机制/images/Selection_438.png -------------------------------------------------------------------------------- /D#0029-CV中的注意力机制/images/Selection_439.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0029-CV中的注意力机制/images/Selection_439.png -------------------------------------------------------------------------------- /D#0029-CV中的注意力机制/images/Selection_440.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0029-CV中的注意力机制/images/Selection_440.png -------------------------------------------------------------------------------- /D#0029-CV中的注意力机制/images/Selection_441.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0029-CV中的注意力机制/images/Selection_441.png -------------------------------------------------------------------------------- /D#0029-CV中的注意力机制/images/Selection_442.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0029-CV中的注意力机制/images/Selection_442.png -------------------------------------------------------------------------------- /D#0030-类MTCNN的360RIP人脸检测器PCN/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0030-类MTCNN的360RIP人脸检测器PCN/images/.gitkeep -------------------------------------------------------------------------------- /D#0030-类MTCNN的360RIP人脸检测器PCN/images/Selection_443.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0030-类MTCNN的360RIP人脸检测器PCN/images/Selection_443.png -------------------------------------------------------------------------------- /D#0030-类MTCNN的360RIP人脸检测器PCN/images/Selection_444.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0030-类MTCNN的360RIP人脸检测器PCN/images/Selection_444.png -------------------------------------------------------------------------------- /D#0030-类MTCNN的360RIP人脸检测器PCN/images/Selection_445.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0030-类MTCNN的360RIP人脸检测器PCN/images/Selection_445.png -------------------------------------------------------------------------------- /D#0030-类MTCNN的360RIP人脸检测器PCN/images/Selection_446.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0030-类MTCNN的360RIP人脸检测器PCN/images/Selection_446.png -------------------------------------------------------------------------------- /D#0030-类MTCNN的360RIP人脸检测器PCN/images/Selection_447.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0030-类MTCNN的360RIP人脸检测器PCN/images/Selection_447.png -------------------------------------------------------------------------------- /D#0030-类MTCNN的360RIP人脸检测器PCN/images/Selection_448.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0030-类MTCNN的360RIP人脸检测器PCN/images/Selection_448.png -------------------------------------------------------------------------------- /D#0030-类MTCNN的360RIP人脸检测器PCN/images/Selection_449.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0030-类MTCNN的360RIP人脸检测器PCN/images/Selection_449.png -------------------------------------------------------------------------------- /D#0030-类MTCNN的360RIP人脸检测器PCN/images/Selection_451.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0030-类MTCNN的360RIP人脸检测器PCN/images/Selection_451.png -------------------------------------------------------------------------------- /D#0030-类MTCNN的360RIP人脸检测器PCN/images/Selection_452.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0030-类MTCNN的360RIP人脸检测器PCN/images/Selection_452.png -------------------------------------------------------------------------------- /D#0030-类MTCNN的360RIP人脸检测器PCN/images/Selection_453.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0030-类MTCNN的360RIP人脸检测器PCN/images/Selection_453.png -------------------------------------------------------------------------------- /D#0030-类MTCNN的360RIP人脸检测器PCN/images/Selection_454.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0030-类MTCNN的360RIP人脸检测器PCN/images/Selection_454.png -------------------------------------------------------------------------------- /D#0030-类MTCNN的360RIP人脸检测器PCN/images/Selection_455.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0030-类MTCNN的360RIP人脸检测器PCN/images/Selection_455.png -------------------------------------------------------------------------------- /D#0030-类MTCNN的360RIP人脸检测器PCN/images/Selection_456.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0030-类MTCNN的360RIP人脸检测器PCN/images/Selection_456.png -------------------------------------------------------------------------------- /D#0030-类MTCNN的360RIP人脸检测器PCN/images/Selection_458.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0030-类MTCNN的360RIP人脸检测器PCN/images/Selection_458.png -------------------------------------------------------------------------------- /D#0030-类MTCNN的360RIP人脸检测器PCN/images/Selection_459.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0030-类MTCNN的360RIP人脸检测器PCN/images/Selection_459.png -------------------------------------------------------------------------------- /D#0030-类MTCNN的360RIP人脸检测器PCN/images/Selection_461.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0030-类MTCNN的360RIP人脸检测器PCN/images/Selection_461.png -------------------------------------------------------------------------------- /D#0030-类MTCNN的360RIP人脸检测器PCN/images/Selection_462.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0030-类MTCNN的360RIP人脸检测器PCN/images/Selection_462.png -------------------------------------------------------------------------------- /D#0031-知识蒸馏Knowledge-Distillation/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0031-知识蒸馏Knowledge-Distillation/images/.gitkeep -------------------------------------------------------------------------------- /D#0031-知识蒸馏Knowledge-Distillation/images/KD.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0031-知识蒸馏Knowledge-Distillation/images/KD.png -------------------------------------------------------------------------------- /D#0031-知识蒸馏Knowledge-Distillation/images/Selection_478.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0031-知识蒸馏Knowledge-Distillation/images/Selection_478.png -------------------------------------------------------------------------------- /D#0031-知识蒸馏Knowledge-Distillation/images/Selection_479.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0031-知识蒸馏Knowledge-Distillation/images/Selection_479.png -------------------------------------------------------------------------------- /D#0032-CNN可视化之类激活热力图Grad-CAM/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0032-CNN可视化之类激活热力图Grad-CAM/images/.gitkeep -------------------------------------------------------------------------------- /D#0032-CNN可视化之类激活热力图Grad-CAM/images/Selection_004.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0032-CNN可视化之类激活热力图Grad-CAM/images/Selection_004.png -------------------------------------------------------------------------------- /D#0032-CNN可视化之类激活热力图Grad-CAM/images/Selection_005.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0032-CNN可视化之类激活热力图Grad-CAM/images/Selection_005.png -------------------------------------------------------------------------------- /D#0032-CNN可视化之类激活热力图Grad-CAM/images/Selection_006.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0032-CNN可视化之类激活热力图Grad-CAM/images/Selection_006.png -------------------------------------------------------------------------------- /D#0032-CNN可视化之类激活热力图Grad-CAM/images/Selection_007.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0032-CNN可视化之类激活热力图Grad-CAM/images/Selection_007.png -------------------------------------------------------------------------------- /D#0032-CNN可视化之类激活热力图Grad-CAM/images/Selection_008.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0032-CNN可视化之类激活热力图Grad-CAM/images/Selection_008.png -------------------------------------------------------------------------------- /D#0033-一些分类网络的训练技巧/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0033-一些分类网络的训练技巧/images/.gitkeep -------------------------------------------------------------------------------- /D#0033-一些分类网络的训练技巧/images/Selection_495.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0033-一些分类网络的训练技巧/images/Selection_495.png -------------------------------------------------------------------------------- /D#0033-一些分类网络的训练技巧/images/Selection_496.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0033-一些分类网络的训练技巧/images/Selection_496.png -------------------------------------------------------------------------------- /D#0033-一些分类网络的训练技巧/images/Selection_497.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0033-一些分类网络的训练技巧/images/Selection_497.png -------------------------------------------------------------------------------- /D#0033-一些分类网络的训练技巧/images/Selection_498.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0033-一些分类网络的训练技巧/images/Selection_498.png -------------------------------------------------------------------------------- /D#0033-一些分类网络的训练技巧/images/Selection_499.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0033-一些分类网络的训练技巧/images/Selection_499.png -------------------------------------------------------------------------------- /D#0033-一些分类网络的训练技巧/images/Selection_500.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0033-一些分类网络的训练技巧/images/Selection_500.png -------------------------------------------------------------------------------- /D#0033-一些分类网络的训练技巧/images/Selection_501.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0033-一些分类网络的训练技巧/images/Selection_501.png -------------------------------------------------------------------------------- /D#0033-一些分类网络的训练技巧/images/Selection_502.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0033-一些分类网络的训练技巧/images/Selection_502.png -------------------------------------------------------------------------------- /D#0034-火箭发射:阿里巴巴的轻量网络训练方法/D#0034.md: -------------------------------------------------------------------------------- 1 | # 火箭发射:阿里巴巴的轻量网络训练方法 2 | 3 | ## 引言 4 | 5 | 一般而言,简单的网络速度快,但性能不如复杂的网络好;负责的网络性能好,但是计算量大,也快不了。如何能结合小网络的速度和大网络的性能是一个很难也很重要的问题。主流的思路有这么几种: 6 | 7 | 从模型上来讲, 8 | 9 | 1. 模型的**剪枝和压缩**,先训练一个性能好的大的模型,然后进行裁剪减小计算量; 10 | 2. **轻量级网络**,比如MobileNet, ShuffeNet等; 11 | 3. [**知识蒸馏**](https://arxiv.org/abs/1503.02531),用性能好的大模型作为“教师”指导小模型训练,让小模型模仿大模型的泛化行为([《**D#0031-知识蒸馏Knowledge-Distillation**》](https://github.com/Captain1986/CaptainBlackboard/blob/master/D%230031-%E7%9F%A5%E8%AF%86%E8%92%B8%E9%A6%8FKnowledge-Distillation/D%230031.md)已有介绍); 12 | 13 | 从工程上讲, 14 | 15 | 4. 模型**量化**,例如在某些平台上,用int8代替float32来做预测; 16 | 17 | 5. **算子融合**,比如BN融合([《**D#0020-Batch-Normalization层原理与分析**》](https://github.com/Captain1986/CaptainBlackboard/blob/master/D%230020-Batch-Normalization%E5%B1%82%E5%8E%9F%E7%90%86%E4%B8%8E%E5%88%86%E6%9E%90/D%230020.md)中也有介绍), Scale融合等; 18 | 19 | 这些只是简单列举了几种,而且他们也并不是互斥的,比如用了剪枝和压缩之后也可以继续做算子融合。 20 | 21 | 这里介绍一种**训练方法**,出自于**阿里巴巴**在2018 AAAI上的论文《Rocket Launching: A Universal and Efficient Framework for Training Well-performing Light Net》,思路上像知识蒸馏,实际上是借鉴了知识蒸馏思路的一种通用的高效训练方法。但与传统的知识蒸馏相比,它的创新点和优势在于:能同时训练复杂和简单两个网络,而不是先训练一个复杂网络,然后用其训练一个简单网络,**缩短了总的训练时间**。 22 | 23 | **欢迎探讨,本文持续维护。** 24 | 25 | ## 实验平台 26 | 27 | N/A 28 | 29 | ## 火箭发射思路简介 30 | 31 | 火箭发射升空过程分为多个阶段,最开始的一个阶段,多级助推器朝着一个目标共同飞行,在后期稳定阶段,第一级助推器脱离,只用第二级助推器飞行。在“火箭发射”训练方法中,训练时,同时训练**共享底层特征**的复杂和简单两个网络,让他们朝着同样的目标优化的同时,用复杂网络**实时**的输出结果作为hint去指导简单网络来提升小网络的性能;在预测时,**砍掉**复杂网络,只用简单网络做预测来减小预测时间。 32 | 33 | ## 火箭发射训练细节 34 | 35 | ![](images/210322.png) 36 | 37 | 如上图所示为训练时候大的框架:粉色虚线框住的是复杂网络,蓝色虚线框住的是简单网络;底下黄色的层是复杂网络和简单网络共享的层,粉色的那些层是复杂网络独有的层,蓝色的层是简单网络独有的层;z(x)是复杂网络训练时候输出的logits score,q(x)是z(x)通过Softmax激活得到的概率,l(x)和p(x)同理。 38 | 39 | ![](images/211148.png) 40 | 41 | 上图是训练时候的前向预测的Loss,前两两项很好理解,是分类常用的**多分类交叉熵**,最后黄色那一项也很好理解,是复杂网络的score和简单网络的**score的均方差**,称之为**Hint Loss**,这两项的差应该尽量小,代表复杂网络在教简单网络。 42 | 43 | 值得注意的几点: 44 | 45 | 1. **参数共享**:在网络的底层,黄色的那几层是大网络和小网络共享的。很好理解,浅层的特征表达的都是一些很基本的信息,比如线和边,这些信息是后面层做更高层抽象表达的原材料,如果小网络共用了大网络的这些层,无疑是**复用了大网络学习到的优良的原材料**,是有好处的; 46 | 47 | 2. **同步训练**:在最初的知识蒸馏中,是先有了大的复杂网络,然后用复杂网络的输出去监督小的简单网络训练。而这除了训练过程时间变长(训练复杂网络的时间加训练简单网络的时间大于同步两者一起训练的时间)的问题之外,还有一个潜在的缺点,那就是大网络学习的**过程中的信息**丢掉了,如果同时训练,用大网络每一个mini-batch的结果去监督小网络,那么小网络学习的知识可能更多; 48 | 49 | 3. **Hint Loss**:在最初的知识蒸馏中,大网络给小网络的监督信号是q(x),然后加入温度超参数T来调节,取交叉熵作为loss,而这里**训练用的监督信号是z(x),取MSE作为loss**; 50 | 51 | ![](images/213237.png) 52 | 53 | 4. **梯度拦截Gradient Block**:大网络有更强的表达能力,要给大网络更大的自由去学习,**不能让小网络的结果去干扰了大网络的训练**,所以,在参数更新,损失梯度回传的时候更新Wb的信号应该**只来自于大网络的输出和真值的交叉熵**,小网络更新Wl的时候,损失应该来自于他自己输出和真值的交叉熵还有他的输出l(x)和大网络的z(x)之间的Hint Loss。 54 | 55 | ## 总结 56 | 57 | 火箭网络的训练框架思路清晰,实现也比较简单,从论文中给出的效果来看,比传统的知识蒸馏效果要好一点,可以作为在网络提速项目中知识蒸馏方向的一个有益的思路拓展和另外一个选择,此记。 58 | 59 | ## 参考资料 60 | 61 | + [Distilling the Knowledge in a Neural Network](https://arxiv.org/abs/1503.02531) 62 | + [Rocket Launching: A Universal and Efficient Framework for Training Well-performing Light Net](https://arxiv.org/abs/1708.04106) 63 | + [《D#0020-Batch-Normalization层原理与分析》](https://github.com/Captain1986/CaptainBlackboard/blob/master/D%230020-Batch-Normalization%E5%B1%82%E5%8E%9F%E7%90%86%E4%B8%8E%E5%88%86%E6%9E%90/D%230020.md) 64 | + [《D#0031-知识蒸馏Knowledge-Distillation》](https://github.com/Captain1986/CaptainBlackboard/blob/master/D%230031-%E7%9F%A5%E8%AF%86%E8%92%B8%E9%A6%8FKnowledge-Distillation/D%230031.md) 65 | -------------------------------------------------------------------------------- /D#0034-火箭发射:阿里巴巴的轻量网络训练方法/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0034-火箭发射:阿里巴巴的轻量网络训练方法/images/.gitkeep -------------------------------------------------------------------------------- /D#0034-火箭发射:阿里巴巴的轻量网络训练方法/images/210322.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0034-火箭发射:阿里巴巴的轻量网络训练方法/images/210322.png -------------------------------------------------------------------------------- /D#0034-火箭发射:阿里巴巴的轻量网络训练方法/images/211148.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0034-火箭发射:阿里巴巴的轻量网络训练方法/images/211148.png -------------------------------------------------------------------------------- /D#0034-火箭发射:阿里巴巴的轻量网络训练方法/images/213237.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0034-火箭发射:阿里巴巴的轻量网络训练方法/images/213237.png -------------------------------------------------------------------------------- /D#0035-2KW短视频打标问题之Activate-Learning/D#0035.md: -------------------------------------------------------------------------------- 1 | # 海量短视频打标问题之Active-Learning 2 | 3 | ## 引言 4 | 5 | 在网络中,每时每刻都会产生很多**无标签**数据信息,比如最近很火的一些短视频APP,每天都有很多用户发布自己生产的短视频(UGC)内容,这些内容一般是部分打了标签或者标签中有很多噪音的,为了进行做推荐或者做分类、识别的训练,需要给这些短视频**自动生成高质量的标签**;而另外,随着人工智能的发展,许多以前积累的数据,需要自动做标注,比如很多医疗领域的核磁共振片子。这只是CV领域(许多数据集已经打标或者很容易打标),在NLP和推荐领域,这样的问题更加重要和常见。 6 | 7 | 这些给**海量视频数据打标签**的问题是很常见也很重要的问题,而且这些问题牵扯到机器学习中的“主动学习”(Active Learning),“多模态机器学习”(MultiModal Machine Learning),“多标签(Multi-label)分类”,“增量学习”(Incremental Learning),“在线学习”(Online Learning),“少样本学习”(Few Shot/Zero Shot Learning)等等领域。 8 | 9 | 为此,船长打算以海量短视频打标这个具体的问题写几篇(具体写几篇,要看我有多忙lan)文章专门捋一捋这方面的常用算法,这个系列的文章将是第一篇,专注于主动学习领域。 10 | 11 | **欢迎探讨,本文持续维护。** 12 | 13 | ## 实验平台 14 | 15 | N/A 16 | 17 | ## 主动学习基本思路 18 | 19 | 假设现在有两千万短视频池,需要给他们打上一万个标签。如果手工一个个检查一个个打标签,成本太高,肯定是不现实的。但是我们可以少量打一些标签,比如为1000个视频打标签,这个成本还是可以接收的。然后用这1000个视频和**手动打的标签**去训练一个自动打标签的模型alpha。用模型alpha去给视频池**剩余的样本去预测标签**,根据预测出的标签的信息,根据**一定规则**挑选出某些更有意义的样本,比如2000个,把这些挑选出来的2000个样本**再找人工打标签**(因为经过挑选的,所以数量可以控制少一点)。把这些打标签的数据和训练模型alpha的数据合在一起,形成一个3000个样本的**更大的带标签数据集**训练一个**更好的模型beta**,再用模型beta重复由模型alpha得到模型beta的过程,可以继续生成更更好的模型gamma,把这个过程可以迭代下去,就可以在有限的标注成本下,得到不错的自动打标模型了。 20 | 21 | ### 主动学习为什么有用? 22 | 23 | 我们知道,一般而言,样本量越多,训练的模型越好。但是不是所有的样本对最终模型性能的贡献都是一样的(比如我们要描述一个正方体,并不需要穷举列出正方体内所有的点,只需要列举这个正方体的某些定点上的点就行了),如果能找出一些**关键样本**,这些样本对模型训练更加重要,那么自然我们可以只标准哪些关键样本来训练模型啦。 24 | 25 | 那么,很显然,关键样本的**挑选规则是主动学习成功的关键**,这也是主动学习领域研究比较多的一个问题,后文会有简单介绍两种比较经典的样本挑选规则。 26 | 27 | ## 主动学习实施 28 | 29 | ### 算法流程 30 | 31 | 在前面一节,已经大概介绍了主动学习的一般思路和过程,这里写一下流程吧: 32 | 33 | 1. 将两千万短视频初始化未标注样本池P; 34 | 2. 在样本池中随机选出1000个样本,对齐人工做标注,形成训练集合T; 35 | 3. 用训练集合T训练模型M; 36 | 4. 用上一步训练出来的模型M预测样本池P中不属于T的样本,得到预测信息Pred; 37 | 5. 根据Pred用**挑选规则挑选**出一些样本,给**人工**进行标注,并把新标注的样本和原来训练集合T合并成新的训练集合T; 38 | 6. 如果模型M满足性能要求,则终止,否则转到步骤3; 39 | 40 | ### 挑选规则 41 | 42 | 主动学习中样本挑选规则是很重要的,主流有如下集中方法: 43 | 44 | 1. 基于不确定度缩减的方法。比如分类问题中,一般会出现一个概率向量,这个概率向量的信息熵可以认为是模型对分类的不确定度;挑选信息熵最大的那些样本送去给人工进行标注。从几何角度看,这种方法优先选择靠近分类边界的样例。 45 | 2. 基于最大两个类别概率差距最小的方法。和上面差不多,只不过选择的标注不是概率向量的信息熵,而是选择那些top1和top2分量差距最小的概率向量所对应的未标注样本送去给人工标注。很好理解,如果模型预测出某个样本有很高的概率属于1类,也有很高的概率属于2类,那么就说明模型对这个样本不是很确定,就需要人工标注给他更多的信息去学习。 46 | 3. 预先聚类的方法:预先运行聚类算法预处理,选择样例时优先选择最靠近分类边界的样例和最能代表聚类的样例(即聚类中心)。 47 | 48 | ## 主动学习和难例挖掘的比较 49 | 50 | 在CV中常见的提升性能的方法有难例挖掘(在线,离线),也是挑选一些少量关键样本来提升性能,这里做个简单的比较。 51 | 52 | ### 相同点 53 | 54 | 主动学习和难例挖掘(Hard Example Mining)很像,都是用训练了的模型去做预测,找出少量可能对模型性能改进有帮助的关键样本,然后用关键样本去帮助模型改进。 55 | 56 | ### 不同点 57 | 58 | 但是不同的地方也很明显,主动学习需要人工参与标注,样本挑选规则挑选出来的样本,要送到人那里去做手动标注;而难例挖掘是在所有样本都有标签的前提下,找出那些特别难的样本。 59 | 60 | ## 总结 61 | 62 | 本文以为海量短视频打标签为例子,简单介绍了一下主动学习这种实用的学习方法。但是单单靠这一种方法来做海量短视频打标还是**远远不够**的,后续我会再介绍这个问题上用得上的其他技术点。 63 | 64 | ## 参考资料 65 | 66 | + [Active Learning Tutorial](https://towardsdatascience.com/active-learning-tutorial-57c3398e34d) 67 | + [Active Learning wiki](https://en.wikipedia.org/wiki/Active_learning_(machine_learning)) 68 | + [爱奇艺短视频分类技术解析](https://mp.weixin.qq.com/s/t801Q3OO_DBrgI60fKSJxQ) 69 | + [PRCV2018 美图短视频实时分类挑战赛第一名解决方案介绍](https://www.leiphone.com/news/201811/yhkoD7Ty8WRaCBqe.html) 70 | -------------------------------------------------------------------------------- /D#0035-2KW短视频打标问题之Activate-Learning/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0035-2KW短视频打标问题之Activate-Learning/images/.gitkeep -------------------------------------------------------------------------------- /D#0036-2KW短视频打标问题之Multi-Modal-Machine-Learning/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0036-2KW短视频打标问题之Multi-Modal-Machine-Learning/images/.gitkeep -------------------------------------------------------------------------------- /D#0036-2KW短视频打标问题之Multi-Modal-Machine-Learning/images/Selection_508.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0036-2KW短视频打标问题之Multi-Modal-Machine-Learning/images/Selection_508.png -------------------------------------------------------------------------------- /D#0036-2KW短视频打标问题之Multi-Modal-Machine-Learning/images/Selection_509.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0036-2KW短视频打标问题之Multi-Modal-Machine-Learning/images/Selection_509.png -------------------------------------------------------------------------------- /D#0036-2KW短视频打标问题之Multi-Modal-Machine-Learning/images/Selection_510.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0036-2KW短视频打标问题之Multi-Modal-Machine-Learning/images/Selection_510.png -------------------------------------------------------------------------------- /D#0036-2KW短视频打标问题之Multi-Modal-Machine-Learning/images/Selection_511.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0036-2KW短视频打标问题之Multi-Modal-Machine-Learning/images/Selection_511.png -------------------------------------------------------------------------------- /D#0036-2KW短视频打标问题之Multi-Modal-Machine-Learning/images/Selection_512.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0036-2KW短视频打标问题之Multi-Modal-Machine-Learning/images/Selection_512.png -------------------------------------------------------------------------------- /D#0036-2KW短视频打标问题之Multi-Modal-Machine-Learning/images/TransferLearning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0036-2KW短视频打标问题之Multi-Modal-Machine-Learning/images/TransferLearning.png -------------------------------------------------------------------------------- /D#0037-CentralNet做多模态融合/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0037-CentralNet做多模态融合/images/.gitkeep -------------------------------------------------------------------------------- /D#0037-CentralNet做多模态融合/images/Selection_511.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0037-CentralNet做多模态融合/images/Selection_511.png -------------------------------------------------------------------------------- /D#0037-CentralNet做多模态融合/images/Selection_513.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0037-CentralNet做多模态融合/images/Selection_513.png -------------------------------------------------------------------------------- /D#0037-CentralNet做多模态融合/images/Selection_514.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0037-CentralNet做多模态融合/images/Selection_514.png -------------------------------------------------------------------------------- /D#0037-CentralNet做多模态融合/images/Selection_515.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0037-CentralNet做多模态融合/images/Selection_515.png -------------------------------------------------------------------------------- /D#0037-CentralNet做多模态融合/images/Selection_516.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0037-CentralNet做多模态融合/images/Selection_516.png -------------------------------------------------------------------------------- /D#0037-CentralNet做多模态融合/images/Selection_517.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0037-CentralNet做多模态融合/images/Selection_517.png -------------------------------------------------------------------------------- /D#0037-CentralNet做多模态融合/images/Selection_518.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0037-CentralNet做多模态融合/images/Selection_518.png -------------------------------------------------------------------------------- /D#0037-CentralNet做多模态融合/images/Selection_519.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0037-CentralNet做多模态融合/images/Selection_519.png -------------------------------------------------------------------------------- /D#0037-CentralNet做多模态融合/images/Selection_520.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0037-CentralNet做多模态融合/images/Selection_520.png -------------------------------------------------------------------------------- /D#0037-CentralNet做多模态融合/images/Selection_521.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0037-CentralNet做多模态融合/images/Selection_521.png -------------------------------------------------------------------------------- /D#0037-CentralNet做多模态融合/images/Selection_522.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0037-CentralNet做多模态融合/images/Selection_522.png -------------------------------------------------------------------------------- /D#0037-CentralNet做多模态融合/images/Selection_523.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0037-CentralNet做多模态融合/images/Selection_523.png -------------------------------------------------------------------------------- /D#0038-Multi-View-Active-Learning做视频推荐/D#0038.md: -------------------------------------------------------------------------------- 1 | # Multi-View Active Learning做视频推荐 2 | 3 | ## 引言 4 | 5 | 今天介绍的是阿里优酷Cognitive and Intelligent Lab做的用Multi-View Active Learning做视频推荐的方法,[论文](https://www.ijcai.org/proceedings/2019/0284.pdf)发表于2019年的IJCAI上。与[D#0035](https://github.com/Captain1986/CaptainBlackboard/blob/master/D%230035-2KW%E7%9F%AD%E8%A7%86%E9%A2%91%E6%89%93%E6%A0%87%E9%97%AE%E9%A2%98%E4%B9%8BActivate-Learning/D%230035.md)和[D#0036](https://github.com/Captain1986/CaptainBlackboard/blob/master/D%230036-2KW%E7%9F%AD%E8%A7%86%E9%A2%91%E6%89%93%E6%A0%87%E9%97%AE%E9%A2%98%E4%B9%8BMulti-Modal-Machine-Learning/D%230036.md)做海量视频打标不一样,这里是把主动学习和多模态(转换)的方法运用到了**视频推荐**这个问题上。 6 | 7 | **欢迎探讨,本文持续维护。** 8 | 9 | ## 实验平台 10 | 11 | N/A 12 | 13 | ## 问题提出和整体思路 14 | 15 | 视频推荐系统要做的事情就是把你喜欢(或者可能喜欢)的视频喂给你。推荐规则的学习方法分成两种:1. 是从你以往视频浏览痕迹或者是2. 和你类似的人的浏览痕迹来学习到的。后者就是有名的[协同过滤](https://en.wikipedia.org/wiki/Collaborative_filtering)方法,前者是基于内容的方法。协同过滤方法在早期推荐系统中运用比较多,但是面临着**冷启动**问题,后面更多流行的是基于内容的推荐方法或者两者的混合方法了。 16 | 17 | ![](images/Selection_504.png) 18 | 19 | 但是,基于内容的方法得要有丰富的内容才行。而带标签的数据都是很珍贵的。 20 | 21 | 比如,现在主流做基于内容的推荐方法,都是用和视频相伴文本信息(靠视频的信息会有semantic gap的问题,不如文本信息可靠)来做预测,预测用户喜欢还是不喜欢这个视频。但是这些文本信息不一定每个视频都有,特别是UGC视频。不过,视频信息是每个视频都有的,我们可以用深度学习方法理解这个视频,为这个视频生成相应的文本信息。有了这些视频生成的文本信息后,再用分类器去分类用户是不是喜欢,根据预测的结果和真值比对,根据比对的结果来挑选一些信息量大的未标记样本主动送给专家去做文本标记,如此迭代来提升推荐系统的性能。 22 | 23 | 根据上面的描述,下面分别介绍这两个比较关键的点,一个是由视频生成文本信息怎么生成(即Video to Text映射),第二个是在学习到Video to Text映射之后,根据映射的结果怎么去主动选择一些未标记(文本信息)的样本去做手工标记。 24 | 25 | ## Video to Text映射 26 | 27 | ![](images/Selection_505.png) 28 | 29 | 上图是学习V2T映射的大框架,看起来有点复杂,其实思路很通顺,让我们来一个个拆解。 30 | 31 | 首先我们要学习一个从视频到文本的映射,而有的样本已经有已知的文本信息了,那么最直观的想法就是**看V2T这个映射能不能把视频信息映射到已知的文本信息,或者它映射的差距有多大**。换句话说就是以文本信息监督V2T函数的学习,损失函数如下: 32 | 33 | ![](images/Selection_506.png) 34 | 35 | 式子中e就是V2T映射函数,A是带标签的样本集合。 36 | 37 | 另外,我们有用户行为的记录y(用户点了视频还是没有点视频),这个记录的信息每个样本都有。由视频文本信息v和用户自身信息u到用户点没点视频y之间的关系可以用函数f建模(就是图2中的classifier),假设f是可靠的,那么可以根据V2T映射出来的文本信息输入到f中,看f的预测,是不是和y的真值一致的。也就是,**加上y的信息来监督V2T函数的学习**。 38 | 39 | ![](images/Selection_507.png) 40 | 41 | 式子(2)的前部分是在带完整文本信息的数据集上学习f,后部分是在不带文本信息的数据集上学习V2T,这是一个**联合学习**的过程。 42 | 43 | 最后,综合式子(1)和(2),得到最终联合学习e和f的损失函数 44 | 45 | ![](images/Selection_508.png) 46 | 47 | 在作者的实现中,把V2T和f的学习都融合到一个神经网络中去优化,损失都用的是均方差。 48 | 49 | ## 未标记样本的主动选择 50 | 51 | 由上面学习到的V2T和f,是不是事情就这么完了呢?不是的,学习到的V2T和f,毕竟是从很少的信息里面学到的,不一定可靠,为了增加V2T和f的性能,需要更多的带真值文本信息的样本,那么就涉及到**主动学习**,怎么挑选样本去给人工做标注了。 52 | 53 | 首先容易想到的是对在未标记集合U中的每个视频样本v,用V2T处理得到它的文本信息,再集合用户信息用f得到点击没点击的预测值,根据预测值和真值的偏离程度来选择是不是把v送去人工标注其文本信息,选择标准如下: 54 | 55 | ![](images/Selection_509.png) 56 | 57 | ni是在未标记集合中视频i出现的次数。 58 | 59 | 再进一步,如果视频在总的记录中出现的次数多,送入做人工标记肯定对模型的性能帮助更大,那么,用它在总记录中出现的次数来做一个加权,也就是顺理成章的事情了: 60 | 61 | ![](images/Selection_510.png) 62 | 63 | 再再进一步,搞得更复杂一点,考虑到不同长短的视频标记成本是不一样的,为了平衡标记成本和对模型性能帮助大小这两个因素,可以把视频长短当做一个因子去除Si得到考虑到标记成本的新的Si。 64 | 65 | 最后,综合上面"Video to Text映射"和"未标记样本的主动选择"两节介绍的内容,Multi-View Active Learning的算法流程图如下: 66 | 67 | ![](images/Selection_511.png) 68 | 69 | ## 总结 70 | 71 | 本文介绍了一种综合利用了多目标学习,多模态学习和主动学习等技术来做视频推荐系统的一种方法。 72 | 73 | ## 参考资料 74 | 75 | + [Multi-View Active Learning for Video Recommendation](https://www.ijcai.org/proceedings/2019/0284.pdf) 76 | + [D#0035-2KW短视频打标问题之Activate-Learning](https://github.com/Captain1986/CaptainBlackboard/blob/master/D%230035-2KW%E7%9F%AD%E8%A7%86%E9%A2%91%E6%89%93%E6%A0%87%E9%97%AE%E9%A2%98%E4%B9%8BActivate-Learning/D%230035.md) 77 | + [D#0036-2KW短视频打标问题之Multi-Modal-Machine-Learning](https://github.com/Captain1986/CaptainBlackboard/blob/master/D%230036-2KW%E7%9F%AD%E8%A7%86%E9%A2%91%E6%89%93%E6%A0%87%E9%97%AE%E9%A2%98%E4%B9%8BMulti-Modal-Machine-Learning/D%230036.md) 78 | -------------------------------------------------------------------------------- /D#0038-Multi-View-Active-Learning做视频推荐/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0038-Multi-View-Active-Learning做视频推荐/images/.gitkeep -------------------------------------------------------------------------------- /D#0038-Multi-View-Active-Learning做视频推荐/images/Selection_504.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0038-Multi-View-Active-Learning做视频推荐/images/Selection_504.png -------------------------------------------------------------------------------- /D#0038-Multi-View-Active-Learning做视频推荐/images/Selection_505.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0038-Multi-View-Active-Learning做视频推荐/images/Selection_505.png -------------------------------------------------------------------------------- /D#0038-Multi-View-Active-Learning做视频推荐/images/Selection_506.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0038-Multi-View-Active-Learning做视频推荐/images/Selection_506.png -------------------------------------------------------------------------------- /D#0038-Multi-View-Active-Learning做视频推荐/images/Selection_507.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0038-Multi-View-Active-Learning做视频推荐/images/Selection_507.png -------------------------------------------------------------------------------- /D#0038-Multi-View-Active-Learning做视频推荐/images/Selection_508.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0038-Multi-View-Active-Learning做视频推荐/images/Selection_508.png -------------------------------------------------------------------------------- /D#0038-Multi-View-Active-Learning做视频推荐/images/Selection_509.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0038-Multi-View-Active-Learning做视频推荐/images/Selection_509.png -------------------------------------------------------------------------------- /D#0038-Multi-View-Active-Learning做视频推荐/images/Selection_510.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0038-Multi-View-Active-Learning做视频推荐/images/Selection_510.png -------------------------------------------------------------------------------- /D#0038-Multi-View-Active-Learning做视频推荐/images/Selection_511.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0038-Multi-View-Active-Learning做视频推荐/images/Selection_511.png -------------------------------------------------------------------------------- /D#0039-用FCN做分割/D#0039.md: -------------------------------------------------------------------------------- 1 | # 用FCN做分割 2 | 3 | 图像分割是计算机视觉中比较常见的技术,广泛应用于智能交通、自动驾驶等领域。恰好船长最近正在做图像分割的项目,也调研了一些图像分割的经典方法,准备把用CNN做图像分割的方法都做个记录,这里是第一篇FCN,原始论文发表于15年的CVPR,属于用深度学习做图像分割的挖坑之作(褒义)。 4 | 5 | **欢迎探讨,本文持续维护。** 6 | 7 | ## 实验平台 8 | 9 | N/A 10 | 11 | ## 语义分割:从整体图片分类到像素级分类 12 | 13 | 在FCN提出之前,CNN(AlexNet,VGG,GoogLeNet,RCNN和SPPNet)都已经在图像分类和目标检测领域攻城拔寨,既然CNN可以在整图分类和部分图片分类上取得成功,那么把CNN来做pixel-to-pixel的分类进而解决图像分割问题也就是可以想象的了。 14 | 15 | 以AlexNet为例,它的开始的几层是卷积层,最后卷积层后面连接全连接Softmax层输出一个长度为1000的向量代表分类的类别概率。它的最后输出是针对整个图的分类结果,此时空间信息消失了。如果在去掉全连接层,换上卷积层,那么一样也可以输出分类的结果,还保存了空间维度上的信息,示意图如下图所示: 16 | 17 | ![](images/225831.png) 18 | 19 | 这里就不再赘述,在[《D#0025-CNN中使用卷积代替全连接》](https://github.com/Captain1986/CaptainBlackboard/blob/master/D%230025-CNN%E4%B8%AD%E4%BD%BF%E7%94%A8%E5%8D%B7%E7%A7%AF%E4%BB%A3%E6%9B%BF%E5%85%A8%E8%BF%9E%E6%8E%A5/D%230025.md)已有比较详细的介绍,感兴趣的朋友可以去看。 20 | 21 | 然后,既然FCN可以生成整张图大小的热力图,那么如果这个图的大小和输入图像大小一致,且有了输入图中每个像素的真值标签(Ground Truth),那么就可以用这个标签来监督FCN的训练,让热力图和标签趋向一致,原理如下图所示。有了每个像素的分类信息,那么做分割就是个太简单的事情了。 22 | 23 | ![](images/232018.png) 24 | 25 | ## FCN做分割的网络架构 26 | 27 | ### 基础网络 28 | 29 | ![](images/Selection_513.png) 30 | 31 | 作者试用了AlexNet,VGG-16,VGG-19和GoogLeNet等在分类任务上表现较好的网络,首先把它们的Softmax砍掉,第二步把全连接层改成卷积层,第三步附加一个1x1的卷积层生成21个PASCAL分类的热力图,最后添加一个双线性采样的upsample层作为Deconvolution层来把热力图扩充到输入图像的分辨率。在做了这些改造之后,如上图所示,实验发现复用了VGG-16的卷积部分的效果最好,VGG-16可以作为特征提取的主干网络。 32 | 33 | ### Skip连接Combining what and where 34 | 35 | 像VGG这样的网络是一层一层的层次性结构,不同的层感受野不同,特征图所能表达的含义也不同。浅层的特征感受野小,可以表达一些精细的特征,能回答**在哪里**的问题;而深层的特征感受野大,适合表达一些整体的语义,适合回答**是什么**的问题。如果把不同层次的特征图融合起来,整个网络形成一个有向无环图DAG,那么应该适合于分割这类既要回答是什么,又要(精确到像素级地)回答在哪里的问题。 36 | 37 | ![](images/Selection_514.png) 38 | 39 | 上图是FCN的网络结构图,VGG不同层的特征在upsample之后会进行(sum或者concate)融合,最终融合到pool3这一层,得出来一个和原图大小一样的图。 40 | 41 | ![](images/Selection_515.png) 42 | 43 | 上图可以看出,不做不同层特征图的融合会怎么样。可以看到,**融合的浅层特征越多,分割得越精细**。 44 | 45 | ![](images/Selection_516.png) 46 | 47 | 上表是作者给出的实验数据,具体地证明了上上图的结论。 48 | 49 | ### UpSample 50 | 51 | 在类似VGG这样的分类网络中,因为卷积Stride和Pooling层的作用,特征图的空间分辨率是随着层数越来越深递进地越来越小的,但是分割要做pixel-to-pixel的分类任务,真值标签是原图分辨率地像素级的图,那么分割网络的输出也要是同样的分辨率,即输入图片大小的分辨率。 52 | 53 | FCN作者这里为了放大特征图分辨率采用了很简单地x2 Upsample层,具体来说就是添加了一个**类似于双线性插值**的层,来根据位置坐标计算输出图的像素级标签。当然,这里说是类似于双线性插值的层,是因为这一层它的插值系数是可以学习的,不是定死了的。 54 | 55 | ## 总结 56 | 57 | 本文介绍了一种早期较为经典的分割网络FCN,可端对端地进行全图训练。该网络特征提取部分还是基于VGG-16,改造成全卷积网络。添加了Skip连接融合浅层细节和深层语义特征,采用了可学习的UpSample层使网络输出分辨率和输入一致。 58 | 59 | ## 参考资料 60 | 61 | + [《D#0025-CNN中使用卷积代替全连接》](https://github.com/Captain1986/CaptainBlackboard/blob/master/D%230025-CNN%E4%B8%AD%E4%BD%BF%E7%94%A8%E5%8D%B7%E7%A7%AF%E4%BB%A3%E6%9B%BF%E5%85%A8%E8%BF%9E%E6%8E%A5/D%230025.md) 62 | + [Fully Convolutional Networks for Semantic Segmentation](https://arxiv.org/pdf/1411.4038.pdf) 63 | + [Fully Convolutional Networks for Semantic Segmentation](https://arxiv.org/pdf/1605.06211.pdf) 64 | -------------------------------------------------------------------------------- /D#0039-用FCN做分割/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0039-用FCN做分割/images/.gitkeep -------------------------------------------------------------------------------- /D#0039-用FCN做分割/images/225831.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0039-用FCN做分割/images/225831.png -------------------------------------------------------------------------------- /D#0039-用FCN做分割/images/232018.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0039-用FCN做分割/images/232018.png -------------------------------------------------------------------------------- /D#0039-用FCN做分割/images/Selection_513.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0039-用FCN做分割/images/Selection_513.png -------------------------------------------------------------------------------- /D#0039-用FCN做分割/images/Selection_514.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0039-用FCN做分割/images/Selection_514.png -------------------------------------------------------------------------------- /D#0039-用FCN做分割/images/Selection_515.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0039-用FCN做分割/images/Selection_515.png -------------------------------------------------------------------------------- /D#0039-用FCN做分割/images/Selection_516.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0039-用FCN做分割/images/Selection_516.png -------------------------------------------------------------------------------- /D#0040-用U-Net做分割/D#0040.md: -------------------------------------------------------------------------------- 1 | # 用U-Net做分割 2 | 3 | 这里继续介绍第二篇著名的图像分割模型,U-Net。论文由德国弗莱堡大学的研究人员发表于15年MICCAI,初始是应用于医学图像分割上的。论文思路清晰,和[《D#0039-用FCN做分割》](https://github.com/Captain1986/CaptainBlackboard/blob/master/D%230039-%E7%94%A8FCN%E5%81%9A%E5%88%86%E5%89%B2/D%230039.md)一样也是一个**全卷积网络**,网络结构简单,而且U-Net具有速度快,需要的**训练样本少**的优势。 4 | 5 | **欢迎探讨,本文持续维护。** 6 | 7 | ## 实验平台 8 | 9 | N/A 10 | 11 | ## 网络结构 12 | 13 | ![](images/Selection_518.png) 14 | 15 | 如上图所示,U-Net的结构非常清晰和**U形对称**,整体呈一种U型结构。在左半边,是它的编码结构,论文中成为**收缩路径**(contracting path),分辨率逐渐由输入的572x572减小到最小的28x28;右边是**对称的扩张路径**(expanding path),分辨率由28x28逐渐扩张到半个输入分辨率的大小388x388。只做灰度图上的细胞轮廓分割,所以输入是1通道,输出是2通道的,由1x1的卷积产生。 16 | 17 | 另外还可以看到,在扩张路径的过程中,也crop了前面收缩路径产生的特征图进行融合,形成了一种不同感受野特征的融合,也是希望结合局部的精细的信息和整体的分类的信息以求更好的分割效果,比较常见的手段。 18 | 19 | ## 外围边界处理策略 20 | 21 | ![](images/Selection_519.png) 22 | 23 | 因为是全卷积模型,所以理论上可以输入任意分辨率的图片。但是,在边缘处的像素,由于缺少部分(至少缺少上、下、左、右其中之一)上下文信息,得到的Segmentation map会不准确。为了解决这个问题,如上图所示,作者简单地对边缘处的像素做了个镜像弥补边缘的上下文信息,以让输出的Segmentation map的每个像素反算到输入都有有效像素点(而不是null)。 24 | 25 | ## 训练方法 26 | 27 | ### 损失函数 28 | 29 | 作者使用的是pixel-wise Softmax![](images/Selection_520.png)输出最后的Segmentation map。损失函数用的是交叉熵![](images/Selection_521.png)。不过,为了让各个相接触的细胞与细胞之间的小边缘得到更好的分割效果,在预先算出了一个weight map![](images/Selection_522.png)对每个pixel的损失进行加权。 30 | 31 | ### 初始化 32 | 33 | 对于有许多卷积层而且不同path的模型,好的参数初始化策略对于模型的优化是不可轻视的。在最佳情况下,每个层的参数应该初始化到输出的特征图分布拥有**单位方差**最好。对于U-Net这样的只有卷积层和ReLU激活层的模型,用**He初始化**根据输入节点的个数做初始化可以达到上面最佳情况的要求。 34 | 35 | ### 数据增强 36 | 37 | 医疗数据的图像很少,作者用的原始带标签的训练数据集分别只有20张,30张和35张。为了生成更多的训练数据,作者使用随机弹性形变Random elastic deformations进行了数据增强。 38 | 39 | ## 实验结果 40 | 41 | ![](images/Selection_523.png) 42 | 43 | 上图是部分实验结果的展示,带颜色的部分是Segmentation map,黄色细线是真值,可以看出来效果还是不错的,有些很困难的微弱的边缘,都切割得八九不离十。 44 | 45 | ## 总结 46 | 47 | 本文介绍的是分割早期比较有名的U-Net模型,模型结构简单,思路清晰,**编码解码结构**划分,加上不同层相似分辨率的特征图融合,都提升了分割的性能。虽说论文中主要介绍了医学图像的分割,但是U-Net也很容易迁移到别的任务的分割上。 48 | 49 | ## 参考资料 50 | 51 | + [《D#0025-CNN中使用卷积代替全连接》](https://github.com/Captain1986/CaptainBlackboard/blob/master/D%230025-CNN%E4%B8%AD%E4%BD%BF%E7%94%A8%E5%8D%B7%E7%A7%AF%E4%BB%A3%E6%9B%BF%E5%85%A8%E8%BF%9E%E6%8E%A5/D%230025.md) 52 | + [《D#0039-用FCN做分割》](https://github.com/Captain1986/CaptainBlackboard/blob/master/D%230039-%E7%94%A8FCN%E5%81%9A%E5%88%86%E5%89%B2/D%230039.md) 53 | + [Fully Convolutional Networks for Semantic Segmentation](https://arxiv.org/pdf/1411.4038.pdf) 54 | + [U-Net: Convolutional Networks for Biomedical Image Segmentation](https://arxiv.org/abs/1505.04597) 55 | + [U-Net Implementation](https://lmb.informatik.uni-freiburg.de/people/ronneber/u-net/) 56 | + [Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification](https://arxiv.org/abs/1502.01852) 57 | -------------------------------------------------------------------------------- /D#0040-用U-Net做分割/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0040-用U-Net做分割/images/.gitkeep -------------------------------------------------------------------------------- /D#0040-用U-Net做分割/images/Selection_518.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0040-用U-Net做分割/images/Selection_518.png -------------------------------------------------------------------------------- /D#0040-用U-Net做分割/images/Selection_519.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0040-用U-Net做分割/images/Selection_519.png -------------------------------------------------------------------------------- /D#0040-用U-Net做分割/images/Selection_520.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0040-用U-Net做分割/images/Selection_520.png -------------------------------------------------------------------------------- /D#0040-用U-Net做分割/images/Selection_521.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0040-用U-Net做分割/images/Selection_521.png -------------------------------------------------------------------------------- /D#0040-用U-Net做分割/images/Selection_522.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0040-用U-Net做分割/images/Selection_522.png -------------------------------------------------------------------------------- /D#0040-用U-Net做分割/images/Selection_523.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0040-用U-Net做分割/images/Selection_523.png -------------------------------------------------------------------------------- /D#0041-用RefineNet做分割/D#0041.md: -------------------------------------------------------------------------------- 1 | # 用RefineNet做分割 2 | 3 | 这里继续介绍第三篇著名的图像分割模型,RefineNet。论文发表于2017年的CVPR上。RefineNet提出了一种通用的多路级联精修结构的网络,与FCN相比,它更充分利用了主干网不同层次分辨率的特征图里面的信息,与Deeplab的Dilated Convolution方法相比,它要求更少的内存。最重要的是,它的效果很好,在七个benchmark上都做出了更好的效果,属于当年的SOTA。 4 | 5 | **欢迎探讨,本文持续维护。** 6 | 7 | ## 实验平台 8 | 9 | N/A 10 | 11 | ## RefineNet网络结构 12 | 13 | ![](images/Selection_525.png) 14 | 15 | 上图基本上表示了RefineNet的思路起源和它比之前方法的优点。如左上角的a所示是一个基于ResNet的全卷积分割模型,但是它只是利用了最后一层的低分辨率的特征图,很多空间信息都在不断地卷积stride和pooling层中丢失了,很难恢复出高精度的Segmentation map;左下的b模型,是Deeplab利用Dilated Convolution的方法来做特征提取,Dilated Convolution优点在于可以在不增大计算量和参数量的情况下保持比较大的感受野,也可以保证网络中每层特征图的分辨率不至于太小,但是缺点也是很显然的,由于中间结果都是分辨率很大的特征图,那么训练和推理过程中都对内存/显存提出了很大的要求。 16 | 17 | 而右边表示的是RefineNet的示意图,**主干网络是ResNet**,但是在ResNet的4个不同分辨率阶段都会抽出来给一个RefineNet块做处理,而且也有identity mapping的连接,即丰富了不同分辨率的特征混合(和U-Net类似),也让大网络的训练更加容易。 18 | 19 | 另外值得注意的是,这个网络的**Cascade Multi-Path Refinement**。从ResNet出来的低分辨率特征,不断的结合上一个Stage的较高分辨率的特征,进行混合;而且这是一个级联(Cascade)的过程,从而不断不断地提升分割精度。 20 | 21 | ## RefineNet块结构 22 | 23 | ![](images/Selection_526.png) 24 | 25 | 上图所示就是RefineNet网络中基础的RefineNet块的结构,RefineNet结构是输入一个或者多个不同分辨率的特征图,进行混合和提升,输出一个较大特征图的块。它由三个级联的子块组成,下面分别介绍。 26 | 27 | ### 残差卷积单元Residual Conv Unit 28 | 29 | 每个input path的后面都会接两个串联的RCU。这个RCU块的作用就是**fine-tune主干网ResNet的输出**使他更适应分割这个任务。 30 | 31 | ### 多分辨率融合Multi-resolution Fusion 32 | 33 | 在RCU输出的特征图每个path的空间分辨率是不一样的,这个多分辨率融合块的作用就是把输入的各种**不同分辨率的特征提升并对齐**到最大的输入path的分辨率,然后将它们通过Sum操作融合起来。 34 | 35 | ### 链式残差池化Chained Residual Pooling 36 | 37 | 这个层的作用是通过pooling操作让不同的特征图有不同的感受野以便于**提取不同尺度的背景上下文**的信息。用不同的残差连接一个作用是便于训练,第二个作用是混合复用不同分辨率的特征。每个pooling层后面添加的卷积层的作用相当于在sum操作前学习一个**自适应的权重**。 38 | 39 | ### 输出卷积层Output Conv 40 | 41 | RefineNet块最后的输出层其实就是一个前面介绍的Residual Conv Unit(这样每个RefineNet有三个RCN,两个在最前面,一个在最后,RCU进,RCU出)。这一层的作用就是给前面层输出的特征**增加一些非线性**。 42 | 43 | ## RefineNet网络中的恒等映射Identity Mapping 44 | 45 | 受到ResNet的启发,在RefineNet的设计中,作者大量使用了Identity Maping这种结构。ResNet的shortcut连接形成一条干净的通道使信息的流通更加顺畅,而在主路上,添加了非线性来学习有效特征。这种结构使很深的网络也可以很好的训练出来。 46 | 47 | 在RefineNet中,有两种Identity Mapping,Long-term的和Short-term的。在RefineNet块的RCU和CRP里面的是Short-term的,在各个RefineNet和主干网ResNet各个Stage输出之间的是Long-term的Identity Mapping。 48 | 49 | ## 实验结果 50 | 51 | RefineNet不但可以用在语义分割任务上,也可以用在Object Parsing的任务上,而且都取得了不错的效果。下面的三幅图分别是在Person-Part 数据集上做Object Parsing和在VOC2012,Cityspace数据集上做语义分割的效果。 52 | 53 | ![](images/Selection_527.png) 54 | 55 | ![](images/Selection_528.png) 56 | 57 | ![](images/Selection_529.png) 58 | 59 | ## Cascade RefineNet网络的变种 60 | 61 | ![](images/Selection_530.png) 62 | 63 | RefineNet网络做很少的修改就可以变化到不同的结构(论文主要介绍的是4个Stage的RefineNet),比如如上图,把网络中的RefineNet块的个数修改一下就得到了变种a和b,把输入图片的个数和分辨率修改一下就可以得到变种c。 64 | 65 | ## 总结 66 | 67 | RefineNet采用多路,多分辨率,Cascade Refine和广泛使用残差结构的网络做语义分割任务,取得了很好的效果。其提出的RefineNet块,也可以以一个基础块的方式嵌入到别的网络中去。另外,RefineNet这个网络还可以做很多不同的泛化和拓展。 68 | 69 | ## 参考资料 70 | 71 | + [《D#0025-CNN中使用卷积代替全连接》](https://github.com/Captain1986/CaptainBlackboard/blob/master/D%230025-CNN%E4%B8%AD%E4%BD%BF%E7%94%A8%E5%8D%B7%E7%A7%AF%E4%BB%A3%E6%9B%BF%E5%85%A8%E8%BF%9E%E6%8E%A5/D%230025.md) 72 | + [RefineNet: Multi-Path Refinement Networks for High-Resolution Semantic Segmentation](https://arxiv.org/abs/1611.06612) 73 | -------------------------------------------------------------------------------- /D#0041-用RefineNet做分割/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0041-用RefineNet做分割/images/.gitkeep -------------------------------------------------------------------------------- /D#0041-用RefineNet做分割/images/Selection_525.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0041-用RefineNet做分割/images/Selection_525.png -------------------------------------------------------------------------------- /D#0041-用RefineNet做分割/images/Selection_526.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0041-用RefineNet做分割/images/Selection_526.png -------------------------------------------------------------------------------- /D#0041-用RefineNet做分割/images/Selection_527.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0041-用RefineNet做分割/images/Selection_527.png -------------------------------------------------------------------------------- /D#0041-用RefineNet做分割/images/Selection_528.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0041-用RefineNet做分割/images/Selection_528.png -------------------------------------------------------------------------------- /D#0041-用RefineNet做分割/images/Selection_529.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0041-用RefineNet做分割/images/Selection_529.png -------------------------------------------------------------------------------- /D#0041-用RefineNet做分割/images/Selection_530.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0041-用RefineNet做分割/images/Selection_530.png -------------------------------------------------------------------------------- /D#0042-用DeepLabv3+的Encoder-Decoder做分割/D#0042.md: -------------------------------------------------------------------------------- 1 | # 用DeepLabv3+的Encoder-Decoder做分割 2 | 3 | 这里继续介绍第四篇著名的图像分割模型,DeepLabv3+。论文发表于2018年的ECCV上。DeepLabv3+在DepLabv3的基础上加了一个精细的Decoder模块得到了一个**Encoder-Decoder**的分割模型,以快速的**Xception为主干网络**,还吸收了MobileNet的**深度可分离卷积**进一步加速,最终在PASCAL VOC 2012和Cityspace两个benchmark上分别得到了89.0%和82.1%的mIOU成绩。 4 | 5 | **欢迎探讨,本文持续维护。** 6 | 7 | ## 实验平台 8 | 9 | N/A 10 | 11 | ## 分割的常用特征提取思路 12 | 13 | 图像分割,其实就是在整图大小的分辨率上做每个像素的分类,实际上属于一个**稠密分类问题**,分类的是每一个像素。既然要做到每个像素这么精细的级别,那么就需要很精细的浅层特征,既然要做分类,那么就要有抽象的上层特征。所以,基本的分割方法都是在考虑**怎么样提取浅层和深层的特征**,和**怎么样把这两种特征联合利用**。 14 | 15 | ![](images/Selection_531.png) 16 | 17 | ### 用SPP来提取特征 18 | 19 | 一般利用不同大小的卷积核或者Pooling在主干网的最后一层来得到不同分辨率的特征图,形成一个空间特征金字塔SPP,也就是如上图所示的a,然后再这个SPP上恢复出每个像素的label的预测值。但是这个思路有个明显的弊病,他是在主干网的最后一层来做SPP操作的,而**主干网最后一层虽然有很丰富的语义信息,但是由于一路上的卷积和Pooling操作,分辨率不断压缩,许多对分割细节至关重要的浅层细节信息还是流失了**。 20 | 21 | ![](images/Selection_533.png) 22 | 23 | 本文介绍的DeepLabv3+前的DeepLabv3(也叫ASPP)就是这么一个思路,如上图所示,只不过在做SPP这一步的时候把普通卷积推广到了空洞卷积。 24 | 25 | ### 用空洞卷积来提取特征 26 | 27 | ![](images/Selection_532.png) 28 | 29 | 尽管还可以用如上图所示的空洞卷积来做,这样就可以一路保持比较大的特征图的分辨率,保持丰富的细节信息,但是这么做,又太消耗GPU显存资源了,也不太好。 30 | 31 | ### 用Encoder的方式来提取特征,Decoder再来解码 32 | 33 | ![](images/232018.png) 34 | 35 | ![](images/Selection_518.png) 36 | 37 | 还有第三种方式,就是本文第一幅图中的b,用一个Encoder模块抽取特征,网络的特征图是逐渐变小的,这样就节约了内存,而在网络的不同深度有不同抽象程度的特征,然后再用一个Decoder模块来利用这些不同层的特征去解码出一个全图大小的pixel-label map。以前介绍的[FCN](https://github.com/Captain1986/CaptainBlackboard/blob/master/D%230039-%E7%94%A8FCN%E5%81%9A%E5%88%86%E5%89%B2/D%230039.md)和[U-Net](https://github.com/Captain1986/CaptainBlackboard/blob/master/D%230040-%E7%94%A8U-Net%E5%81%9A%E5%88%86%E5%89%B2/D%230040.md)基本上就是属于这么一个路子。 38 | 39 | ## DeepLabv3+的Encoder-Decoder网络结构 40 | 41 | ### 融合SPP,空洞卷积和Encoder-Decoder结构来得到DeepLabv3+ 42 | 43 | ![](images/Selection_534.png) 44 | 45 | 本文提出的DeepLabv3+,综合吸收了上面几种思路的有点,主体设计如第一幅图所示,细节设计如上图所示,它主体上是一个**Encoder-Decoder结构**:Encoder部分,用主干网DCNN(可以使Xception,也可以是VGG或者ResNet等)提取基本特征,再用**空洞卷积**提取不同感受野的特征图,最后用1x1的卷积混合它们。Decoder部分,抽取主干网前面的特征(**这里是细节信息**),然后对Encoder混合出来的小分辨率特征(**这里是抽象特征**)进行上采样,Concate的方式混合两者,再经过3x3的卷积和上采样之后回复出输入图分辨率的pixel-wise预测结果。 46 | 47 | 这么一路分析下来,思路就很明显了,**就是在DeepLabv3(ASPP结合了空洞卷积和SPP)的基础上连一个比较复杂的Decoder模块改造成一个Encoder-Decoder结构**,用主干网中间的细节信息,和更多的非线性来解码出来预测图。 48 | 49 | ## 总结 50 | 51 | 本文比较简介的介绍常用于分割网络设计的特征提取思路,包括,多分辨率特征,SPP,空洞卷积和Encoder-Decoder结构,基本上所有的分割网络、甚至目标检测和别的任务的网络,都大量的采用了这些设计思路。DeepLabv3+是结合了多种设计思路的模型,也取得了不错的结果,值得我们去学习和借鉴。 52 | 53 | ## 参考资料 54 | 55 | + [《D#0039-用FCN做分割》](https://github.com/Captain1986/CaptainBlackboard/blob/master/D%230039-%E7%94%A8FCN%E5%81%9A%E5%88%86%E5%89%B2/D%230039.md) 56 | 57 | + [《D#0040-用U-Net做分割》](https://github.com/Captain1986/CaptainBlackboard/blob/master/D%230040-用U-Net做分割/D%230040.md) 58 | 59 | + [《D#0041-用RefineNet做分割》](https://github.com/Captain1986/CaptainBlackboard/blob/master/D%230041-用RefineNet做分割/D%230041.md) 60 | 61 | + [Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation](https://arxiv.org/abs/1802.02611) 62 | 63 | + [RefineNet: Multi-Path Refinement Networks for High-Resolution Semantic Segmentation](https://arxiv.org/abs/1611.06612) 64 | -------------------------------------------------------------------------------- /D#0042-用DeepLabv3+的Encoder-Decoder做分割/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0042-用DeepLabv3+的Encoder-Decoder做分割/images/.gitkeep -------------------------------------------------------------------------------- /D#0042-用DeepLabv3+的Encoder-Decoder做分割/images/232018.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0042-用DeepLabv3+的Encoder-Decoder做分割/images/232018.png -------------------------------------------------------------------------------- /D#0042-用DeepLabv3+的Encoder-Decoder做分割/images/Selection_518.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0042-用DeepLabv3+的Encoder-Decoder做分割/images/Selection_518.png -------------------------------------------------------------------------------- /D#0042-用DeepLabv3+的Encoder-Decoder做分割/images/Selection_531.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0042-用DeepLabv3+的Encoder-Decoder做分割/images/Selection_531.png -------------------------------------------------------------------------------- /D#0042-用DeepLabv3+的Encoder-Decoder做分割/images/Selection_532.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0042-用DeepLabv3+的Encoder-Decoder做分割/images/Selection_532.png -------------------------------------------------------------------------------- /D#0042-用DeepLabv3+的Encoder-Decoder做分割/images/Selection_533.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0042-用DeepLabv3+的Encoder-Decoder做分割/images/Selection_533.png -------------------------------------------------------------------------------- /D#0042-用DeepLabv3+的Encoder-Decoder做分割/images/Selection_534.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0042-用DeepLabv3+的Encoder-Decoder做分割/images/Selection_534.png -------------------------------------------------------------------------------- /D#0043-用HRNet做分割/D#0043.md: -------------------------------------------------------------------------------- 1 | # 用HRNet做分割 2 | 3 | 这里继续介绍第五篇著名的图像分割模型,HRNet[v2]。最开始的HRNet的论文发表于2019年的CVPR上,是做Pose检测的,而HRNetv2是在原来HRNet的基础上把它稍作改造使其成为用于分割的网络。不过,由于HRNet提取的特征丰富,各种分辨率的都有,而且在网络一路都保持着高分辨率特征,所以也很容易类似于改造VGG,GoogLeNet和ResNet那样**根据需要将其改造别的任务**(比如图像识别,目标检测,人脸特征点检测,语义分割的等)的主干网络。本文仅以HRNetv2做分割为例,来介绍HRNet。 4 | 5 | **欢迎探讨,本文持续维护。** 6 | 7 | ## 实验平台 8 | 9 | N/A 10 | 11 | ## HRNet思路来源 12 | 13 | 做语义分割,现在主流的网络设计可以按照最终特征图的生成方式分为两大流派: 14 | 15 | ### 以FCN,U-Net的代表的特征图先缩小后恢复的方法 16 | 17 | ![](images/232018.png) 18 | 19 | 像以前介绍过的[FCN](https://github.com/Captain1986/CaptainBlackboard/blob/master/D%230039-%E7%94%A8FCN%E5%81%9A%E5%88%86%E5%89%B2/D%230039.md)和[U-Net](https://github.com/Captain1986/CaptainBlackboard/blob/master/D%230040-用U-Net做分割/D%230040.md)的网络,如上图所示,在网络的传播过程中,**逐步减小特征图的空间分辨率**,可以视为一个Encoder的过程,然后再加上一个尾巴Decoder把特征图编码了的信息解码出来,一次性或者**逐步地放大特征图分辨率**直到原始输入分辨率,得到Segmentation map。这其中可能也穿插着不同分辨率信息的融合。 20 | 21 | ### 以DeepLab为代表的一路保持较大分辨率特征图的方法 22 | 23 | ![](images/154321.png) 24 | 25 | 上面的FCN和U-Net那样的思路,会在特征图分辨率缩小的过程中有**信息的流失**,就算加上了跳跃连接和多分辨率融合也不一定能很好的补偿这种信息流失。所以就有另外一种思路,在**特征提取的过程中一路保持较大的分辨率**,如上图b,在最后的大分辨率特征图上预测Segmentation map。这种思路比较有代表性的就是DeepLab系列的空洞卷积了,[以前也有介绍](https://github.com/Captain1986/CaptainBlackboard/blob/master/D%230042-%E7%94%A8DeepLabv3%2B%E7%9A%84Encoder-Decoder%E5%81%9A%E5%88%86%E5%89%B2/D%230042.md)。 26 | 27 | ## HRNet网络结构 28 | 29 | HRNet的设计思路延续了一路保持较大分辨率特征图的方法,在网络前进的过程中,都**保持较大的特征图**,但是在网路前进过程中,也会**平行地**做一些下采样缩小特征图,如此**迭代**下去。最后生成**多组有不同分辨率的特征图**,**再融合**这些特征图做Segmentation map的预测。 30 | 31 | ### 主干网络结构 32 | 33 | ![](images/155806.png) 34 | 35 | 上图是HRNet简单地示意图,生成多种不同分辨率的特征。这里需要注意的细节是,它在网络的前,中,后三段都做了特征融合,而不是仅仅在最后的特征图上做融合。别的好像也没什么了,结构和思路都比较简单,没有[前面的RefineNet](https://github.com/Captain1986/CaptainBlackboard/blob/master/D%230041-用RefineNet做分割/D%230041.md)那么复杂,就不多做介绍了。 36 | 37 | ### 多分辨率融合Multi-resolution Fusion 38 | 39 | ![](images/160639.png) 40 | 41 | HRNet作为主干网络提取了特征,这些特征有不同的分辨率,需要根据不同的任务来选择融合的方式。 42 | 43 | 在HRNet的最初CVPR做人体姿态检测的版本中,用的是上图a的融合方式,也就是丢掉低分辨率的特征,只用最大分辨率的特征。 44 | 45 | 如果做语义分割或者人脸特征点定位,那么就是如上图b中所示,把不同分辨率的特征通过upsample操作后得到一致的大分辨率特征图,然后concate起来做融合。 46 | 47 | 如果做目标检测,那么如上图c所示,在b的基础上构造一个多分辨率的特征金字塔。 48 | 49 | ### 计算量 50 | 51 | ![](images/161658.png) 52 | 53 | HRNet虽然有许多分辨率,而且一路都有保持大分辨率,但是根据论文的Table 1来看,貌似参数数量和计算量与前面的UNet和DeepLab相比并没有增加多少,特别是与DeepLab相比,计算量少了一大半。 54 | 55 | ## 总结 56 | 57 | HRNet构造思路很简单,一路保持较大的分辨率,而且并行地下采样、融合,最终生成多个分辨率的特征图,可以根据不同任务的具体需要进行选择性地融合使用。HRNet能在图像分类,目标检测,语义分割,人脸特征点定位等应用上取得不错的效果,有望像VGG,GoogLeNet和ResNet那样,成为各个任务主干网络新的选择。 58 | 59 | ## 参考资料 60 | 61 | + [Deep High-Resolution Representation Learning for Human Pose Estimation](https://arxiv.org/abs/1902.09212) 62 | + [High-Resolution Representations for Labeling Pixels and Regions](https://arxiv.org/abs/1904.04514) 63 | + [D#0039-用FCN做分割](https://github.com/Captain1986/CaptainBlackboard/blob/master/D%230039-%E7%94%A8FCN%E5%81%9A%E5%88%86%E5%89%B2/D%230039.md) 64 | + [D#0040-用U-Net做分割](https://github.com/Captain1986/CaptainBlackboard/blob/master/D%230040-用U-Net做分割/D%230040.md) 65 | + [RefineNet: Multi-Path Refinement Networks for High-Resolution Semantic Segmentation](https://arxiv.org/abs/1611.06612) 66 | -------------------------------------------------------------------------------- /D#0043-用HRNet做分割/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0043-用HRNet做分割/images/.gitkeep -------------------------------------------------------------------------------- /D#0043-用HRNet做分割/images/154321.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0043-用HRNet做分割/images/154321.png -------------------------------------------------------------------------------- /D#0043-用HRNet做分割/images/155806.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0043-用HRNet做分割/images/155806.png -------------------------------------------------------------------------------- /D#0043-用HRNet做分割/images/160639.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0043-用HRNet做分割/images/160639.png -------------------------------------------------------------------------------- /D#0043-用HRNet做分割/images/161658.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0043-用HRNet做分割/images/161658.png -------------------------------------------------------------------------------- /D#0043-用HRNet做分割/images/232018.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0043-用HRNet做分割/images/232018.png -------------------------------------------------------------------------------- /D#0044-用Network-Slimming做模型加速和压缩/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0044-用Network-Slimming做模型加速和压缩/images/.gitkeep -------------------------------------------------------------------------------- /D#0044-用Network-Slimming做模型加速和压缩/images/543.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0044-用Network-Slimming做模型加速和压缩/images/543.png -------------------------------------------------------------------------------- /D#0044-用Network-Slimming做模型加速和压缩/images/544.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0044-用Network-Slimming做模型加速和压缩/images/544.png -------------------------------------------------------------------------------- /D#0044-用Network-Slimming做模型加速和压缩/images/545.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0044-用Network-Slimming做模型加速和压缩/images/545.png -------------------------------------------------------------------------------- /D#0044-用Network-Slimming做模型加速和压缩/images/546.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0044-用Network-Slimming做模型加速和压缩/images/546.png -------------------------------------------------------------------------------- /D#0044-用Network-Slimming做模型加速和压缩/images/547.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0044-用Network-Slimming做模型加速和压缩/images/547.png -------------------------------------------------------------------------------- /D#0044-用Network-Slimming做模型加速和压缩/images/548.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0044-用Network-Slimming做模型加速和压缩/images/548.png -------------------------------------------------------------------------------- /D#0044-用Network-Slimming做模型加速和压缩/images/549.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0044-用Network-Slimming做模型加速和压缩/images/549.png -------------------------------------------------------------------------------- /D#0045-Stacked-Hourglass-Network-for-Human-Pose-Estimation/D#0045.md: -------------------------------------------------------------------------------- 1 | # Stacked Hourglass Network for Human Pose Estimation 2 | 3 | 本文介绍一篇2016 ECCV上的老文章[《Stacked Hourglass Network for Human Pose Estimation》](https://arxiv.org/abs/1603.06937),文章提出了一种用于做人体姿态估计的Hourglass网络,形状和前面介绍的用于分割的[U-Net](https://github.com/Captain1986/CaptainBlackboard/blob/master/D%230040-%E7%94%A8U-Net%E5%81%9A%E5%88%86%E5%89%B2/D%230040.md)很像,设计思路也很像,是人体姿态检测领域一篇比较重要的文章。而且,Hourglass的结构也作为Backbone网络应用于除人体姿态估计之外的领域中,比如今年大热的目标检测Anchor-Free的[CornetNet](https://arxiv.org/abs/1808.01244)中。 4 | 5 | **欢迎探讨,本文持续维护。** 6 | 7 | ## 实验平台 8 | 9 | N/A 10 | 11 | ## 网络结构 12 | 13 | 对于人体姿态估计这样的高级别的视觉任务来说,既需要模型能捕获低级别小尺度的信息(比如定位大概某个点是一个手腕),有需要模型能捕获高级别大尺度的信息(比如判断这个点是左手的手腕还是右手的手腕)。对不多尺度信息的捕获,有许多方法,比如简单的多分辨率预测,不同深度的层做特征融合,或者复杂一点的特征金字塔网络FPN,和前面介绍过的用于做语义分割的U-Net那样的。**这里说的Hourglass就属于和U-Net比较像的思路,先不断地缩小特征图空间分辨率,再不断提升和回复原来输入的分辨率,然后再在前后加一些Skip Layer,从而得到涵盖了不同尺度信息的输出特征图**。 14 | 15 | ### Hourglass模块 16 | 17 | #### 大致结构 18 | 19 | ![](images/556.png) 20 | 21 | 上图就是一个Hourglass模块,可以直观地看到分辨率是先降低再升高,两头大中间细,很像一个沙漏形状,所以取名Hourglass。 22 | 23 | 在Hourglass模块中,使用max pooling来降低特征图分辨率,在每次**max pooling**之后,牵出来一个Skip Layer来处理原来分辨率的信息(后面跟升起来分辨率的相对应的特征图做融合)。在模块达到最低分辨率的时候,后面又用**最近邻插值**的方法(不是unpooling或者deconv)进行上采样得到捕获全局信息的不同分辨率的特征图,和前面Skip Connection对应的特征图进行**Element-wise相加**做融合。上图中所有的max pooling操作都对应着一个最近邻插值操作,降低分辨率的过程和提升分辨率的过程是**完全对称**的结构。 24 | 25 | ![](images/557.png) 26 | 27 | 在输出的时候,通过1x1的卷积调整得到一个heatmap,heatmap的通道个数等于人体Pose的关键点类别数。 28 | 29 | #### Residual Module 30 | 31 | ![](images/558.png) 32 | 33 | 上图是一个Hourglass模块中使用的残差模块的示意图(在图3中就是一个小方框),这个没什么说的。 34 | 35 | ### Stacked Hourglass网络 36 | 37 | ![](images/560.png) 38 | 39 | 把多个(论文中是8个)Hourglass模块堆叠(Stacked)起来就组成了Stack Hourglass模型。需要注意的是,输入模型的图片分辨率是256x256,为了节约内存,一开始就用大卷积和一系列的max pooling操作把分辨率降到了64x64(这也是网络中Hourglass模块的输入和输出分辨率)。在网络中间各个Hourglass模块串联的时候,添加了一些卷积层。整个网络输出的时候,又通过1x1的卷积调整得到各个关节点的heatmap,heatmap通过最大值激活可以得到准确的关节点坐标,通道对应着关节点的种类。 40 | 41 | ## 训练方法 42 | 43 | ### Intermediate Supervision 44 | 45 | ![](images/561.png) 46 | 47 | 由于Stacked Hourglass网络使用的是多个Hourglass模块堆叠而成,每个Hourglass模块都应该捕获了全局的和局部的信息,所以从这些信息中生成GT信息也应该是合理的。而且这个堆叠的过程,作者假设它是一个不断bottom-up,top-down的信息处理过程,在这个过程中输出的都是关于人体Pose的整体和局部信息,只不过后面的实在前面的基础上做further evaluate和reassess。基于这个假设,作者在每个Hourglass的输出上加了生成GT的模块,用GT进行监督训练来降低网络的训练难度。 48 | 49 | ### 训练细节 50 | 51 | 训练数据集采用FLIC和MPII,首先通过标注信息扣出人体,再resize到256x256的分辨率,做了一些旋转(正负30度)和缩放的数据增强(x0.75~x1.25)。优化方法使用rmsprop,初始学习率2.5e-4(后面每到达平台期就除以5),使用Titan X训练了3天。单次预测耗时75ms。在测试的时候,做两次预测,一次原图,一次翻转后的图,把两次的heatmap平均一下可以得到大约1%的性能提升。 52 | 53 | 损失函数用的是MSE,真值用2D正态分布做了软化处理。为了提升定位精度,在从heatmap坐标反推到原图坐标的时候,把heatmap坐标向第二高的激活值方向移动1/4个像素(其实就是heatmap的最大值和第二大值进行了一个调和)。 54 | 55 | 对于严重遮挡和扭曲的关节点,在把真值heatmap上都设置为0。 56 | 57 | ## 实验结果 58 | 59 | ### 数据集上测试结果 60 | 61 | 作者使用关键点正确分类的百分比(PCK)指标和一些在人体姿态估计上流行的方法做了比较,比较结果如下: 62 | 63 | FLIC的结果 64 | 65 | ![](images/562.png) 66 | 67 | MPII的结果 68 | 69 | ![](images/563.png) 70 | 71 | ### 剪枝实验 72 | 73 | 另外,作者为了证明Intermediate Supervision的效果确实对性能提升帮助很大,性能提升不是由于网络参数增多达到的,做了如下的剪枝实验。 74 | 75 | ![](images/564.png) 76 | 77 | ## 总结 78 | 79 | Hourglass网络在人体姿态领域算是比较早也比较经典的一个模型,其结构和U-Net类似,都是分辨率先下降再提升的过程。Hourglass不但应用于人体姿态检测,也作为Backbone被应用于目标检测模型CornerNet中。 80 | 81 | ## 参考资料 82 | 83 | + [D#0040-用U-Net做分割](https://github.com/Captain1986/CaptainBlackboard/blob/master/D%230040-%E7%94%A8U-Net%E5%81%9A%E5%88%86%E5%89%B2/D%230040.md) 84 | + [Stacked Hourglass Network for Human Pose Estimation](https://arxiv.org/abs/1603.06937) 85 | + [CornerNet: Detecting Objects as Paired Keypoints](https://arxiv.org/abs/1808.01244) 86 | -------------------------------------------------------------------------------- /D#0045-Stacked-Hourglass-Network-for-Human-Pose-Estimation/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0045-Stacked-Hourglass-Network-for-Human-Pose-Estimation/images/.gitkeep -------------------------------------------------------------------------------- /D#0045-Stacked-Hourglass-Network-for-Human-Pose-Estimation/images/556.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0045-Stacked-Hourglass-Network-for-Human-Pose-Estimation/images/556.png -------------------------------------------------------------------------------- /D#0045-Stacked-Hourglass-Network-for-Human-Pose-Estimation/images/557.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0045-Stacked-Hourglass-Network-for-Human-Pose-Estimation/images/557.png -------------------------------------------------------------------------------- /D#0045-Stacked-Hourglass-Network-for-Human-Pose-Estimation/images/558.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0045-Stacked-Hourglass-Network-for-Human-Pose-Estimation/images/558.png -------------------------------------------------------------------------------- /D#0045-Stacked-Hourglass-Network-for-Human-Pose-Estimation/images/560.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0045-Stacked-Hourglass-Network-for-Human-Pose-Estimation/images/560.png -------------------------------------------------------------------------------- /D#0045-Stacked-Hourglass-Network-for-Human-Pose-Estimation/images/561.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0045-Stacked-Hourglass-Network-for-Human-Pose-Estimation/images/561.png -------------------------------------------------------------------------------- /D#0045-Stacked-Hourglass-Network-for-Human-Pose-Estimation/images/562.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0045-Stacked-Hourglass-Network-for-Human-Pose-Estimation/images/562.png -------------------------------------------------------------------------------- /D#0045-Stacked-Hourglass-Network-for-Human-Pose-Estimation/images/563.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0045-Stacked-Hourglass-Network-for-Human-Pose-Estimation/images/563.png -------------------------------------------------------------------------------- /D#0045-Stacked-Hourglass-Network-for-Human-Pose-Estimation/images/564.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0045-Stacked-Hourglass-Network-for-Human-Pose-Estimation/images/564.png -------------------------------------------------------------------------------- /D#0046-Anchor-Free第一篇CornerNet-Detecting-Objects-as-Paired-Keypoints/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0046-Anchor-Free第一篇CornerNet-Detecting-Objects-as-Paired-Keypoints/images/.gitkeep -------------------------------------------------------------------------------- /D#0046-Anchor-Free第一篇CornerNet-Detecting-Objects-as-Paired-Keypoints/images/565.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0046-Anchor-Free第一篇CornerNet-Detecting-Objects-as-Paired-Keypoints/images/565.png -------------------------------------------------------------------------------- /D#0046-Anchor-Free第一篇CornerNet-Detecting-Objects-as-Paired-Keypoints/images/566.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0046-Anchor-Free第一篇CornerNet-Detecting-Objects-as-Paired-Keypoints/images/566.png -------------------------------------------------------------------------------- /D#0046-Anchor-Free第一篇CornerNet-Detecting-Objects-as-Paired-Keypoints/images/567.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0046-Anchor-Free第一篇CornerNet-Detecting-Objects-as-Paired-Keypoints/images/567.png -------------------------------------------------------------------------------- /D#0046-Anchor-Free第一篇CornerNet-Detecting-Objects-as-Paired-Keypoints/images/568.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0046-Anchor-Free第一篇CornerNet-Detecting-Objects-as-Paired-Keypoints/images/568.png -------------------------------------------------------------------------------- /D#0046-Anchor-Free第一篇CornerNet-Detecting-Objects-as-Paired-Keypoints/images/569.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0046-Anchor-Free第一篇CornerNet-Detecting-Objects-as-Paired-Keypoints/images/569.png -------------------------------------------------------------------------------- /D#0046-Anchor-Free第一篇CornerNet-Detecting-Objects-as-Paired-Keypoints/images/570.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0046-Anchor-Free第一篇CornerNet-Detecting-Objects-as-Paired-Keypoints/images/570.png -------------------------------------------------------------------------------- /D#0046-Anchor-Free第一篇CornerNet-Detecting-Objects-as-Paired-Keypoints/images/571.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0046-Anchor-Free第一篇CornerNet-Detecting-Objects-as-Paired-Keypoints/images/571.png -------------------------------------------------------------------------------- /D#0046-Anchor-Free第一篇CornerNet-Detecting-Objects-as-Paired-Keypoints/images/572.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0046-Anchor-Free第一篇CornerNet-Detecting-Objects-as-Paired-Keypoints/images/572.png -------------------------------------------------------------------------------- /D#0046-Anchor-Free第一篇CornerNet-Detecting-Objects-as-Paired-Keypoints/images/573.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0046-Anchor-Free第一篇CornerNet-Detecting-Objects-as-Paired-Keypoints/images/573.png -------------------------------------------------------------------------------- /D#0046-Anchor-Free第一篇CornerNet-Detecting-Objects-as-Paired-Keypoints/images/574.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0046-Anchor-Free第一篇CornerNet-Detecting-Objects-as-Paired-Keypoints/images/574.png -------------------------------------------------------------------------------- /D#0046-Anchor-Free第一篇CornerNet-Detecting-Objects-as-Paired-Keypoints/images/575.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0046-Anchor-Free第一篇CornerNet-Detecting-Objects-as-Paired-Keypoints/images/575.png -------------------------------------------------------------------------------- /D#0046-Anchor-Free第一篇CornerNet-Detecting-Objects-as-Paired-Keypoints/images/576.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0046-Anchor-Free第一篇CornerNet-Detecting-Objects-as-Paired-Keypoints/images/576.png -------------------------------------------------------------------------------- /D#0046-Anchor-Free第一篇CornerNet-Detecting-Objects-as-Paired-Keypoints/images/577.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0046-Anchor-Free第一篇CornerNet-Detecting-Objects-as-Paired-Keypoints/images/577.png -------------------------------------------------------------------------------- /D#0047-Anchor-Free第二篇CornerNet的变种ExtremeNet/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0047-Anchor-Free第二篇CornerNet的变种ExtremeNet/images/.gitkeep -------------------------------------------------------------------------------- /D#0047-Anchor-Free第二篇CornerNet的变种ExtremeNet/images/565.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0047-Anchor-Free第二篇CornerNet的变种ExtremeNet/images/565.png -------------------------------------------------------------------------------- /D#0047-Anchor-Free第二篇CornerNet的变种ExtremeNet/images/575.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0047-Anchor-Free第二篇CornerNet的变种ExtremeNet/images/575.png -------------------------------------------------------------------------------- /D#0047-Anchor-Free第二篇CornerNet的变种ExtremeNet/images/580.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0047-Anchor-Free第二篇CornerNet的变种ExtremeNet/images/580.png -------------------------------------------------------------------------------- /D#0047-Anchor-Free第二篇CornerNet的变种ExtremeNet/images/581.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0047-Anchor-Free第二篇CornerNet的变种ExtremeNet/images/581.png -------------------------------------------------------------------------------- /D#0047-Anchor-Free第二篇CornerNet的变种ExtremeNet/images/582.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0047-Anchor-Free第二篇CornerNet的变种ExtremeNet/images/582.png -------------------------------------------------------------------------------- /D#0047-Anchor-Free第二篇CornerNet的变种ExtremeNet/images/583.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0047-Anchor-Free第二篇CornerNet的变种ExtremeNet/images/583.png -------------------------------------------------------------------------------- /D#0047-Anchor-Free第二篇CornerNet的变种ExtremeNet/images/584.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0047-Anchor-Free第二篇CornerNet的变种ExtremeNet/images/584.png -------------------------------------------------------------------------------- /D#0047-Anchor-Free第二篇CornerNet的变种ExtremeNet/images/585.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0047-Anchor-Free第二篇CornerNet的变种ExtremeNet/images/585.png -------------------------------------------------------------------------------- /D#0047-Anchor-Free第二篇CornerNet的变种ExtremeNet/images/586.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0047-Anchor-Free第二篇CornerNet的变种ExtremeNet/images/586.png -------------------------------------------------------------------------------- /D#0048-Anchor-Free第三篇Objects-as-Points/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0048-Anchor-Free第三篇Objects-as-Points/images/.gitkeep -------------------------------------------------------------------------------- /D#0048-Anchor-Free第三篇Objects-as-Points/images/122149.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0048-Anchor-Free第三篇Objects-as-Points/images/122149.png -------------------------------------------------------------------------------- /D#0048-Anchor-Free第三篇Objects-as-Points/images/125313.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0048-Anchor-Free第三篇Objects-as-Points/images/125313.png -------------------------------------------------------------------------------- /D#0048-Anchor-Free第三篇Objects-as-Points/images/130128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0048-Anchor-Free第三篇Objects-as-Points/images/130128.png -------------------------------------------------------------------------------- /D#0048-Anchor-Free第三篇Objects-as-Points/images/130954.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0048-Anchor-Free第三篇Objects-as-Points/images/130954.png -------------------------------------------------------------------------------- /D#0048-Anchor-Free第三篇Objects-as-Points/images/131049.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0048-Anchor-Free第三篇Objects-as-Points/images/131049.png -------------------------------------------------------------------------------- /D#0048-Anchor-Free第三篇Objects-as-Points/images/131135.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0048-Anchor-Free第三篇Objects-as-Points/images/131135.png -------------------------------------------------------------------------------- /D#0048-Anchor-Free第三篇Objects-as-Points/images/131329.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0048-Anchor-Free第三篇Objects-as-Points/images/131329.png -------------------------------------------------------------------------------- /D#0048-Anchor-Free第三篇Objects-as-Points/images/565.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0048-Anchor-Free第三篇Objects-as-Points/images/565.png -------------------------------------------------------------------------------- /D#0048-Anchor-Free第三篇Objects-as-Points/images/580.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0048-Anchor-Free第三篇Objects-as-Points/images/580.png -------------------------------------------------------------------------------- /D#0049-Anchor-Free第四篇CenterNet-Keypoint-Triplets-for-Object-Detection/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0049-Anchor-Free第四篇CenterNet-Keypoint-Triplets-for-Object-Detection/images/.gitkeep -------------------------------------------------------------------------------- /D#0049-Anchor-Free第四篇CenterNet-Keypoint-Triplets-for-Object-Detection/images/180227.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0049-Anchor-Free第四篇CenterNet-Keypoint-Triplets-for-Object-Detection/images/180227.png -------------------------------------------------------------------------------- /D#0049-Anchor-Free第四篇CenterNet-Keypoint-Triplets-for-Object-Detection/images/180345.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0049-Anchor-Free第四篇CenterNet-Keypoint-Triplets-for-Object-Detection/images/180345.png -------------------------------------------------------------------------------- /D#0049-Anchor-Free第四篇CenterNet-Keypoint-Triplets-for-Object-Detection/images/181626.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0049-Anchor-Free第四篇CenterNet-Keypoint-Triplets-for-Object-Detection/images/181626.png -------------------------------------------------------------------------------- /D#0049-Anchor-Free第四篇CenterNet-Keypoint-Triplets-for-Object-Detection/images/182402.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0049-Anchor-Free第四篇CenterNet-Keypoint-Triplets-for-Object-Detection/images/182402.png -------------------------------------------------------------------------------- /D#0049-Anchor-Free第四篇CenterNet-Keypoint-Triplets-for-Object-Detection/images/183054.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0049-Anchor-Free第四篇CenterNet-Keypoint-Triplets-for-Object-Detection/images/183054.png -------------------------------------------------------------------------------- /D#0049-Anchor-Free第四篇CenterNet-Keypoint-Triplets-for-Object-Detection/images/183111.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0049-Anchor-Free第四篇CenterNet-Keypoint-Triplets-for-Object-Detection/images/183111.png -------------------------------------------------------------------------------- /D#0049-Anchor-Free第四篇CenterNet-Keypoint-Triplets-for-Object-Detection/images/184637.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0049-Anchor-Free第四篇CenterNet-Keypoint-Triplets-for-Object-Detection/images/184637.png -------------------------------------------------------------------------------- /D#0050-C++中浮点值做比较的正确方法/code/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | // Never try to check two floating point variables for equality 4 | // The important rule to remember is that powers of two and integer multiples thereof can be perfectly represented. everything else is an approximation. 5 | 6 | bool isEqual(const double first, const double second, const double epsilon = 1e-6) 7 | { 8 | return abs(first - second) < epsilon; 9 | } 10 | 11 | int main(int argc, char *argv[]) 12 | { 13 | float a = 1.0; 14 | double b = 1.0; 15 | 16 | // float和double比较 17 | // 会发生类型提升,float-->double 18 | if (a == b) { 19 | std::cout << "float 1.0 and doube 1.0 can be compared by == " << std::endl; 20 | } else { 21 | std::cerr << "float 1.0 and doube 1.0 CAN NOT be compared by == " << std::endl; 22 | } 23 | 24 | a = 3.14; 25 | b = 3.14; 26 | 27 | if (a == b) { 28 | std::cout << "float 3.14 and doube 3.14 can be compared by == " << std::endl; 29 | } else { 30 | std::cerr << "float 3.14 and doube 3.14 CAN NOT be compared by == " << std::endl; 31 | } 32 | 33 | if (a == (float)b) { 34 | std::cout << "float 3.14 and float 3.14 can be compared by == " << std::endl; 35 | } else { 36 | std::cerr << "float 3.14 and float 3.14 CAN NOT be compared by == " << std::endl; 37 | } 38 | 39 | // float和int比较 40 | // 会发生类型提升,int-->float 41 | a = 0.0; 42 | 43 | if (a == 0) { 44 | std::cout << "float 0.0 and literal 0 can be compared by == " << std::endl; 45 | } else { 46 | std::cerr << "float 0.0 and literal 0 CAN NOT be compared by == " << std::endl; 47 | } 48 | 49 | a = 1.0; 50 | 51 | if (a == 1) { 52 | std::cout << "float 1.0 and literal 1 can be compared by == " << std::endl; 53 | } else { 54 | std::cerr << "float 1.0 and literal 1 CAN NOT be compared by == " << std::endl; 55 | } 56 | 57 | // float和字面值常量比较 58 | a = 3.14; 59 | 60 | if (a == 3.14) { 61 | std::cout << "float 3.14 and literal 3.14 can be compared by == " << std::endl; 62 | } else { 63 | std::cerr << "float 3.14 and literal 3.14 CAN NOT be compared by == " << std::endl; 64 | } 65 | 66 | // 浮点数做比较的安全方法,引入些许误差 67 | double epsilon = 1e-6; 68 | if ( isEqual(a, 3.14, epsilon) ) { 69 | std::cout << "float 3.14 and literal 3.14 can be compared by isEqual(...) " << std::endl; 70 | } else { 71 | std::cerr << "float 3.14 and literal 3.14 CAN NOT be compared by isEqual(...) " << std::endl; 72 | } 73 | 74 | return 0; 75 | } 76 | -------------------------------------------------------------------------------- /D#0050-C++中浮点值做比较的正确方法/images/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Captain1986/CaptainBlackboard/39c0078394eb14ebd5ed58cab5d41717912a92b6/D#0050-C++中浮点值做比较的正确方法/images/.gitkeep -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Peng Du 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | --------------------------------------------------------------------------------