├── .gitignore ├── img ├── v1.mp4 ├── v2.mp4 ├── v3.mp4 ├── 10-1.png ├── 2-1.png ├── 2-10.png ├── 2-11.png ├── 2-12.png ├── 2-13.png ├── 2-14.png ├── 2-15.png ├── 2-2.png ├── 2-3.png ├── 2-4.png ├── 2-5.png ├── 2-6.png ├── 2-7.png ├── 2-8.png ├── 2-9.png ├── 3-1.png ├── 3-2.png ├── 3-3.png ├── 3-4.png ├── 3-5.png ├── 4-1.png ├── 4-10.png ├── 4-11.png ├── 4-12.png ├── 4-13.png ├── 4-14.png ├── 4-15.png ├── 4-16.png ├── 4-17.png ├── 4-18.png ├── 4-19.png ├── 4-2.png ├── 4-20.png ├── 4-21.png ├── 4-22.png ├── 4-3.png ├── 4-4.png ├── 4-5.png ├── 4-6.png ├── 4-7.png ├── 4-8.png ├── 4-9.png ├── 5-1.png ├── 5-2.png ├── 6-1.png ├── 6-10.png ├── 6-11.png ├── 6-12.png ├── 6-13.png ├── 6-14.png ├── 6-15.png ├── 6-16.png ├── 6-17.png ├── 6-18.png ├── 6-19.png ├── 6-2.png ├── 6-20.png ├── 6-21.png ├── 6-22.png ├── 6-23.png ├── 6-24.png ├── 6-25.png ├── 6-26.png ├── 6-27.png ├── 6-3.png ├── 6-4.png ├── 6-5.png ├── 6-6.png ├── 6-7.png ├── 6-8.png ├── 6-9.png ├── 8-1.png ├── cnn1.png ├── cnn2.png ├── cnn3.png ├── ct_1.png ├── fpa.png ├── shop.png ├── xray.png ├── L1vsL2.jpg ├── arnoldi.jpg ├── digit.gif ├── full_qr.JPG ├── graph.png ├── lesson4.png ├── nimfa.png ├── nmf_doc.png ├── sparse.png ├── svd_fb.png ├── tex1-1.gif ├── tex1-2.gif ├── tex1-3.gif ├── tex1-4.gif ├── tex1-5.gif ├── tex1-6.gif ├── tex1-7.gif ├── tex4-1.gif ├── tex4-2.gif ├── tex4-3.gif ├── tex4-4.gif ├── tex4-5.gif ├── top10.png ├── L1vsL2_2.png ├── data_xray.png ├── face_nmf.png ├── face_pca.png ├── full_svd.JPG ├── qr_alipay.png ├── block_matrix.png ├── bloom_filter.png ├── face_outputs.png ├── faces_rpca.png ├── fltscale-wh.png ├── projection.png ├── reduced_qr.JPG ├── reduced_svd.JPG ├── sklearn_ct.png ├── InsideImagenet.png ├── document_term.png ├── markov_health.jpg ├── sportspredict.jpeg ├── surveillance3.png ├── adjaceny_matrix.png ├── cincinnati_reds.png ├── cincinnati_reds2.png ├── cython_vs_numba.png ├── hermitian_eigen.JPG ├── page_rank_graph.png ├── pentium_nytimes.png ├── projection_line.png ├── what_is_pytorch.png ├── nonhermitian_eigen.JPG ├── word2vec_analogies.png ├── randomizedSVDbenchmarks.png ├── Finite_element_sparse_matrix.png ├── cholesky_factorization_speed.png ├── tex-0668689340173b7ece8ed05b1fef41f2.gif ├── tex-0efe901699c0a4a6b90bbe9de36c82bc.gif ├── tex-1bc9193e34bc6ecc3df206e9fee737a1.gif ├── tex-1f3bd911ed0fe9a852495e314d71cc6f.gif ├── tex-1fa928cb8ffdc3becc35c3597a6be0cf.gif ├── tex-1ff4e7c4ea49e4f89fcea2a90968d87f.gif ├── tex-20b146a05599f25ed98e8d46822b8cfa.gif ├── tex-212090d41dd7136dd288934d2aa0addd.gif ├── tex-216fbe1d99dcd0f6c93ce475e7cfde71.gif ├── tex-246c34c550a7977ad4c582df9b57f0b6.gif ├── tex-25a2e7b9046096493cb32e8c28332cf6.gif ├── tex-25ebb7d03839869698867bbbf0a9932a.gif ├── tex-2a237e54504442e3d483a39f75df7bfa.gif ├── tex-2a95aaaf954c2187999c6357b04a58dd.gif ├── tex-2c38aefee9439828606c09fe64a540fc.gif ├── tex-3522f8b8d6b2b912a41177e213f96dd2.gif ├── tex-36b7a5a0150dd4e04b4078a7f3aeeac7.gif ├── tex-370268bfaa109db6f22e1b1dbb175b03.gif ├── tex-372c8c0974114a670f97d71577e77b98.gif ├── tex-37775992f2ac92fd81ac90641d7bd911.gif ├── tex-39d9d43173d632ad7babc040b062f89c.gif ├── tex-3a5b7a905599865f84eb22d0f7559171.gif ├── tex-3ce96232d2f5b6a5a0ad8606da843542.gif ├── tex-3fee5b26f9c6ce2571715fb9b282f431.gif ├── tex-40bac7e3198c7355424b1ee8dbe98f60.gif ├── tex-41f6cdbb5eab68455940bb18f5d2eb38.gif ├── tex-4481186643f99234ebdfe2fef5baa9b8.gif ├── tex-49f9c1ce16068918f80263cd2aa8ce82.gif ├── tex-4b62075d74bf4d4e462ede239a6dea12.gif ├── tex-4bf5e6e7f38061ce5bb452465c966eed.gif ├── tex-4c3150fb54600e58070931bf27225aa8.gif ├── tex-521a9daa8c1cd6293c6e22e8e8386c42.gif ├── tex-5581e026c980f5465035922f6a3bd2b1.gif ├── tex-569030d1a75c430d7fb7eae39ba2681c.gif ├── tex-58b1e9f4b9a86690cb106e65a265a34f.gif ├── tex-598e86c6b430c46c0f07aa5cd4ff6254.gif ├── tex-5ca635990dbca0e677a639ea78c11b33.gif ├── tex-5fe8f15151faec7684f7142cafc667c4.gif ├── tex-646ab88a198f4afeb2c72b8e851b35ed.gif ├── tex-6595d679e306a127a3fe53268bcaddb2.gif ├── tex-69d4b6b19f75f7e5b87932c6e2d651ae.gif ├── tex-6bfddf2b3b60a5b9c782652427a72c2a.gif ├── tex-6ce3eb7d4cd7e67415afc2cb2cfad5ff.gif ├── tex-6dabaeb72eb89ffd11a70a83032db2c6.gif ├── tex-6f2e0e91ebd226538ef465b081f4468f.gif ├── tex-706a79d0152a49a31acbe57543deb878.gif ├── tex-75db0522195fefad89d39b56cad146ee.gif ├── tex-7e08feffe0b36227e1f55eef469f5b74.gif ├── tex-7f2a13fc30f16b860d8d8b3e2ba033cd.gif ├── tex-7f57ce5c29b329529f4e3f9a3765b114.gif ├── tex-7f584f910a157bc06c98a7bc04c2f6c2.gif ├── tex-8104a2c0754d95be6fc08c624a869f51.gif ├── tex-81dc1e682f76d75883f68881a975919a.gif ├── tex-82513c6fe20ef419fc103891436353ab.gif ├── tex-82e0e3e93e5381229b1fbf5342650527.gif ├── tex-83fa37f17de1ace1b15afc44510ba1de.gif ├── tex-85089c6af5bd4da24c6ef9006b891700.gif ├── tex-86b461c679e90cc24dc854910f5709b7.gif ├── tex-89dcd0d2e9c8c729b0475c5691ec117e.gif ├── tex-8b7f1b39b8e0258e80aea39ca1c24265.gif ├── tex-93184d704e544153075ad66ee50a11e6.gif ├── tex-94699469cb13f47d93ea4c76de5f21d0.gif ├── tex-9638f37af1fca41b055dce638d17c7c9.gif ├── tex-97b82a4744248541f07907ce55b655e7.gif ├── tex-984b1a7c514d0745420c25c796f068a9.gif ├── tex-9856f83106c82c34de1010bf61297500.gif ├── tex-988b9ac3871364bd5322846c8cf0d884.gif ├── tex-9a9e4519d91963584b808251dd695f2a.gif ├── tex-9afec84def59080f0649f3c4e6bcb102.gif ├── tex-9cf4c5aa6f07fd0a07f20bf280caf968.gif ├── tex-9eee2c1c197de7cf3b522d1cf624846f.gif ├── tex-a32cff3b4bdd4f46771f4cc817a60308.gif ├── tex-a37a721b9f00ed456388db50b0a45ac5.gif ├── tex-a5648d8510dad3cd30913d6d01478c6c.gif ├── tex-a599f5fe0ec2014228b548291962279c.gif ├── tex-a73f577d94ab9875f2d8f378f0c206e6.gif ├── tex-a76ce928e80fa17258b8eb54afe5fee4.gif ├── tex-aa38f107289d4d73d516190581397349.gif ├── tex-ac84cb0b37265aec0386afc0b759d555.gif ├── tex-af9ef2d9d428087a16ff027ddd2a254f.gif ├── tex-b047f24e2fa0d6c8825b03766e27b0b5.gif ├── tex-b065a07a541b090fed1f2557e84d21d8.gif ├── tex-b21c98c8f055b92996a80407da83b1ca.gif ├── tex-b49bd0fb74c5566485bc64026d7b25d0.gif ├── tex-b6515bc4c1610897c8eb15e9a7c41f8a.gif ├── tex-bb4e955e77268f56bb0aa2b892c69ea5.gif ├── tex-bcdc457be3528d6871c31858dc0389d6.gif ├── tex-bf57e363507aefe351c79d666c9dec7e.gif ├── tex-c243886a288804343eee2af0ad8dcebc.gif ├── tex-c26279a88e721f793673a0eab58e3f32.gif ├── tex-cb277091062ae1b5af8d8d6b6844c804.gif ├── tex-d45f0a6200f855ffbed7dbbf8dc6094b.gif ├── tex-d5d4999631fcb52cad0f18f87e9bd219.gif ├── tex-d6f55635dc5c3f389c28c1d0bf62fbf8.gif ├── tex-d7f295cb8499408dbee77e8cd68ae9af.gif ├── tex-da0d0ed6422853602b6d13b51c9ff509.gif ├── tex-dfb3126ab90267fa6df10126a2151119.gif ├── tex-e188ba54d741c47cccdec77b8ef7c5e5.gif ├── tex-e1b212ddfb441c50f8fe3d58dd87eb37.gif ├── tex-e2c7c210aaacf3c6067aab0b96f6caa6.gif ├── tex-e2d55fffd53a113312172c3c24cc951c.gif ├── tex-e30b5d17dfcd09314f65b066e89a9f85.gif ├── tex-e8407a56ca47caa4fe9602f66ff53f38.gif ├── tex-e847c49ecde16aabe6054b63a48387e0.gif ├── tex-eba57389f737cede6f2839b89155acfc.gif ├── tex-ed968a8a7ef1411c816a603227f1044d.gif ├── tex-efd8ffa0fdc4e994808665f543b0fc78.gif ├── tex-f2f5a286e58badf8f6e65cd86de754a2.gif ├── tex-f44ed84fd6f63e2fb0e7dd2a90a2c3a1.gif ├── tex-f7d89fc326b255fea73d7fcf23dea705.gif ├── tex-fc34f66ec4dc2a0fc6ad00807b6f87c1.gif └── tex-fd654a7c1aa786061c06e450074793e0.gif ├── SUMMARY.md ├── src └── process_tex.js ├── README.md ├── styles └── ebook.css ├── 7.md ├── 5.md ├── 2.md ├── 10.md ├── 1.md ├── LICENSE ├── 6.md ├── 8.md └── 3.md /.gitignore: -------------------------------------------------------------------------------- 1 | _book 2 | Thumbs.db 3 | node_modules -------------------------------------------------------------------------------- /img/v1.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/v1.mp4 -------------------------------------------------------------------------------- /img/v2.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/v2.mp4 -------------------------------------------------------------------------------- /img/v3.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/v3.mp4 -------------------------------------------------------------------------------- /img/10-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/10-1.png -------------------------------------------------------------------------------- /img/2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/2-1.png -------------------------------------------------------------------------------- /img/2-10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/2-10.png -------------------------------------------------------------------------------- /img/2-11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/2-11.png -------------------------------------------------------------------------------- /img/2-12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/2-12.png -------------------------------------------------------------------------------- /img/2-13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/2-13.png -------------------------------------------------------------------------------- /img/2-14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/2-14.png -------------------------------------------------------------------------------- /img/2-15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/2-15.png -------------------------------------------------------------------------------- /img/2-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/2-2.png -------------------------------------------------------------------------------- /img/2-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/2-3.png -------------------------------------------------------------------------------- /img/2-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/2-4.png -------------------------------------------------------------------------------- /img/2-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/2-5.png -------------------------------------------------------------------------------- /img/2-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/2-6.png -------------------------------------------------------------------------------- /img/2-7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/2-7.png -------------------------------------------------------------------------------- /img/2-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/2-8.png -------------------------------------------------------------------------------- /img/2-9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/2-9.png -------------------------------------------------------------------------------- /img/3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/3-1.png -------------------------------------------------------------------------------- /img/3-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/3-2.png -------------------------------------------------------------------------------- /img/3-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/3-3.png -------------------------------------------------------------------------------- /img/3-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/3-4.png -------------------------------------------------------------------------------- /img/3-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/3-5.png -------------------------------------------------------------------------------- /img/4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/4-1.png -------------------------------------------------------------------------------- /img/4-10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/4-10.png -------------------------------------------------------------------------------- /img/4-11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/4-11.png -------------------------------------------------------------------------------- /img/4-12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/4-12.png -------------------------------------------------------------------------------- /img/4-13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/4-13.png -------------------------------------------------------------------------------- /img/4-14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/4-14.png -------------------------------------------------------------------------------- /img/4-15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/4-15.png -------------------------------------------------------------------------------- /img/4-16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/4-16.png -------------------------------------------------------------------------------- /img/4-17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/4-17.png -------------------------------------------------------------------------------- /img/4-18.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/4-18.png -------------------------------------------------------------------------------- /img/4-19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/4-19.png -------------------------------------------------------------------------------- /img/4-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/4-2.png -------------------------------------------------------------------------------- /img/4-20.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/4-20.png -------------------------------------------------------------------------------- /img/4-21.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/4-21.png -------------------------------------------------------------------------------- /img/4-22.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/4-22.png -------------------------------------------------------------------------------- /img/4-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/4-3.png -------------------------------------------------------------------------------- /img/4-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/4-4.png -------------------------------------------------------------------------------- /img/4-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/4-5.png -------------------------------------------------------------------------------- /img/4-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/4-6.png -------------------------------------------------------------------------------- /img/4-7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/4-7.png -------------------------------------------------------------------------------- /img/4-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/4-8.png -------------------------------------------------------------------------------- /img/4-9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/4-9.png -------------------------------------------------------------------------------- /img/5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/5-1.png -------------------------------------------------------------------------------- /img/5-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/5-2.png -------------------------------------------------------------------------------- /img/6-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/6-1.png -------------------------------------------------------------------------------- /img/6-10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/6-10.png -------------------------------------------------------------------------------- /img/6-11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/6-11.png -------------------------------------------------------------------------------- /img/6-12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/6-12.png -------------------------------------------------------------------------------- /img/6-13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/6-13.png -------------------------------------------------------------------------------- /img/6-14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/6-14.png -------------------------------------------------------------------------------- /img/6-15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/6-15.png -------------------------------------------------------------------------------- /img/6-16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/6-16.png -------------------------------------------------------------------------------- /img/6-17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/6-17.png -------------------------------------------------------------------------------- /img/6-18.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/6-18.png -------------------------------------------------------------------------------- /img/6-19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/6-19.png -------------------------------------------------------------------------------- /img/6-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/6-2.png -------------------------------------------------------------------------------- /img/6-20.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/6-20.png -------------------------------------------------------------------------------- /img/6-21.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/6-21.png -------------------------------------------------------------------------------- /img/6-22.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/6-22.png -------------------------------------------------------------------------------- /img/6-23.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/6-23.png -------------------------------------------------------------------------------- /img/6-24.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/6-24.png -------------------------------------------------------------------------------- /img/6-25.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/6-25.png -------------------------------------------------------------------------------- /img/6-26.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/6-26.png -------------------------------------------------------------------------------- /img/6-27.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/6-27.png -------------------------------------------------------------------------------- /img/6-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/6-3.png -------------------------------------------------------------------------------- /img/6-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/6-4.png -------------------------------------------------------------------------------- /img/6-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/6-5.png -------------------------------------------------------------------------------- /img/6-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/6-6.png -------------------------------------------------------------------------------- /img/6-7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/6-7.png -------------------------------------------------------------------------------- /img/6-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/6-8.png -------------------------------------------------------------------------------- /img/6-9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/6-9.png -------------------------------------------------------------------------------- /img/8-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/8-1.png -------------------------------------------------------------------------------- /img/cnn1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/cnn1.png -------------------------------------------------------------------------------- /img/cnn2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/cnn2.png -------------------------------------------------------------------------------- /img/cnn3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/cnn3.png -------------------------------------------------------------------------------- /img/ct_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/ct_1.png -------------------------------------------------------------------------------- /img/fpa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/fpa.png -------------------------------------------------------------------------------- /img/shop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/shop.png -------------------------------------------------------------------------------- /img/xray.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/xray.png -------------------------------------------------------------------------------- /img/L1vsL2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/L1vsL2.jpg -------------------------------------------------------------------------------- /img/arnoldi.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/arnoldi.jpg -------------------------------------------------------------------------------- /img/digit.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/digit.gif -------------------------------------------------------------------------------- /img/full_qr.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/full_qr.JPG -------------------------------------------------------------------------------- /img/graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/graph.png -------------------------------------------------------------------------------- /img/lesson4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/lesson4.png -------------------------------------------------------------------------------- /img/nimfa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/nimfa.png -------------------------------------------------------------------------------- /img/nmf_doc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/nmf_doc.png -------------------------------------------------------------------------------- /img/sparse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/sparse.png -------------------------------------------------------------------------------- /img/svd_fb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/svd_fb.png -------------------------------------------------------------------------------- /img/tex1-1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex1-1.gif -------------------------------------------------------------------------------- /img/tex1-2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex1-2.gif -------------------------------------------------------------------------------- /img/tex1-3.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex1-3.gif -------------------------------------------------------------------------------- /img/tex1-4.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex1-4.gif -------------------------------------------------------------------------------- /img/tex1-5.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex1-5.gif -------------------------------------------------------------------------------- /img/tex1-6.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex1-6.gif -------------------------------------------------------------------------------- /img/tex1-7.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex1-7.gif -------------------------------------------------------------------------------- /img/tex4-1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex4-1.gif -------------------------------------------------------------------------------- /img/tex4-2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex4-2.gif -------------------------------------------------------------------------------- /img/tex4-3.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex4-3.gif -------------------------------------------------------------------------------- /img/tex4-4.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex4-4.gif -------------------------------------------------------------------------------- /img/tex4-5.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex4-5.gif -------------------------------------------------------------------------------- /img/top10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/top10.png -------------------------------------------------------------------------------- /img/L1vsL2_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/L1vsL2_2.png -------------------------------------------------------------------------------- /img/data_xray.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/data_xray.png -------------------------------------------------------------------------------- /img/face_nmf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/face_nmf.png -------------------------------------------------------------------------------- /img/face_pca.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/face_pca.png -------------------------------------------------------------------------------- /img/full_svd.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/full_svd.JPG -------------------------------------------------------------------------------- /img/qr_alipay.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/qr_alipay.png -------------------------------------------------------------------------------- /img/block_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/block_matrix.png -------------------------------------------------------------------------------- /img/bloom_filter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/bloom_filter.png -------------------------------------------------------------------------------- /img/face_outputs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/face_outputs.png -------------------------------------------------------------------------------- /img/faces_rpca.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/faces_rpca.png -------------------------------------------------------------------------------- /img/fltscale-wh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/fltscale-wh.png -------------------------------------------------------------------------------- /img/projection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/projection.png -------------------------------------------------------------------------------- /img/reduced_qr.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/reduced_qr.JPG -------------------------------------------------------------------------------- /img/reduced_svd.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/reduced_svd.JPG -------------------------------------------------------------------------------- /img/sklearn_ct.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/sklearn_ct.png -------------------------------------------------------------------------------- /img/InsideImagenet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/InsideImagenet.png -------------------------------------------------------------------------------- /img/document_term.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/document_term.png -------------------------------------------------------------------------------- /img/markov_health.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/markov_health.jpg -------------------------------------------------------------------------------- /img/sportspredict.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/sportspredict.jpeg -------------------------------------------------------------------------------- /img/surveillance3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/surveillance3.png -------------------------------------------------------------------------------- /img/adjaceny_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/adjaceny_matrix.png -------------------------------------------------------------------------------- /img/cincinnati_reds.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/cincinnati_reds.png -------------------------------------------------------------------------------- /img/cincinnati_reds2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/cincinnati_reds2.png -------------------------------------------------------------------------------- /img/cython_vs_numba.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/cython_vs_numba.png -------------------------------------------------------------------------------- /img/hermitian_eigen.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/hermitian_eigen.JPG -------------------------------------------------------------------------------- /img/page_rank_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/page_rank_graph.png -------------------------------------------------------------------------------- /img/pentium_nytimes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/pentium_nytimes.png -------------------------------------------------------------------------------- /img/projection_line.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/projection_line.png -------------------------------------------------------------------------------- /img/what_is_pytorch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/what_is_pytorch.png -------------------------------------------------------------------------------- /img/nonhermitian_eigen.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/nonhermitian_eigen.JPG -------------------------------------------------------------------------------- /img/word2vec_analogies.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/word2vec_analogies.png -------------------------------------------------------------------------------- /img/randomizedSVDbenchmarks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/randomizedSVDbenchmarks.png -------------------------------------------------------------------------------- /img/Finite_element_sparse_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/Finite_element_sparse_matrix.png -------------------------------------------------------------------------------- /img/cholesky_factorization_speed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/cholesky_factorization_speed.png -------------------------------------------------------------------------------- /img/tex-0668689340173b7ece8ed05b1fef41f2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-0668689340173b7ece8ed05b1fef41f2.gif -------------------------------------------------------------------------------- /img/tex-0efe901699c0a4a6b90bbe9de36c82bc.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-0efe901699c0a4a6b90bbe9de36c82bc.gif -------------------------------------------------------------------------------- /img/tex-1bc9193e34bc6ecc3df206e9fee737a1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-1bc9193e34bc6ecc3df206e9fee737a1.gif -------------------------------------------------------------------------------- /img/tex-1f3bd911ed0fe9a852495e314d71cc6f.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-1f3bd911ed0fe9a852495e314d71cc6f.gif -------------------------------------------------------------------------------- /img/tex-1fa928cb8ffdc3becc35c3597a6be0cf.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-1fa928cb8ffdc3becc35c3597a6be0cf.gif -------------------------------------------------------------------------------- /img/tex-1ff4e7c4ea49e4f89fcea2a90968d87f.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-1ff4e7c4ea49e4f89fcea2a90968d87f.gif -------------------------------------------------------------------------------- /img/tex-20b146a05599f25ed98e8d46822b8cfa.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-20b146a05599f25ed98e8d46822b8cfa.gif -------------------------------------------------------------------------------- /img/tex-212090d41dd7136dd288934d2aa0addd.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-212090d41dd7136dd288934d2aa0addd.gif -------------------------------------------------------------------------------- /img/tex-216fbe1d99dcd0f6c93ce475e7cfde71.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-216fbe1d99dcd0f6c93ce475e7cfde71.gif -------------------------------------------------------------------------------- /img/tex-246c34c550a7977ad4c582df9b57f0b6.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-246c34c550a7977ad4c582df9b57f0b6.gif -------------------------------------------------------------------------------- /img/tex-25a2e7b9046096493cb32e8c28332cf6.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-25a2e7b9046096493cb32e8c28332cf6.gif -------------------------------------------------------------------------------- /img/tex-25ebb7d03839869698867bbbf0a9932a.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-25ebb7d03839869698867bbbf0a9932a.gif -------------------------------------------------------------------------------- /img/tex-2a237e54504442e3d483a39f75df7bfa.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-2a237e54504442e3d483a39f75df7bfa.gif -------------------------------------------------------------------------------- /img/tex-2a95aaaf954c2187999c6357b04a58dd.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-2a95aaaf954c2187999c6357b04a58dd.gif -------------------------------------------------------------------------------- /img/tex-2c38aefee9439828606c09fe64a540fc.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-2c38aefee9439828606c09fe64a540fc.gif -------------------------------------------------------------------------------- /img/tex-3522f8b8d6b2b912a41177e213f96dd2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-3522f8b8d6b2b912a41177e213f96dd2.gif -------------------------------------------------------------------------------- /img/tex-36b7a5a0150dd4e04b4078a7f3aeeac7.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-36b7a5a0150dd4e04b4078a7f3aeeac7.gif -------------------------------------------------------------------------------- /img/tex-370268bfaa109db6f22e1b1dbb175b03.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-370268bfaa109db6f22e1b1dbb175b03.gif -------------------------------------------------------------------------------- /img/tex-372c8c0974114a670f97d71577e77b98.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-372c8c0974114a670f97d71577e77b98.gif -------------------------------------------------------------------------------- /img/tex-37775992f2ac92fd81ac90641d7bd911.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-37775992f2ac92fd81ac90641d7bd911.gif -------------------------------------------------------------------------------- /img/tex-39d9d43173d632ad7babc040b062f89c.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-39d9d43173d632ad7babc040b062f89c.gif -------------------------------------------------------------------------------- /img/tex-3a5b7a905599865f84eb22d0f7559171.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-3a5b7a905599865f84eb22d0f7559171.gif -------------------------------------------------------------------------------- /img/tex-3ce96232d2f5b6a5a0ad8606da843542.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-3ce96232d2f5b6a5a0ad8606da843542.gif -------------------------------------------------------------------------------- /img/tex-3fee5b26f9c6ce2571715fb9b282f431.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-3fee5b26f9c6ce2571715fb9b282f431.gif -------------------------------------------------------------------------------- /img/tex-40bac7e3198c7355424b1ee8dbe98f60.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-40bac7e3198c7355424b1ee8dbe98f60.gif -------------------------------------------------------------------------------- /img/tex-41f6cdbb5eab68455940bb18f5d2eb38.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-41f6cdbb5eab68455940bb18f5d2eb38.gif -------------------------------------------------------------------------------- /img/tex-4481186643f99234ebdfe2fef5baa9b8.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-4481186643f99234ebdfe2fef5baa9b8.gif -------------------------------------------------------------------------------- /img/tex-49f9c1ce16068918f80263cd2aa8ce82.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-49f9c1ce16068918f80263cd2aa8ce82.gif -------------------------------------------------------------------------------- /img/tex-4b62075d74bf4d4e462ede239a6dea12.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-4b62075d74bf4d4e462ede239a6dea12.gif -------------------------------------------------------------------------------- /img/tex-4bf5e6e7f38061ce5bb452465c966eed.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-4bf5e6e7f38061ce5bb452465c966eed.gif -------------------------------------------------------------------------------- /img/tex-4c3150fb54600e58070931bf27225aa8.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-4c3150fb54600e58070931bf27225aa8.gif -------------------------------------------------------------------------------- /img/tex-521a9daa8c1cd6293c6e22e8e8386c42.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-521a9daa8c1cd6293c6e22e8e8386c42.gif -------------------------------------------------------------------------------- /img/tex-5581e026c980f5465035922f6a3bd2b1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-5581e026c980f5465035922f6a3bd2b1.gif -------------------------------------------------------------------------------- /img/tex-569030d1a75c430d7fb7eae39ba2681c.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-569030d1a75c430d7fb7eae39ba2681c.gif -------------------------------------------------------------------------------- /img/tex-58b1e9f4b9a86690cb106e65a265a34f.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-58b1e9f4b9a86690cb106e65a265a34f.gif -------------------------------------------------------------------------------- /img/tex-598e86c6b430c46c0f07aa5cd4ff6254.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-598e86c6b430c46c0f07aa5cd4ff6254.gif -------------------------------------------------------------------------------- /img/tex-5ca635990dbca0e677a639ea78c11b33.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-5ca635990dbca0e677a639ea78c11b33.gif -------------------------------------------------------------------------------- /img/tex-5fe8f15151faec7684f7142cafc667c4.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-5fe8f15151faec7684f7142cafc667c4.gif -------------------------------------------------------------------------------- /img/tex-646ab88a198f4afeb2c72b8e851b35ed.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-646ab88a198f4afeb2c72b8e851b35ed.gif -------------------------------------------------------------------------------- /img/tex-6595d679e306a127a3fe53268bcaddb2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-6595d679e306a127a3fe53268bcaddb2.gif -------------------------------------------------------------------------------- /img/tex-69d4b6b19f75f7e5b87932c6e2d651ae.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-69d4b6b19f75f7e5b87932c6e2d651ae.gif -------------------------------------------------------------------------------- /img/tex-6bfddf2b3b60a5b9c782652427a72c2a.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-6bfddf2b3b60a5b9c782652427a72c2a.gif -------------------------------------------------------------------------------- /img/tex-6ce3eb7d4cd7e67415afc2cb2cfad5ff.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-6ce3eb7d4cd7e67415afc2cb2cfad5ff.gif -------------------------------------------------------------------------------- /img/tex-6dabaeb72eb89ffd11a70a83032db2c6.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-6dabaeb72eb89ffd11a70a83032db2c6.gif -------------------------------------------------------------------------------- /img/tex-6f2e0e91ebd226538ef465b081f4468f.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-6f2e0e91ebd226538ef465b081f4468f.gif -------------------------------------------------------------------------------- /img/tex-706a79d0152a49a31acbe57543deb878.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-706a79d0152a49a31acbe57543deb878.gif -------------------------------------------------------------------------------- /img/tex-75db0522195fefad89d39b56cad146ee.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-75db0522195fefad89d39b56cad146ee.gif -------------------------------------------------------------------------------- /img/tex-7e08feffe0b36227e1f55eef469f5b74.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-7e08feffe0b36227e1f55eef469f5b74.gif -------------------------------------------------------------------------------- /img/tex-7f2a13fc30f16b860d8d8b3e2ba033cd.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-7f2a13fc30f16b860d8d8b3e2ba033cd.gif -------------------------------------------------------------------------------- /img/tex-7f57ce5c29b329529f4e3f9a3765b114.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-7f57ce5c29b329529f4e3f9a3765b114.gif -------------------------------------------------------------------------------- /img/tex-7f584f910a157bc06c98a7bc04c2f6c2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-7f584f910a157bc06c98a7bc04c2f6c2.gif -------------------------------------------------------------------------------- /img/tex-8104a2c0754d95be6fc08c624a869f51.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-8104a2c0754d95be6fc08c624a869f51.gif -------------------------------------------------------------------------------- /img/tex-81dc1e682f76d75883f68881a975919a.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-81dc1e682f76d75883f68881a975919a.gif -------------------------------------------------------------------------------- /img/tex-82513c6fe20ef419fc103891436353ab.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-82513c6fe20ef419fc103891436353ab.gif -------------------------------------------------------------------------------- /img/tex-82e0e3e93e5381229b1fbf5342650527.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-82e0e3e93e5381229b1fbf5342650527.gif -------------------------------------------------------------------------------- /img/tex-83fa37f17de1ace1b15afc44510ba1de.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-83fa37f17de1ace1b15afc44510ba1de.gif -------------------------------------------------------------------------------- /img/tex-85089c6af5bd4da24c6ef9006b891700.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-85089c6af5bd4da24c6ef9006b891700.gif -------------------------------------------------------------------------------- /img/tex-86b461c679e90cc24dc854910f5709b7.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-86b461c679e90cc24dc854910f5709b7.gif -------------------------------------------------------------------------------- /img/tex-89dcd0d2e9c8c729b0475c5691ec117e.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-89dcd0d2e9c8c729b0475c5691ec117e.gif -------------------------------------------------------------------------------- /img/tex-8b7f1b39b8e0258e80aea39ca1c24265.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-8b7f1b39b8e0258e80aea39ca1c24265.gif -------------------------------------------------------------------------------- /img/tex-93184d704e544153075ad66ee50a11e6.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-93184d704e544153075ad66ee50a11e6.gif -------------------------------------------------------------------------------- /img/tex-94699469cb13f47d93ea4c76de5f21d0.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-94699469cb13f47d93ea4c76de5f21d0.gif -------------------------------------------------------------------------------- /img/tex-9638f37af1fca41b055dce638d17c7c9.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-9638f37af1fca41b055dce638d17c7c9.gif -------------------------------------------------------------------------------- /img/tex-97b82a4744248541f07907ce55b655e7.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-97b82a4744248541f07907ce55b655e7.gif -------------------------------------------------------------------------------- /img/tex-984b1a7c514d0745420c25c796f068a9.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-984b1a7c514d0745420c25c796f068a9.gif -------------------------------------------------------------------------------- /img/tex-9856f83106c82c34de1010bf61297500.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-9856f83106c82c34de1010bf61297500.gif -------------------------------------------------------------------------------- /img/tex-988b9ac3871364bd5322846c8cf0d884.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-988b9ac3871364bd5322846c8cf0d884.gif -------------------------------------------------------------------------------- /img/tex-9a9e4519d91963584b808251dd695f2a.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-9a9e4519d91963584b808251dd695f2a.gif -------------------------------------------------------------------------------- /img/tex-9afec84def59080f0649f3c4e6bcb102.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-9afec84def59080f0649f3c4e6bcb102.gif -------------------------------------------------------------------------------- /img/tex-9cf4c5aa6f07fd0a07f20bf280caf968.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-9cf4c5aa6f07fd0a07f20bf280caf968.gif -------------------------------------------------------------------------------- /img/tex-9eee2c1c197de7cf3b522d1cf624846f.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-9eee2c1c197de7cf3b522d1cf624846f.gif -------------------------------------------------------------------------------- /img/tex-a32cff3b4bdd4f46771f4cc817a60308.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-a32cff3b4bdd4f46771f4cc817a60308.gif -------------------------------------------------------------------------------- /img/tex-a37a721b9f00ed456388db50b0a45ac5.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-a37a721b9f00ed456388db50b0a45ac5.gif -------------------------------------------------------------------------------- /img/tex-a5648d8510dad3cd30913d6d01478c6c.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-a5648d8510dad3cd30913d6d01478c6c.gif -------------------------------------------------------------------------------- /img/tex-a599f5fe0ec2014228b548291962279c.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-a599f5fe0ec2014228b548291962279c.gif -------------------------------------------------------------------------------- /img/tex-a73f577d94ab9875f2d8f378f0c206e6.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-a73f577d94ab9875f2d8f378f0c206e6.gif -------------------------------------------------------------------------------- /img/tex-a76ce928e80fa17258b8eb54afe5fee4.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-a76ce928e80fa17258b8eb54afe5fee4.gif -------------------------------------------------------------------------------- /img/tex-aa38f107289d4d73d516190581397349.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-aa38f107289d4d73d516190581397349.gif -------------------------------------------------------------------------------- /img/tex-ac84cb0b37265aec0386afc0b759d555.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-ac84cb0b37265aec0386afc0b759d555.gif -------------------------------------------------------------------------------- /img/tex-af9ef2d9d428087a16ff027ddd2a254f.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-af9ef2d9d428087a16ff027ddd2a254f.gif -------------------------------------------------------------------------------- /img/tex-b047f24e2fa0d6c8825b03766e27b0b5.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-b047f24e2fa0d6c8825b03766e27b0b5.gif -------------------------------------------------------------------------------- /img/tex-b065a07a541b090fed1f2557e84d21d8.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-b065a07a541b090fed1f2557e84d21d8.gif -------------------------------------------------------------------------------- /img/tex-b21c98c8f055b92996a80407da83b1ca.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-b21c98c8f055b92996a80407da83b1ca.gif -------------------------------------------------------------------------------- /img/tex-b49bd0fb74c5566485bc64026d7b25d0.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-b49bd0fb74c5566485bc64026d7b25d0.gif -------------------------------------------------------------------------------- /img/tex-b6515bc4c1610897c8eb15e9a7c41f8a.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-b6515bc4c1610897c8eb15e9a7c41f8a.gif -------------------------------------------------------------------------------- /img/tex-bb4e955e77268f56bb0aa2b892c69ea5.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-bb4e955e77268f56bb0aa2b892c69ea5.gif -------------------------------------------------------------------------------- /img/tex-bcdc457be3528d6871c31858dc0389d6.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-bcdc457be3528d6871c31858dc0389d6.gif -------------------------------------------------------------------------------- /img/tex-bf57e363507aefe351c79d666c9dec7e.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-bf57e363507aefe351c79d666c9dec7e.gif -------------------------------------------------------------------------------- /img/tex-c243886a288804343eee2af0ad8dcebc.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-c243886a288804343eee2af0ad8dcebc.gif -------------------------------------------------------------------------------- /img/tex-c26279a88e721f793673a0eab58e3f32.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-c26279a88e721f793673a0eab58e3f32.gif -------------------------------------------------------------------------------- /img/tex-cb277091062ae1b5af8d8d6b6844c804.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-cb277091062ae1b5af8d8d6b6844c804.gif -------------------------------------------------------------------------------- /img/tex-d45f0a6200f855ffbed7dbbf8dc6094b.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-d45f0a6200f855ffbed7dbbf8dc6094b.gif -------------------------------------------------------------------------------- /img/tex-d5d4999631fcb52cad0f18f87e9bd219.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-d5d4999631fcb52cad0f18f87e9bd219.gif -------------------------------------------------------------------------------- /img/tex-d6f55635dc5c3f389c28c1d0bf62fbf8.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-d6f55635dc5c3f389c28c1d0bf62fbf8.gif -------------------------------------------------------------------------------- /img/tex-d7f295cb8499408dbee77e8cd68ae9af.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-d7f295cb8499408dbee77e8cd68ae9af.gif -------------------------------------------------------------------------------- /img/tex-da0d0ed6422853602b6d13b51c9ff509.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-da0d0ed6422853602b6d13b51c9ff509.gif -------------------------------------------------------------------------------- /img/tex-dfb3126ab90267fa6df10126a2151119.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-dfb3126ab90267fa6df10126a2151119.gif -------------------------------------------------------------------------------- /img/tex-e188ba54d741c47cccdec77b8ef7c5e5.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-e188ba54d741c47cccdec77b8ef7c5e5.gif -------------------------------------------------------------------------------- /img/tex-e1b212ddfb441c50f8fe3d58dd87eb37.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-e1b212ddfb441c50f8fe3d58dd87eb37.gif -------------------------------------------------------------------------------- /img/tex-e2c7c210aaacf3c6067aab0b96f6caa6.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-e2c7c210aaacf3c6067aab0b96f6caa6.gif -------------------------------------------------------------------------------- /img/tex-e2d55fffd53a113312172c3c24cc951c.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-e2d55fffd53a113312172c3c24cc951c.gif -------------------------------------------------------------------------------- /img/tex-e30b5d17dfcd09314f65b066e89a9f85.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-e30b5d17dfcd09314f65b066e89a9f85.gif -------------------------------------------------------------------------------- /img/tex-e8407a56ca47caa4fe9602f66ff53f38.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-e8407a56ca47caa4fe9602f66ff53f38.gif -------------------------------------------------------------------------------- /img/tex-e847c49ecde16aabe6054b63a48387e0.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-e847c49ecde16aabe6054b63a48387e0.gif -------------------------------------------------------------------------------- /img/tex-eba57389f737cede6f2839b89155acfc.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-eba57389f737cede6f2839b89155acfc.gif -------------------------------------------------------------------------------- /img/tex-ed968a8a7ef1411c816a603227f1044d.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-ed968a8a7ef1411c816a603227f1044d.gif -------------------------------------------------------------------------------- /img/tex-efd8ffa0fdc4e994808665f543b0fc78.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-efd8ffa0fdc4e994808665f543b0fc78.gif -------------------------------------------------------------------------------- /img/tex-f2f5a286e58badf8f6e65cd86de754a2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-f2f5a286e58badf8f6e65cd86de754a2.gif -------------------------------------------------------------------------------- /img/tex-f44ed84fd6f63e2fb0e7dd2a90a2c3a1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-f44ed84fd6f63e2fb0e7dd2a90a2c3a1.gif -------------------------------------------------------------------------------- /img/tex-f7d89fc326b255fea73d7fcf23dea705.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-f7d89fc326b255fea73d7fcf23dea705.gif -------------------------------------------------------------------------------- /img/tex-fc34f66ec4dc2a0fc6ad00807b6f87c1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-fc34f66ec4dc2a0fc6ad00807b6f87c1.gif -------------------------------------------------------------------------------- /img/tex-fd654a7c1aa786061c06e450074793e0.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apachecn/fastai-num-linalg-v2-zh/HEAD/img/tex-fd654a7c1aa786061c06e450074793e0.gif -------------------------------------------------------------------------------- /SUMMARY.md: -------------------------------------------------------------------------------- 1 | + [fast.ai 数值线性代数讲义中文版 v2](README.md) 2 | + [一、我们为什么在这里](1.md) 3 | + [二、SVD 背景消除](2.md) 4 | + [三、使用 NMF 和 SVD 的主题建模](3.md) 5 | + [四、随机化 SVD](4.md) 6 | + [五、LU 分解](5.md) 7 | + [六、使用鲁棒回归的 CT 扫描的压缩感知](6.md) 8 | + [七、线性回归和健康结果](7.md) 9 | + [八、如何实现线性回归](8.md) 10 | + [九、PageRank 和特征值分解](9.md) 11 | + [十、实现 QR 分解](10.md) -------------------------------------------------------------------------------- /src/process_tex.js: -------------------------------------------------------------------------------- 1 | var fs = require('fs') 2 | var crypto = require('crypto') 3 | var request = require('sync-request') 4 | 5 | var doc_dir = '..' 6 | var img_dir = '../img' 7 | 8 | function processTex(md) { 9 | 10 | var rm; 11 | while(rm = /\$(.+?)\$/g.exec(md)){ 12 | var tex = rm[1] 13 | var url = 'http://latex.codecogs.com/gif.latex?' 14 | + encodeURIComponent(tex) 15 | var tex_md5 = crypto.createHash("md5").update(tex).digest('hex') 16 | var img = request('get', url).getBody() 17 | 18 | // replace_all 19 | md = md.split(rm[0]).join(`![${tex}](img/tex-${tex_md5}.gif)`) 20 | fs.writeFileSync(`${img_dir}/tex-${tex_md5}.gif`, img) 21 | 22 | console.log(tex_md5) 23 | } 24 | 25 | return md 26 | 27 | } 28 | 29 | function main() { 30 | 31 | var flist = fs.readdirSync(doc_dir).filter(s => s.endsWith('.md')) 32 | 33 | for(var fname of flist) { 34 | 35 | fname = doc_dir + '/' + fname 36 | console.log(fname) 37 | 38 | var md = fs.readFileSync(fname, 'utf-8') 39 | md = processTex(md) 40 | fs.writeFileSync(fname, md) 41 | } 42 | 43 | console.log('done') 44 | 45 | } 46 | 47 | main() -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # fast.ai 数值线性代数讲义中文版 v2 2 | 3 | > 原文:[fastai/numerical-linear-algebra-v2](https://nbviewer.jupyter.org/github/fastai/numerical-linear-algebra-v2/tree/master/nbs/) 4 | > 5 | > 译者:[飞龙](https://github.com/wizardforcel) 6 | > 7 | > 协议:[CC BY-NC-SA 4.0](http://creativecommons.org/licenses/by-nc-sa/4.0/) 8 | > 9 | > 自豪地采用[谷歌翻译](https://translate.google.cn/) 10 | > 11 | > 欢迎任何人参与和完善:一个人可以走的很快,但是一群人却可以走的更远。 12 | 13 | + [ApacheCN 机器学习交流群 629470233](http://shang.qq.com/wpa/qunwpa?idkey=30e5f1123a79867570f665aa3a483ca404b1c3f77737bc01ec520ed5f078ddef) 14 | + [ApacheCN 学习资源](http://www.apachecn.org/) 15 | + [fast.ai 机器学习和深度学习中文笔记](https://github.com/apachecn/fastai-ml-dl-notes-zh) 16 | 17 | ‍ 18 | 19 | + [在线阅读](https://www.gitbook.com/book/wizardforcel/fastai-num-linalg-v2/details) 20 | + [PDF格式](https://www.gitbook.com/download/pdf/book/wizardforcel/fastai-num-linalg-v2) 21 | + [EPUB格式](https://www.gitbook.com/download/epub/book/wizardforcel/fastai-num-linalg-v2) 22 | + [MOBI格式](https://www.gitbook.com/download/mobi/book/wizardforcel/fastai-num-linalg-v2) 23 | + [代码仓库](https://github.com/apachecn/fastai-num-linalg-v2-zh) 24 | 25 | ## 赞助我 26 | 27 | ![](img/qr_alipay.png) 28 | 29 | ## 协议 30 | 31 | [CC BY-NC-SA 4.0](http://creativecommons.org/licenses/by-nc-sa/4.0/) 32 | -------------------------------------------------------------------------------- /styles/ebook.css: -------------------------------------------------------------------------------- 1 | /* GitHub stylesheet for MarkdownPad (http://markdownpad.com) */ 2 | /* Author: Nicolas Hery - http://nicolashery.com */ 3 | /* Version: b13fe65ca28d2e568c6ed5d7f06581183df8f2ff */ 4 | /* Source: https://github.com/nicolahery/markdownpad-github */ 5 | 6 | /* RESET 7 | =============================================================================*/ 8 | 9 | html, body, div, span, applet, object, iframe, h1, h2, h3, h4, h5, h6, p, blockquote, pre, a, abbr, acronym, address, big, cite, code, del, dfn, em, img, ins, kbd, q, s, samp, small, strike, strong, sub, sup, tt, var, b, u, i, center, dl, dt, dd, ol, ul, li, fieldset, form, label, legend, table, caption, tbody, tfoot, thead, tr, th, td, article, aside, canvas, details, embed, figure, figcaption, footer, header, hgroup, menu, nav, output, ruby, section, summary, time, mark, audio, video { 10 | margin: 0; 11 | padding: 0; 12 | border: 0; 13 | } 14 | 15 | /* BODY 16 | =============================================================================*/ 17 | 18 | body { 19 | font-family: Helvetica, arial, freesans, clean, sans-serif; 20 | font-size: 14px; 21 | line-height: 1.6; 22 | color: #333; 23 | background-color: #fff; 24 | padding: 20px; 25 | max-width: 960px; 26 | margin: 0 auto; 27 | } 28 | 29 | body>*:first-child { 30 | margin-top: 0 !important; 31 | } 32 | 33 | body>*:last-child { 34 | margin-bottom: 0 !important; 35 | } 36 | 37 | /* BLOCKS 38 | =============================================================================*/ 39 | 40 | p, blockquote, ul, ol, dl, table, pre { 41 | margin: 15px 0; 42 | } 43 | 44 | /* HEADERS 45 | =============================================================================*/ 46 | 47 | h1, h2, h3, h4, h5, h6 { 48 | margin: 20px 0 10px; 49 | padding: 0; 50 | font-weight: bold; 51 | -webkit-font-smoothing: antialiased; 52 | } 53 | 54 | h1 tt, h1 code, h2 tt, h2 code, h3 tt, h3 code, h4 tt, h4 code, h5 tt, h5 code, h6 tt, h6 code { 55 | font-size: inherit; 56 | } 57 | 58 | h1 { 59 | font-size: 24px; 60 | border-bottom: 1px solid #ccc; 61 | color: #000; 62 | } 63 | 64 | h2 { 65 | font-size: 18px; 66 | color: #000; 67 | } 68 | 69 | h3 { 70 | font-size: 14px; 71 | } 72 | 73 | h4 { 74 | font-size: 14px; 75 | } 76 | 77 | h5 { 78 | font-size: 14px; 79 | } 80 | 81 | h6 { 82 | color: #777; 83 | font-size: 14px; 84 | } 85 | 86 | body>h2:first-child, body>h1:first-child, body>h1:first-child+h2, body>h3:first-child, body>h4:first-child, body>h5:first-child, body>h6:first-child { 87 | margin-top: 0; 88 | padding-top: 0; 89 | } 90 | 91 | a:first-child h1, a:first-child h2, a:first-child h3, a:first-child h4, a:first-child h5, a:first-child h6 { 92 | margin-top: 0; 93 | padding-top: 0; 94 | } 95 | 96 | h1+p, h2+p, h3+p, h4+p, h5+p, h6+p { 97 | margin-top: 10px; 98 | } 99 | 100 | /* LINKS 101 | =============================================================================*/ 102 | 103 | a { 104 | color: #4183C4; 105 | text-decoration: none; 106 | } 107 | 108 | a:hover { 109 | text-decoration: underline; 110 | } 111 | 112 | /* LISTS 113 | =============================================================================*/ 114 | 115 | ul, ol { 116 | padding-left: 30px; 117 | } 118 | 119 | ul li > :first-child, 120 | ol li > :first-child, 121 | ul li ul:first-of-type, 122 | ol li ol:first-of-type, 123 | ul li ol:first-of-type, 124 | ol li ul:first-of-type { 125 | margin-top: 0px; 126 | } 127 | 128 | ul ul, ul ol, ol ol, ol ul { 129 | margin-bottom: 0; 130 | } 131 | 132 | dl { 133 | padding: 0; 134 | } 135 | 136 | dl dt { 137 | font-size: 14px; 138 | font-weight: bold; 139 | font-style: italic; 140 | padding: 0; 141 | margin: 15px 0 5px; 142 | } 143 | 144 | dl dt:first-child { 145 | padding: 0; 146 | } 147 | 148 | dl dt>:first-child { 149 | margin-top: 0px; 150 | } 151 | 152 | dl dt>:last-child { 153 | margin-bottom: 0px; 154 | } 155 | 156 | dl dd { 157 | margin: 0 0 15px; 158 | padding: 0 15px; 159 | } 160 | 161 | dl dd>:first-child { 162 | margin-top: 0px; 163 | } 164 | 165 | dl dd>:last-child { 166 | margin-bottom: 0px; 167 | } 168 | 169 | /* CODE 170 | =============================================================================*/ 171 | 172 | pre, code, tt { 173 | font-size: 12px; 174 | font-family: Consolas, "Liberation Mono", Courier, monospace; 175 | } 176 | 177 | code, tt { 178 | margin: 0 0px; 179 | padding: 0px 0px; 180 | white-space: nowrap; 181 | border: 1px solid #eaeaea; 182 | background-color: #f8f8f8; 183 | border-radius: 3px; 184 | } 185 | 186 | pre>code { 187 | margin: 0; 188 | padding: 0; 189 | white-space: pre; 190 | border: none; 191 | background: transparent; 192 | } 193 | 194 | pre { 195 | background-color: #f8f8f8; 196 | border: 1px solid #ccc; 197 | font-size: 13px; 198 | line-height: 19px; 199 | overflow: auto; 200 | padding: 6px 10px; 201 | border-radius: 3px; 202 | } 203 | 204 | pre code, pre tt { 205 | background-color: transparent; 206 | border: none; 207 | } 208 | 209 | kbd { 210 | -moz-border-bottom-colors: none; 211 | -moz-border-left-colors: none; 212 | -moz-border-right-colors: none; 213 | -moz-border-top-colors: none; 214 | background-color: #DDDDDD; 215 | background-image: linear-gradient(#F1F1F1, #DDDDDD); 216 | background-repeat: repeat-x; 217 | border-color: #DDDDDD #CCCCCC #CCCCCC #DDDDDD; 218 | border-image: none; 219 | border-radius: 2px 2px 2px 2px; 220 | border-style: solid; 221 | border-width: 1px; 222 | font-family: "Helvetica Neue",Helvetica,Arial,sans-serif; 223 | line-height: 10px; 224 | padding: 1px 4px; 225 | } 226 | 227 | /* QUOTES 228 | =============================================================================*/ 229 | 230 | blockquote { 231 | border-left: 4px solid #DDD; 232 | padding: 0 15px; 233 | color: #777; 234 | } 235 | 236 | blockquote>:first-child { 237 | margin-top: 0px; 238 | } 239 | 240 | blockquote>:last-child { 241 | margin-bottom: 0px; 242 | } 243 | 244 | /* HORIZONTAL RULES 245 | =============================================================================*/ 246 | 247 | hr { 248 | clear: both; 249 | margin: 15px 0; 250 | height: 0px; 251 | overflow: hidden; 252 | border: none; 253 | background: transparent; 254 | border-bottom: 4px solid #ddd; 255 | padding: 0; 256 | } 257 | 258 | /* TABLES 259 | =============================================================================*/ 260 | 261 | table th { 262 | font-weight: bold; 263 | } 264 | 265 | table th, table td { 266 | border: 1px solid #ccc; 267 | padding: 6px 13px; 268 | } 269 | 270 | table tr { 271 | border-top: 1px solid #ccc; 272 | background-color: #fff; 273 | } 274 | 275 | table tr:nth-child(2n) { 276 | background-color: #f8f8f8; 277 | } 278 | 279 | /* IMAGES 280 | =============================================================================*/ 281 | 282 | img { 283 | max-width: 100% 284 | } -------------------------------------------------------------------------------- /7.md: -------------------------------------------------------------------------------- 1 | # 七、线性回归和健康结果 2 | 3 | ## 糖尿病数据集 4 | 5 | 我们将使用来自糖尿病患者的数据集。 数据由 442 个样本和 10 个变量(都是生理特征)组成,因此它很高而且很窄。 因变量是基线后一年疾病进展的定量测量。 6 | 7 | 这是一个经典的数据集,由 Efron,Hastie,Johnstone 和 Tibshirani 在他们的最小角度回归的论文中使用,也是 scikit-learn 中包含的众多数据集之一。 8 | 9 | ```py 10 | data = datasets.load_diabetes() 11 | 12 | feature_names=['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6'] 13 | 14 | trn,test,y_trn,y_test = train_test_split(data.data, data.target, test_size=0.2) 15 | 16 | trn.shape, test.shape 17 | 18 | # ((353, 10), (89, 10)) 19 | ``` 20 | 21 | ## Sklearn 中的线性回归 22 | 23 | 考虑系统`Xβ=y`,其中`X`的行比列更多。 当你有比变量更多的数据样本时会发生这种情况。 我们想要找到 ![\hat \beta](img/tex-37775992f2ac92fd81ac90641d7bd911.gif) 来最小化: 24 | 25 | ![\big\vert\big\vert X\beta - y \big\vert\big\vert_2](img/tex-75db0522195fefad89d39b56cad146ee.gif) 26 | 27 | 让我们从使用 sklearn 实现开始: 28 | 29 | ```py 30 | regr = linear_model.LinearRegression() 31 | %timeit regr.fit(trn, y_trn) 32 | 33 | # 458 µs ± 62.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each) 34 | 35 | pred = regr.predict(test) 36 | ``` 37 | 38 | 有一些指标来表示我们的预测有多好,会很有帮助。 我们将研究均方范数(L2)和平均绝对误差(L1)。 39 | 40 | 41 | ```py 42 | def regr_metrics(act, pred): 43 | return (math.sqrt(metrics.mean_squared_error(act, pred)), 44 | metrics.mean_absolute_error(act, pred)) 45 | 46 | regr_metrics(y_test, regr.predict(test)) 47 | 48 | # (75.36166834955054, 60.629082113104403) 49 | ``` 50 | 51 | ## 多项式特征 52 | 53 | 线性回归找到最佳系数`βi`: 54 | 55 | ![x_0\beta_0 + x_1\beta_1 + x_2\beta_2 = y](img/tex-41f6cdbb5eab68455940bb18f5d2eb38.gif) 56 | 57 | 添加多项式特征仍然是线性回归问题,只需更多项: 58 | 59 | ![x_0\beta_0 + x_1\beta_1 + x_2\beta_2 + x_0^2\beta_3 + x_0 x_1\beta_4 + x_0 x_2\beta_5 + x_1^2\beta_6 + x_1 x_2\beta_7 + x_2^2\beta_8 = y](img/tex-569030d1a75c430d7fb7eae39ba2681c.gif) 60 | 61 | 我们需要使用原始数据`X`来计算其他多项式特征。 62 | 63 | ```py 64 | trn.shape 65 | 66 | # (353, 10) 67 | ``` 68 | 69 | 现在,我们想通过添加更多功能,来尝试提高模型的表现。 目前,我们的模型在每个变量中都是线性的,但我们可以添加多项式特征来改变它。 70 | 71 | ```py 72 | poly = PolynomialFeatures(include_bias=False) 73 | 74 | trn_feat = poly.fit_transform(trn) 75 | 76 | ', '.join(poly.get_feature_names(feature_names)) 77 | 78 | # 'age, sex, bmi, bp, s1, s2, s3, s4, s5, s6, age^2, age sex, age bmi, age bp, age s1, age s2, age s3, age s4, age s5, age s6, sex^2, sex bmi, sex bp, sex s1, sex s2, sex s3, sex s4, sex s5, sex s6, bmi^2, bmi bp, bmi s1, bmi s2, bmi s3, bmi s4, bmi s5, bmi s6, bp^2, bp s1, bp s2, bp s3, bp s4, bp s5, bp s6, s1^2, s1 s2, s1 s3, s1 s4, s1 s5, s1 s6, s2^2, s2 s3, s2 s4, s2 s5, s2 s6, s3^2, s3 s4, s3 s5, s3 s6, s4^2, s4 s5, s4 s6, s5^2, s5 s6, s6^2' 79 | 80 | trn_feat.shape 81 | 82 | # (353, 65) 83 | 84 | regr.fit(trn_feat, y_trn) 85 | 86 | # LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False) 87 | 88 | regr_metrics(y_test, regr.predict(poly.fit_transform(test))) 89 | 90 | # (55.747345922929185, 42.836164292252235) 91 | ``` 92 | 93 | 时间对于特征数是平方的,对于样本数是线性的,所以这将变得非常慢! 94 | 95 | ```py 96 | %timeit poly.fit_transform(trn) 97 | 98 | # 635 µs ± 9.25 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each) 99 | ``` 100 | 101 | ## 加速特征生成 102 | 103 | 我们想加快速度。 我们将使用 Numba,一个直接将代码编译为 C 的 Python 库。 104 | 105 | Numba 是一个编译器。 106 | 107 | ### 资源 108 | 109 | Jake VanderPlas 的[这个教程](https://jakevdp.github.io/blog/2012/08/24/numba-vs-cython/)是一个很好的介绍。 在这里,Jake 使用 Numba 实现了一个[非平凡的算法](https://jakevdp.github.io/blog/2015/02/24/optimizing-python-with-numpy-and-numba/)(非均匀快速傅里叶变换)。 110 | 111 | Cython 是另一种选择。 我发现 Cython 主要比 Numba 更多的知识(它更接近 C),但提供类似 Numba 的加速。 112 | 113 | ![](img/cython_vs_numba.png) 114 | 115 | 这里是预先编译(AOT)编译器,即时编译(JIT)编译器和解释器之间差异的[全面回答](https://softwareengineering.stackexchange.com/questions/246094/understanding-the-differences-traditional-interpreter-jit-compiler-jit-interp)。 116 | 117 | ### 使用向量化和原生代码进行实验 118 | 119 | 120 | 让我们先了解一下 Numba,然后我们将回到我们的糖尿病数据集回归的多项式特征问题。 121 | 122 | ```py 123 | %matplotlib inline 124 | 125 | import math, numpy as np, matplotlib.pyplot as plt 126 | from pandas_summary import DataFrameSummary 127 | from scipy import ndimage 128 | 129 | from numba import jit, vectorize, guvectorize, cuda, float32, void, float64 130 | ``` 131 | 132 | 我们将展示以下方面的影响: 133 | 134 | + 避免内存分配和副本(比 CPU 计算慢) 135 | + 更好的局部性 136 | + 向量化 137 | 138 | 如果我们一次在整个数组上使用 numpy,它会创建大量的临时值,并且不能使用缓存。 如果我们一次使用 numba 循环遍历数组项,那么我们就不必分配大型临时数组,并且可以复用缓存数据,因为我们正在对每个数组项进行多次计算。 139 | 140 | ```py 141 | # 无类型和没有向量化 142 | def proc_python(xx,yy): 143 | zz = np.zeros(nobs, dtype='float32') 144 | for j in range(nobs): 145 | x, y = xx[j], yy[j] 146 | x = x*2 - ( y * 55 ) 147 | y = x + y*2 148 | z = x + y + 99 149 | z = z * ( z - .88 ) 150 | zz[j] = z 151 | return zz 152 | 153 | nobs = 10000 154 | x = np.random.randn(nobs).astype('float32') 155 | y = np.random.randn(nobs).astype('float32') 156 | 157 | %timeit proc_python(x,y) 158 | 159 | # 49.8 ms ± 1.19 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) 160 | ``` 161 | 162 | ### NumPy 163 | 164 | Numpy 让我们对其向量化: 165 | 166 | ```py 167 | # 有类型和向量化 168 | def proc_numpy(x,y): 169 | z = np.zeros(nobs, dtype='float32') 170 | x = x*2 - ( y * 55 ) 171 | y = x + y*2 172 | z = x + y + 99 173 | z = z * ( z - .88 ) 174 | return z 175 | 176 | np.allclose( proc_numpy(x,y), proc_python(x,y), atol=1e-4 ) 177 | 178 | # True 179 | 180 | %timeit proc_numpy(x,y) # Typed and vectorized 181 | 182 | # 35.9 µs ± 166 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each) 183 | ``` 184 | 185 | ### Numba 186 | 187 | Numba 提供几种不同的装饰器。 我们将尝试两种不同的方法: 188 | 189 | + `@jit`:非常一般 190 | + `@vectorize`:不需要编写for循环。操作相同大小的向量时很有用 191 | 192 | 首先,我们将使用 Numba 的`jit`(即时)编译器装饰器,而无需显式向量化。 这避免了大量内存分配,因此我们有更好的局部性: 193 | 194 | ```py 195 | @jit() 196 | def proc_numba(xx,yy,zz): 197 | for j in range(nobs): 198 | x, y = xx[j], yy[j] 199 | x = x*2 - ( y * 55 ) 200 | y = x + y*2 201 | z = x + y + 99 202 | z = z * ( z - .88 ) 203 | zz[j] = z 204 | return zz 205 | 206 | z = np.zeros(nobs).astype('float32') 207 | np.allclose( proc_numpy(x,y), proc_numba(x,y,z), atol=1e-4 ) 208 | 209 | # True 210 | 211 | %timeit proc_numba(x,y,z) 212 | 213 | # 6.4 µs ± 17.6 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each) 214 | ``` 215 | 216 | 现在我们将使用 Numba 的`vectorize`装饰器。 Numba 的编译器以比普通 Python 和 Numpy 更聪明的方式优化它。 它为你写了一个 Numpy `ufunc`,传统上它涉及编写 C 并且不那么简单。 217 | 218 | ```py 219 | @vectorize 220 | def vec_numba(x,y): 221 | x = x*2 - ( y * 55 ) 222 | y = x + y*2 223 | z = x + y + 99 224 | return z * ( z - .88 ) 225 | 226 | np.allclose(vec_numba(x,y), proc_numba(x,y,z), atol=1e-4 ) 227 | 228 | # True 229 | 230 | %timeit vec_numba(x,y) 231 | 232 | # 5.82 µs ± 14.4 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each) 233 | ``` 234 | 235 | Numba 很棒。 看看这有多快! 236 | 237 | ### Numba 多项式特征 238 | 239 | ```py 240 | @jit(nopython=True) 241 | def vec_poly(x, res): 242 | m,n=x.shape 243 | feat_idx=0 244 | for i in range(n): 245 | v1=x[:,i] 246 | for k in range(m): res[k,feat_idx] = v1[k] 247 | feat_idx+=1 248 | for j in range(i,n): 249 | for k in range(m): res[k,feat_idx] = v1[k]*x[k,j] 250 | feat_idx+=1 251 | ``` 252 | 253 | ### 行序和列序存储 254 | 255 | 来自 [Eli Bendersky 的博客文章](http://eli.thegreenplace.net/2015/memory-layout-of-multi-dimensional-arrays/): 256 | 257 | “矩阵的行序布局将第一行放在连续的内存中,然后是第二行放在它后面,然后是第三行,依此类推。列序布局将第一列放在连续内存中,然后放入第二列,等等....虽然知道特定数据集使用哪种布局对于良好的性能至关重要,但对于哪种布局“更好”的问题,没有单一的答案。” 258 | 259 | “事实证明,匹配算法与数据布局的工作方式,可以决定应用程序的性能。” 260 | 261 | “简短的说法是:始终按照布局顺序遍历数据。” 262 | 263 | 列序布局:Fortran,Matlab,R 和 Julia 264 | 265 | 行序布局:C,C ++,Python,Pascal,Mathematica 266 | 267 | ```py 268 | trn = np.asfortranarray(trn) 269 | test = np.asfortranarray(test) 270 | 271 | m,n=trn.shape 272 | n_feat = n*(n+1)//2 + n 273 | trn_feat = np.zeros((m,n_feat), order='F') 274 | test_feat = np.zeros((len(y_test), n_feat), order='F') 275 | 276 | vec_poly(trn, trn_feat) 277 | vec_poly(test, test_feat) 278 | 279 | regr.fit(trn_feat, y_trn) 280 | 281 | # LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False) 282 | 283 | regr_metrics(y_test, regr.predict(test_feat)) 284 | 285 | # (55.74734592292935, 42.836164292252306) 286 | 287 | %timeit vec_poly(trn, trn_feat) 288 | 289 | # 7.33 µs ± 19.8 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each) 290 | ``` 291 | 292 | 回想一下,这是 sklearn `PolynomialFeatures`实现的时间,它是由专家创建的: 293 | 294 | ```py 295 | %timeit poly.fit_transform(trn) 296 | 297 | # 635 µs ± 9.25 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each) 298 | 299 | 605/7.7 300 | 301 | # 78.57142857142857 302 | ``` 303 | 304 | 这是一个大问题! Numba 太神奇了! 只需一行代码,我们就可以获得比 scikit 学习快 78 倍的速度(由专家优化)。 305 | 306 | ### 正则化和噪声 307 | 308 | 正则化是一种减少过拟合,并创建更好地泛化到新数据的模型的方法。 309 | 310 | ### 正则化 311 | 312 | Lasso 回归使用 L1 惩罚,产生稀疏系数。 参数`α`用于加权惩罚项。 Scikit Learn 的`LassoCV`使用许多不同的`α`值进行交叉验证。 313 | 314 | 观看 [Lasso 回归的 Coursera 视频](https://www.coursera.org/learn/machine-learning-data-analysis/lecture/0KIy7/what-is-lasso-regression),了解更多信息。 315 | 316 | ```py 317 | reg_regr = linear_model.LassoCV(n_alphas=10) 318 | 319 | reg_regr.fit(trn_feat, y_trn) 320 | 321 | ''' 322 | /home/jhoward/anaconda3/lib/python3.6/site-packages/sklearn/linear_model/coordinate_descent.py:484: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Fitting data with very small alpha may cause precision problems. 323 | ConvergenceWarning) 324 | 325 | LassoCV(alphas=None, copy_X=True, cv=None, eps=0.001, fit_intercept=True, 326 | max_iter=1000, n_alphas=10, n_jobs=1, normalize=False, positive=False, 327 | precompute='auto', random_state=None, selection='cyclic', tol=0.0001, 328 | verbose=False) 329 | ''' 330 | 331 | reg_regr.alpha_ 332 | 333 | # 0.0098199431661591518 334 | 335 | regr_metrics(y_test, reg_regr.predict(test_feat)) 336 | 337 | # (50.0982471642817, 40.065199085003101) 338 | ``` 339 | 340 | ### 噪声 341 | 342 | 现在我们将为数据添加一些噪音。 343 | 344 | ```py 345 | idxs = np.random.randint(0, len(trn), 10) 346 | 347 | y_trn2 = np.copy(y_trn) 348 | y_trn2[idxs] *= 10 # label noise 349 | 350 | regr = linear_model.LinearRegression() 351 | regr.fit(trn, y_trn) 352 | regr_metrics(y_test, regr.predict(test)) 353 | 354 | # (51.1766253181518, 41.415992803872754) 355 | 356 | regr.fit(trn, y_trn2) 357 | regr_metrics(y_test, regr.predict(test)) 358 | 359 | # (62.66110319520415, 53.21914420254862) 360 | ``` 361 | 362 | Huber 损失是一种损失函数,对异常值的敏感度低于平方误差损失。 对于小的误差值,它是二次的,对于大的值,它是线性的。 363 | 364 | ![L(x)= \begin{cases} \frac{1}{2}x^2, & \text{for } \lvert x\rvert\leq \delta \\ \delta(\lvert x \rvert - \frac{1}{2}\delta), & \text{otherwise} \end{cases}](img/tex-6dabaeb72eb89ffd11a70a83032db2c6.gif) 365 | 366 | ```py 367 | hregr = linear_model.HuberRegressor() 368 | hregr.fit(trn, y_trn2) 369 | regr_metrics(y_test, hregr.predict(test)) 370 | 371 | # (51.24055602541746, 41.670840571376822) 372 | ``` 373 | -------------------------------------------------------------------------------- /5.md: -------------------------------------------------------------------------------- 1 | # 五、LU 分解 2 | 3 | `fbpca`和我们自己的`randomized_range_finder`方法都使用 LU 分解,它将矩阵分解为下三角矩阵和上三角矩阵的乘积。 4 | 5 | ### 高斯消元 6 | 7 | 本节基于 Trefethen 的 20-22 讲座。 8 | 9 | 如果你不熟悉高斯消元或需要复习,请观看[此可汗学院视频](https://www.khanacademy.org/math/precalculus/precalc-matrices/row-echelon-and-gaussian-elimination/v/matrices-reduced-row-echelon-form-2)。 10 | 11 | 让我们手动使用高斯消元来回顾: 12 | 13 | ![A=\begin{pmatrix} 1 & -2 & -2 & -3 \\ 3 & -9 & 0 & -9 \\ -1 & 2 & 4 & 7 \\ -3 & -6 & 26 & 2 \end{pmatrix}](img/tex-b49bd0fb74c5566485bc64026d7b25d0.gif) 14 | 15 | 答案: 16 | 17 | ![LU = \begin{bmatrix} 1 & 0 & 0 & 0\\ 3 & 1 & 0 & 0 \\ -1 & 0 & 1 & 0 \\ -3 & 4 & -2 & 1\end{bmatrix} \cdot \begin{bmatrix} 1 & -2 & -2 & -3 \\ 0 & -3 & 6 & 0 \\ 0 & 0 & 2 & 4 \\ 0 & 0 & 0 & 1 \end{bmatrix}](img/tex-5581e026c980f5465035922f6a3bd2b1.gif) 18 | 19 | 以上示例来自 Trefethen 的讲座 20,21。 20 | 21 | 高斯消元通过在左侧应用线性变换,将线性方程组变换为上三角形方程组。 它是三角形三角化。 22 | 23 | ![L_{m-1} \dots L_2 L_1 A = U](img/tex-7f584f910a157bc06c98a7bc04c2f6c2.gif) 24 | 25 | `L`是单位下三角形:所有对角线元素都是 1。 26 | 27 | ```py 28 | def LU(A): 29 | U = np.copy(A) 30 | m, n = A.shape 31 | L = np.eye(n) 32 | for k in range(n-1): 33 | for j in range(k+1,n): 34 | L[j,k] = U[j,k]/U[k,k] 35 | U[j,k:n] -= L[j,k] * U[k,k:n] 36 | return L, U 37 | 38 | A = np.array([[2,1,1,0],[4,3,3,1],[8,7,9,5],[6,7,9,8]]).astype(np.float) 39 | 40 | L, U = LU(A) 41 | 42 | np.allclose(A, L @ U) 43 | 44 | # True 45 | ``` 46 | 47 | LU分解很有用! 48 | 49 | 求解`Ax = b`变为`LUx = b`: 50 | 51 | + 找到`A = LU` 52 | + 解`Ly = b` 53 | + 解`Ux = y` 54 | + 完事 55 | 56 | ### 工作量 57 | 58 | 高斯消元的工作量:![2\cdot\frac{1}{3} n^3](img/tex-a73f577d94ab9875f2d8f378f0c206e6.gif) 59 | 60 | ### 内存 61 | 62 | 在上面,我们创建了两个新的矩阵,`L`和`U`。但是,我们可以将`L`和`U`的值存储在矩阵`A`中(覆盖原始矩阵)。 由于`L`的对角线都是 1,因此不需要存储。 在原地进行因式分解或计算,是数值线性代数中用于节省内存的常用技术。 注意:如果你将来需要再次使用原始矩阵`A`,则不希望这样做。 其中一个作业问题是重写 LU 方法来原地操作。 63 | 64 | 考虑矩阵: 65 | 66 | ![A = \begin{bmatrix} 10^{-20} & 1 \\ 1 & 1 \end{bmatrix}](img/tex-eba57389f737cede6f2839b89155acfc.gif) 67 | 68 | ```py 69 | A = np.array([[1e-20, 1], [1,1]]) 70 | ``` 71 | 72 | 手动使用高斯消元法计算`L`和`U`: 73 | 74 | ```py 75 | # 练习: 76 | 77 | np.set_printoptions(suppress=True) 78 | 79 | # 练习: 80 | 81 | L2, U2 = LU(A) 82 | 83 | ''' 84 | [[ 1.00000000e-20 1.00000000e+00] 85 | [ 0.00000000e+00 -1.00000000e+20]] 86 | ''' 87 | 88 | L2, U2 89 | 90 | ''' 91 | (array([[ 1.00000000e+00, 0.00000000e+00], 92 | [ 1.00000000e+20, 1.00000000e+00]]), 93 | array([[ 1.00000000e-20, 1.00000000e+00], 94 | [ 0.00000000e+00, -1.00000000e+20]])) 95 | ''' 96 | 97 | np.allclose(L1, L2) 98 | 99 | # True 100 | 101 | np.allclose(U1, U2) 102 | 103 | # True 104 | 105 | np.allclose(A, L2 @ U2) 106 | 107 | # False 108 | ``` 109 | 110 | 这是使用交换主元进行 LU 分解的动机。 111 | 112 | 这也说明 LU 分解是稳定的,但不是向后稳定的。 (剧透:即使部分交换主元,LU 对某些矩阵来说“爆炸性不稳定”,但在实践中稳定) 113 | 114 | ### 稳定性 115 | 116 | 问题`f`的算法 ![\hat f](img/tex-93184d704e544153075ad66ee50a11e6.gif) 是稳定的,如果对于每个`x`: 117 | 118 | ![\frac{\lVert \hat{f}(x) - f(y) \rVert}{ \lVert f(y) \rVert } = \mathcal{O}(\varepsilon_{machine})](img/tex-dfb3126ab90267fa6df10126a2151119.gif) 119 | 120 | 对于一些`y`: 121 | 122 | ![\frac{\lVert y - x \rVert }{\lVert x \rVert} = \mathcal{O}(\varepsilon_{machine})](img/tex-9638f37af1fca41b055dce638d17c7c9.gif) 123 | 124 | 一个稳定的算法几乎可以为几乎正确的问题提供正确的答案(Trefethen,第 104 页)。 125 | 126 | 翻译: 127 | 128 | + 正确的问题:`x` 129 | + 几乎正确的问题:`y` 130 | + 正确答案:`f` 131 | + 几乎正确的问题的正确答案:`f(y)` 132 | 133 | ### 向后稳定 134 | 135 | 向后稳定性比稳定性更强大,更简单。 136 | 137 | 问题`f`的算法 ![\hat f](img/tex-93184d704e544153075ad66ee50a11e6.gif) 是向后稳定的,如果对于每个`x`, 138 | 139 | ![\hat{f}(x) = f(y)](img/tex-988b9ac3871364bd5322846c8cf0d884.gif) 140 | 141 | 对于一些`y`: 142 | 143 | ![\frac{\lVert y - x \rVert }{\lVert x \rVert} = \mathcal{O}(\varepsilon_{machine})](img/tex-9638f37af1fca41b055dce638d17c7c9.gif) 144 | 145 | 向后稳定的算法为几乎正确的问题提供了正确的答案(Trefethen,第 104 页)。 146 | 147 | 翻译: 148 | 149 | + 正确的问题:`x` 150 | + 几乎正确的问题:`y` 151 | + 正确答案:`f` 152 | + 几乎正确的问题的正确答案:`f(y)` 153 | 154 | ### 带有交换主元的 LU 分解 155 | 156 | 让我们看看矩阵: 157 | 158 | ![\hat{A} = \begin{bmatrix} 1 & 1 \\ 10^{-20} & 1 \end{bmatrix}](img/tex-6ce3eb7d4cd7e67415afc2cb2cfad5ff.gif) 159 | 160 | ```py 161 | A = np.array([[1,1], [1e-20, 1]]) 162 | ``` 163 | 164 | 手动使用高斯消元法计算`L`和`U`: 165 | 166 | ![\hat{L} = \begin{bmatrix} 1 & 0 \\ 10^{-20} & 1 \end{bmatrix}](img/tex-984b1a7c514d0745420c25c796f068a9.gif) 167 | 168 | ![\hat{U} = \begin{bmatrix} 1 & 1 \\ 0 & 1 - 10^{-20} \end{bmatrix}](img/tex-83fa37f17de1ace1b15afc44510ba1de.gif) 169 | 170 | ```py 171 | L, U = LU(A) 172 | 173 | np.allclose(A, L @ U) 174 | 175 | # True 176 | ``` 177 | 178 | 想法:我们可以切换行的顺序,来获得更稳定的答案! 这相当于乘以置换矩阵`P`。例如, 179 | 180 | ![\begin{bmatrix} 0 & 1 \\ 1 & 0 \end{bmatrix} \cdot \begin{bmatrix} 10^{-20} & 1 \\ 1 & 1 \end{bmatrix} = \begin{bmatrix} 1 & 1 \\ 10^{-20} & 1 \end{bmatrix}](img/tex-a37a721b9f00ed456388db50b0a45ac5.gif) 181 | 182 | ![PA = \hat{A}](img/tex-82e0e3e93e5381229b1fbf5342650527.gif) 183 | 184 | 对`PA`应用高斯消元。 185 | 186 | 在每个步骤中,选择列`k`中的最大值,并将该行移动到行`k`。 187 | 188 | ### 作业 189 | 190 | ```py 191 | def swap(a,b): 192 | temp = np.copy(a) 193 | a[:] = b 194 | b[:] = temp 195 | 196 | a=np.array([1,2,3]) 197 | b=np.array([3,2,1]) 198 | swap(a,b) 199 | a,b 200 | 201 | # 练习:重新编写上面的 LU 分解以使用交换主元 202 | ``` 203 | 204 | ### 示例 205 | 206 | ```py 207 | A = np.array([[2,1,1,0],[4,3,3,1],[8,7,9,5],[6,7,9,8]]).astype(np.float) 208 | 209 | L, U, P = LU_pivot(A) 210 | ``` 211 | 212 | 可以比较下面 Trefethen,第 159 页的答案: 213 | 214 | ```py 215 | A 216 | 217 | ''' 218 | array([[ 2., 1., 1., 0.], 219 | [ 4., 3., 3., 1.], 220 | [ 8., 7., 9., 5.], 221 | [ 6., 7., 9., 8.]]) 222 | ''' 223 | 224 | U 225 | 226 | ''' 227 | array([[ 8. , 7. , 9. , 5. ], 228 | [ 0. , 1.75 , 2.25 , 4.25 ], 229 | [ 0. , 0. , -0.28571429, 0.57142857], 230 | [ 0. , 0. , 0. , -2. ]]) 231 | ''' 232 | 233 | P 234 | 235 | ''' 236 | array([[ 0., 0., 1., 0.], 237 | [ 0., 0., 0., 1.], 238 | [ 1., 0., 0., 0.], 239 | [ 0., 1., 0., 0.]]) 240 | ''' 241 | ``` 242 | 243 | 部分交换主元可以置换行。 这是一种普遍的做法,这通常是 LU 分解的意思。 244 | 245 | 完全交换主元可以置换行和列。 完全交换主元非常耗时,很少在实践中使用。 246 | 247 | ### 示例 248 | 249 | 考虑方程组: 250 | 251 | ![\begin{bmatrix} 1 & 0 & 0 & 0 & 0 & 1 \\ -1 & 1 & 0 & 0 & 0 & 1 \\ -1 & -1 & 1 & 0 & 0 & 1 \\ -1 & -1 & -1 & 1 & 0 & 1 \\ -1 & -1 & -1 & -1 & 1 & 1 \\ -1 & -1 & -1 & -1 & -1 & 1 \end{bmatrix} \mathbf{x} = \begin{bmatrix} 1 \\ 1 \\ 1 \\ 1 \\ 2 \\ 1 \end{bmatrix}](img/tex-370268bfaa109db6f22e1b1dbb175b03.gif) 252 | 253 | ```py 254 | def make_matrix(n): 255 | A = np.eye(n) 256 | for i in range(n): 257 | A[i,-1] = 1 258 | for j in range(i): 259 | A[i,j] = -1 260 | return A 261 | 262 | def make_vector(n): 263 | b = np.ones(n) 264 | b[-2] = 2 265 | return b 266 | 267 | make_vector(7) 268 | 269 | # array([ 1., 1., 1., 1., 1., 2., 1.]) 270 | ``` 271 | 272 | ### 练习 273 | 274 | 练习:让我们在`5×5`方程组上使用高斯消元法。 275 | 276 | Scipy 也有这种功能。 让我们看看最后 5 个方程的解,其中`n = 10,20,30,40,50,60`。 277 | 278 | ```py 279 | np.set_printoptions(precision=3, suppress=True) 280 | 281 | ?scipy.linalg.solve 282 | 283 | for n, ls in zip(range(10, 70, 10), ['--', ':', '-', '-.', '--', ':']): 284 | soln = scipy.linalg.lu_solve(scipy.linalg.lu_factor(make_matrix(n)), make_vector(n)) 285 | plt.plot(soln[-5:], ls) 286 | print(soln[-5:]) 287 | 288 | ''' 289 | [-0.062 -0.125 -0.25 0.5 1.002] 290 | [-0.062 -0.125 -0.25 0.5 1. ] 291 | [-0.062 -0.125 -0.25 0.5 1. ] 292 | [-0.062 -0.125 -0.25 0.5 1. ] 293 | [-0.062 -0.125 -0.25 0.5 1. ] 294 | [ 0. 0. 0. 0. 1.] 295 | ''' 296 | ``` 297 | 298 | ![](img/5-1.png) 299 | 300 | 当`n = 60`时会发生什么? 301 | 302 | 定理:让矩阵`A`的因式分解`PA = LU`通过高斯消元和部分交换主元来计算。 所得矩阵(由计算机使用浮点算术) ![\hat P](img/tex-a599f5fe0ec2014228b548291962279c.gif),![\hat L](img/tex-94699469cb13f47d93ea4c76de5f21d0.gif) 和 ![\hat U](img/tex-646ab88a198f4afeb2c72b8e851b35ed.gif) 满足: 303 | 304 | ![\hat{L}\hat{U} = \hat{P} A + \delta A, \quad \frac{\delta A}{A} = \mathcal{O}(\rho \varepsilon_{machine})](img/tex-216fbe1d99dcd0f6c93ce475e7cfde71.gif) 305 | 306 | 其中`ρ`是增长因子。 307 | 308 | ![\rho = \frac{max_{i,j} \lvert u_{ij} \rvert }{max_{i,j} \lvert a_{ij} \rvert }](img/tex-49f9c1ce16068918f80263cd2aa8ce82.gif) 309 | 310 | 对于我们上面的矩阵,![\rho = 2^{m-1}](img/tex-e847c49ecde16aabe6054b63a48387e0.gif)。 311 | 312 | ### 理论上不稳定,实际上稳定 313 | 314 | 大多数算法(例如 QR)的稳定性很简单。 具有部分交换主元的高斯消元不是这种情况。 只有当`L`和/或`U`相对于`A`的大小较大时,才会出现高斯消元(有或没有交换主元)的不稳定性。 315 | 316 | Trefethen:“尽管有(22.4)这样的例子,部分交换主元的高斯消元在实践中是完全稳定的......在计算的五十年中,在自然环境下不会出现产生爆炸性不稳定性的矩阵问题。”【虽然人为的例子很容易构造】 317 | 318 | 虽然有些矩阵会导致不稳定,但由于统计原因,占所有矩阵的比例非常小,因此“从不”出现。 “如果你随机挑选十亿个矩阵,你几乎肯定找不到高斯消元不稳定的矩阵。” 319 | 320 | ### 扩展阅读 321 | 322 | + 高斯消元/ LU 分解 - Trefethn 讲座 20 323 | + 交换主元 - Trefethn 讲座 21 324 | + 高斯消除的稳定性 - Trefethn 讲座 22 325 | 326 | ## 随机投影发生了什么? 327 | 328 | 我们在下面的矩阵中采用线性组合(带有随机权重): 329 | 330 | ```py 331 | plt.figure(figsize=(12, 12)) 332 | plt.imshow(M, cmap='gray') 333 | 334 | # 335 | ``` 336 | 337 | ![](img/5-2.png) 338 | 339 | 这就像一个随机加权平均值。 如果你选取其中的几个,你最终会得到彼此不高度相关的列(大致正交)。 340 | 341 | Johnson-Lindenstrauss 引理:(来自维基百科)高维空间中的一小组点可以嵌入到更低维度的空间中,使得点之间的距离几乎保持不变。 342 | 343 | 我们期望,能够以保留相关结构的方式,减少数据的维度。 Johnson-Lindenstrauss 引理是这种类型的经典结果。 344 | 345 | ### 高斯消元的历史 346 | 347 | > [有趣的事实:高斯并没有发明高斯消元,但可能在 Cholesky 之前发现了 Cholesky 因子分解](https://t.co/CGPJqIWR7H) 348 | > 349 | > — Rachel Thomas (@math_rachel) [2017 年 6 月 6 日](https://twitter.com/math_rachel/status/872229937771495424?ref_src=twsrc%5Etfw) 350 | 351 | 根据维基百科,[Stigler 的 Eponymy 定律](https://en.m.wikipedia.org/wiki/Stigler%27s_law_of_eponymy):“没有任何科学发现以它的原始发现者命名。例子包括哈勃定律,它是由 Georges Lemaître 在 Edwin Hubble 两年之前得到的,毕达哥拉斯定理在毕达哥拉斯之前为巴比伦数学家所知,哈雷彗星是自公元前 240 年以来天文学家观察到的彗星。Stigler 本人将社会学家 Robert K. Merton 命名为 Stigler 定律的发现者,表明它遵循自己的法令,尽管这一现象之前曾被其他人注意到。” 352 | 353 | [迷人的高斯消元的历史](http://meyer.math.ncsu.edu/Meyer/PS_Files/GaussianEliminationHistory.pdf)。一些亮点: 354 | 355 | + 公元前 20 0年左右,高斯消元的第一个书面记录在中文书籍“九章算术”中。 356 | + 古代中国人使用彩色竹棒放在“计数板”的列中。 357 | + 日本数学家 Seki Kowa(1643-1708)在 1683 年之前推进了中国的淘汰消元,并发明了行列式。大约在同一时间,莱布尼兹独立地发现了相似的发现,但是 Kowa 和莱布尼兹都没有因为他们的发现而受到赞扬。 358 | + 高斯称消元方法是“众所周知的”并且从未声称已经发明了它,尽管他可能已经发明了 Cholesky 分解。 359 | 360 | [这里有更多历史](http://www.sciencedirect.com/science/article/pii/S0315086010000376) 361 | 362 | ### 加速高斯消元 363 | 364 | [并行 LU 分解](https://courses.engr.illinois.edu/cs554/fa2013/notes/06_lu_8up.pdf):LU 分解可以完全并行化 365 | 366 | [随机化 LU 分解](http://www.sciencedirect.com/science/article/pii/S1063520316300069)(2016 年文章):随机 LU 完全为在标准 GPU 上运行而实现,无需任何 GPU-CPU 数据传输。 367 | 368 | ### `scipy.linalg` vs `lu_solve` 369 | 370 | ```py 371 | n = 60 372 | A = make_matrix(n) 373 | b = make_vector(n) 374 | ``` 375 | 376 | 这个问题有很大的增长因子`= 259`。 我们使用`scipy.linalg.lu_solve`获得了错误的答案,但使用`scipy.linalg.solve`得到了正确的答案。什么是`scipy.linalg.solve`呢? 377 | 378 | ```py 379 | print(scipy.linalg.lu_solve(scipy.linalg.lu_factor(A), b)[-5:]) 380 | print(scipy.linalg.solve(A, b)[-5:]) 381 | 382 | ''' 383 | [ 0. 0. 0. 0. 1.] 384 | [-0.062 -0.125 -0.25 0.5 1. ] 385 | ''' 386 | 387 | %%timeit 388 | soln = scipy.linalg.lu_solve(scipy.linalg.lu_factor(A), b) 389 | soln[-5:] 390 | 391 | # 91.2 µs ± 192 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each) 392 | 393 | %%timeit 394 | soln = scipy.linalg.solve(A, b) 395 | soln[-5:] 396 | 397 | # 153 µs ± 5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each) 398 | ``` 399 | 400 | 查看`scipy`的源代码,我们看到它正在调用`LAPACK`例程`gesvx`。 这是`sgesvx`的 Fortran 源代码(`s`指的是单个,也有用于浮点的`dgesvx`和复数的`cgesvx`)。 在注释中,我们看到它正在计算 reciprocal 主元增长因子,因此它考虑了这个增长因子,并做了一些比普通的部分主元 LU 分解更复杂的事情。 401 | 402 | ## 分块矩阵 403 | 404 | ### 经典的矩阵乘法 405 | 406 | 问题:计算两个`n×n`的矩阵`A×B = C`的矩阵乘法的计算复杂度(大`O`)是多少? 407 | 408 | 你可以在 Codecademy 学习(或复习)大`O`。 409 | 410 | 它的样子是: 411 | 412 | ```py 413 | for i=1 to n 414 | {read row i of A into fast memory} 415 | for j=1 to n 416 | {read col j of B into fast memory} 417 | for k=1 to n 418 | C[i,j] += A[i,k] x B[k,j] 419 | {write C[i,j] back to slow memory} 420 | ``` 421 | 422 | 问题:进行了多少次读写操作? 423 | 424 | ### 分块矩阵相乘 425 | 426 | 将`A`,`B`,`C`分成大小为`N/n × N/n`的`N×N`个块。 427 | 428 | ![](img/block_matrix.png) 429 | 430 | > [来源](http://avishek.net/blog/?p=804) 431 | 432 | 它的样子是: 433 | 434 | ```py 435 | for i=1 to N 436 | for j=1 to N 437 | for k=1 to N 438 | {read block (i,k) of A} 439 | {read block (k,j) of B} 440 | block (i,j) of C += block of A times block of B 441 | {write block (i,j) of C back to slow memory} 442 | ``` 443 | 444 | 问题 1:这个的大`O`是什么? 445 | 446 | 问题 2:进行了多少次读写操作? 447 | -------------------------------------------------------------------------------- /2.md: -------------------------------------------------------------------------------- 1 | # 二、SVD 背景消除 2 | 3 | 我们今天的目标: 4 | 5 | ![](img/surveillance3.png) 6 | 7 | ## 加载和格式化数据 8 | 9 | 让我们使用 BMC 2012 背景建模挑战数据集中的[真实视频 003](http://bmc.iut-auvergne.com/?page_id=24)。 10 | 11 | 导入所需的库: 12 | 13 | ```py 14 | import imageio 15 | imageio.plugins.ffmpeg.download() 16 | 17 | ''' 18 | Imageio: 'ffmpeg.linux64' was not found on your computer; downloading it now. 19 | Try 1. Download from https://github.com/imageio/imageio-binaries/raw/master/ffmpeg/ffmpeg.linux64 (27.2 MB) 20 | Downloading: 8192/28549024 bytes (0.02260992/28549024 bytes (7.9%5455872/28549024 bytes (19.18790016/28549024 bytes (30.812189696/28549024 bytes (42.7%15687680/28549024 bytes (54.9%18898944/28549024 bytes (66.2%22134784/28549024 bytes (77.5%25518080/28549024 bytes (89.4%28549024/28549024 bytes (100.0%) 21 | Done 22 | File saved as /home/racheltho/.imageio/ffmpeg/ffmpeg.linux64. 23 | ''' 24 | 25 | import moviepy.editor as mpe 26 | import numpy as np 27 | import scipy 28 | 29 | %matplotlib inline 30 | import matplotlib.pyplot as plt 31 | 32 | np.set_printoptions(precision=4, suppress=True) 33 | 34 | video = mpe.VideoFileClip("movie/Video_003.avi") 35 | 36 | video.subclip(0,50).ipython_display(width=300) 37 | 38 | ''' 39 | 100%|█████████▉| 350/351 [00:00<00:00, 914.11it/s] 40 | ''' 41 | ``` 42 | 43 | 44 | 45 | ```py 46 | video.duration 47 | 48 | # 113.57 49 | ``` 50 | 51 | ### 辅助方法 52 | 53 | ```py 54 | def create_data_matrix_from_video(clip, fps=5, scale=50): 55 | return np.vstack([scipy.misc.imresize(rgb2gray(clip.get_frame(i/float(fps))).astype(int), 56 | scale).flatten() for i in range(fps * int(clip.duration))]).T 57 | 58 | def rgb2gray(rgb): 59 | return np.dot(rgb[...,:3], [0.299, 0.587, 0.114]) 60 | ``` 61 | 62 | ### 格式化数据 63 | 64 | 一个时刻的图像是120像素乘160像素(缩放时)。 我们可以将该图片展开为一个很高的列。 因此,我们不是拥有`120×160`的 2D 图像,而是拥有`1×19,200`的一列。 65 | 66 | 这不是人类可读的,但它很方便,因为它可以让我们将来自不同时间的图像叠加在一起,将视频全部放入 1 个矩阵中。 如果我们拍摄视频 100 秒,每隔百分之一秒一张图像(所以有 10,000 个不同的图像,每个图像来自不同的时间点),我们将拥有`10,000×19,200`的矩阵,代表视频! 67 | 68 | ```py 69 | scale = 0.50 # Adjust scale to change resolution of image 70 | dims = (int(240 * scale), int(320 * scale)) 71 | fps = 60 # frames per second 72 | 73 | M = create_data_matrix_from_video(video.subclip(0,100), fps, scale) 74 | # M = np.load("movie/med_res_surveillance_matrix_60fps.npy") 75 | 76 | print(dims, M.shape) 77 | 78 | # (120, 160) (19200, 6000) 79 | 80 | plt.imshow(np.reshape(M[:,140], dims), cmap='gray'); 81 | ``` 82 | 83 | ![](img/2-1.png) 84 | 85 | 由于`create_data_from_matrix`有点慢,我们将保存矩阵。 通常,只要你的预处理步骤较慢,最好保存结果以备将来使用。 86 | 87 | ```py 88 | np.save("movie/med_res_surveillance_matrix_60fps.npy", M) 89 | 90 | plt.figure(figsize=(12, 12)) 91 | plt.imshow(M[::3,:], cmap='gray') 92 | 93 | # 94 | ``` 95 | 96 | ![](img/2-2.png) 97 | 98 | 问题:那些黑波浪线是什么? 水平线是什么? 99 | 100 | ## 奇异值分解 101 | 102 | ### SVD介绍 103 | 104 | “将矩阵分解为我们关心的更简单,更有意义的片段的便捷方式” - 大卫奥斯汀 105 | 106 | “我忘了学习的最重要的线性代数概念” - Daniel Lemire 107 | 108 | SVD的应用: 109 | 110 | + 主成分分析 111 | + 数据压缩 112 | + 伪逆 113 | + 协同过滤 114 | + 主题建模 115 | + 背景消除 116 | + 删除损坏的数据 117 | 118 | ```py 119 | U, s, V = np.linalg.svd(M, full_matrices=False) 120 | ``` 121 | 122 | 这非常慢,因此你可能希望保存结果以便将来使用。 123 | 124 | ```py 125 | np.save("movie/U.npy", U) 126 | np.save("movie/s.npy", s) 127 | np.save("move/V.npy", V) 128 | ``` 129 | 130 | 将来,你只需加载已保存的内容: 131 | 132 | ```py 133 | U = np.load("movie/U.npy") 134 | s = np.load("movie/s.npy") 135 | V = np.load("movie/V.npy") 136 | ``` 137 | 138 | `U, s, V`是什么样子? 139 | 140 | ```py 141 | U.shape, s.shape, V.shape 142 | 143 | # ((19200, 6000), (6000,), (6000, 6000)) 144 | ``` 145 | 146 | ### 练习 147 | 148 | 检查它们是否是`M`的分解。 149 | 150 | ```py 151 | # Exercise: 152 | 153 | # True 154 | ``` 155 | 156 | 他们正是。 157 | 158 | ```py 159 | np.allclose(M, reconstructed_matrix) 160 | 161 | # True 162 | 163 | np.set_printoptions(suppress=True, precision=0) 164 | ``` 165 | 166 | ### `s`的属性 167 | 168 | ```py 169 | np.diag(s[:6]) 170 | ``` 171 | 172 | 你看到`s`的顺序了吗? 173 | 174 | ```py 175 | s[0:2000:50] 176 | 177 | ''' 178 | array([ 1341720., 10528., 6162., 4235., 3174., 2548., 179 | 2138., 1813., 1558., 1346., 1163., 1001., 180 | 841., 666., 0., 0., 0., 0., 181 | 0., 0., 0., 0., 0., 0., 182 | 0., 0., 0., 0., 0., 0., 183 | 0., 0., 0., 0., 0., 0., 184 | 0., 0., 0., 0.]) 185 | ''' 186 | 187 | len(s) 188 | 189 | # 6000 190 | 191 | s[700] 192 | 193 | # 3.2309523518534773e-10 194 | 195 | np.set_printoptions(suppress=True, precision=4) 196 | ``` 197 | 198 | `U`是个巨大的矩阵,所以仅仅查看一小部分: 199 | 200 | ```py 201 | U[:5,:5] 202 | 203 | ''' 204 | array([[-0.0083, -0.0009, -0.0007, 0.003 , -0.0002], 205 | [-0.0083, -0.0013, -0.0005, 0.0034, -0.0001], 206 | [-0.0084, -0.0012, 0.0002, 0.0045, -0.0003], 207 | [-0.0085, -0.0011, 0.0001, 0.0044, -0. ], 208 | [-0.0086, -0.0013, -0.0002, 0.004 , 0.0001]]) 209 | ''' 210 | ``` 211 | 212 | ### 寻找背景 213 | 214 | ```py 215 | U.shape, s.shape, V.shape 216 | 217 | # ((19200, 6000), (6000,), (6000, 6000)) 218 | 219 | low_rank = np.expand_dims(U[:,0], 1) * s[0] * np.expand_dims(V[0,:], 0) 220 | 221 | plt.figure(figsize=(12, 12)) 222 | plt.imshow(low_rank, cmap='gray') 223 | 224 | # 225 | ``` 226 | 227 | ![](img/2-3.png) 228 | 229 | ```py 230 | plt.imshow(np.reshape(low_rank[:,0], dims), cmap='gray'); 231 | ``` 232 | 233 | ![](img/2-4.png) 234 | 235 | 如何得到里面的人? 236 | 237 | ```py 238 | plt.imshow(np.reshape(M[:,0] - low_rank[:,0], dims), cmap='gray'); 239 | ``` 240 | 241 | ![](img/2-5.png) 242 | 243 | 高分辨率版本。 244 | 245 | ```py 246 | plt.imshow(np.reshape(M[:,140] - low_rank[:,140], dims), cmap='gray'); 247 | ``` 248 | 249 | ![](img/2-6.png) 250 | 251 | ### 制作视频 252 | 253 | 我受到了 fast.ai 学生萨米尔·穆萨(Samir Moussa)的启发来制作人物视频。 254 | 255 | ```py 256 | from moviepy.video.io.bindings import mplfig_to_npimage 257 | 258 | def make_video(matrix, dims, filename): 259 | mat_reshaped = np.reshape(matrix, (dims[0], dims[1], -1)) 260 | 261 | fig, ax = plt.subplots() 262 | def make_frame(t): 263 | ax.clear() 264 | ax.imshow(mat_reshaped[...,int(t*fps)]) 265 | return mplfig_to_npimage(fig) 266 | 267 | animation = mpe.VideoClip(make_frame, duration=int(10)) 268 | animation.write_videofile('videos/' + filename + '.mp4', fps=fps) 269 | 270 | make_video(M - low_rank, dims, "figures2") 271 | 272 | ''' 273 | [MoviePy] >>>> Building video videos/figures2.mp4 274 | [MoviePy] Writing video videos/figures2.mp4 275 | 276 | 100%|█████████▉| 600/601 [00:39<00:00, 15.22it/s] 277 | 278 | [MoviePy] Done. 279 | [MoviePy] >>>> Video ready: videos/figures2.mp4 280 | ''' 281 | ``` 282 | 283 | ![](img/2-7.png) 284 | 285 | ```py 286 | mpe.VideoFileClip("videos/figures2.mp4").subclip(0,10).ipython_display(width=300) 287 | 288 | # 100%|█████████▉| 600/601 [00:00<00:00, 858.48it/s] 289 | ``` 290 | 291 | 292 | 293 | ### SVD 分解不同尺寸矩阵的速度 294 | 295 | ```py 296 | import timeit 297 | import pandas as pd 298 | 299 | m_array = np.array([100, int(1e3), int(1e4)]) 300 | n_array = np.array([100, int(1e3), int(1e4)]) 301 | 302 | index = 303 | pd.MultiIndex.from_product([m_array, n_array], names=['# rows', '# cols']) 304 | 305 | pd.options.display.float_format = '{:,.3f}'.format 306 | df = pd.DataFrame(index=m_array, columns=n_array) 307 | 308 | # %%prun 309 | for m in m_array: 310 | for n in n_array: 311 | A = np.random.uniform(-40,40,[m,n]) 312 | t = timeit.timeit('np.linalg.svd(A, full_matrices=False)', number=3, globals=globals()) 313 | df.set_value(m, n, t) 314 | 315 | df/3 316 | ``` 317 | 318 | | | 100 | 1000 | 10000 | 319 | | --- | --- | --- | --- | 320 | | 100 | 0.006 | 0.009 | 0.043 | 321 | | 1000 | 0.004 | 0.259 | 0.992 | 322 | | 10000 | 0.019 | 0.984 | 218.726 | 323 | 324 | ### 一个视频中的两个背景 325 | 326 | 我们现在使用 BMC 2012 背景建模挑战数据集中的真实视频 008,以及上面使用的 003。 327 | 328 | ```py 329 | from moviepy.editor import concatenate_videoclips 330 | 331 | video2 = mpe.VideoFileClip("movie/Video_008.avi") 332 | 333 | concat_video = concatenate_videoclips([video2.subclip(0,20), video.subclip(0,10)]) 334 | concat_video.write_videofile("movie/concatenated_video.mp4") 335 | 336 | ''' 337 | [MoviePy] >>>> Building video movie/concatenated_video.mp4 338 | [MoviePy] Writing video movie/concatenated_video.mp4 339 | 340 | 100%|█████████▉| 300/301 [00:00<00:00, 481.76it/s] 341 | 342 | [MoviePy] Done. 343 | [MoviePy] >>>> Video ready: movie/concatenated_video.mp4 344 | ''' 345 | 346 | concat_video = mpe.VideoFileClip("movie/concatenated_video.mp4") 347 | ``` 348 | 349 | ### 现在回到我们的背景消除问题 350 | 351 | ```py 352 | concat_video.ipython_display(width=300, maxduration=160) 353 | 354 | # 100%|█████████▉| 300/301 [00:00<00:00, 533.88it/s] 355 | ``` 356 | 357 | 358 | 359 | ```py 360 | scale = 0.5 # 调整比例来更改图像的分辨率 361 | dims = (int(240 * scale), int(320 * scale)) 362 | 363 | N = create_data_matrix_from_video(concat_video, fps, scale) 364 | # N = np.load("low_res_traffic_matrix.npy") 365 | np.save("med_res_concat_video.npy", N) 366 | 367 | N.shape 368 | 369 | # (19200, 1800) 370 | 371 | plt.imshow(np.reshape(N[:,200], dims), cmap='gray'); 372 | ``` 373 | 374 | ![](img/2-8.png) 375 | 376 | ```py 377 | U_concat, s_concat, V_concat = np.linalg.svd(N, full_matrices=False) 378 | ``` 379 | 380 | 这很慢,因此你可能希望保存结果以便将来使用。 381 | 382 | ```py 383 | np.save("movie/U_concat.npy", U_concat) 384 | np.save("movie/s_concat.npy", s_concat) 385 | np.save("movie/V_concat.npy", V_concat) 386 | ``` 387 | 388 | 将来,你只需加载已保存的内容: 389 | 390 | ```py 391 | U_concat = np.load("movie/U_concat.npy") 392 | s_concat = np.load("movie/s_concat.npy") 393 | V_concat = np.load("movie/V_concat.npy") 394 | 395 | low_rank = U_concat[:,:10] @ np.diag(s_concat[:10]) @ V_concat[:10,:] 396 | ``` 397 | 398 | `U`的最小主成分: 399 | 400 | ```py 401 | plt.imshow(np.reshape(U_concat[:, 1], dims), cmap='gray') 402 | 403 | # 404 | ``` 405 | 406 | ![](img/2-9.png) 407 | 408 | ```py 409 | plt.imshow(np.reshape(U_concat[:, 2], dims), cmap='gray') 410 | 411 | # 412 | ``` 413 | 414 | ![](img/2-10.png) 415 | 416 | ```py 417 | plt.imshow(np.reshape(U_concat[:, 3], dims), cmap='gray') 418 | 419 | # 420 | ``` 421 | 422 | ![](img/2-11.png) 423 | 424 | 425 | 背景移除: 426 | 427 | ```py 428 | plt.imshow(np.reshape((N - low_rank)[:, -40], dims), cmap='gray') 429 | 430 | # 431 | ``` 432 | 433 | ![](img/2-12.png) 434 | 435 | ```py 436 | plt.imshow(np.reshape((N - low_rank)[:, 240], dims), cmap='gray') 437 | 438 | # 439 | ``` 440 | 441 | ![](img/2-13.png) 442 | 443 | ### 数据压缩旁注 444 | 445 | 假设我们采用 700 个奇异值(记住,总共有 10000 个奇异值)。 446 | 447 | ```py 448 | s[0:1000:50] 449 | 450 | ''' 451 | array([ 1341719.6552, 10527.5148, 6162.0638, 4234.9367, 452 | 3174.0389, 2548.4273, 2138.1887, 1812.9873, 453 | 1557.7163, 1345.805 , 1163.2866, 1000.5186, 454 | 841.4604, 665.7271, 0. , 0. , 455 | 0. , 0. , 0. , 0. ]) 456 | ''' 457 | 458 | k = 700 459 | compressed_M = U[:,:k] @ np.diag(s[:k]) @ V[:k,:] 460 | 461 | plt.figure(figsize=(12, 12)) 462 | plt.imshow(compressed_M, cmap='gray') 463 | 464 | # 465 | ``` 466 | 467 | ![](img/2-14.png) 468 | 469 | ```py 470 | plt.imshow(np.reshape(compressed_M[:,140], dims), cmap='gray'); 471 | ``` 472 | 473 | ![](img/2-15.png) 474 | 475 | ```py 476 | np.allclose(compressed_M, M) 477 | 478 | # True 479 | 480 | np.linalg.norm(M - compressed_M) 481 | 482 | # 2.864899899979104e-09 483 | 484 | U[:,:k].shape, s[:k].shape, V[:k,:].shape 485 | 486 | # ((19200, 700), (700,), (700, 6000)) 487 | ``` 488 | 489 | 节省的空间为对于 700 个奇异值的`U, s, V`中的数据比原始矩阵。 490 | 491 | ```py 492 | ((19200 + 1 + 6000) * 700) / (19200 * 6000) 493 | 494 | # 0.1531310763888889 495 | ``` 496 | 497 | 我们只需要存储 15.3% 的数据,并且可以将精度保持在`1e-5`! 很棒! 498 | 499 | 很漂亮!!!但... 500 | SVD 的运行时复杂度为`O(min(m^2 n, m n^2))` 501 | 缺点:这真的很慢(同样,我们摒弃了很多计算)。 502 | 503 | ```py 504 | %time u, s, v = np.linalg.svd(M, full_matrices=False) 505 | 506 | ''' 507 | CPU times: user 5min 38s, sys: 1.53 s, total: 5min 40s 508 | Wall time: 57.1 s 509 | ''' 510 | 511 | M.shape 512 | 513 | # (19200, 6000) 514 | ``` 515 | -------------------------------------------------------------------------------- /10.md: -------------------------------------------------------------------------------- 1 | # 十、实现 QR 分解 2 | 3 | 我们在计算特征值时使用 QR 分解并计算最小二乘回归。 它是数值线性代数中的重要组成部分。 4 | 5 | “数值线性代数中的一种算法比其他算法更重要:QR 分解。” --Trefethen,第 48 页 6 | 7 | 回想一下,对于任何矩阵`A`,`A = QR`,其中`Q`是正交的,`R`是上三角。 8 | 9 | 提醒:我们在上一课中看到的 QR 算法使用 QR 分解,但不要混淆二者。 10 | 11 | ### NumPy 中 12 | 13 | ```py 14 | import numpy as np 15 | 16 | np.set_printoptions(suppress=True, precision=4) 17 | 18 | n = 5 19 | A = np.random.rand(n,n) 20 | npQ, npR = np.linalg.qr(A) 21 | ``` 22 | 23 | 检查`Q`是正交的: 24 | 25 | ```py 26 | np.allclose(np.eye(n), npQ @ npQ.T), np.allclose(np.eye(n), npQ.T @ npQ) 27 | 28 | # (True, True) 29 | ``` 30 | 31 | 检查`R`是三角。 32 | 33 | ```py 34 | npR 35 | 36 | ''' 37 | array([[-0.8524, -0.7872, -1.1163, -1.2248, -0.7587], 38 | [ 0. , -0.9363, -0.2958, -0.7666, -0.632 ], 39 | [ 0. , 0. , 0.4645, -0.1744, -0.3542], 40 | [ 0. , 0. , 0. , 0.4328, -0.2567], 41 | [ 0. , 0. , 0. , 0. , 0.1111]]) 42 | ''' 43 | ``` 44 | 45 | 当向量`b`投影到直线`a`上时,其投影`p`是`b`沿着直线`a`的一部分。 46 | 47 | 让我们看看 [沉浸式线性代数在线版](http://immersivemath.com/ila/index.html)的[第 3.2.2 节:投影](http://immersivemath.com/ila/ch03_dotproduct/ch03.html)的交互图。 48 | 49 | ![](img/projection_line.png) 50 | 51 | > 来源:[沉浸式数学](http://immersivemath.com/ila/ch03_dotproduct/ch03.html) 52 | 53 | 以下是将向量投影到平面上的样子: 54 | 55 | ![](img/projection.png) 56 | 57 | > 来源:[最小二乘回归的线性代数视角](https://medium.com/@andrew.chamberlain/the-linear-algebra-view-of-least-squares-regression-f67044b7f39b) 58 | 59 | 当向量`b`投影到直线`a`上时,其投影`p`是`b`沿着直线`a`的一部分。 所以`p`是`a`的一些倍数。 设 ![\mathbf{p} = \hat{x}\mathbf{a}](img/tex-a32cff3b4bdd4f46771f4cc817a60308.gif) 其中 ![\hat{x}](img/tex-2a95aaaf954c2187999c6357b04a58dd.gif) 是标量。 60 | 61 | ### 正交性 62 | 63 | 投影的关键是正交性:从`b`到`p`的直线(可以写成 ![\mathbf{b} - \hat{x}\mathbf{a}](img/tex-bb4e955e77268f56bb0aa2b892c69ea5.gif))垂直于`a`。 64 | 65 | 这意味着: 66 | 67 | ![\mathbf{a} \cdot (\mathbf{b} - \hat{x}\mathbf{a}) = 0](img/tex-69d4b6b19f75f7e5b87932c6e2d651ae.gif) 68 | 69 | 所以: 70 | 71 | ![\hat{x} = \frac{\mathbf{a} \cdot \mathbf{b}}{\mathbf{a} \cdot \mathbf{a}}](img/tex-e188ba54d741c47cccdec77b8ef7c5e5.gif) 72 | 73 | ## Gram-Schmidt 74 | 75 | ### 经典的 Gram-Schmidt(不稳定) 76 | 77 | 对于每列`j`,计算单一投影: 78 | 79 | ![v_j = P_ja_j](img/tex-efd8ffa0fdc4e994808665f543b0fc78.gif) 80 | 81 | 其中 ![P_j](img/tex-7f57ce5c29b329529f4e3f9a3765b114.gif) 与 ![q_1, ..., q_{j-1}](img/tex-e2d55fffd53a113312172c3c24cc951c.gif) 的跨度正交的空间。 82 | 83 | ```py 84 | def cgs(A): 85 | m, n = A.shape 86 | Q = np.zeros([m,n], dtype=np.float64) 87 | R = np.zeros([n,n], dtype=np.float64) 88 | for j in range(n): 89 | v = A[:,j] 90 | for i in range(j): 91 | R[i,j] = np.dot(Q[:,i], A[:,j]) 92 | v = v - (R[i,j] * Q[:,i]) 93 | R[j,j] = np.linalg.norm(v) 94 | Q[:, j] = v / R[j,j] 95 | return Q, R 96 | 97 | Q, R = cgs(A) 98 | 99 | np.allclose(A, Q @ R) 100 | 101 | # True 102 | ``` 103 | 104 | 检查`Q`是酉矩阵。 105 | 106 | ```py 107 | np.allclose(np.eye(len(Q)), Q.dot(Q.T)) 108 | 109 | # True 110 | 111 | np.allclose(npQ, -Q) 112 | 113 | # True 114 | 115 | R 116 | 117 | ''' 118 | array([[ 0.02771, 0.02006, -0.0164 , ..., 0.00351, 0.00198, 0.00639], 119 | [ 0. , 0.10006, -0.00501, ..., 0.07689, -0.0379 , -0.03095], 120 | [ 0. , 0. , 0.01229, ..., 0.01635, 0.02988, 0.01442], 121 | ..., 122 | [ 0. , 0. , 0. , ..., 0. , -0. , -0. ], 123 | [ 0. , 0. , 0. , ..., 0. , 0. , -0. ], 124 | [ 0. , 0. , 0. , ..., 0. , 0. , 0. ]]) 125 | ''' 126 | ``` 127 | 128 | Gram-Schmidt 应该让你想起来一点 Arnoldi 迭代(用于将矩阵转换为海森堡形式),因为它也是一个结构化的正交化。 129 | 130 | ### 改进版 Gram-Schmidt 131 | 132 | 经典(不稳定的)Gram-Schmidt:对于每列`j`,计算单一投影: 133 | 134 | ![v_j = P_ja_j](img/tex-efd8ffa0fdc4e994808665f543b0fc78.gif) 135 | 136 | 其中 ![P_j](img/tex-7f57ce5c29b329529f4e3f9a3765b114.gif) 与 ![q_1, ..., q_{j-1}](img/tex-e2d55fffd53a113312172c3c24cc951c.gif) 的跨度正交的空间。 137 | 138 | 改进版 Gram-Schmidt:对于每列`j`,计算`n - 1`个投影: 139 | 140 | ![P_j = P_{\perp q_{j-1}\cdots\perp q_{2}\perp q_{1}}](img/tex-6f2e0e91ebd226538ef465b081f4468f.gif) 141 | 142 | ```py 143 | import numpy as np 144 | n = 3 145 | A = np.random.rand(n,n).astype(np.float64) 146 | 147 | def cgs(A): 148 | m, n = A.shape 149 | Q = np.zeros([m,n], dtype=np.float64) 150 | R = np.zeros([n,n], dtype=np.float64) 151 | for j in range(n): 152 | v = A[:,j] 153 | for i in range(j): 154 | R[i,j] = np.dot(Q[:,i], A[:,j]) 155 | v = v - (R[i,j] * Q[:,i]) 156 | R[j,j] = np.linalg.norm(v) 157 | Q[:, j] = v / R[j,j] 158 | return Q, R 159 | 160 | def mgs(A): 161 | V = A.copy() 162 | m, n = A.shape 163 | Q = np.zeros([m,n], dtype=np.float64) 164 | R = np.zeros([n,n], dtype=np.float64) 165 | for i in range(n): 166 | R[i,i] = np.linalg.norm(V[:,i]) 167 | Q[:,i] = V[:,i] / R[i,i] 168 | for j in range(i, n): 169 | R[i,j] = np.dot(Q[:,i],V[:,j]) 170 | V[:,j] = V[:,j] - R[i,j]*Q[:,i] 171 | return Q, R 172 | 173 | Q, R = mgs(A) 174 | 175 | np.allclose(np.eye(len(Q)), Q.dot(Q.T.conj())) 176 | 177 | # True 178 | 179 | np.allclose(A, np.matmul(Q,R)) 180 | 181 | # True 182 | ``` 183 | 184 | ## Householder 185 | 186 | ### 引言 187 | 188 | ![\begin{array}{ l | l | c } \hline Gram-Schmidt & Triangular\, Orthogonalization & A R_1 R_2 \cdots R_n = Q \\ Householder & Orthogonal\, Triangularization & Q_n \cdots Q_2 Q_1 A = R \\ \hline \end{array}](img/tex-3fee5b26f9c6ce2571715fb9b282f431.gif) 189 | 190 | Householder 反射产生更接近正交的矩阵`Q`,具有舍入误差 191 | 192 | Gram-Schmidt 可以部分停止,留下`A`的前`n`列的简化 QR。 193 | 194 | ### 初始化 195 | 196 | ```py 197 | import numpy as np 198 | n = 4 199 | A = np.random.rand(n,n).astype(np.float64) 200 | 201 | Q = np.zeros([n,n], dtype=np.float64) 202 | R = np.zeros([n,n], dtype=np.float64) 203 | 204 | A 205 | 206 | ''' 207 | array([[ 0.5435, 0.6379, 0.4011, 0.5773], 208 | [ 0.0054, 0.8049, 0.6804, 0.0821], 209 | [ 0.2832, 0.2416, 0.8656, 0.8099], 210 | [ 0.1139, 0.9621, 0.7623, 0.5648]]) 211 | ''' 212 | 213 | from scipy.linalg import block_diag 214 | 215 | np.set_printoptions(5) 216 | ``` 217 | 218 | ### 算法 219 | 220 | 我添加了更多的计算和更多的信息,因为它说明了算法的工作原理。 此版本也返回 Householder 反射。 221 | 222 | ```py 223 | def householder_lots(A): 224 | m, n = A.shape 225 | R = np.copy(A) 226 | V = [] 227 | Fs = [] 228 | for k in range(n): 229 | v = np.copy(R[k:,k]) 230 | v = np.reshape(v, (n-k, 1)) 231 | v[0] += np.sign(v[0]) * np.linalg.norm(v) 232 | v /= np.linalg.norm(v) 233 | R[k:,k:] = R[k:,k:] - 2*np.matmul(v, np.matmul(v.T, R[k:,k:])) 234 | V.append(v) 235 | F = np.eye(n-k) - 2 * np.matmul(v, v.T)/np.matmul(v.T, v) 236 | Fs.append(F) 237 | return R, V, Fs 238 | ``` 239 | 240 | 检查`R`是上三角。 241 | 242 | ```py 243 | R 244 | 245 | ''' 246 | array([[-0.62337, -0.84873, -0.88817, -0.97516], 247 | [ 0. , -1.14818, -0.86417, -0.30109], 248 | [ 0. , 0. , -0.64691, -0.45234], 249 | [-0. , 0. , 0. , -0.26191]]) 250 | ''' 251 | ``` 252 | 253 | 作为检查,我们将使用分块矩阵`F`计算 ![Q^T](img/tex-8b7f1b39b8e0258e80aea39ca1c24265.gif) 和`R`。矩阵`F`是 householder 反射。 254 | 255 | 请注意,这不是一种处理`Q`的有效计算方式。在大多数情况下,你实际上并不需要`Q`。例如,如果你使用 QR 来求解最小二乘,则只需要`Q * b`。 256 | 257 | + 对于隐式计算乘积`Q * b`或`Qx`的技巧,请参阅 Trefethen 第 74 页。 258 | + 请参阅[这些讲义](http://www.cs.cornell.edu/~bindel/class/cs6210-f09/lec18.pdf),了解 Householder 的不同实现,它同时计算`Q`,作为`R`的一部分。 259 | 260 | ```py 261 | QT = np.matmul(block_diag(np.eye(3), F[3]), 262 | np.matmul(block_diag(np.eye(2), F[2]), 263 | np.matmul(block_diag(np.eye(1), F[1]), F[0]))) 264 | 265 | F[1] 266 | 267 | ''' 268 | array([[-0.69502, 0.10379, -0.71146], 269 | [ 0.10379, 0.99364, 0.04356], 270 | [-0.71146, 0.04356, 0.70138]]) 271 | ''' 272 | 273 | block_diag(np.eye(1), F[1]) 274 | 275 | ''' 276 | array([[ 1. , 0. , 0. , 0. ], 277 | [ 0. , -0.69502, 0.10379, -0.71146], 278 | [ 0. , 0.10379, 0.99364, 0.04356], 279 | [ 0. , -0.71146, 0.04356, 0.70138]]) 280 | ''' 281 | 282 | block_diag(np.eye(2), F[2]) 283 | 284 | ''' 285 | array([[ 1. , 0. , 0. , 0. ], 286 | [ 0. , 1. , 0. , 0. ], 287 | [ 0. , 0. , -0.99989, 0.01452], 288 | [ 0. , 0. , 0.01452, 0.99989]]) 289 | ''' 290 | 291 | block_diag(np.eye(3), F[3]) 292 | 293 | ''' 294 | array([[ 1., 0., 0., 0.], 295 | [ 0., 1., 0., 0.], 296 | [ 0., 0., 1., 0.], 297 | [ 0., 0., 0., -1.]]) 298 | ''' 299 | 300 | np.matmul(block_diag(np.eye(1), F[1]), F[0]) 301 | 302 | ''' 303 | array([[-0.87185, -0.00861, -0.45431, -0.18279], 304 | [ 0.08888, -0.69462, 0.12536, -0.70278], 305 | [-0.46028, 0.10167, 0.88193, -0.00138], 306 | [-0.14187, -0.71211, 0.00913, 0.68753]]) 307 | ''' 308 | 309 | QT 310 | 311 | ''' 312 | array([[-0.87185, -0.00861, -0.45431, -0.18279], 313 | [ 0.08888, -0.69462, 0.12536, -0.70278], 314 | [ 0.45817, -0.112 , -0.88171, 0.01136], 315 | [ 0.14854, 0.71056, -0.02193, -0.68743]]) 316 | ''' 317 | 318 | R2 = np.matmul(block_diag(np.eye(3), F[3]), 319 | np.matmul(block_diag(np.eye(2), F[2]), 320 | np.matmul(block_diag(np.eye(1), F[1]), 321 | np.matmul(F[0], A)))) 322 | 323 | np.allclose(A, np.matmul(np.transpose(QT), R2)) 324 | 325 | # True 326 | 327 | np.allclose(R, R2) 328 | 329 | # True 330 | ``` 331 | 332 | 这是 Householder 的简洁版本(尽管我创建了一个新的`R`,而不是覆盖`A`,并原地计算它)。 333 | 334 | ```py 335 | def householder(A): 336 | m, n = A.shape 337 | R = np.copy(A) 338 | Q = np.eye(m) 339 | V = [] 340 | for k in range(n): 341 | v = np.copy(R[k:,k]) 342 | v = np.reshape(v, (n-k, 1)) 343 | v[0] += np.sign(v[0]) * np.linalg.norm(v) 344 | v /= np.linalg.norm(v) 345 | R[k:,k:] = R[k:,k:] - 2 * v @ v.T @ R[k:,k:] 346 | V.append(v) 347 | return R, V 348 | 349 | RH, VH = householder(A) 350 | ``` 351 | 352 | 检查`R`是对角的。 353 | 354 | ```py 355 | RH 356 | 357 | ''' 358 | array([[-0.62337, -0.84873, -0.88817, -0.97516], 359 | [-0. , -1.14818, -0.86417, -0.30109], 360 | [-0. , -0. , -0.64691, -0.45234], 361 | [-0. , 0. , 0. , -0.26191]]) 362 | ''' 363 | 364 | VH 365 | 366 | ''' 367 | [array([[ 0.96743], 368 | [ 0.00445], 369 | [ 0.2348 ], 370 | [ 0.09447]]), array([[ 0.9206 ], 371 | [-0.05637], 372 | [ 0.38641]]), array([[ 0.99997], 373 | [-0.00726]]), array([[ 1.]])] 374 | ''' 375 | 376 | np.allclose(R, RH) 377 | 378 | # True 379 | 380 | def implicit_Qx(V,x): 381 | n = len(x) 382 | for k in range(n-1,-1,-1): 383 | x[k:n] -= 2*np.matmul(v[-k], np.matmul(v[-k], x[k:n])) 384 | 385 | A 386 | 387 | ''' 388 | array([[ 0.54348, 0.63791, 0.40114, 0.57728], 389 | [ 0.00537, 0.80485, 0.68037, 0.0821 ], 390 | [ 0.2832 , 0.24164, 0.86556, 0.80986], 391 | [ 0.11395, 0.96205, 0.76232, 0.56475]]) 392 | ''' 393 | ``` 394 | 395 | 经典和改良的 Gram-Schmidt 都需要`2mn^2`个浮点运算。 396 | 397 | ### 陷阱 398 | 399 | 有些事情需要注意: 400 | 401 | + 当你复制值时 VS 当你有两个指向同一内存位置的变量时 402 | + 长度为`n`的向量与`1 x n`矩阵之间的差异(`np.matmul`以不同方式处理它们) 403 | 404 | ## 类比 405 | 406 | 407 | | | `A=QR` | `A=QHQ*` | 408 | | --- | --- | --- | 409 | | 正交结构化 | Householder | Householder | 410 | | 结构化正交 | Gram-Schmidt | Arnoldi | 411 | 412 | Gram-Schmidt 和 Arnoldi:连续的三角运算,可以部分停止,前`n`列是正确的。 413 | 414 | Householder:连续的正交运算。 在存在舍入误差的情况下产生更接近正交的`A`。 415 | 416 | 请注意,要计算海森堡化简`A = QHQ *`,将 Householder 反射应用于`A`的两侧,而不是仅应用于一侧。 417 | 418 | ## 示例 419 | 420 | 以下示例来自 Trefethen 和 Bau 的第 9 讲,尽管从 MATLAB 翻译成 Python。 421 | 422 | ### 示例:经典与改进的 Gram-Schmidt 423 | 424 | 这个例子是 Trefethen 第 9 节的实验 2。 我们想要构造一个方阵`A`,它具有随机奇异向量和广泛变化的奇异值,间隔为 ![2^{-1}](img/tex-3522f8b8d6b2b912a41177e213f96dd2.gif) 和 ![2^{-(n + 1)}](img/tex-9eee2c1c197de7cf3b522d1cf624846f.gif) 之间的 2 的倍数。 425 | 426 | ```py 427 | import matplotlib.pyplot as plt 428 | from matplotlib import rcParams 429 | %matplotlib inline 430 | 431 | n = 100 432 | U, X = np.linalg.qr(np.random.randn(n,n)) # 将 U 设为随机正交矩阵 433 | V, X = np.linalg.qr(np.random.randn(n,n)) # 将 V 设为随机正交矩阵 434 | S = np.diag(np.power(2,np.arange(-1,-(n+1),-1), dtype=float)) # 将 S 设为对角矩阵 w/ exp 435 | # 值在 2^-1 和 2^-(n+1) 之间 436 | 437 | A = np.matmul(U,np.matmul(S,V)) 438 | 439 | QC, RC = cgs(A) 440 | QM, RM = mgs(A) 441 | 442 | plt.figure(figsize=(10,10)) 443 | plt.semilogy(np.diag(S), 'r.', basey=2, label="True Singular Values") 444 | plt.semilogy(np.diag(RM), 'go', basey=2, label="Modified Gram-Shmidt") 445 | plt.semilogy(np.diag(RC), 'bx', basey=2, label="Classic Gram-Shmidt") 446 | plt.legend() 447 | rcParams.update({'font.size': 18}) 448 | ``` 449 | 450 | ![](img/10-1.png) 451 | 452 | ```py 453 | type(A[0,0]), type(RC[0,0]), type(S[0,0]) 454 | 455 | # (numpy.float64, numpy.float64, numpy.float64) 456 | 457 | eps = np.finfo(np.float64).eps; eps 458 | 459 | # 2.2204460492503131e-16 460 | 461 | np.log2(eps), np.log2(np.sqrt(eps)) 462 | 463 | # (-52.0, -26.0) 464 | ``` 465 | 466 | ### 示例:正交性的数值损失 467 | 468 | 这个例子是 Trefethen 第 9 节的实验 3。 469 | 470 | ```py 471 | A = np.array([[0.70000, 0.70711], [0.70001, 0.70711]]) 472 | 473 | A 474 | 475 | ''' 476 | array([[ 0.7 , 0.70711], 477 | [ 0.70001, 0.70711]]) 478 | ''' 479 | ``` 480 | 481 | Gram-Schmidt: 482 | 483 | ```py 484 | Q1, R1 = mgs(A) 485 | ``` 486 | 487 | Householder: 488 | 489 | ```py 490 | R2, V, F = householder_lots(A) 491 | Q2T = np.matmul(block_diag(np.eye(1), F[1]), F[0]) 492 | ``` 493 | 494 | NumPy 的 Householder: 495 | 496 | ```py 497 | Q3, R3 = np.linalg.qr(A) 498 | ``` 499 | 500 | 检查 QR 分解是否能用: 501 | 502 | ```py 503 | np.matmul(Q1, R1) 504 | 505 | ''' 506 | array([[ 0.7 , 0.7071], 507 | [ 0.7 , 0.7071]]) 508 | ''' 509 | 510 | np.matmul(Q2T.T, R2) 511 | 512 | ''' 513 | array([[ 0.7 , 0.7071], 514 | [ 0.7 , 0.7071]]) 515 | ''' 516 | 517 | np.matmul(Q3, R3) 518 | 519 | ''' 520 | array([[ 0.7 , 0.7071], 521 | [ 0.7 , 0.7071]]) 522 | ''' 523 | ``` 524 | 525 | 检查`Q`多么接近完美正交。 526 | 527 | ```py 528 | np.linalg.norm(np.matmul(Q1.T, Q1) - np.eye(2)) # 改进的 Gram-Schmidt 529 | 530 | # 3.2547268868202263e-11 531 | 532 | np.linalg.norm(np.matmul(Q2T.T, Q2T) - np.eye(2)) # 我们的 Householder 实现 533 | 534 | # 1.1110522984689321e-16 535 | 536 | np.linalg.norm(np.matmul(Q3.T, Q3) - np.eye(2)) # Numpy(它使用 Householder) 537 | 538 | # 2.5020189909116529e-16 539 | ``` 540 | 541 | GS(Q1)不如 Householder(Q2T,Q3)稳定。 542 | -------------------------------------------------------------------------------- /1.md: -------------------------------------------------------------------------------- 1 | # 一、我们为什么在这里 2 | 3 | 你可以在[此博客文章](http://www.fast.ai/2017/07/17/num-lin-alg/)中阅读数值线性代数课程的概述。 该课程最初在[旧金山大学数据科学](https://www.usfca.edu/arts-sciences/graduate-programs/analytics)的研究生项目中讲授。[YouTube](https://www.youtube.com/playlist?list=PLtmWHNX-gukIc92m1K0P6bIOnZb-mg0hY) 上提供了课程视频(请注意,笔记本的编码和视频编号不对应,因为有些笔记本需要超过 1 个视频才能覆盖)。 4 | 5 | 你可以在我们的 [fast.ai 论坛](http://forums.fast.ai/c/lin-alg)上提出有关该课程的问题。 6 | 7 | 注意:未来的课程代码比这个更多。 8 | 9 | ## 为什么学习数值线性代数? 10 | 11 | 本课程的关键问题:我们如何以可接受的速度和可接受的准确度进行矩阵计算? 12 | 13 | 20 世纪的[十大科学与工程算法](http://www.cs.fsu.edu/~lacher/courses/COT4401/notes/cise_v2_i1/index.html)列表包括线性代数的矩阵分解方法。 它还包括 QR 算法,我们将介绍,以及 Krylov 迭代方法,我们将看到它的一个例子([见这里](https://nickhigham.wordpress.com/2016/03/29/the-top-10-algorithms-in-applied-mathematics/))。 14 | 15 | ![](img/top10.png) 16 | 17 | > 来源:[十大算法](http://www.cs.fsu.edu/~lacher/courses/COT4401/notes/cise_v2_i1/guest.pdf) 18 | 19 | 在选择或设计矩阵计算算法时,要记住以下四点: 20 | 21 | + 内存使用 22 | + 速度 23 | + 准确度 24 | + 可扩展性/并行 25 | 26 | 通常会在这些类别之间进行权衡。 27 | 28 | ### 动机 29 | 30 | 矩阵无处不在 - 任何可以放在 Excel 电子表格中的东西都是矩阵,语言和图片也可以表示为矩阵。了解矩阵算法的选项以及如何做出权衡,可能为你的解决方案带来巨大差异。例如,近似矩阵计算通常比精确矩阵计算快数千倍。 31 | 32 | 这不仅仅是了解现有库的内容,而是了解它们的工作原理。这是因为你通常可以制作算法的变体,库不支持它们,从而为你提供所需的性能或准确度。此外,此领域目前正在快速发展,特别是在与深度学习,推荐系统,近似算法和图形分析相关的领域,因此你经常会发现最近的结果可能会对你的项目产生重大影响,但是不在你的库中。 33 | 34 | 了解算法的真正工作原理。有助于调试和加速解决方案。 35 | 36 | ## 矩阵计算 37 | 38 | 矩阵计算有两种关键类型,它们以多种不同的方式组合在一起。 这些是: 39 | 40 | + 矩阵和张量积 41 | + 矩阵分解 42 | 43 | 所以基本上我们将组合矩阵,并将它们再次分开! 44 | 45 | ### 矩阵和张量积 46 | 47 | ### 矩阵和向量的积: 48 | 49 | 下面的矩阵给出了 1 年内从 1 个健康状态转移到另一个健康状态的概率。 如果一组的当前健康状况是: 50 | 51 | + 85% 无症状 52 | + 10% 有症状 53 | + 5% 的艾滋病 54 | + 0% 死亡 55 | 56 | 1 年内每个健康状况的百分比是多少? 57 | 58 | ![](img/markov_health.jpg) 59 | 60 | > 来源:[马尔科夫链的概念](https://www.youtube.com/watch?v=0Il-y_WLTo4) 61 | 62 | ### 答案 63 | 64 | ```py 65 | import numpy as np 66 | 67 | # Exercise: Use Numpy to compute the answer to the above 68 | 69 | # array([[ 0.765 , 0.1525, 0.0645, 0.018 ]]) 70 | ``` 71 | 72 | ### 矩阵和矩阵的积 73 | 74 | ![](img/shop.png) 75 | 76 | > 来源:[几种线性代数工具的简单的现实世界的应用](https://www.mff.cuni.cz/veda/konference/wds/proc/pdf06/WDS06_106_m8_Ulrychova.pdf) 77 | 78 | ### 答案 79 | 80 | ```py 81 | # Exercise: Use Numpy to compute the answer to the above 82 | 83 | ''' 84 | array([[ 50. , 49. ], 85 | [ 58.5, 61. ], 86 | [ 43.5, 43.5]]) 87 | ''' 88 | ``` 89 | 90 | ### 图像数据 91 | 92 | 图像可以表示为矩阵。 93 | 94 | ![](img/digit.gif) 95 | 96 | > 来源:[Adam Geitgey](https://medium.com/@ageitgey/machine-learning-is-fun-part-3-deep-learning-and-convolutional-neural-networks-f40359318721) 97 | 98 | ### 卷积 99 | 100 | 卷积是卷积神经网络(CNN)的核心,它是一种深度学习,产生了过去几年图像识别的巨大进步。 它们现在也越来越多地用于语音,例如 Facebook AI 的语音翻译结果比 RNN 快 9 倍(目前最流行的语音翻译方法)。 101 | 102 | 现在,在图像分类方面,计算机比人更准确。 103 | 104 | ![](img/sportspredict.jpeg) 105 | 106 | > 来源:[Andrej Karpathy](http://karpathy.github.io/2014/07/03/feature-learning-escapades/) 107 | 108 | ![](img/InsideImagenet.png) 109 | 110 | > 来源:[Nvidia](https://blogs.nvidia.com/blog/2014/09/07/imagenet/) 111 | 112 | 你可以将卷积视为一种特殊的矩阵积。 113 | 114 | 以下3张图片均来自优秀博客文章“[不同观点下的 CNN](https://medium.com/impactai/cnns-from-different-viewpoints-fab7f52d159c)”,由 fast.ai 学生撰写: 115 | 116 | 卷积将过滤器应用于图像的每个部分: 117 | 118 | ![](img/cnn1.png) 119 | 120 | 神经网络观点: 121 | 122 | ![](img/cnn2.png) 123 | 124 | 矩阵乘法观点: 125 | 126 | ![](img/cnn3.png) 127 | 128 | 让我们在[这个](https://nbviewer.jupyter.org/github/fastai/numerical-linear-algebra-v2/blob/master/nbs/convolution-intro.ipynb)笔记本中看看,如何使用卷积进行边缘检测(最初来自 [fast.ai 深度学习课程](http://course.fast.ai/))。 129 | 130 | ### 矩阵分解 131 | 132 | 我们将在本课程的每一天讨论矩阵分解,并将在以后的课程中介绍以下示例: 133 | 134 | 主题建模(NMF 和 SVD,SVD 使用 QR)一组文档可以由术语 - 文档矩阵表示。 135 | 136 | ![](img/document_term.png) 137 | 138 | > 来源:[信息检索导论](http://player.slideplayer.com/15/4528582/#) 139 | 140 | ![](img/nmf_doc.png) 141 | 142 | > 来源:[NMF 教程](http://perso.telecom-paristech.fr/~essid/teach/NMF_tutorial_ICME-2014.pdf) 143 | 144 | 背景移除(截断 SVD): 145 | 146 | ![](img/surveillance3.png) 147 | 148 | 噪声移除(鲁棒 PCA,使用 SVD): 149 | 150 | ![](img/faces_rpca.png) 151 | 152 | > 这个示例来自 [Jean Kossaifi 的博客](http://jeankossaifi.com/blog/rpca.html)。 153 | 154 | 谷歌的 PageRank 算法(特征值分解): 155 | 156 | ![](img/page_rank_graph.png) 157 | 158 | > 来源:[什么是 PageRank?](http://computationalculture.net/article/what_is_in_pagerank) 159 | 160 | [其它分解和应用的列表](https://sites.google.com/site/igorcarron2/matrixfactorizations)。 161 | 162 | ## 准确度 163 | 164 | ### 浮点数算术 165 | 166 | 为了理解准确性,我们首先需要了解计算机(有限和离散)如何存储数字(无限且连续) 167 | 168 | ### 练习 169 | 170 | 花点时间看看下面的函数`f`。 在尝试运行它之前,在纸上写下`x1 = f(110)`的输出。 现在,(仍在纸上)将其代入`f`并计算`x2 = f(x1)`。 继续进行 10 次迭代。 171 | 172 | 这个例子来自 Greenbaum 和 Chartier 的 Numerical Methods 的第 107 页。 173 | 174 | ```py 175 | def f(x): 176 | if x <= 1/2: 177 | return 2 * x 178 | if x > 1/2: 179 | return 2*x - 1 180 | ``` 181 | 182 | 仅仅在你写下你认为答案应该是什么之后,运行下面的代码: 183 | 184 | ```py 185 | x = 1/10 186 | for i in range(80): 187 | print(x) 188 | x = f(x) 189 | 190 | ''' 191 | 0.1 192 | 0.2 193 | 0.4 194 | 0.8 195 | 0.6000000000000001 196 | 0.20000000000000018 197 | 0.40000000000000036 198 | 0.8000000000000007 199 | 0.6000000000000014 200 | 0.20000000000000284 201 | 0.4000000000000057 202 | 0.8000000000000114 203 | 0.6000000000000227 204 | 0.20000000000004547 205 | 0.40000000000009095 206 | 0.8000000000001819 207 | 0.6000000000003638 208 | 0.2000000000007276 209 | 0.4000000000014552 210 | 0.8000000000029104 211 | 0.6000000000058208 212 | 0.20000000001164153 213 | 0.40000000002328306 214 | 0.8000000000465661 215 | 0.6000000000931323 216 | 0.20000000018626451 217 | 0.40000000037252903 218 | 0.8000000007450581 219 | 0.6000000014901161 220 | 0.20000000298023224 221 | 0.4000000059604645 222 | 0.800000011920929 223 | 0.6000000238418579 224 | 0.20000004768371582 225 | 0.40000009536743164 226 | 0.8000001907348633 227 | 0.6000003814697266 228 | 0.20000076293945312 229 | 0.40000152587890625 230 | 0.8000030517578125 231 | 0.600006103515625 232 | 0.20001220703125 233 | 0.4000244140625 234 | 0.800048828125 235 | 0.60009765625 236 | 0.2001953125 237 | 0.400390625 238 | 0.80078125 239 | 0.6015625 240 | 0.203125 241 | 0.40625 242 | 0.8125 243 | 0.625 244 | 0.25 245 | 0.5 246 | 1.0 247 | 1.0 248 | 1.0 249 | 1.0 250 | 1.0 251 | 1.0 252 | 1.0 253 | 1.0 254 | 1.0 255 | 1.0 256 | 1.0 257 | 1.0 258 | 1.0 259 | 1.0 260 | 1.0 261 | 1.0 262 | 1.0 263 | 1.0 264 | 1.0 265 | 1.0 266 | 1.0 267 | 1.0 268 | 1.0 269 | 1.0 270 | 1.0 271 | ''' 272 | ``` 273 | 274 | 哪里不对? 275 | 276 | ### 问题:数学是连续的或无限的,但计算机是离散的和有限的 277 | 278 | 计算机的数字表示的两个局限: 279 | 280 | + 它们不能是随意大小 281 | + 它们之间肯定存在差距 282 | 283 | 我们需要关心准确性的原因是,计算机无法存储无限准确的数字。 可以创建给出非常错误答案的计算(特别是在多次重复操作时,因为每个操作可能会使错误成倍增加)。 284 | 285 | 计算机如何存储数字: 286 | 287 | ![](img/fpa.png) 288 | 289 | 尾数也可以称为有效数。 290 | 291 | IEEE双精度算术: 292 | 293 | 数字可以大到`1.79×10308`,小到`2.23×10-308`。 294 | 295 | 区间`[1,2]`由离散子集表示: 296 | 297 | ![](img/tex1-1.gif) 298 | 299 | 区间`[2,4]`表示为: 300 | 301 | ![](img/tex1-2.gif) 302 | 303 | 浮点并不是等间距的。 304 | 305 | ![](img/fltscale-wh.png) 306 | 307 | > 来源:[你从未想过要了解浮点数,但将被迫了解](http://www.volkerschatz.com/science/float.html) 308 | 309 | ### 机器`ε` 310 | 311 | 1 和下一个较大数字之间的距离的一半。 这可能因计算机而异。IEEE双精度标准规定: 312 | 313 | ![](img/tex1-3.gif) 314 | 315 | ### 浮点运算的两个重要属性 316 | 317 | 实数`x`与其最接近的浮点近似值`fl(x)`之间的差值,总是小于机器`ε`,相对而言。对于某些`ε`,其中 ![](img/tex1-4.gif): 318 | 319 | ![](img/tex1-5.gif) 320 | 321 | `*`为任何操作(`+`,`-`,`×`,`÷`),`⊛`是它的浮点模拟,对于一些`ε`,其中 ![](img/tex1-4.gif): 322 | 323 | ![](img/tex1-6.gif) 324 | 325 | 也就是说,浮点运算的每个操作都精确到最大为机器`ε`的相对误差。 326 | 327 | ### 历史 328 | 329 | 事后看来,浮点运算似乎是一个明确的选择,但存在许多存储数字的方法: 330 | 331 | + 定点算术 332 | + 对数和半对数数系统 333 | + 连续分数 334 | + 有理数 335 | + 有理数的可能无限字符串 336 | + 级别索引数字系统 337 | + 固定斜杠和浮动斜杠数字系统 338 | + 2-adic 数字 339 | 340 | 对于参考,请参阅“[浮点运算手册](http://www.springer.com/gp/book/9780817647049)”的[第 1 章](https://perso.ens-lyon.fr/jean-michel.muller/chapitre1.pdf)(免费)。 是的,有一本关于浮点的完整的 16 章书! 341 | 342 | ### 浮点运算的时间线历史 343 | 344 | + 公元前 1600:巴比伦基数 60 的系统是最早的浮点系统(Donald Knuth)。 使用基数 60 的浮点表示来表示尾数(如果两个数字的比率是 60 的幂,则表示相同) 345 | + 1630:滑动规则。仅操纵有效数字(基数 10) 346 | + 1914:Leonardo Torres 和 Quevedo 使用浮点运算描述了 Babbage 的分析引擎的的机电实现。 347 | + 1941:第一个真正的现代实现。 Konrad Zuse 的Z3电脑。 使用基数 2,具有 14 位有效数字,7 位指数和 1 个符号位。 348 | + 1985:IEEE 754-1985 二进制浮点运算标准发布。 提高了准确性,可靠性和便携性。 William Kahan 主导。 349 | 350 | “已经引入了许多不同的方法来在计算机上近似实数。然而,浮点运算是迄今为止在现代计算机中最常用的表示实数的方法。使用有限集合(“机器数字”)模拟无限连续集(实数) 并不是一项简单的任务:必须在速度,准确性,动态范围,易用性和实现以及内存之间找到明智的妥协。如果选择得当的参数(基数,精度,极值指数等),浮点运算似乎对于大多数数值应用来说是一个非常好的折衷方案。” 351 | 352 | 尽管基数 2(二进制)似乎是计算机中非常明显的赢家,但在各个方面使用了各种其他基数值: 353 | 354 | + 早期机器 PDP-10,Burroughs 570 和 6700 使用 基数 8 355 | + 基数 16 的 IBM 360 356 | + 基数 10 的财务计算,袖珍计算器,Maple 357 | + 基数 3 的俄罗斯 SETUN 计算机(1958 年)。 优点:最小化`beta x p`(符号乘位数),对于固定的最大可表示数字`beta^p - 1`。舍入等于截断 358 | + 基数 2 最常见。 理由:易于实现。 研究表明(带有隐式前导位),这比其他所有基数都具有更好的最坏情况或平均精度。 359 | 360 | ### 条件作用和稳定性 361 | 362 | 由于我们无法在计算机上准确表示数字(由于存储的有限性以及浮点结构中数字之间的间隔),因此了解输入中的小扰动如何影响输出变得非常重要。 363 | 364 | “稳定的算法几乎可以为几乎正确的问题提供正确的答案。”--Trefethen 365 | 366 | 条件作用:数学问题的扰动行为(例如最小二乘) 367 | 368 | 稳定性:用于在计算机上解决该问题的算法的扰动行为(例如,最小二乘算法,householder,回代,高斯消除) 369 | 370 | 示例:矩阵的特征值: 371 | 372 | ```py 373 | import scipy.linalg as la 374 | 375 | A = np.array([[1., 1000], [0, 1]]) 376 | B = np.array([[1, 1000], [0.001, 1]]) 377 | 378 | print(A) 379 | 380 | print(B) 381 | 382 | ''' 383 | [[ 1. 1000.] 384 | [ 0. 1.]] 385 | [[ 1.00000000e+00 1.00000000e+03] 386 | [ 1.00000000e-03 1.00000000e+00]] 387 | ''' 388 | 389 | np.set_printoptions(suppress=True, precision=4) 390 | 391 | wA, vrA = la.eig(A) 392 | wB, vrB = la.eig(B) 393 | 394 | wA, wB 395 | ''' 396 | (array([ 1.+0.j, 1.+0.j]), array([ 2.+0.j, 0.+0.j])) 397 | ''' 398 | ``` 399 | 400 | 提醒:浮点运算的两个属性 401 | 402 | 实数`x`与其最接近的浮点近似值`fl(x)`之间的差值总是小于机器`ε`,相对而言。 403 | 404 | 浮点运算的每个运算`+`,`-`,`×`,`÷`都精确到最大为机器`ε`的相对误差。 405 | 406 | 我们将看到的例子: 407 | 408 | + 经典与修正的 Gram-Schmidt 精度 409 | + Gram-Schmidt 与 Householder(计算 QR 分解的两种不同方式),答案的正交性如何 410 | + 方程组的条件 411 | 412 | ### 近似的准确度 413 | 414 | 我们很少需要大规模地进行高精度的矩阵计算。 事实上,我们经常进行某种机器学习,而不太准确的方法可以防止过度拟合。 415 | 416 | 在许多情况下,输入数据可能不那么精确,因此使用输入数据和计算来寻求更高的准确度是没有意义的。 417 | 418 | + 随机数据结构 419 | + 布隆过滤器 420 | + HyperLogLog 421 | + 局部敏感哈希 422 | + 跳过列表 423 | + Count-min 草图 424 | + 最小哈希 425 | 426 | 示例:布隆过滤器能够使用每个元素的`<10`位,搜索集合的成员性,具有 1% 的假阳性。 这通常表示数千次的内存使用减少。 427 | 428 | ![](img/bloom_filter.png) 429 | 430 | 通过对所有退回的项目进行第二阶段(确切)检查,可以轻松处理假阳性 - 对于稀有项目,这可能非常有效。 例如,许多网络浏览器使用布隆过滤器来创建一组被阻止的页面(例如,带有病毒的页面),因为被阻止的网页只是整个网络的一小部分。 可以通过获取布隆过滤器返回的任何内容并使用完整的精确列表检查 Web 服务来处理假阳性。 (详细信息请参阅[布隆过滤器教程](https://llimllib.github.io/bloomfilter-tutorial/))。 431 | 432 | ### 随机算法 433 | 434 | + Karger 算法(图的最小切割) 435 | + 随机回归 436 | + 蒙特卡罗模拟 437 | + 随机 LU 分解(高斯消元) 438 | + 随机 SVD 439 | 440 | 如果我们接受一些精度降低,那么我们通常可以通过使用近似算法将速度提高几个数量级(和/或减少内存使用)。 这些算法通常以一定的概率给出正确的答案。 通过多次重新运行算法,你通常可以加倍增加该概率! 441 | 442 | ### 昂贵的错误 443 | 444 | 以下示例来自 Greenbaum 和 Chartier。 445 | 446 | 欧洲航天局在阿丽亚娜 5 火箭上 10 年花费了 70 亿美元。 447 | 448 | 当你尝试将 64 位数放入 16 位空间(整数溢出)时会发生什么: 449 | 450 | ```py 451 | from IPython.display import YouTubeVideo 452 | YouTubeVideo("PK_yguLapgA") 453 | ``` 454 | 455 | 456 | 457 | 这是一个浮点错误,耗资 4.75 亿英镑: 458 | 459 | > [1994 纽约时报关于英特尔奔腾错误的文章](http://www.nytimes.com/1994/11/24/business/company-news-flaw-undermines-accuracy-of-pentium-chips.html) 460 | 461 | ![](img/pentium_nytimes.png) 462 | 463 | 资源:浮点运算的更多信息,请参阅 Trefethen & Bau 的第 13 讲和 Greenbaum & Chartier 的第 5 章。 464 | 465 | ## 内存使用 466 | 467 | ### 稀疏 VS 密集 468 | 469 | 上面我们介绍了如何存储数字,现在让我们来谈谈如何存储矩阵。 节省内存(和计算)的关键方法不是存储所有矩阵。 相反,只需存储非零元素。 这称为稀疏存储,它非常适合稀疏矩阵,即大多数元素为零的矩阵。 470 | 471 | ![](img/sparse.png) 472 | 473 | 以下是有限元问题的矩阵示例,该问题出现在工程中(例如,在对平面周围的气流进行建模时)。 在此示例中,非零元素为黑色,零元素为白色: 474 | 475 | ![](img/Finite_element_sparse_matrix.png) 476 | 477 | > [来源](https://commons.wikimedia.org/w/index.php?curid=2245335) 478 | 479 | 还有特殊类型的结构化矩阵,例如对角线,三对角线,hessenberg 和三角,它们都表现稀疏性的特定模式,可以利用它们来减少内存和计算。 480 | 481 | 与稀疏矩阵相反的是密集矩阵,以及密集存储,其仅仅指代主要包含非零的矩阵,其中每个元素被显式存储。 由于稀疏矩阵是有用且常见的,因此数值线性代数侧重于通过尽可能多的操作来保持稀疏性。 482 | 483 | ## 速度 484 | 485 | 速度差异来自许多方面,特别是: 486 | 487 | + 计算复杂性 488 | + 向量化 489 | + 扩展到多个核心和节点 490 | + 局部性 491 | 492 | ### 计算复杂性 493 | 494 | 算法通常使用关于矩阵中的行数和列数的计算复杂度来表示。 例如。 你可能会发现一个描述为`O(n^2 m)`的算法。 495 | 496 | 计算复杂性和`O`符号经常出现在技术评论中,所以实践它们是个好主意。 你可以在这些网站中了解概念和实践: 497 | 498 | * [Interview Cake](https://www.interviewcake.com/article/java/big-o-notation-time-and-space-complexity) 499 | * [Codecademy](https://www.codecademy.com/courses/big-o/0/3) 500 | * [HackerRank](https://www.hackerrank.com/contests/30-days-of-code/challenges/day-25-running-time) 501 | 502 | ### 向量化 503 | 504 | 现代 CPU 和 GPU 可以在单个核心上同时对多个元素应用操作。 例如,在一个步骤中选取向量中的 4 个浮点数的指数。 这称为 SIMD。 你不会显式编写 SIMD 代码(它往往需要汇编语言或特殊的 C “内在函数”),而是在像 numpy 这样的库中使用向量化操作,而后者又依赖于专门调整的向量化底层线性代数 API(特别是 BLAS 和 LAPACK)。 505 | 506 | ### 矩阵计算包:BLAS 和 LAPACK 507 | 508 | [BLAS(基本线性代数子程序)](http://www.netlib.org/blas/):底层矩阵和向量算术运算的规范。这些是用于执行基本向量和矩阵运算的标准积木。 BLAS 起源于 1979 年的 Fortran 库。BLAS 库的示例包括:AMD 核心数学库(ACML),ATLAS,英特尔数学核心库(MKL)和 OpenBLAS。 509 | 510 | [LAPACK](http://www.netlib.org/lapack/) 是用 Fortran 编写的,提供了解决线性方程组,特征值问题和奇异值问题的例程。矩阵因式分解(LU,Cholesky,QR,SVD,Schur)。处理密集和带状矩阵,但不处理一般稀疏矩阵。实数和复数,单精度和双精度。 511 | 512 | 20 世纪 70 年代和 80 年代:EISPACK(特征值例程)和 LINPACK(线性方程和线性最小二乘例程)库。 513 | 514 | LAPACK 最初的目标:使 LINAPCK 和 EISPACK 在共享内存的向量和并行处理器上高效运行,并在现代基于缓存的体系结构上利用缓存(最初于 1992 年发布)。 EISPACK 和 LINPACK 忽略了多层内存架构,并且花费了太多时间来移动数据。 515 | 516 | LAPACK 使用高度优化的块操作实现(在每台机器上大量实现),以便尽可能多的计算由 BLAS 执行。 517 | 518 | ### 局部性 519 | 520 | 使用较慢的方式来访问数据(例如,通过互联网)可以比快速方式(例如来自寄存器)慢十亿倍。 但慢速存储器远多于快速存储器。 因此,一旦我们在快速存储中拥有数据,我们就希望在那里进行所需的任何计算,而不是每次需要时都需要多次加载。 此外,对于大多数类型的存储,访问彼此相邻的数据项要快得多,因此我们应该尽量使用附近存储的任何数据,我们知道我们很快就需要它。 这两个问题被称为局部性。 521 | 522 | ### 不同类型内存的速度 523 | 524 | 以下是每个人都应该知道的一些数字(来自传奇人物 [Jeff Dean](http://static.googleusercontent.com/media/research.google.com/en/us/people/jeff/stanford-295-talk.pdf)): 525 | 526 | + L1 高速缓存引用:0.5 ns 527 | + L2 缓存引用:7 ns 528 | + 主存储器引用 / RAM:100 ns 529 | + 通过 1 Gbps 网络发送 2K 字节:20,000 ns 530 | + 从内存顺序读取 1 MB:250,000 ns 531 | + 在同一数据中心内往返:500,000 ns 532 | + 磁盘搜索:10,000,000 ns 533 | + 从网络顺序读取 1 MB:10,000,000 ns 534 | + 从磁盘顺序读取 1 MB:30,000,000 ns 535 | + 发送数据包 CA-> Netherlands-> CA:150,000,000 ns 536 | 537 | 这是一个更新的交互式[版本](https://people.eecs.berkeley.edu/~rcs/research/interactive_latency.html),其中包含这些数字如何变化的时间表。 538 | 539 | 关键结论:每个连续的存储器类型(至少)比之前的存储器类型差一个数量级。磁盘搜索非常慢。 540 | 541 | 这个视频有一个很好的例子,展示了几种计算照片模糊的方法,并进行了各种权衡。不要担心出现的 C 代码,只关注矩阵计算的红色和绿色运动图像。 542 | 543 | 虽然视频是关于一种名为 Halide 的新语言,但它很好地说明了它所引发的问题是普遍存在的。观看 1-13 分钟: 544 | 545 | ```py 546 | from IPython.display import YouTubeVideo 547 | YouTubeVideo("3uiEyEKji0M") 548 | ``` 549 | 550 | 551 | 552 | 局部性很难。 潜在的权衡: 553 | 554 | + 用于节省内存带宽的冗余计算 555 | + 牺牲并行性来获得更好的复用 556 | 557 | ### 临时性 558 | 559 | 当计算结果存储在 RAM 中的临时变量中,然后加载该变量来对其进行另一次计算时,就会出现“临时性”问题。 这比将数据保存在缓存或寄存器中,并在将最终结果存储到 RAM 之前,进行所有必要的计算要慢很多个数量级。 这对我们来说尤其是一个问题,因为 numpy 通常为每一个操作或功能创造临时性。 例如。`a = b * c^2 + ln(d)`将创建四个临时值(因为有四个操作和函数)。 560 | 561 | ### 扩展到多个核心和节点 562 | 563 | 我们有一个单独的可扩展性章节,但值得注意的是,这对速度也很重要 - 如果我们无法扩展我们拥有的所有计算资源,我们将会遇到计算速度较慢的问题。 564 | 565 | ## 可扩展性/并行化 566 | 567 | 通常我们会发现我们拥有的数据比用于处理内存或计算时间要多。 在这种情况下,我们希望能够在多个核(在一个计算机内)或节点(即网络上的多个计算机)上扩展我们的算法。 虽然我们将研究跨[多个核](http://www.makeuseof.com/tag/processor-core-makeuseof-explains-2/)的扩展(称为并行化),但我们不会在本课程中处理多节点扩展。 通常,可扩展算法是指,输入可以分解成较小的部分,每个部分由不同的核心/计算机处理,然后在最后重新组合在一起。 568 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International Public License (CC BY-NC-SA 4.0) 2 | 3 | Copyright © 2020 ApacheCN(apachecn@163.com) 4 | 5 | By exercising the Licensed Rights (defined below), You accept and agree to be bound by the terms and conditions of this Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International Public License ("Public License"). To the extent this Public License may be interpreted as a contract, You are granted the Licensed Rights in consideration of Your acceptance of these terms and conditions, and the Licensor grants You such rights in consideration of benefits the Licensor receives from making the Licensed Material available under these terms and conditions. 6 | 7 | Section 1 – Definitions. 8 | 9 | a. Adapted Material means material subject to Copyright and Similar Rights that is derived from or based upon the Licensed Material and in which the Licensed Material is translated, altered, arranged, transformed, or otherwise modified in a manner requiring permission under the Copyright and Similar Rights held by the Licensor. For purposes of this Public License, where the Licensed Material is a musical work, performance, or sound recording, Adapted Material is always produced where the Licensed Material is synched in timed relation with a moving image. 10 | b. Adapter's License means the license You apply to Your Copyright and Similar Rights in Your contributions to Adapted Material in accordance with the terms and conditions of this Public License. 11 | c. BY-NC-SA Compatible License means a license listed at creativecommons.org/compatiblelicenses, approved by Creative Commons as essentially the equivalent of this Public License. 12 | d. Copyright and Similar Rights means copyright and/or similar rights closely related to copyright including, without limitation, performance, broadcast, sound recording, and Sui Generis Database Rights, without regard to how the rights are labeled or categorized. For purposes of this Public License, the rights specified in Section 2(b)(1)-(2) are not Copyright and Similar Rights. 13 | e. Effective Technological Measures means those measures that, in the absence of proper authority, may not be circumvented under laws fulfilling obligations under Article 11 of the WIPO Copyright Treaty adopted on December 20, 1996, and/or similar international agreements. 14 | f. Exceptions and Limitations means fair use, fair dealing, and/or any other exception or limitation to Copyright and Similar Rights that applies to Your use of the Licensed Material. 15 | g. License Elements means the license attributes listed in the name of a Creative Commons Public License. The License Elements of this Public License are Attribution, NonCommercial, and ShareAlike. 16 | h. Licensed Material means the artistic or literary work, database, or other material to which the Licensor applied this Public License. 17 | i. Licensed Rights means the rights granted to You subject to the terms and conditions of this Public License, which are limited to all Copyright and Similar Rights that apply to Your use of the Licensed Material and that the Licensor has authority to license. 18 | j. Licensor means the individual(s) or entity(ies) granting rights under this Public License. 19 | k. NonCommercial means not primarily intended for or directed towards commercial advantage or monetary compensation. For purposes of this Public License, the exchange of the Licensed Material for other material subject to Copyright and Similar Rights by digital file-sharing or similar means is NonCommercial provided there is no payment of monetary compensation in connection with the exchange. 20 | l. Share means to provide material to the public by any means or process that requires permission under the Licensed Rights, such as reproduction, public display, public performance, distribution, dissemination, communication, or importation, and to make material available to the public including in ways that members of the public may access the material from a place and at a time individually chosen by them. 21 | m. Sui Generis Database Rights means rights other than copyright resulting from Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, as amended and/or succeeded, as well as other essentially equivalent rights anywhere in the world. 22 | n. You means the individual or entity exercising the Licensed Rights under this Public License. Your has a corresponding meaning. 23 | 24 | Section 2 – Scope. 25 | 26 | a. License grant. 27 | 1. Subject to the terms and conditions of this Public License, the Licensor hereby grants You a worldwide, royalty-free, non-sublicensable, non-exclusive, irrevocable license to exercise the Licensed Rights in the Licensed Material to: 28 | A. reproduce and Share the Licensed Material, in whole or in part, for NonCommercial purposes only; and 29 | B. produce, reproduce, and Share Adapted Material for NonCommercial purposes only. 30 | 2. Exceptions and Limitations. For the avoidance of doubt, where Exceptions and Limitations apply to Your use, this Public License does not apply, and You do not need to comply with its terms and conditions. 31 | 3. Term. The term of this Public License is specified in Section 6(a). 32 | 4. Media and formats; technical modifications allowed. The Licensor authorizes You to exercise the Licensed Rights in all media and formats whether now known or hereafter created, and to make technical modifications necessary to do so. The Licensor waives and/or agrees not to assert any right or authority to forbid You from making technical modifications necessary to exercise the Licensed Rights, including technical modifications necessary to circumvent Effective Technological Measures. For purposes of this Public License, simply making modifications authorized by this Section 2(a)(4) never produces Adapted Material. 33 | 5. Downstream recipients. 34 | A. Offer from the Licensor – Licensed Material. Every recipient of the Licensed Material automatically receives an offer from the Licensor to exercise the Licensed Rights under the terms and conditions of this Public License. 35 | B. Additional offer from the Licensor – Adapted Material. Every recipient of Adapted Material from You automatically receives an offer from the Licensor to exercise the Licensed Rights in the Adapted Material under the conditions of the Adapter’s License You apply. 36 | C. No downstream restrictions. You may not offer or impose any additional or different terms or conditions on, or apply any Effective Technological Measures to, the Licensed Material if doing so restricts exercise of the Licensed Rights by any recipient of the Licensed Material. 37 | 6. No endorsement. Nothing in this Public License constitutes or may be construed as permission to assert or imply that You are, or that Your use of the Licensed Material is, connected with, or sponsored, endorsed, or granted official status by, the Licensor or others designated to receive attribution as provided in Section 3(a)(1)(A)(i). 38 | b. Other rights. 39 | 1. Moral rights, such as the right of integrity, are not licensed under this Public License, nor are publicity, privacy, and/or other similar personality rights; however, to the extent possible, the Licensor waives and/or agrees not to assert any such rights held by the Licensor to the limited extent necessary to allow You to exercise the Licensed Rights, but not otherwise. 40 | 2. Patent and trademark rights are not licensed under this Public License. 41 | 3. To the extent possible, the Licensor waives any right to collect royalties from You for the exercise of the Licensed Rights, whether directly or through a collecting society under any voluntary or waivable statutory or compulsory licensing scheme. In all other cases the Licensor expressly reserves any right to collect such royalties, including when the Licensed Material is used other than for NonCommercial purposes. 42 | 43 | Section 3 – License Conditions. 44 | 45 | Your exercise of the Licensed Rights is expressly made subject to the following conditions. 46 | 47 | a. Attribution. 48 | 1. If You Share the Licensed Material (including in modified form), You must: 49 | A. retain the following if it is supplied by the Licensor with the Licensed Material: 50 | i. identification of the creator(s) of the Licensed Material and any others designated to receive attribution, in any reasonable manner requested by the Licensor (including by pseudonym if designated); 51 | ii. a copyright notice; 52 | iii. a notice that refers to this Public License; 53 | iv. a notice that refers to the disclaimer of warranties; 54 | v. a URI or hyperlink to the Licensed Material to the extent reasonably practicable; 55 | B. indicate if You modified the Licensed Material and retain an indication of any previous modifications; and 56 | C. indicate the Licensed Material is licensed under this Public License, and include the text of, or the URI or hyperlink to, this Public License. 57 | 2. You may satisfy the conditions in Section 3(a)(1) in any reasonable manner based on the medium, means, and context in which You Share the Licensed Material. For example, it may be reasonable to satisfy the conditions by providing a URI or hyperlink to a resource that includes the required information. 58 | 3. If requested by the Licensor, You must remove any of the information required by Section 3(a)(1)(A) to the extent reasonably practicable. 59 | b. ShareAlike. 60 | In addition to the conditions in Section 3(a), if You Share Adapted Material You produce, the following conditions also apply. 61 | 1. The Adapter’s License You apply must be a Creative Commons license with the same License Elements, this version or later, or a BY-NC-SA Compatible License. 62 | 2. You must include the text of, or the URI or hyperlink to, the Adapter's License You apply. You may satisfy this condition in any reasonable manner based on the medium, means, and context in which You Share Adapted Material. 63 | 3. You may not offer or impose any additional or different terms or conditions on, or apply any Effective Technological Measures to, Adapted Material that restrict exercise of the rights granted under the Adapter's License You apply. 64 | 65 | Section 4 – Sui Generis Database Rights. 66 | 67 | Where the Licensed Rights include Sui Generis Database Rights that apply to Your use of the Licensed Material: 68 | 69 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right to extract, reuse, reproduce, and Share all or a substantial portion of the contents of the database for NonCommercial purposes only; 70 | b. if You include all or a substantial portion of the database contents in a database in which You have Sui Generis Database Rights, then the database in which You have Sui Generis Database Rights (but not its individual contents) is Adapted Material, including for purposes of Section 3(b); and 71 | c. You must comply with the conditions in Section 3(a) if You Share all or a substantial portion of the contents of the database. 72 | 73 | For the avoidance of doubt, this Section 4 supplements and does not replace Your obligations under this Public License where the Licensed Rights include other Copyright and Similar Rights. 74 | 75 | Section 5 – Disclaimer of Warranties and Limitation of Liability. 76 | 77 | a. Unless otherwise separately undertaken by the Licensor, to the extent possible, the Licensor offers the Licensed Material as-is and as-available, and makes no representations or warranties of any kind concerning the Licensed Material, whether express, implied, statutory, or other. This includes, without limitation, warranties of title, merchantability, fitness for a particular purpose, non-infringement, absence of latent or other defects, accuracy, or the presence or absence of errors, whether or not known or discoverable. Where disclaimers of warranties are not allowed in full or in part, this disclaimer may not apply to You. 78 | b. To the extent possible, in no event will the Licensor be liable to You on any legal theory (including, without limitation, negligence) or otherwise for any direct, special, indirect, incidental, consequential, punitive, exemplary, or other losses, costs, expenses, or damages arising out of this Public License or use of the Licensed Material, even if the Licensor has been advised of the possibility of such losses, costs, expenses, or damages. Where a limitation of liability is not allowed in full or in part, this limitation may not apply to You. 79 | c. The disclaimer of warranties and limitation of liability provided above shall be interpreted in a manner that, to the extent possible, most closely approximates an absolute disclaimer and waiver of all liability. 80 | 81 | Section 6 – Term and Termination. 82 | 83 | a. This Public License applies for the term of the Copyright and Similar Rights licensed here. However, if You fail to comply with this Public License, then Your rights under this Public License terminate automatically. 84 | b. Where Your right to use the Licensed Material has terminated under Section 6(a), it reinstates: 85 | 1. automatically as of the date the violation is cured, provided it is cured within 30 days of Your discovery of the violation; or 86 | 2. upon express reinstatement by the Licensor. 87 | For the avoidance of doubt, this Section 6(b) does not affect any right the Licensor may have to seek remedies for Your violations of this Public License. 88 | c. For the avoidance of doubt, the Licensor may also offer the Licensed Material under separate terms or conditions or stop distributing the Licensed Material at any time; however, doing so will not terminate this Public License. 89 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public License. 90 | 91 | Section 7 – Other Terms and Conditions. 92 | 93 | a. The Licensor shall not be bound by any additional or different terms or conditions communicated by You unless expressly agreed. 94 | b. Any arrangements, understandings, or agreements regarding the Licensed Material not stated herein are separate from and independent of the terms and conditions of this Public License. 95 | 96 | Section 8 – Interpretation. 97 | 98 | a. For the avoidance of doubt, this Public License does not, and shall not be interpreted to, reduce, limit, restrict, or impose conditions on any use of the Licensed Material that could lawfully be made without permission under this Public License. 99 | b. To the extent possible, if any provision of this Public License is deemed unenforceable, it shall be automatically reformed to the minimum extent necessary to make it enforceable. If the provision cannot be reformed, it shall be severed from this Public License without affecting the enforceability of the remaining terms and conditions. 100 | c. No term or condition of this Public License will be waived and no failure to comply consented to unless expressly agreed to by the Licensor. 101 | d. Nothing in this Public License constitutes or may be interpreted as a limitation upon, or waiver of, any privileges and immunities that apply to the Licensor or You, including from the legal processes of any jurisdiction or authority. -------------------------------------------------------------------------------- /6.md: -------------------------------------------------------------------------------- 1 | # 六、使用鲁棒回归的 CT 扫描的压缩感知 2 | 3 | ## 广播 4 | 5 | 术语广播描述了在算术运算期间,如何处理不同形状的数组。 Numpy 首先使用广播一词,但现在用于其他库,如 Tensorflow 和 Matlab;规则因库而异。 6 | 7 | 来自 Numpy 文档: 8 | 9 | > 广播提供了一种向量化数组操作的方法,使循环在 C 而不是 Python 中出现。 它可以不制作不必要的数据副本而实现,并且通常可以产生高效实现。最简单的广播示例在数组乘以标量时发生。 10 | 11 | ```py 12 | a = np.array([1.0, 2.0, 3.0]) 13 | b = 2.0 14 | a * b 15 | 16 | # array([ 2., 4., 6.]) 17 | 18 | v=np.array([1,2,3]) 19 | print(v, v.shape) 20 | 21 | # [1 2 3] (3,) 22 | 23 | m=np.array([v,v*2,v*3]); m, m.shape 24 | 25 | ''' 26 | (array([[1, 2, 3], 27 | [2, 4, 6], 28 | [3, 6, 9]]), (3, 3)) 29 | ''' 30 | 31 | n = np.array([m*1, m*5]) 32 | 33 | n 34 | 35 | ''' 36 | array([[[ 1, 2, 3], 37 | [ 2, 4, 6], 38 | [ 3, 6, 9]], 39 | 40 | [[ 5, 10, 15], 41 | [10, 20, 30], 42 | [15, 30, 45]]]) 43 | ''' 44 | 45 | n.shape, m.shape 46 | 47 | # ((2, 3, 3), (3, 3)) 48 | ``` 49 | 50 | 我们可以使用广播来将矩阵和数组相加: 51 | 52 | ```py 53 | m+v 54 | 55 | ''' 56 | array([[ 2, 4, 6], 57 | [ 3, 6, 9], 58 | [ 4, 8, 12]]) 59 | ''' 60 | ``` 61 | 62 | 注意如果我们转置数组会发生什么: 63 | 64 | ```py 65 | v1=np.expand_dims(v,-1); v1, v1.shape 66 | 67 | ''' 68 | (array([[1], 69 | [2], 70 | [3]]), (3, 1)) 71 | ''' 72 | 73 | m+v1 74 | 75 | ''' 76 | array([[ 2, 3, 4], 77 | [ 4, 6, 8], 78 | [ 6, 9, 12]]) 79 | ''' 80 | ``` 81 | 82 | ### 通用的 NumPy 广播规则 83 | 84 | 操作两个数组时,NumPy 会逐元素地比较它们的形状。 它从最后的维度开始,并向前移动。 如果满足: 85 | 86 | + 他们是相等的,或者 87 | + 其中一个是 1 88 | 89 | 两个维度兼容。 90 | 91 | 数组不需要具有相同数量的维度。 例如,如果你有一个`256×256×3`的 RGB 值数组,并且你希望将图像中的每种颜色缩放不同的值,则可以将图像乘以具有 3 个值的一维数组。 根据广播规则排列这些数组的尾部轴的大小,表明它们是兼容的: 92 | 93 | ``` 94 | Image (3d array): 256 x 256 x 3 95 | Scale (1d array): 3 96 | Result (3d array): 256 x 256 x 3 97 | ``` 98 | 99 | ### 回顾 100 | 101 | ```py 102 | v = np.array([1,2,3,4]) 103 | m = np.array([v,v*2,v*3]) 104 | A = np.array([5*m, -1*m]) 105 | 106 | v.shape, m.shape, A.shape 107 | 108 | # ((4,), (3, 4), (2, 3, 4)) 109 | ``` 110 | 111 | 下列操作有效嘛? 112 | 113 | ```py 114 | A 115 | 116 | A + v 117 | 118 | A.T + v 119 | 120 | A.T.shape 121 | ``` 122 | 123 | ### (SciPy 中的)稀疏矩阵 124 | 125 | 具有大量零的矩阵称为稀疏(稀疏是密集的反义)。 对于稀疏矩阵,仅仅存储非零值,可以节省大量内存。 126 | 127 | ![](img/sparse.png) 128 | 129 | 另一个大型稀疏矩阵的例子: 130 | 131 | ![](img/Finite_element_sparse_matrix.png) 132 | 133 | > [来源](https://commons.wikimedia.org/w/index.php?curid=2245335S) 134 | 135 | 这是最常见的稀疏存储格式: 136 | 137 | + 逐坐标(scipy 称 COO) 138 | + 压缩稀疏行(CSR) 139 | + 压缩稀疏列(CSC) 140 | 141 | 让我们来看看[这些例子](http://www.mathcs.emory.edu/~cheung/Courses/561/Syllabus/3-C/sparse.html)。 142 | 143 | 实际上还有[更多格式](http://www.cs.colostate.edu/~mcrob/toolbox/c++/sparseMatrix/sparse_matrix_compression.html)。 144 | 145 | 如果非零元素的数量与行(或列)的数量成比例而不是与行列的乘积成比例,则通常将一类矩阵(例如,对角)称为稀疏。 146 | 147 | ### Scipy 实现 148 | 149 | 来自 [Scipy 稀疏矩阵文档](https://docs.scipy.org/doc/scipy-0.18.1/reference/sparse.html) 150 | 151 | + 为了有效地构造矩阵,请使用`dok_matrix`或`lil_matrix`。 `lil_matrix`类支持基本切片和花式索引,其语法与 NumPy 数组类似。 如下所示,COO 格式也可用于有效地构造矩阵 152 | + 要执行乘法或求逆等操作,首先要将矩阵转换为 CSC 或 CSR 格式。 153 | + CSR,CSC 和 COO 格式之间的所有转换都是高效的线性时间操作。 154 | 155 | ### 今天:CT 扫描 156 | 157 | ### 引言 158 | 159 | “[数学真的可以拯救你的生命吗?当然可以!!](https://plus.maths.org/content/saving-lives-mathematics-tomography)” (可爱的文章) 160 | 161 | ![](img/xray.png) 162 | 163 | (CAT 和 CT 扫描指代相同的过程。CT 扫描是更现代的术语) 164 | 165 | 本课程基于 Scikit-Learn 示例[压缩感知:使用 L1 先验的层析成像重建(Lasso)](http://scikit-learn.org/stable/auto_examples/applications/plot_tomography_l1_reconstruction.html)。 166 | 167 | ### 我们今天的目标 168 | 169 | 读取 CT 扫描的结果并构建原始图像。 170 | 171 | ![](img/lesson4.png) 172 | 173 | 对于(特定位置和特定角度的)每个 X 射线,我们进行单次测量。 我们需要从这些测量中构建原始图像。 此外,我们不希望患者经历大量辐射,因此我们收集的数据少于图片区域。 174 | 175 | ![](img/data_xray.png) 176 | 177 | 我们会看到: 178 | 179 | ![](img/sklearn_ct.png) 180 | 181 | > 来源:[压缩感知](https://people.csail.mit.edu/indyk/princeton.pdf) 182 | 183 | ![](img/ct_1.png) 184 | 185 | > [来源](https://www.fields.utoronto.ca/programs/scientific/10-11/medimaging/presentations/Plenary_Sidky.pdf) 186 | 187 | ### 导入 188 | 189 | ```py 190 | %matplotlib inline 191 | import numpy as np, matplotlib.pyplot as plt, math 192 | from scipy import ndimage, sparse 193 | 194 | np.set_printoptions(suppress=True) 195 | ``` 196 | 197 | ## 生成数据 198 | 199 | ### 引言 200 | 201 | 我们将使用生成的数据(不是真正的 CT 扫描)。 生成数据涉及一些有趣的 numpy 和线性代数,我们稍后会再回过头来看。 202 | 203 | 代码来自 Scikit-Learn 示例[压缩感知:使用 L1 先验的层析成像重建(Lasso)](http://scikit-learn.org/stable/auto_examples/applications/plot_tomography_l1_reconstruction.html)。 204 | 205 | ### 生成图像 206 | 207 | ```py 208 | def generate_synthetic_data(): 209 | rs = np.random.RandomState(0) 210 | n_pts = 36 211 | x, y = np.ogrid[0:l, 0:l] 212 | mask_outer = (x - l / 2) ** 2 + (y - l / 2) ** 2 < (l / 2) ** 2 213 | mx,my = rs.randint(0, l, (2,n_pts)) 214 | mask = np.zeros((l, l)) 215 | mask[mx,my] = 1 216 | mask = ndimage.gaussian_filter(mask, sigma=l / n_pts) 217 | res = (mask > mask.mean()) & mask_outer 218 | return res ^ ndimage.binary_erosion(res) 219 | 220 | l = 128 221 | data = generate_synthetic_data() 222 | 223 | plt.figure(figsize=(5,5)) 224 | plt.imshow(data, cmap=plt.cm.gray); 225 | ``` 226 | 227 | ![](img/6-1.png) 228 | 229 | ### `generate_synthetic_data`在做什么 230 | 231 | ```py 232 | l=8; n_pts=5 233 | rs = np.random.RandomState(0) 234 | 235 | x, y = np.ogrid[0:l, 0:l]; x,y 236 | 237 | ''' 238 | (array([[0], 239 | [1], 240 | [2], 241 | [3], 242 | [4], 243 | [5], 244 | [6], 245 | [7]]), array([[0, 1, 2, 3, 4, 5, 6, 7]])) 246 | ''' 247 | 248 | x + y 249 | 250 | ''' 251 | array([[ 0, 1, 2, 3, 4, 5, 6, 7], 252 | [ 1, 2, 3, 4, 5, 6, 7, 8], 253 | [ 2, 3, 4, 5, 6, 7, 8, 9], 254 | [ 3, 4, 5, 6, 7, 8, 9, 10], 255 | [ 4, 5, 6, 7, 8, 9, 10, 11], 256 | [ 5, 6, 7, 8, 9, 10, 11, 12], 257 | [ 6, 7, 8, 9, 10, 11, 12, 13], 258 | [ 7, 8, 9, 10, 11, 12, 13, 14]]) 259 | ''' 260 | 261 | (x - l/2) ** 2 262 | 263 | ''' 264 | array([[ 16.], 265 | [ 9.], 266 | [ 4.], 267 | [ 1.], 268 | [ 0.], 269 | [ 1.], 270 | [ 4.], 271 | [ 9.]]) 272 | ''' 273 | 274 | (x - l/2) ** 2 + (y - l/2) ** 2 275 | 276 | ''' 277 | array([[ 32., 25., 20., 17., 16., 17., 20., 25.], 278 | [ 25., 18., 13., 10., 9., 10., 13., 18.], 279 | [ 20., 13., 8., 5., 4., 5., 8., 13.], 280 | [ 17., 10., 5., 2., 1., 2., 5., 10.], 281 | [ 16., 9., 4., 1., 0., 1., 4., 9.], 282 | [ 17., 10., 5., 2., 1., 2., 5., 10.], 283 | [ 20., 13., 8., 5., 4., 5., 8., 13.], 284 | [ 25., 18., 13., 10., 9., 10., 13., 18.]]) 285 | ''' 286 | 287 | mask_outer = (x - l/2) ** 2 + (y - l/2) ** 2 < (l/2) ** 2; mask_outer 288 | 289 | ''' 290 | array([[False, False, False, False, False, False, False, False], 291 | [False, False, True, True, True, True, True, False], 292 | [False, True, True, True, True, True, True, True], 293 | [False, True, True, True, True, True, True, True], 294 | [False, True, True, True, True, True, True, True], 295 | [False, True, True, True, True, True, True, True], 296 | [False, True, True, True, True, True, True, True], 297 | [False, False, True, True, True, True, True, False]], dtype=bool) 298 | ''' 299 | 300 | plt.imshow(mask_outer, cmap='gray') 301 | 302 | # 303 | ``` 304 | 305 | ![](img/6-2.png) 306 | 307 | ```py 308 | mask = np.zeros((l, l)) 309 | mx,my = rs.randint(0, l, (2,n_pts)) 310 | mask[mx,my] = 1; mask 311 | 312 | ''' 313 | array([[ 0., 1., 0., 0., 0., 0., 0., 0.], 314 | [ 0., 0., 0., 0., 0., 0., 0., 0.], 315 | [ 0., 0., 0., 0., 0., 0., 0., 0.], 316 | [ 0., 0., 0., 1., 0., 0., 0., 0.], 317 | [ 0., 0., 0., 1., 0., 0., 0., 0.], 318 | [ 0., 0., 0., 0., 0., 0., 0., 1.], 319 | [ 0., 0., 0., 0., 0., 0., 0., 0.], 320 | [ 0., 0., 0., 1., 0., 0., 0., 0.]]) 321 | ''' 322 | 323 | plt.imshow(mask, cmap='gray') 324 | 325 | # 326 | ``` 327 | 328 | ![](img/6-3.png) 329 | 330 | ```py 331 | mask = ndimage.gaussian_filter(mask, sigma=l / n_pts) 332 | 333 | plt.imshow(mask, cmap='gray') 334 | 335 | # 336 | ``` 337 | 338 | ![](img/6-4.png) 339 | 340 | ```py 341 | res = np.logical_and(mask > mask.mean(), mask_outer) 342 | plt.imshow(res, cmap='gray'); 343 | ``` 344 | 345 | ![](img/6-5.png) 346 | 347 | ```py 348 | plt.imshow(ndimage.binary_erosion(res), cmap='gray'); 349 | ``` 350 | 351 | ![](img/6-6.png) 352 | 353 | ```py 354 | plt.imshow(res ^ ndimage.binary_erosion(res), cmap='gray'); 355 | ``` 356 | 357 | ![](img/6-7.png) 358 | 359 | ## 生成投影 360 | 361 | ### 代码 362 | 363 | ```py 364 | def _weights(x, dx=1, orig=0): 365 | x = np.ravel(x) 366 | floor_x = np.floor((x - orig) / dx) 367 | alpha = (x - orig - floor_x * dx) / dx 368 | return np.hstack((floor_x, floor_x + 1)), np.hstack((1 - alpha, alpha)) 369 | 370 | 371 | def _generate_center_coordinates(l_x): 372 | X, Y = np.mgrid[:l_x, :l_x].astype(np.float64) 373 | center = l_x / 2. 374 | X += 0.5 - center 375 | Y += 0.5 - center 376 | return X, Y 377 | 378 | def build_projection_operator(l_x, n_dir): 379 | X, Y = _generate_center_coordinates(l_x) 380 | angles = np.linspace(0, np.pi, n_dir, endpoint=False) 381 | data_inds, weights, camera_inds = [], [], [] 382 | data_unravel_indices = np.arange(l_x ** 2) 383 | data_unravel_indices = np.hstack((data_unravel_indices, 384 | data_unravel_indices)) 385 | for i, angle in enumerate(angles): 386 | Xrot = np.cos(angle) * X - np.sin(angle) * Y 387 | inds, w = _weights(Xrot, dx=1, orig=X.min()) 388 | mask = (inds >= 0) & (inds < l_x) 389 | weights += list(w[mask]) 390 | camera_inds += list(inds[mask] + i * l_x) 391 | data_inds += list(data_unravel_indices[mask]) 392 | proj_operator = sparse.coo_matrix((weights, (camera_inds, data_inds))) 393 | return proj_operator 394 | ``` 395 | 396 | ### 投影运算符 397 | 398 | ```py 399 | l = 128 400 | 401 | proj_operator = build_projection_operator(l, l // 7) 402 | 403 | proj_operator 404 | 405 | ''' 406 | <2304x16384 sparse matrix of type '' 407 | with 555378 stored elements in COOrdinate format> 408 | ''' 409 | ``` 410 | 411 | 维度:角度(`l // 7`),位置(`l`),每个图像(`l x l`) 412 | 413 | ```py 414 | proj_t = np.reshape(proj_operator.todense().A, (l//7,l,l,l)) 415 | ``` 416 | 417 | 第一个坐标指的是线的角度,第二个坐标指代线的位置。 418 | 419 | 索引为 3 的角度的直线: 420 | 421 | ```py 422 | plt.imshow(proj_t[3,0], cmap='gray'); 423 | ``` 424 | 425 | ![](img/6-8.png) 426 | 427 | ```py 428 | plt.imshow(proj_t[3,1], cmap='gray'); 429 | ``` 430 | 431 | ![](img/6-9.png) 432 | 433 | ```py 434 | plt.imshow(proj_t[3,2], cmap='gray'); 435 | ``` 436 | 437 | ![](img/6-10.png) 438 | 439 | ```py 440 | plt.imshow(proj_t[3,40], cmap='gray'); 441 | ``` 442 | 443 | ![](img/6-11.png) 444 | 445 | 垂直位置 40 处的其他直线: 446 | 447 | ```py 448 | plt.imshow(proj_t[4,40], cmap='gray'); 449 | ``` 450 | 451 | ![](img/6-12.png) 452 | 453 | ```py 454 | plt.imshow(proj_t[15,40], cmap='gray'); 455 | ``` 456 | 457 | ![](img/6-13.png) 458 | 459 | ```py 460 | plt.imshow(proj_t[17,40], cmap='gray'); 461 | ``` 462 | 463 | ![](img/6-14.png) 464 | 465 | ### X 射线和数据之间的交点 466 | 467 | 接下来,我们想看看直线如何与我们的数据相交。 请记住,这就是数据的样子: 468 | 469 | ```py 470 | plt.figure(figsize=(5,5)) 471 | plt.imshow(data, cmap=plt.cm.gray) 472 | plt.axis('off') 473 | plt.savefig("images/data.png") 474 | ``` 475 | 476 | ![](img/6-15.png) 477 | 478 | ```py 479 | proj = proj_operator @ data.ravel()[:, np.newaxis] 480 | ``` 481 | 482 | 角度为 17,位置为 40 的穿过数据的 X 射线: 483 | 484 | ```py 485 | plt.figure(figsize=(5,5)) 486 | plt.imshow(data + proj_t[17,40], cmap=plt.cm.gray) 487 | plt.axis('off') 488 | plt.savefig("images/data_xray.png") 489 | ``` 490 | 491 | ![](img/6-16.png) 492 | 493 | 它们相交的地方。 494 | 495 | ```py 496 | both = data + proj_t[17,40] 497 | plt.imshow((both > 1.1).astype(int), cmap=plt.cm.gray); 498 | ``` 499 | 500 | ![](img/6-17.png) 501 | 502 | 那条 X 射线的强度: 503 | 504 | ```py 505 | np.resize(proj, (l//7,l))[17,40] 506 | 507 | # 6.4384498372605989 508 | ``` 509 | 510 | 角度为 3,位置为 14 的穿过数据的 X 射线: 511 | 512 | ```py 513 | plt.imshow(data + proj_t[3,14], cmap=plt.cm.gray); 514 | ``` 515 | 516 | ![](img/6-18.png) 517 | 518 | 它们相交的地方。 519 | 520 | ```py 521 | both = data + proj_t[3,14] 522 | plt.imshow((both > 1.1).astype(int), cmap=plt.cm.gray); 523 | ``` 524 | 525 | ![](img/6-19.png) 526 | 527 | CT 扫描的测量结果在这里是一个小数字: 528 | 529 | ```py 530 | np.resize(proj, (l//7,l))[3,14] 531 | 532 | # 2.1374953737965541 533 | 534 | proj += 0.15 * np.random.randn(*proj.shape) 535 | ``` 536 | 537 | ### 关于`*args` 538 | 539 | ```py 540 | a = [1,2,3] 541 | b = [4,5,6] 542 | 543 | c = list(zip(a, b)) 544 | 545 | c 546 | 547 | # [(1, 4), (2, 5), (3, 6)] 548 | 549 | list(zip(*c)) 550 | 551 | # [(1, 2, 3), (4, 5, 6)] 552 | ``` 553 | 554 | ### 投影(CT 读取) 555 | 556 | ```py 557 | plt.figure(figsize=(7,7)) 558 | plt.imshow(np.resize(proj, (l//7,l)), cmap='gray') 559 | plt.axis('off') 560 | plt.savefig("images/proj.png") 561 | ``` 562 | 563 | ![](img/6-20.png) 564 | 565 | ## 回归 566 | 567 | 现在我们将尝试仅从投影中恢复数据(CT 扫描的测量值)。 568 | 569 | ### 线性回归:`Xβ=y` 570 | 571 | 我们的矩阵`A`是投影算子。 这是我们不同 X 射线上方的 4d 矩阵(角度,位置,`x`,`y`): 572 | 573 | ```py 574 | plt.figure(figsize=(12,12)) 575 | plt.title("X: Projection Operator") 576 | plt.imshow(proj_operator.todense().A, cmap='gray') 577 | 578 | # 579 | ``` 580 | 581 | ![](img/6-21.png) 582 | 583 | 我们正在求解原始数据`x`。 我们将 2D 数据展开为单个列。 584 | 585 | ```py 586 | plt.figure(figsize=(5,5)) 587 | plt.title("beta: Image") 588 | plt.imshow(data, cmap='gray') 589 | 590 | plt.figure(figsize=(4,12)) 591 | # 我正在平铺列,使其更容易看到 592 | plt.imshow(np.tile(data.ravel(), (80,1)).T, cmap='gray') 593 | 594 | # 595 | ``` 596 | 597 | ![](img/6-22.png) 598 | 599 | ![](img/6-23.png) 600 | 601 | 我们的向量`y`是展开的测量值矩阵: 602 | 603 | ```py 604 | plt.figure(figsize=(8,8)) 605 | plt.imshow(np.resize(proj, (l//7,l)), cmap='gray') 606 | 607 | plt.figure(figsize=(10,10)) 608 | plt.imshow(np.tile(proj.ravel(), (20,1)).T, cmap='gray') 609 | 610 | # 611 | ``` 612 | 613 | ![](img/6-24.png) 614 | 615 | ![](img/6-25.png) 616 | 617 | # 使用 Sklearn 线性回归重构图像 618 | 619 | ```py 620 | from sklearn.linear_model import Lasso 621 | from sklearn.linear_model import Ridge 622 | 623 | # 用 L2(岭)惩罚重建 624 | rgr_ridge = Ridge(alpha=0.2) 625 | rgr_ridge.fit(proj_operator, proj.ravel()) 626 | rec_l2 = rgr_ridge.coef_.reshape(l, l) 627 | plt.imshow(rec_l2, cmap='gray') 628 | 629 | # 630 | ``` 631 | 632 | ![](img/6-26.png) 633 | 634 | ```py 635 | 18*128 636 | 637 | # 2304 638 | 639 | 18 x 128 x 128 x 128 640 | ``` 641 | 642 | ### L1 范数产生稀疏性 643 | 644 | 单位球 ![\lVert x \rVert_1 = 1](img/tex-ed968a8a7ef1411c816a603227f1044d.gif) 在 L1 范数中是菱形。 它的极值是角: 645 | 646 | ![](img/L1vsL2.jpg) 647 | 648 | > [来源](https://www.quora.com/Why-is-L1-regularization-supposed-to-lead-to-sparsity-than-L2) 649 | 650 | 类似的视角是看损失函数的轮廓: 651 | 652 | ![](img/L1vsL2_2.png) 653 | 654 | > [来源](https://www.quora.com/Why-is-L1-regularization-better-than-L2-regularization-provided-that-all-Norms-are-equivalent) 655 | 656 | ![\lVert \cdot \rVert_1](img/tex-97b82a4744248541f07907ce55b655e7.gif) 是 L1 范数。 最小化 L1 范数会产生稀疏值。 对于矩阵,L1 范数等于最大绝对列范数。 657 | 658 | ![\lVert \cdot \rVert_*](img/tex-25a2e7b9046096493cb32e8c28332cf6.gif) 是核范数,它是奇异值的 L1 范数。 试图最小化它会产生稀疏的奇异值 -> 低秩。 659 | 660 | ```py 661 | proj_operator.shape 662 | 663 | # (2304, 16384) 664 | 665 | # 使用 L1(Lasso)惩罚重建 α 的最佳值 666 | # 使用 LassoCV 交叉验证来确定 667 | rgr_lasso = Lasso(alpha=0.001) 668 | rgr_lasso.fit(proj_operator, proj.ravel()) 669 | rec_l1 = rgr_lasso.coef_.reshape(l, l) 670 | plt.imshow(rec_l1, cmap='gray') 671 | 672 | # 673 | ``` 674 | 675 | ![](img/6-27.png) 676 | 677 | 这里的 L1 惩罚明显优于 L2 惩罚! 678 | -------------------------------------------------------------------------------- /8.md: -------------------------------------------------------------------------------- 1 | # 八、如何实现线性回归 2 | 3 | 在上一课中,我们使用 scikit learn 的实现计算了糖尿病数据集的最小二乘线性回归。 今天,我们将看看如何编写自己的实现。 4 | 5 | ## 起步 6 | 7 | ```py 8 | from sklearn import datasets, linear_model, metrics 9 | from sklearn.model_selection import train_test_split 10 | from sklearn.preprocessing import PolynomialFeatures 11 | import math, scipy, numpy as np 12 | from scipy import linalg 13 | 14 | np.set_printoptions(precision=6) 15 | 16 | data = datasets.load_diabetes() 17 | 18 | feature_names=['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6'] 19 | 20 | trn,test,y_trn,y_test = train_test_split(data.data, data.target, test_size=0.2) 21 | 22 | trn.shape, test.shape 23 | 24 | # ((353, 10), (89, 10)) 25 | 26 | def regr_metrics(act, pred): 27 | return (math.sqrt(metrics.mean_squared_error(act, pred)), 28 | metrics.mean_absolute_error(act, pred)) 29 | ``` 30 | 31 | ### sklearn 如何实现它 32 | 33 | sklearn 是如何做到这一点的? 通过检查[源代码](https://github.com/scikit-learn/scikit-learn/blob/14031f6/sklearn/linear_model/base.py#L417),你可以看到在密集的情况下,它调用[`scipy.linalg.lstqr`](https://github.com/scipy/scipy/blob/v0.19.0/scipy/linalg/basic.py#L892-L1058),它调用 LAPACK 方法: 34 | 35 | > 选项是`'gelsd'`,`'gelsy'`,`'gelss'`。默认值`'gelsd'`是个好的选择,但是,`'gelsy'`在许多问题上更快一些。`'gelss'`由于历史原因而使用。它通常更慢但是使用更少内存。 36 | 37 | + [gelsd](https://software.intel.com/sites/products/documentation/doclib/mkl_sa/11/mkl_lapack_examples/_gelsd.htm):使用 SVD 和分治方法 38 | + [gelsy](https://software.intel.com/en-us/node/521113):使用 QR 分解 39 | + [gelss](https://software.intel.com/en-us/node/521114):使用 SVD 40 | 41 | ### Scipy 稀疏最小二乘 42 | 43 | 我们不会详细介绍稀疏版本的最小二乘法。如果你有兴趣,请参考以下信息: 44 | 45 | Scipy 稀疏最小二乘使用称为 [Golub 和 Kahan 双对角化](https://web.stanford.edu/class/cme324/paige-saunders2.pdf)的迭代方法。 46 | 47 | Scipy 稀疏最小二乘源代码:预处理是减少迭代次数的另一种方法。如果有可能有效地求解相关系统`M*x = b`,其中`M`以某种有用的方式近似`A`(例如,`M-A`具有低秩或其元素相对于`A`的元素较小),则 LSQR 可以在系统`A*M(inverse)*z = b`更快地收敛。之后可以通过求解`M * x = z`来恢复`x`。 48 | 49 | 如果`A`是对称的,则不应使用 LSQR!替代方案是对称共轭梯度法(cg)和/或 SYMMLQ。 SYMMLQ 是对称 cg 的一种实现,适用于任何对称的`A`,并且比 LSQR 更快收敛。如果`A`是正定的,则​​存在对称 cg 的其他实现,每次迭代需要的工作量比 SYMMLQ 少一些(但需要相同的迭代次数)。 50 | 51 | ### `linalg.lstqr` 52 | 53 | sklearn 实现为我们添加一个常数项(因为对于我们正在学习的直线,`y`截距可能不是 0)。 我们现在需要手工完成: 54 | 55 | ```py 56 | trn_int = np.c_[trn, np.ones(trn.shape[0])] 57 | test_int = np.c_[test, np.ones(test.shape[0])] 58 | ``` 59 | 60 | 由于`linalg.lstsq`允许我们指定,我们想要使用哪个 LAPACK 例程,让我们尝试它们并进行一些时间比较: 61 | 62 | ```py 63 | %timeit coef, _,_,_ = linalg.lstsq(trn_int, y_trn, lapack_driver="gelsd") 64 | 65 | # 290 µs ± 9.24 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each) 66 | 67 | %timeit coef, _,_,_ = linalg.lstsq(trn_int, y_trn, lapack_driver="gelsy") 68 | 69 | # 140 µs ± 91.7 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each) 70 | 71 | %timeit coef, _,_,_ = linalg.lstsq(trn_int, y_trn, lapack_driver="gelss") 72 | 73 | # 199 µs ± 228 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each) 74 | ``` 75 | 76 | ## 朴素解法 77 | 78 | 回想一下,我们想找到 ![\hat x](img/tex-c243886a288804343eee2af0ad8dcebc.gif),来最小化: 79 | 80 | ![\big\vert\big\vert Ax - b \big\vert\big\vert_2](img/tex-4481186643f99234ebdfe2fef5baa9b8.gif) 81 | 82 | 另一种思考方式是,我们对向量`b`最接近`A`的子空间(称为`A`的范围)的地方感兴趣。 这是`b`在`A`上的投影。由于 ![b - A \hat x](img/tex-b21c98c8f055b92996a80407da83b1ca.gif) 必须垂直于`A`的子空间,我们可以看到: 83 | 84 | ![A^T (b - A\hat{x}) = 0](img/tex-1bc9193e34bc6ecc3df206e9fee737a1.gif) 85 | 86 | 使用了 ![A^T](img/tex-521a9daa8c1cd6293c6e22e8e8386c42.gif) 因为要相乘`A`和 ![b - A\hat{x}](img/tex-cb277091062ae1b5af8d8d6b6844c804.gif) 的每列。 87 | 88 | 这让我们得到正规方程: 89 | 90 | ![x = (A^TA)^{-1}A^T b](img/tex-39d9d43173d632ad7babc040b062f89c.gif) 91 | 92 | ```py 93 | def ls_naive(A, b): 94 | return np.linalg.inv(A.T @ A) @ A.T @ b 95 | 96 | %timeit coeffs_naive = ls_naive(trn_int, y_trn) 97 | 98 | # 45.8 µs ± 4.65 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each) 99 | 100 | coeffs_naive = ls_naive(trn_int, y_trn) 101 | regr_metrics(y_test, test_int @ coeffs_naive) 102 | 103 | # (57.94102134545707, 48.053565198516438) 104 | ``` 105 | 106 | ## 正规方程(Cholesky) 107 | 108 | 正规方程: 109 | 110 | ![A^TA x = A^T b](img/tex-0668689340173b7ece8ed05b1fef41f2.gif) 111 | 112 | 如果`A`具有满秩,则伪逆 ![(A^TA)^{-1}A^T](img/tex-9a9e4519d91963584b808251dd695f2a.gif) 是正方形,埃尔米特正定矩阵。 解决这种系统的标准方法是 Cholesky 分解,它找到上三角形`R`,满足 ![A^TA = R^TR](img/tex-4c3150fb54600e58070931bf27225aa8.gif)。 113 | 114 | 以下步骤基于 Trefethen 的算法 11.1: 115 | 116 | ```py 117 | A = trn_int 118 | 119 | b = y_trn 120 | 121 | AtA = A.T @ A 122 | Atb = A.T @ b 123 | ``` 124 | 125 | 警告:对于Cholesky,Numpy 和 Scipy 默认为不同的上/下三角。 126 | 127 | ```py 128 | R = scipy.linalg.cholesky(AtA) 129 | 130 | np.set_printoptions(suppress=True, precision=4) 131 | R 132 | 133 | ''' 134 | array([[ 0.9124, 0.1438, 0.1511, 0.3002, 0.2228, 0.188 , 135 | -0.051 , 0.1746, 0.22 , 0.2768, -0.2583], 136 | [ 0. , 0.8832, 0.0507, 0.1826, -0.0251, 0.0928, 137 | -0.3842, 0.2999, 0.0911, 0.15 , 0.4393], 138 | [ 0. , 0. , 0.8672, 0.2845, 0.2096, 0.2153, 139 | -0.2695, 0.3181, 0.3387, 0.2894, -0.005 ], 140 | [ 0. , 0. , 0. , 0.7678, 0.0762, -0.0077, 141 | 0.0383, 0.0014, 0.165 , 0.166 , 0.0234], 142 | [ 0. , 0. , 0. , 0. , 0.8288, 0.7381, 143 | 0.1145, 0.4067, 0.3494, 0.158 , -0.2826], 144 | [ 0. , 0. , 0. , 0. , 0. , 0.3735, 145 | -0.3891, 0.2492, -0.3245, -0.0323, -0.1137], 146 | [ 0. , 0. , 0. , 0. , 0. , 0. , 147 | 0.6406, -0.511 , -0.5234, -0.172 , -0.9392], 148 | [ 0. , 0. , 0. , 0. , 0. , 0. , 149 | 0. , 0.2887, -0.0267, -0.0062, 0.0643], 150 | [ 0. , 0. , 0. , 0. , 0. , 0. , 151 | 0. , 0. , 0.2823, 0.0636, 0.9355], 152 | [ 0. , 0. , 0. , 0. , 0. , 0. , 153 | 0. , 0. , 0. , 0.7238, 0.0202], 154 | [ 0. , 0. , 0. , 0. , 0. , 0. , 155 | 0. , 0. , 0. , 0. , 18.7319]]) 156 | ''' 157 | ``` 158 | 159 | 检查我们的分解: 160 | 161 | ```py 162 | np.linalg.norm(AtA - R.T @ R) 163 | 164 | # 4.5140158187158533e-16 165 | ``` 166 | 167 | ![A^T A x = A^T b \\ R^T R x = A^T b \\R^T w = A^T b \\R x = w](img/tex-82513c6fe20ef419fc103891436353ab.gif) 168 | 169 | ```py 170 | w = scipy.linalg.solve_triangular(R, Atb, lower=False, trans='T') 171 | ``` 172 | 173 | 检查我们的结果是否符合预期总是好的:(以防我们输入错误的参数,函数没有返回我们想要的东西,或者有时文档甚至过时)。 174 | 175 | ```py 176 | np.linalg.norm(R.T @ w - Atb) 177 | 178 | # 1.1368683772161603e-13 179 | 180 | coeffs_chol = scipy.linalg.solve_triangular(R, w, lower=False) 181 | 182 | np.linalg.norm(R @ coeffs_chol - w) 183 | 184 | # 6.861429794408013e-14 185 | 186 | def ls_chol(A, b): 187 | R = scipy.linalg.cholesky(A.T @ A) 188 | w = scipy.linalg.solve_triangular(R, A.T @ b, trans='T') 189 | return scipy.linalg.solve_triangular(R, w) 190 | 191 | %timeit coeffs_chol = ls_chol(trn_int, y_trn) 192 | 193 | # 111 µs ± 272 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each) 194 | 195 | coeffs_chol = ls_chol(trn_int, y_trn) 196 | regr_metrics(y_test, test_int @ coeffs_chol) 197 | 198 | # (57.9410213454571, 48.053565198516438) 199 | ``` 200 | 201 | ## QR 分解 202 | 203 | ![A x = b \\ A = Q R \\ Q R x = b \\ R x = Q^T b](img/tex-d45f0a6200f855ffbed7dbbf8dc6094b.gif) 204 | 205 | ```py 206 | def ls_qr(A,b): 207 | Q, R = scipy.linalg.qr(A, mode='economic') 208 | return scipy.linalg.solve_triangular(R, Q.T @ b) 209 | 210 | %timeit coeffs_qr = ls_qr(trn_int, y_trn) 211 | 212 | # 205 µs ± 264 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each) 213 | 214 | coeffs_qr = ls_qr(trn_int, y_trn) 215 | regr_metrics(y_test, test_int @ coeffs_qr) 216 | 217 | # (57.94102134545711, 48.053565198516452) 218 | ``` 219 | 220 | ### SVD 221 | 222 | ![A x = b \\ A = U \Sigma V \\ \Sigma V x = U^T b \\ \Sigma w = U^T b \\ x = V^T w](img/tex-4b62075d74bf4d4e462ede239a6dea12.gif) 223 | 224 | SVD 给出伪逆。 225 | 226 | ```py 227 | def ls_svd(A,b): 228 | m, n = A.shape 229 | U, sigma, Vh = scipy.linalg.svd(A, full_matrices=False) 230 | w = (U.T @ b)/ sigma 231 | return Vh.T @ w 232 | 233 | %timeit coeffs_svd = ls_svd(trn_int, y_trn) 234 | 235 | # 1.11 ms ± 320 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each) 236 | 237 | %timeit coeffs_svd = ls_svd(trn_int, y_trn) 238 | 239 | # 266 µs ± 8.49 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each) 240 | 241 | coeffs_svd = ls_svd(trn_int, y_trn) 242 | regr_metrics(y_test, test_int @ coeffs_svd) 243 | 244 | # (57.941021345457244, 48.053565198516687) 245 | ``` 246 | 247 | ### 最小二乘回归的随机 Sketching 技巧 248 | 249 | [线性 Sketching](http://researcher.watson.ibm.com/researcher/files/us-dpwoodru/journal.pdf)(Woodruff) 250 | 251 | + 抽取`r×n`随机矩阵`S`,`r << n` 252 | + 计算`S A`和`S b` 253 | + 找到回归`SA x = Sb`的精确解`x` 254 | 255 | ### 时间比较 256 | 257 | ```py 258 | import timeit 259 | import pandas as pd 260 | 261 | def scipylstq(A, b): 262 | return scipy.linalg.lstsq(A,b)[0] 263 | 264 | row_names = ['Normal Eqns- Naive', 265 | 'Normal Eqns- Cholesky', 266 | 'QR Factorization', 267 | 'SVD', 268 | 'Scipy lstsq'] 269 | 270 | name2func = {'Normal Eqns- Naive': 'ls_naive', 271 | 'Normal Eqns- Cholesky': 'ls_chol', 272 | 'QR Factorization': 'ls_qr', 273 | 'SVD': 'ls_svd', 274 | 'Scipy lstsq': 'scipylstq'} 275 | 276 | m_array = np.array([100, 1000, 10000]) 277 | n_array = np.array([20, 100, 1000]) 278 | 279 | index = pd.MultiIndex.from_product([m_array, n_array], names=['# rows', '# cols']) 280 | 281 | pd.options.display.float_format = '{:,.6f}'.format 282 | df = pd.DataFrame(index=row_names, columns=index) 283 | df_error = pd.DataFrame(index=row_names, columns=index) 284 | 285 | # %%prun 286 | for m in m_array: 287 | for n in n_array: 288 | if m >= n: 289 | x = np.random.uniform(-10,10,n) 290 | A = np.random.uniform(-40,40,[m,n]) # removed np.asfortranarray 291 | b = np.matmul(A, x) + np.random.normal(0,2,m) 292 | for name in row_names: 293 | fcn = name2func[name] 294 | t = timeit.timeit(fcn + '(A,b)', number=5, globals=globals()) 295 | df.set_value(name, (m,n), t) 296 | coeffs = locals()[fcn](A, b) 297 | reg_met = regr_metrics(b, A @ coeffs) 298 | df_error.set_value(name, (m,n), reg_met[0]) 299 | 300 | df 301 | ``` 302 | 303 | | # rows | 100 | | | 1000 | | | 10000 | | | 304 | | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | 305 | | # cols | 20 | 100 | 1000 | 20 | 100 | 1000 | 20 | 100 | 1000 | 306 | | Normal Eqns- Naive | 0.001276 | 0.003634 | NaN | 0.000960 | 0.005172 | 0.293126 | 0.002226 | 0.021248 | 1.164655 | 307 | | Normal Eqns- Cholesky | 0.001660 | 0.003958 | NaN | 0.001665 | 0.004007 | 0.093696 | 0.001928 | 0.010456 | 0.399464 | 308 | | QR Factorization | 0.002174 | 0.006486 | NaN | 0.004235 | 0.017773 | 0.213232 | 0.019229 | 0.116122 | 2.208129 | 309 | | SVD | 0.003880 | 0.021737 | NaN | 0.004672 | 0.026950 | 1.280490 | 0.018138 | 0.130652 | 3.433003 | 310 | | Scipy lstsq | 0.004338 | 0.020198 | NaN | 0.004320 | 0.021199 | 1.083804 | 0.012200 | 0.088467 | 2.134780 | 311 | 312 | ```py 313 | df_error 314 | ``` 315 | 316 | 317 | | # rows | 100 | | | 1000 | | | 10000 | | | 318 | | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | 319 | | # cols | 20 | 100 | 1000 | 20 | 100 | 1000 | 20 | 100 | 1000 | 320 | | Normal Eqns- Naive | 1.702742 | 0.000000 | NaN | 1.970767 | 1.904873 | 0.000000 | 1.978383 | 1.980449 | 1.884440 | 321 | | Normal Eqns- Cholesky | 1.702742 | 0.000000 | NaN | 1.970767 | 1.904873 | 0.000000 | 1.978383 | 1.980449 | 1.884440 | 322 | | QR Factorization | 1.702742 | 0.000000 | NaN | 1.970767 | 1.904873 | 0.000000 | 1.978383 | 1.980449 | 1.884440 | 323 | | SVD | 1.702742 | 0.000000 | NaN | 1.970767 | 1.904873 | 0.000000 | 1.978383 | 1.980449 | 1.884440 | 324 | | Scipy lstsq | 1.702742 | 0.000000 | NaN | 1.970767 | 1.904873 | 0.000000 | 1.978383 | 1.980449 | 1.884440 | 325 | 326 | ```py 327 | store = pd.HDFStore('least_squares_results.h5') 328 | store['df'] = df 329 | 330 | ''' 331 | C:\Users\rache\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:2881: PerformanceWarning: 332 | your performance may suffer as PyTables will pickle object types that it cannot 333 | map directly to c-types [inferred_type->floating,key->block0_values] [items->[(100, 20), (100, 100), (100, 1000), (1000, 20), (1000, 100), (1000, 1000), (5000, 20), (5000, 100), (5000, 1000)]] 334 | 335 | exec(code_obj, self.user_global_ns, self.user_ns) 336 | ''' 337 | ``` 338 | 339 | ### 注解 340 | 341 | 我用魔术指令`%prun`来测量我的代码。 342 | 343 | 替代方案:最小绝对偏差(L1 回归) 344 | 345 | + 异常值的敏感度低于最小二乘法。 346 | + 没有闭式解,但可以通过线性规划解决。 347 | 348 | ### 条件作用和稳定性 349 | 350 | ### 条件数 351 | 352 | 条件数是一个指标,衡量输入的小变化导致输出变化的程度。 353 | 354 | 问题:为什么我们在数值线性代数中,关心输入的小变化的相关行为? 355 | 356 | 相对条件数由下式定义: 357 | 358 | ![\kappa = \sup_{\delta x} \frac{\|\delta f\|}{\| f(x) \|}\bigg/ \frac{\| \delta x \|}{\| x \|}](img/tex-af9ef2d9d428087a16ff027ddd2a254f.gif) 359 | 360 | 其中 ![\delta x](img/tex-58b1e9f4b9a86690cb106e65a265a34f.gif) 是无穷小。 361 | 362 | 根据 Trefethen(第 91 页),如果`κ`很小(例如 `1, 10, 10^2`),问题是良态的,如果`κ`很大(例如`10^6, 10^16`),那么问题是病态的。 363 | 364 | 条件作用:数学问题的扰动行为(例如最小二乘) 365 | 366 | 稳定性:用于在计算机上解决该问题的算法的扰动行为(例如,最小二乘算法,householder,回代,高斯消除) 367 | 368 | ### 条件作用的例子 369 | 370 | 计算非对称矩阵的特征值的问题通常是病态的。 371 | 372 | ```py 373 | A = [[1, 1000], [0, 1]] 374 | B = [[1, 1000], [0.001, 1]] 375 | 376 | wA, vrA = scipy.linalg.eig(A) 377 | wB, vrB = scipy.linalg.eig(B) 378 | 379 | wA, wB 380 | 381 | ''' 382 | (array([ 1.+0.j, 1.+0.j]), 383 | array([ 2.00000000e+00+0.j, -2.22044605e-16+0.j])) 384 | ''' 385 | ``` 386 | 387 | ### 矩阵的条件数 388 | 389 | 乘积 ![\| A\| \|A^{-1} \|](img/tex-5ca635990dbca0e677a639ea78c11b33.gif) 经常出现,它有自己的名字:`A`的条件数。注意,通常我们谈论问题的条件作用,而不是矩阵。 390 | 391 | `A`的条件数涉及: 392 | 393 | + 给定`Ax = b`中的`A`和`x`,计算`b` 394 | + 给定`Ax = b`中的`A`和`b`,计算`x` 395 | 396 | ### 未交待清楚的事情 397 | 398 | ### 完整和简化分解 399 | 400 | ### SVD 401 | 402 | 来自 Trefethen 的图: 403 | 404 | ![](img/full_svd.JPG) 405 | 406 | ![](img/reduced_svd.JPG) 407 | 408 | ### 对于所有矩阵,QR 分解都存在 409 | 410 | 与 SVD 一样,有 QR 分解的完整版和简化版。 411 | 412 | ![](img/full_qr.JPG) 413 | 414 | ![](img/reduced_qr.JPG) 415 | 416 | ### 矩阵的逆是不稳定的 417 | 418 | ```py 419 | from scipy.linalg import hilbert 420 | 421 | n = 5 422 | hilbert(n) 423 | 424 | ''' 425 | array([[ 1. , 0.5 , 0.3333, 0.25 , 0.2 ], 426 | [ 0.5 , 0.3333, 0.25 , 0.2 , 0.1667], 427 | [ 0.3333, 0.25 , 0.2 , 0.1667, 0.1429], 428 | [ 0.25 , 0.2 , 0.1667, 0.1429, 0.125 ], 429 | [ 0.2 , 0.1667, 0.1429, 0.125 , 0.1111]]) 430 | ''' 431 | 432 | n = 14 433 | A = hilbert(n) 434 | x = np.random.uniform(-10,10,n) 435 | b = A @ x 436 | 437 | A_inv = np.linalg.inv(A) 438 | 439 | np.linalg.norm(np.eye(n) - A @ A_inv) 440 | 441 | # 5.0516495470543212 442 | 443 | np.linalg.cond(A) 444 | 445 | # 2.2271635826494112e+17 446 | 447 | A @ A_inv 448 | 449 | ''' 450 | array([[ 1. , 0. , -0.0001, 0.0005, -0.0006, 0.0105, -0.0243, 451 | 0.1862, -0.6351, 2.2005, -0.8729, 0.8925, -0.0032, -0.0106], 452 | [ 0. , 1. , -0. , 0. , 0.0035, 0.0097, -0.0408, 453 | 0.0773, -0.0524, 1.6926, -0.7776, -0.111 , -0.0403, -0.0184], 454 | [ 0. , 0. , 1. , 0.0002, 0.0017, 0.0127, -0.0273, 455 | 0. , 0. , 1.4688, -0.5312, 0.2812, 0.0117, 0.0264], 456 | [ 0. , 0. , -0. , 1.0005, 0.0013, 0.0098, -0.0225, 457 | 0.1555, -0.0168, 1.1571, -0.9656, -0.0391, 0.018 , -0.0259], 458 | [-0. , 0. , -0. , 0.0007, 1.0001, 0.0154, 0.011 , 459 | -0.2319, 0.5651, -0.2017, 0.2933, -0.6565, 0.2835, -0.0482], 460 | [ 0. , -0. , 0. , -0.0004, 0.0059, 0.9945, -0.0078, 461 | -0.0018, -0.0066, 1.1839, -0.9919, 0.2144, -0.1866, 0.0187], 462 | [-0. , 0. , -0. , 0.0009, -0.002 , 0.0266, 0.974 , 463 | -0.146 , 0.1883, -0.2966, 0.4267, -0.8857, 0.2265, -0.0453], 464 | [ 0. , 0. , -0. , 0.0002, 0.0009, 0.0197, -0.0435, 465 | 1.1372, -0.0692, 0.7691, -1.233 , 0.1159, -0.1766, -0.0033], 466 | [ 0. , 0. , -0. , 0.0002, 0. , -0.0018, -0.0136, 467 | 0.1332, 0.945 , 0.3652, -0.2478, -0.1682, 0.0756, -0.0212], 468 | [ 0. , -0. , -0. , 0.0003, 0.0038, -0.0007, 0.0318, 469 | -0.0738, 0.2245, 1.2023, -0.2623, -0.2783, 0.0486, -0.0093], 470 | [-0. , 0. , -0. , 0.0004, -0.0006, 0.013 , -0.0415, 471 | 0.0292, -0.0371, 0.169 , 1.0715, -0.09 , 0.1668, -0.0197], 472 | [ 0. , -0. , 0. , 0. , 0.0016, 0.0062, -0.0504, 473 | 0.1476, -0.2341, 0.8454, -0.7907, 1.4812, -0.15 , 0.0186], 474 | [ 0. , -0. , 0. , -0.0001, 0.0022, 0.0034, -0.0296, 475 | 0.0944, -0.1833, 0.6901, -0.6526, 0.2556, 0.8563, 0.0128], 476 | [ 0. , 0. , 0. , -0.0001, 0.0018, -0.0041, -0.0057, 477 | -0.0374, -0.165 , 0.3968, -0.2264, -0.1538, -0.0076, 1.005 ]]) 478 | ''' 479 | 480 | row_names = ['Normal Eqns- Naive', 481 | 'QR Factorization', 482 | 'SVD', 483 | 'Scipy lstsq'] 484 | 485 | name2func = {'Normal Eqns- Naive': 'ls_naive', 486 | 'QR Factorization': 'ls_qr', 487 | 'SVD': 'ls_svd', 488 | 'Scipy lstsq': 'scipylstq'} 489 | 490 | pd.options.display.float_format = '{:,.9f}'.format 491 | df = pd.DataFrame(index=row_names, columns=['Time', 'Error']) 492 | 493 | for name in row_names: 494 | fcn = name2func[name] 495 | t = timeit.timeit(fcn + '(A,b)', number=5, globals=globals()) 496 | coeffs = locals()[fcn](A, b) 497 | df.set_value(name, 'Time', t) 498 | df.set_value(name, 'Error', regr_metrics(b, A @ coeffs)[0]) 499 | ``` 500 | 501 | ### SVD 在这里最好 502 | 503 | 不要重新运行。 504 | 505 | ```py 506 | df 507 | ``` 508 | 509 | 510 | | | Time | Error | 511 | | --- | --- | --- | 512 | | Normal Eqns- Naive | 0.001334339 | 3.598901966 | 513 | | QR Factorization | 0.002166139 | 0.000000000 | 514 | | SVD | 0.001556937 | 0.000000000 | 515 | | Scipy lstsq | 0.001871590 | 0.000000000 | 516 | 517 | 即使`A`是稀疏的,![A^{-1}](img/tex-1ff4e7c4ea49e4f89fcea2a90968d87f.gif) 通常是密集的。对于大型矩阵,![A^{-1}](img/tex-1ff4e7c4ea49e4f89fcea2a90968d87f.gif) 放不进内存。 518 | 519 | ## 运行时间 520 | 521 | + 矩阵求逆:![2n^3](img/tex-212090d41dd7136dd288934d2aa0addd.gif) 522 | + 矩阵乘法:![n^3](img/tex-25ebb7d03839869698867bbbf0a9932a.gif) 523 | + Cholesky:![1/3 n^3](img/tex-e2c7c210aaacf3c6067aab0b96f6caa6.gif) 524 | + QR,Gram Schmidt:![2mn^2](img/tex-8104a2c0754d95be6fc08c624a869f51.gif),![m \ge n](img/tex-9856f83106c82c34de1010bf61297500.gif)(Trefethen 第 8 章) 525 | + QR,Householder:2![mn^2-2/3 n^3](img/tex-b6515bc4c1610897c8eb15e9a7c41f8a.gif)(Trefethen 第 10 章) 526 | + 求解三角形系统:![n^2](img/tex-6595d679e306a127a3fe53268bcaddb2.gif) 527 | 528 | 为什么 Cholesky 较快: 529 | 530 | ![](img/cholesky_factorization_speed.png) 531 | 532 | > 来源:[斯坦福凸优化:数值线性代数背景幻灯片](http://stanford.edu/class/ee364a/lectures/num-lin-alg.pdf) 533 | 534 | ### QR 最优的一个案例 535 | 536 | ```py 537 | m=100 538 | n=15 539 | t=np.linspace(0, 1, m) 540 | 541 | # 范德蒙矩阵 542 | A=np.stack([t**i for i in range(n)], 1) 543 | b=np.exp(np.sin(4*t)) 544 | 545 | # 这将使解决方案标准化为 1 546 | b /= 2006.787453080206 547 | 548 | from matplotlib import pyplot as plt 549 | %matplotlib inline 550 | 551 | plt.plot(t, b) 552 | 553 | # [] 554 | ``` 555 | 556 | ![](img/8-1.png) 557 | 558 | 检查我们得到了 1: 559 | 560 | ```py 561 | 1 - ls_qr(A, b)[14] 562 | 563 | # 1.4137685733217609e-07 564 | ``` 565 | 566 | 不好的条件数: 567 | 568 | ```py 569 | kappa = np.linalg.cond(A); kappa 570 | 571 | # 5.827807196683593e+17 572 | 573 | row_names = ['Normal Eqns- Naive', 574 | 'QR Factorization', 575 | 'SVD', 576 | 'Scipy lstsq'] 577 | 578 | name2func = {'Normal Eqns- Naive': 'ls_naive', 579 | 'QR Factorization': 'ls_qr', 580 | 'SVD': 'ls_svd', 581 | 'Scipy lstsq': 'scipylstq'} 582 | 583 | pd.options.display.float_format = '{:,.9f}'.format 584 | df = pd.DataFrame(index=row_names, columns=['Time', 'Error']) 585 | 586 | for name in row_names: 587 | fcn = name2func[name] 588 | t = timeit.timeit(fcn + '(A,b)', number=5, globals=globals()) 589 | coeffs = locals()[fcn](A, b) 590 | df.set_value(name, 'Time', t) 591 | df.set_value(name, 'Error', np.abs(1 - coeffs[-1])) 592 | 593 | df 594 | ``` 595 | 596 | 597 | | | Time | Error | 598 | | --- | --- | --- | 599 | | Normal Eqns- Naive | 0.001565099 | 1.357066025 | 600 | | QR Factorization | 0.002632104 | 0.000000116 | 601 | | SVD | 0.003503785 | 0.000000116 | 602 | | Scipy lstsq | 0.002763502 | 0.000000116 | 603 | 604 | 通过正规方程求解最小二乘的解决方案通常是不稳定的,尽管对于小条件数的问题是稳定的。 605 | 606 | ### 低秩 607 | 608 | ```py 609 | m = 100 610 | n = 10 611 | x = np.random.uniform(-10,10,n) 612 | A2 = np.random.uniform(-40,40, [m, int(n/2)]) # removed np.asfortranarray 613 | A = np.hstack([A2, A2]) 614 | 615 | A.shape, A2.shape 616 | 617 | # ((100, 10), (100, 5)) 618 | 619 | b = A @ x + np.random.normal(0,1,m) 620 | 621 | row_names = ['Normal Eqns- Naive', 622 | 'QR Factorization', 623 | 'SVD', 624 | 'Scipy lstsq'] 625 | 626 | name2func = {'Normal Eqns- Naive': 'ls_naive', 627 | 'QR Factorization': 'ls_qr', 628 | 'SVD': 'ls_svd', 629 | 'Scipy lstsq': 'scipylstq'} 630 | 631 | pd.options.display.float_format = '{:,.9f}'.format 632 | df = pd.DataFrame(index=row_names, columns=['Time', 'Error']) 633 | 634 | for name in row_names: 635 | fcn = name2func[name] 636 | t = timeit.timeit(fcn + '(A,b)', number=5, globals=globals()) 637 | coeffs = locals()[fcn](A, b) 638 | df.set_value(name, 'Time', t) 639 | df.set_value(name, 'Error', regr_metrics(b, A @ coeffs)[0]) 640 | 641 | df 642 | ``` 643 | 644 | | | Time | Error | 645 | | --- | --- | --- | 646 | | Normal Eqns- Naive | 0.001227640 | 300.658979382 | 647 | | QR Factorization | 0.002315920 | 0.876019803 | 648 | | SVD | 0.001745647 | 1.584746056 | 649 | | Scipy lstsq | 0.002067989 | 0.804750398 | 650 | 651 | ## 比较 652 | 653 | 比较我们的结果和上面: 654 | 655 | ```py 656 | df 657 | ``` 658 | 659 | | # rows | 100 | | | 1000 | | | 10000 | | | 660 | | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | 661 | | # cols | 20 | 100 | 1000 | 20 | 100 | 1000 | 20 | 100 | 1000 | 662 | | Normal Eqns- Naive | 0.001276 | 0.003634 | NaN | 0.000960 | 0.005172 | 0.293126 | 0.002226 | 0.021248 | 1.164655 | 663 | | Normal Eqns- Cholesky | 0.001660 | 0.003958 | NaN | 0.001665 | 0.004007 | 0.093696 | 0.001928 | 0.010456 | 0.399464 | 664 | | QR Factorization | 0.002174 | 0.006486 | NaN | 0.004235 | 0.017773 | 0.213232 | 0.019229 | 0.116122 | 2.208129 | 665 | | SVD | 0.003880 | 0.021737 | NaN | 0.004672 | 0.026950 | 1.280490 | 0.018138 | 0.130652 | 3.433003 | 666 | | Scipy lstsq | 0.004338 | 0.020198 | NaN | 0.004320 | 0.021199 | 1.083804 | 0.012200 | 0.088467 | 2.134780 | 667 | 668 | 来自 Trefethen(第 84 页): 669 | 670 | 正规方程式/ Cholesky 在生效时速度最快。 Cholesky 只能用于对称正定矩阵。 此外,对于具有高条件数或具有低秩的矩阵,正规方程/ Cholesky 是不稳定的。 671 | 672 | 数值分析师推荐通过 QR 进行线性回归,作为多年的标准方法。 它自然,优雅,适合“日常使用”。 673 | -------------------------------------------------------------------------------- /3.md: -------------------------------------------------------------------------------- 1 | # 三、使用 NMF 和 SVD 的主题建模 2 | 3 | 主题建模是开始使用矩阵分解的好方法。 我们从术语 - 文档矩阵开始: 4 | 5 | ![](img/document_term.png) 6 | 7 | > 来源:[信息检索导论](http://player.slideplayer.com/15/4528582/#) 8 | 9 | 我们可以将其分解为一个高的窄矩阵乘以一个宽的扁矩阵(中间可能有对角矩阵)。 10 | 11 | 请注意,此表示不考虑单词顺序或句子结构。 这是一个词袋的例子。 12 | 13 | ### 动机 14 | 15 | 考虑最极端的情况 - 使用两个向量的外积重建矩阵。 显然,在大多数情况下,我们无法准确地重建矩阵。 但是,如果我们有一个向量,带有每个单词在所有单词中的相对频率,而另一个向量具有每个文档的平均单词数,那么外积将尽可能接近。 16 | 17 | 现在考虑将矩阵增加到两列和两行。 现在最佳分解是将文档聚类成两组,每组具有尽可能彼此不同的单词分布,但在簇中的文档中尽可能相似。 我们将这两个组称为“主题”。 我们会根据每个主题中最常出现的词汇将这些词汇分为两组。 18 | 19 | ### 今天的课程中 20 | 21 | 我们将采用几个不同类别的文档数据集,并为它们查找主题(由单词组组成)。 了解实际类别有助于我们评估我们发现的主题是否有意义。 22 | 23 | 我们将尝试使用两种不同的矩阵因式分解:奇异值分解(SVD)和非负矩阵分解(NMF)。 24 | 25 | ```py 26 | import numpy as np 27 | from sklearn.datasets import fetch_20newsgroups 28 | from sklearn import decomposition 29 | from scipy import linalg 30 | import matplotlib.pyplot as plt 31 | 32 | %matplotlib inline 33 | np.set_printoptions(suppress=True) 34 | ``` 35 | 36 | ## 附加资源 37 | 38 | + [数据源](http://scikit-learn.org/stable/datasets/twenty_newsgroups.html):新闻组是 Usenet 上的讨论组,它在网络真正起飞之前的 80 年代和 90 年代很流行。 该数据集包括 18,000 个新闻组帖子,带有 20 个主题。 39 | + [Chris Manning 的矩阵分解和 LSI 的书](https://nlp.stanford.edu/IR-book/pdf/18lsi.pdf) 40 | + Sklearn 的[截断 SVD LSI 的细节](http://scikit-learn.org/stable/modules/decomposition.html#lsa) 41 | 42 | ### 其它教程 43 | 44 | + [Scikit-Learn:文本文档的核外分类](http://scikit-learn.org/stable/auto_examples/applications/plot_out_of_core_classification.html):使用 [Reuters-21578](https://archive.ics.uci.edu/ml/datasets/reuters-21578+text+categorization+collection) 数据集(标有 ~100 个类别的路透社文章),`HashingVectorizer` 45 | + [使用人文和社会科学主题模型进行文本分析](https://de.dariah.eu/tatom/index.html):使用 Jane Austen,Charlotte Bronte,Victor Hugo 等人的[英国和法国文学数据集](https://de.dariah.eu/tatom/datasets.html) 46 | 47 | ## 建立数据 48 | 49 | Scikit Learn 附带了许多内置数据集,以及加载工具来加载多个标准外部数据集。 这是一个很好的资源,数据集包括波士顿房价,人脸图像,森林斑块,糖尿病,乳腺癌等。 我们将使用新闻组数据集。 50 | 51 | 新闻组是 Usenet 上的讨论组,它在网络真正起飞之前的 80 年代和 90 年代很流行。 该数据集包括 18,000 个新闻组帖子,带有 20 个主题。 52 | 53 | ```py 54 | categories = ['alt.atheism', 'talk.religion.misc', 'comp.graphics', 'sci.space'] 55 | remove = ('headers', 'footers', 'quotes') 56 | newsgroups_train = fetch_20newsgroups(subset='train', categories=categories, remove=remove) 57 | newsgroups_test = fetch_20newsgroups(subset='test', categories=categories, remove=remove) 58 | 59 | newsgroups_train.filenames.shape, newsgroups_train.target.shape 60 | 61 | # ((2034,), (2034,)) 62 | ``` 63 | 64 | 我们来看看一些数据。 你能猜出这些消息属于哪个类别? 65 | 66 | ```py 67 | print("\n".join(newsgroups_train.data[:3])) 68 | 69 | ''' 70 | Hi, 71 | 72 | I've noticed that if you only save a model (with all your mapping planes 73 | positioned carefully) to a .3DS file that when you reload it after restarting 74 | 3DS, they are given a default position and orientation. But if you save 75 | to a .PRJ file their positions/orientation are preserved. Does anyone 76 | know why this information is not stored in the .3DS file? Nothing is 77 | explicitly said in the manual about saving texture rules in the .PRJ file. 78 | I'd like to be able to read the texture rule information, does anyone have 79 | the format for the .PRJ file? 80 | 81 | Is the .CEL file format available from somewhere? 82 | 83 | Rych 84 | 85 | 86 | Seems to be, barring evidence to the contrary, that Koresh was simply 87 | another deranged fanatic who thought it neccessary to take a whole bunch of 88 | folks with him, children and all, to satisfy his delusional mania. Jim 89 | Jones, circa 1993. 90 | 91 | 92 | Nope - fruitcakes like Koresh have been demonstrating such evil corruption 93 | for centuries. 94 | 95 | >In article <1993Apr19.020359.26996@sq.sq.com>, msb@sq.sq.com (Mark Brader) 96 | 97 | MB> So the 98 | MB> 1970 figure seems unlikely to actually be anything but a perijove. 99 | 100 | JG>Sorry, _perijoves_...I'm not used to talking this language. 101 | 102 | Couldn't we just say periapsis or apoapsis? 103 | ''' 104 | ``` 105 | 106 | 提示:perijove 的定义是离木星中心最近的木星卫星轨道上的点。 107 | 108 | ```py 109 | np.array(newsgroups_train.target_names)[newsgroups_train.target[:3]] 110 | 111 | ''' 112 | array(['comp.graphics', 'talk.religion.misc', 'sci.space'], 113 | dtype=' 来源:[Facebook 研究:快速随机 SVD](https://research.fb.com/fast-randomized-svd/) 170 | 171 | SVD 是精确分解,因为它产生的矩阵足够大,完全覆盖原始矩阵。 SVD 在线性代数中的使用非常广泛,特别是在数据科学中,包括: 172 | 173 | + 语义分析 174 | + 协同过滤/推荐(获的 Netflix 奖项) 175 | + 计算 Moore-Penrose 伪逆 176 | + 数据压缩 177 | + 主成分分析(将在后面介绍) 178 | 179 | ```py 180 | %time U, s, Vh = linalg.svd(vectors, full_matrices=False) 181 | 182 | ''' 183 | CPU times: user 1min 4s, sys: 8.82 s, total: 1min 13s 184 | Wall time: 13.3 s 185 | ''' 186 | 187 | print(U.shape, s.shape, Vh.shape) 188 | 189 | # (2034, 2034) (2034,) (2034, 26576) 190 | ``` 191 | 192 | 确认这是输入的分解。 193 | 194 | ### 答案 195 | 196 | ```py 197 | # 练习:确认 U,s,Vh 是 var 向量的分解 198 | 199 | # True 200 | ``` 201 | 202 | 确认`U, V`正交。 203 | 204 | ### 答案 205 | 206 | ```py 207 | # 练习:确认`U, V`正交 208 | 209 | # True 210 | ``` 211 | 212 | ### 主题 213 | 214 | 关于奇异值`s`我们能说什么? 215 | 216 | ```py 217 | plt.plot(s); 218 | ``` 219 | 220 | ![](img/3-1.png) 221 | 222 | ```py 223 | plt.plot(s[:10]) 224 | 225 | # [] 226 | ``` 227 | 228 | ![](img/3-2.png) 229 | 230 | ```py 231 | num_top_words=8 232 | 233 | def show_topics(a): 234 | top_words = lambda t: [vocab[i] for i in np.argsort(t)[:-num_top_words-1:-1]] 235 | topic_words = ([top_words(t) for t in a]) 236 | return [' '.join(t) for t in topic_words] 237 | 238 | show_topics(Vh[:10]) 239 | 240 | ''' 241 | ['critus ditto propagandist surname galacticentric kindergarten surreal imaginative', 242 | 'jpeg gif file color quality image jfif format', 243 | 'graphics edu pub mail 128 3d ray ftp', 244 | 'jesus god matthew people atheists atheism does graphics', 245 | 'image data processing analysis software available tools display', 246 | 'god atheists atheism religious believe religion argument true', 247 | 'space nasa lunar mars probe moon missions probes', 248 | 'image probe surface lunar mars probes moon orbit', 249 | 'argument fallacy conclusion example true ad argumentum premises', 250 | 'space larson image theory universe physical nasa material'] 251 | ''' 252 | ``` 253 | 254 | 我们得到的主题匹配我们期望的簇的类型! 尽管事实上这是一个无监督的算法 - 也就是说,我们从未真正告诉算法我们的文档是如何分组的。 255 | 256 | 稍后我们将更详细地回顾 SVD。 目前,重要的一点是我们有一个工具可以让我们将矩阵精确地分解为正交列和正交行。 257 | 258 | ## 非负矩阵分解(NMF) 259 | 260 | ### 动机 261 | 262 | ![](img/face_pca.png) 263 | 264 | > 来源:[NMF 教程](http://perso.telecom-paristech.fr/~essid/teach/NMF_tutorial_ICME-2014.pdf) 265 | 266 | 更加可解释的方法: 267 | 268 | ![](img/face_outputs.png) 269 | 270 | > 来源:[NMF 教程](http://perso.telecom-paristech.fr/~essid/teach/NMF_tutorial_ICME-2014.pdf) 271 | 272 | 273 | ### 理念 274 | 275 | 不是将我们的因式限制为正交,而是另一种想法将它们限制为非负的。 NMF 是非负数据集`V`到非负矩阵`W`,`H`的因子分解: 276 | 277 | ``` 278 | V = WH 279 | ``` 280 | 281 | 通常正因式会更容易解释(这也是 NMF 受欢迎的原因)。 282 | 283 | ![](img/face_nmf.png) 284 | 285 | > 来源:[NMF 教程](http://perso.telecom-paristech.fr/~essid/teach/NMF_tutorial_ICME-2014.pdf) 286 | 287 | 288 | 非负矩阵分解(NMF)是一种非精确因子分解,它将因子分解为一个窄的和一个短的正矩阵。 NMF 是 NP 难和不唯一的。 它有许多变体,通过添加不同的约束来创建。 289 | 290 | ### NMF 的应用 291 | 292 | + [人脸分解](http://scikit-learn.org/stable/auto_examples/decomposition/plot_faces_decomposition.html#sphx-glr-auto-examples-decomposition-plot-faces-decomposition-py) 293 | + [协同过滤,例如电影推荐](http://www.quuxlabs.com/blog/2010/09/matrix-factorization-a-simple-tutorial-and-implementation-in-python/) 294 | + [音频源分离](https://pdfs.semanticscholar.org/cc88/0b24791349df39c5d9b8c352911a0417df34.pdf) 295 | + [化学](http://ieeexplore.ieee.org/document/1532909/) 296 | + [生物信息学](https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-015-0485-4)和[基因表达](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2623306/) 297 | + 主题建模(我们的问题!) 298 | 299 | ![](img/nmf_doc.png) 300 | 301 | > 来源:[NMF 教程](http://perso.telecom-paristech.fr/~essid/teach/NMF_tutorial_ICME-2014.pdf) 302 | 303 | ### 阅读更多 304 | 305 | + [非负矩阵分解的原因和方法](https://arxiv.org/pdf/1401.5226.pdf) 306 | 307 | ### 来自 sklearn 的 NMF 308 | 309 | 首先我们使用 [sklearn 的 NMF 实现](http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.NMF.html): 310 | 311 | ```py 312 | m,n=vectors.shape 313 | d=5 # 主题数量 314 | 315 | clf = decomposition.NMF(n_components=d, random_state=1) 316 | 317 | W1 = clf.fit_transform(vectors) 318 | H1 = clf.components_ 319 | 320 | show_topics(H1) 321 | 322 | ''' 323 | ['jpeg image gif file color images format quality', 324 | 'edu graphics pub mail 128 ray ftp send', 325 | 'space launch satellite nasa commercial satellites year market', 326 | 'jesus matthew prophecy people said messiah david isaiah', 327 | 'image data available software processing ftp edu analysis', 328 | 'god atheists atheism religious believe people religion does'] 329 | ''' 330 | ``` 331 | 332 | ### TF-IDF 333 | 334 | 主题频率 - 逆文档频率(TF-IDF)是一种规范术语计数的方法,通过考虑它们在文档中出现的频率,文档的持续时间以及该术语的常见/稀有程度。 335 | 336 | TF =(文档中术语`t`的出现次数)/(文档中的单词数) 337 | 338 | IDF = log(文档数量/包含术语`t`的文档数) 339 | 340 | ```py 341 | vectorizer_tfidf = TfidfVectorizer(stop_words='english') 342 | vectors_tfidf = vectorizer_tfidf.fit_transform(newsgroups_train.data) # (documents, vocab) 343 | 344 | W1 = clf.fit_transform(vectors_tfidf) 345 | H1 = clf.components_ 346 | 347 | show_topics(H1) 348 | 349 | ''' 350 | ['don people just think like know say religion', 351 | 'thanks graphics files image file program windows format', 352 | 'space nasa launch shuttle orbit lunar moon earth', 353 | 'ico bobbe tek beauchaine bronx manhattan sank queens', 354 | 'god jesus bible believe atheism christian does belief', 355 | 'objective morality values moral subjective science absolute claim'] 356 | ''' 357 | 358 | plt.plot(clf.components_[0]) 359 | 360 | # [] 361 | ``` 362 | 363 | ![](img/3-3.png) 364 | 365 | ```py 366 | clf.reconstruction_err_ 367 | 368 | # 43.71292605795277 369 | ``` 370 | 371 | ### NMF 总结 372 | 373 | 优点:快速且易于使用! 374 | 375 | 缺点:需要多年的研究和专业知识才能创建 376 | 377 | 注意: 378 | 379 | + 对于 NMF,矩阵高度需要至少与宽度一样,否则我们会遇到`fit_transform`的错误 380 | + 可以在`CountVectorizer`中使用`df_min`来仅查看至少`k`个分割文本中的单词 381 | 382 | ### 使用 SGD 在 NumPy 中从零开始编写 NMF 383 | 384 | ### 梯度下降 385 | 386 | 标准梯度下降的关键思想: 387 | 388 | + 随机选择一些权重来开始 389 | + 循环: 390 | + 使用权重来计算预测 391 | + 计算损失的导数 392 | + 更新权重 393 | + 多次重复步骤 2。最终我们得到了一些不错的权重。 394 | 395 | 关键:我们希望减少损失,导数告诉我们最陡的下降方向。 396 | 397 | 请注意,损失,误差和成本都是用于描述相同内容的术语。 398 | 399 | 让我们来看看[梯度下降导论笔记本](https://nbviewer.jupyter.org/github/fastai/numerical-linear-algebra-v2/blob/master/nbs/gradient-descent-intro.ipynb)(最初来自 [fast.ai 深度学习课程](https://github.com/fastai/courses))。 400 | 401 | ### 随机梯度下降(SGD) 402 | 403 | 随机梯度下降是一种非常有用的优化方法(它也是深度学习的核心,它用于反向传播)。 404 | 405 | 对于标准梯度下降,我们使用所有数据来计算损失,可能非常慢。 在随机梯度下降中,我们仅根据我们的数据样本(有时称为小批量)来计算我们的损失函数。 我们会在不同的数据样本上得到不同的损失值,因此这就是随机的原因。 事实证明,这仍然是一种有效的优化方式,而且效率更高! 406 | 407 | 我们可以在这个[电子表格](https://nbviewer.jupyter.org/github/fastai/numerical-linear-algebra-v2/blob/master/nbs/graddesc.xlsm)中看到它是如何工作的(最初来自 [fast.ai 深度学习课程](https://github.com/fastai/courses))。 408 | 409 | 资源: 410 | 411 | + [来自 Andrew Ng 的 Coursera ML 课程的 SGD 讲座](https://www.coursera.org/learn/machine-learning/lecture/DoRHJ/stochastic-gradient-descent) 412 | + [fast.ai wiki 页面上的 SGD](http://wiki.fast.ai/index.php/Stochastic_Gradient_Descent_(SGD)) 413 | + [机器学习的梯度下降(Jason Brownlee - Machine Learning Mastery)](http://machinelearningmastery.com/gradient-descent-for-machine-learning/) 414 | + [梯度下降优化算法概述](http://sebastianruder.com/optimizing-gradient-descent/) 415 | 416 | ### 对 NMF 应用 SGD 417 | 418 | 目标:将`V(mxn)`分解为: 419 | 420 | ``` 421 | V ≈ WH 422 | ``` 423 | 424 | 其中`W(mxd)`和`H(dxn)`,`W, H >= 0`,我们最小化`V − WH` 的 Frobenius 范式。 425 | 426 | 方法:我们将随机选择正的`W`和`H`,然后使用 SGD 进行优化。 427 | 428 | 要使用 SGD,我们需要知道损失函数的梯度。 429 | 430 | 资料来源: 431 | 432 | + [NMF 的优化性和梯度](http://users.wfu.edu/plemmons/papers/chu_ple.pdf) 433 | + [投影梯度](https://www.csie.ntu.edu.tw/~cjlin/papers/pgradnmf.pdf) 434 | 435 | ```py 436 | lam=1e3 437 | lr=1e-2 438 | m, n = vectors_tfidf.shape 439 | 440 | W1 = clf.fit_transform(vectors) 441 | H1 = clf.components_ 442 | 443 | show_topics(H1) 444 | 445 | ''' 446 | ['jpeg image gif file color images format quality', 447 | 'edu graphics pub mail 128 ray ftp send', 448 | 'space launch satellite nasa commercial satellites year market', 449 | 'jesus matthew prophecy people said messiah david isaiah', 450 | 'image data available software processing ftp edu analysis', 451 | 'god atheists atheism religious believe people religion does'] 452 | ''' 453 | 454 | mu = 1e-6 455 | def grads(M, W, H): 456 | R = W@H-M 457 | return R@H.T + penalty(W, mu)*lam, W.T@R + penalty(H, mu)*lam # dW, dH 458 | 459 | def penalty(M, mu): 460 | return np.where(M>=mu,0, np.min(M - mu, 0)) 461 | 462 | def upd(M, W, H, lr): 463 | dW,dH = grads(M,W,H) 464 | W -= lr*dW; H -= lr*dH 465 | 466 | def report(M,W,H): 467 | print(np.linalg.norm(M-W@H), W.min(), H.min(), (W<0).sum(), (H<0).sum()) 468 | 469 | W = np.abs(np.random.normal(scale=0.01, size=(m,d))) 470 | H = np.abs(np.random.normal(scale=0.01, size=(d,n))) 471 | 472 | report(vectors_tfidf, W, H) 473 | 474 | # 44.4395133509 5.67503308167e-07 2.49717354504e-07 0 0 475 | 476 | upd(vectors_tfidf,W,H,lr) 477 | 478 | report(vectors_tfidf, W, H) 479 | 480 | # 44.4194155587 -0.00186845669883 -0.000182969569359 509 788 481 | 482 | for i in range(50): 483 | upd(vectors_tfidf,W,H,lr) 484 | if i % 10 == 0: report(vectors_tfidf,W,H) 485 | 486 | ''' 487 | 44.4071645597 -0.00145791197281 -0.00012862260312 343 1174 488 | 44.352156176 -0.000549676823494 -9.16363641124e-05 218 4689 489 | 44.3020593384 -0.000284017335617 -0.000130903875061 165 9685 490 | 44.2468609535 -0.000279317810433 -0.000182173029912 169 16735 491 | 44.199218 -0.000290092649623 -0.000198140867356 222 25109 492 | ''' 493 | 494 | show_topics(H) 495 | 496 | ''' 497 | ['cview file image edu files use directory temp', 498 | 'moral like time does don software new years', 499 | 'god jesus bible believe objective exist atheism belief', 500 | 'thanks graphics program know help looking windows advance', 501 | 'space nasa launch shuttle orbit station moon lunar', 502 | 'people don said think ico tek bobbe bronx'] 503 | ''' 504 | ``` 505 | 506 | 这种训练非常缓慢! 大量的参数要调整,但仍然很慢(或爆炸)。 507 | 508 | ### PyTorch 509 | 510 | [PyTorch](http://pytorch.org/) 是一个用于张量和动态神经网络的 Python 框架,具有 GPU 加速功能。 许多核心贡献者都在 Facebook 的 AI 团队中工作。 在许多方面,它与 Numpy 类似,只是增加了使用 GPU 的并行化。 511 | 512 | 从 [PyTorch 文档](http://pytorch.org/tutorials/beginner/blitz/tensor_tutorial.html)来看: 513 | 514 | ![](img/what_is_pytorch.png) 515 | 516 | 进一步学习:如果你想了解动态神经网络是什么,你可能希望观看 Facebook AI 研究员和 PyTorch 核心贡献者 Soumith Chintala 的[演讲](https://www.youtube.com/watch?v=Z15cBAuY7Sc)。 517 | 518 | 如果你想更加了解 PyTorch,你可以尝试[本教程](http://pytorch.org/tutorials/beginner/deep_learning_60min_blitz.html)或[通过示例学习](http://pytorch.org/tutorials/beginner/pytorch_with_examples.html)。 519 | 520 | GPU 的注意事项:如果你不使用GPU,则需要从以下方法中删除`.cuda()`。 本课程不需要使用 GPU,但我认为有些人会感兴趣。 要了解如何使用 GPU 创建 AWS 实例,你可以观看 [fast.ai 的配置课程](http://course.fast.ai/lessons/aws.html)。 521 | 522 | ```py 523 | import torch 524 | import torch.cuda as tc 525 | from torch.autograd import Variable 526 | 527 | def V(M): return Variable(M, requires_grad=True) 528 | 529 | v=vectors_tfidf.todense() 530 | 531 | t_vectors = 532 | torch.Tensor(v.astype(np.float32)).cuda() 533 | 534 | mu = 1e-5 535 | 536 | def grads_t(M, W, H): 537 | R = W.mm(H)-M 538 | return (R.mm(H.t()) + penalty_t(W, mu)*lam, 539 | W.t().mm(R) + penalty_t(H, mu)*lam) # dW, dH 540 | 541 | def penalty_t(M, mu): 542 | return (M] 591 | ``` 592 | 593 | ![](img/3-4.png) 594 | 595 | ```py 596 | t_W.mm(t_H).max() 597 | 598 | 0.43389660120010376 599 | 600 | t_vectors.max() 601 | 602 | 0.9188119769096375 603 | ``` 604 | 605 | ### PyTorch:自动梯度 606 | 607 | 在上面,我们使用了我们的损失函数梯度的知识,在 PyTorch 中从零编写 SGD。 但是,PyTorch 有一个自动梯度包,我们可以使用它。 这非常有用,因为我们可以在我们不知道导数是什么的问题上使用自动梯度。 608 | 609 | 我们在下面使用的方法非常通用,几乎可以用于任何优化问题。 610 | 611 | 在 PyTorch 中,变量与张量具有相同的 API,但变量记住了用于创建它们的操作。 这让我们可以求导。 612 | 613 | ### PyTorch 自动梯度简介 614 | 615 | 示例从官方文档中的[本教程](http://pytorch.org/tutorials/beginner/former_torchies/autograd_tutorial.html)获取。 616 | 617 | ```py 618 | x = Variable(torch.ones(2, 2), requires_grad=True) 619 | print(x) 620 | 621 | ''' 622 | Variable containing: 623 | 1 1 624 | 1 1 625 | [torch.FloatTensor of size 2x2] 626 | ''' 627 | 628 | print(x.data) 629 | 630 | ''' 631 | 1 1 632 | 1 1 633 | [torch.FloatTensor of size 2x2] 634 | ''' 635 | 636 | print(x.grad) 637 | 638 | ''' 639 | Variable containing: 640 | 0 0 641 | 0 0 642 | [torch.FloatTensor of size 2x2] 643 | ''' 644 | 645 | y = x + 2 646 | print(y) 647 | 648 | ''' 649 | Variable containing: 650 | 3 3 651 | 3 3 652 | [torch.FloatTensor of size 2x2] 653 | ''' 654 | 655 | z = y * y * 3 656 | out = z.sum() 657 | print(z, out) 658 | 659 | ''' 660 | Variable containing: 661 | 27 27 662 | 27 27 663 | [torch.FloatTensor of size 2x2] 664 | Variable containing: 665 | 108 666 | [torch.FloatTensor of size 1] 667 | ''' 668 | 669 | out.backward() 670 | print(x.grad) 671 | 672 | ''' 673 | Variable containing: 674 | 18 18 675 | 18 18 676 | [torch.FloatTensor of size 2x2] 677 | ''' 678 | ``` 679 | 680 | ### 对 NMF 使用自动梯度 681 | 682 | ```py 683 | lam=1e6 684 | 685 | pW = Variable(tc.FloatTensor(m,d), requires_grad=True) 686 | pH = Variable(tc.FloatTensor(d,n), requires_grad=True) 687 | pW.data.normal_(std=0.01).abs_() 688 | pH.data.normal_(std=0.01).abs_(); 689 | 690 | def report(): 691 | W,H = pW.data, pH.data 692 | print((M-pW.mm(pH)).norm(2).data[0], W.min(), H.min(), (W<0).sum(), (H<0).sum()) 693 | 694 | def penalty(A): 695 | return torch.pow((A<0).type(tc.FloatTensor)*torch.clamp(A, max=0.), 2) 696 | 697 | def penalize(): return penalty(pW).mean() + penalty(pH).mean() 698 | 699 | def loss(): return (M-pW.mm(pH)).norm(2) + penalize()*lam 700 | 701 | M = Variable(t_vectors).cuda() 702 | 703 | opt = torch.optim.Adam([pW,pH], lr=1e-3, betas=(0.9,0.9)) 704 | lr = 0.05 705 | report() 706 | 707 | # 43.66044616699219 -0.0002547535696066916 -0.00046720390673726797 319 8633 708 | ``` 709 | 710 | 使用自动梯度,如何应用 SGD: 711 | 712 | ```py 713 | for i in range(1000): 714 | opt.zero_grad() 715 | l = loss() 716 | l.backward() 717 | opt.step() 718 | if i % 100 == 99: 719 | report() 720 | lr *= 0.9 # 学习率衰减 721 | 722 | ''' 723 | 43.628597259521484 -0.022899555042386055 -0.26526615023612976 692 82579 724 | 43.62860107421875 -0.021287493407726288 -0.2440912425518036 617 77552 725 | 43.628597259521484 -0.020111067220568657 -0.22828206419944763 576 77726 726 | 43.628604888916016 -0.01912039890885353 -0.21654289960861206 553 84411 727 | 43.62861251831055 -0.018248897045850754 -0.20736189186573029 544 75546 728 | 43.62862014770508 -0.01753264293074608 -0.19999365508556366 491 78949 729 | 43.62862777709961 -0.016773322597146034 -0.194113627076149 513 83822 730 | 43.628639221191406 -0.01622121036052704 -0.18905577063560486 485 74101 731 | 43.62863540649414 -0.01574397087097168 -0.18498440086841583 478 85987 732 | 43.628639221191406 -0.015293922275304794 -0.18137598037719727 487 74023 733 | ''' 734 | 735 | h = pH.data.cpu().numpy() 736 | show_topics(h) 737 | 738 | ''' 739 | ['god jesus bible believe atheism christian belief does', 740 | 'thanks graphics files image file windows program format', 741 | 'just don think people like ve graphics religion', 742 | 'objective morality values moral subjective science absolute claim', 743 | 'ico bobbe tek beauchaine bronx manhattan sank queens', 744 | 'space nasa shuttle launch orbit lunar moon data'] 745 | ''' 746 | plt.plot(h[0]); 747 | ``` 748 | 749 | ![](img/3-5.png) 750 | 751 | ### 比较方法 752 | 753 | Scikit-Learn 的 NMF 754 | 755 | + 快 756 | + 没有参数调整 757 | + 依靠几十年的学术研究,花了专家很长时间来实现 758 | 759 | ![](img/nimfa.png) 760 | 761 | > 来源:[Python Numfa 文档](http://nimfa.biolab.si/) 762 | 763 | 使用 PyTorch 和 SGD 764 | 765 | + 花了我们一个小时来实现,没有必要成为 NMF 的专家 766 | + 参数很繁琐 767 | + 没有那么快(在 numpy 中尝试过,我们不得不切换到 PyTorch) 768 | 769 | ## 截断 SVD 770 | 771 | 当我们计算 NMF 时,通过计算我们感兴趣的列的子集,我们节省了大量时间。有没有办法通过 SVD 获得这个好处? 就在这里! 它被称为截断 SVD。 我们只对与最大奇异值对应的向量感兴趣。 772 | 773 | ![](img/svd_fb.png) 774 | 775 | > 来源:[Facebook 研究:快速随机 SVD](https://research.fb.com/fast-randomized-svd/) 776 | 777 | ### 经典算法对于分解的缺陷 778 | 779 | + 矩阵“非常大” 780 | + 数据通常缺失或不准确。 当输入的不精确限制输出的精度时,为什么要花费额外的计算资源? 781 | + 数据传输现在在算法时间中起主要作用。 即使需要更多的浮点运算(flop),需要更少数据传递的技术也可以极大加速。 782 | + 重要的是利用 GPU。 783 | 784 | > 来源:[Halko](https://arxiv.org/abs/0909.4061) 785 | 786 | ### 随机算法的优势 787 | 788 | + 内在稳定 789 | + 性能保证不依赖于细微的光谱特性 790 | + 所需的矩阵向量积可以并行完成 791 | 792 | > 来源:[Halko](https://arxiv.org/abs/0909.4061) 793 | --------------------------------------------------------------------------------