├── .gitignore ├── LICENSE ├── README.md ├── labs ├── homework-1 │ ├── 01_assignment-original.zip │ └── assignment-01 │ │ ├── HM1_Canny.py │ │ ├── HM1_Convolve.py │ │ ├── HM1_HarrisCorner.py │ │ ├── HM1_RANSAC.py │ │ ├── HM1_ransac_points.txt │ │ ├── Lenna.png │ │ ├── hand_writting.png │ │ ├── pack.py │ │ ├── result │ │ ├── HM1_Canny_result.png │ │ ├── HM1_Convolve_img_blur.png │ │ ├── HM1_Convolve_img_gadient_x.png │ │ ├── HM1_Convolve_img_gadient_y.png │ │ ├── HM1_Convolve_replicate_pad.txt │ │ ├── HM1_Convolve_result_1.txt │ │ ├── HM1_Convolve_result_2.txt │ │ ├── HM1_Convolve_zero_pad.txt │ │ ├── HM1_HarrisCorner.png │ │ ├── HM1_RANSAC_fig.png │ │ └── HM1_RANSAC_plane.txt │ │ ├── test-as_strided.ipynb │ │ └── utils.py ├── homework-2 │ ├── 02_assignment-original.zip │ └── 02_assignment │ │ ├── HW1_BP.py │ │ ├── README.md │ │ ├── batch_normalization │ │ └── bn.py │ │ ├── cifar-10 │ │ ├── Lenna.png │ │ ├── dataset.py │ │ ├── network.py │ │ ├── train.py │ │ └── util.py │ │ ├── mnist_subset │ │ ├── 0.png │ │ ├── 1.png │ │ ├── 2.png │ │ ├── 3.png │ │ ├── 4.png │ │ ├── 5.png │ │ ├── 6.png │ │ ├── 7.png │ │ ├── 8.png │ │ └── 9.png │ │ ├── pack.py │ │ └── results │ │ ├── Best performance of my model.png │ │ ├── HW1_BP.txt │ │ ├── Lenna.png │ │ ├── Lenna_aug1.png │ │ ├── Lenna_aug2.png │ │ ├── bn_loss.txt │ │ ├── different learning rates.png │ │ └── network_structure.png ├── homework-3 │ ├── 03_assignment-original.zip │ └── 03_assignment │ │ ├── README.md │ │ ├── camera_calibr │ │ ├── back_image.npy │ │ ├── calibr.ipynb │ │ ├── front.png │ │ └── front_image.npy │ │ ├── depth_pc │ │ ├── aligned_full_pc.txt │ │ ├── depth.png │ │ ├── depth_pc.ipynb │ │ ├── intrinsic.npy │ │ ├── raw_full_pc.txt │ │ ├── render.py │ │ ├── seg.png │ │ └── spot.obj │ │ ├── marching_cube │ │ ├── data │ │ │ ├── bob_cell.npy │ │ │ └── spot_cell.npy │ │ ├── lookup_table.py │ │ ├── marching_cube.ipynb │ │ └── render_marching_cube.py │ │ ├── mesh_pc │ │ ├── earthmover.py │ │ ├── fps_vis.txt │ │ ├── mesh_pc.ipynb │ │ ├── spot.obj │ │ └── uniform_sampling_vis.txt │ │ ├── pack.py │ │ └── results │ │ ├── bob.obj │ │ ├── calibr.npy │ │ ├── fps_results.npy │ │ ├── fps_results.png │ │ ├── marching_cube_bob_result.png │ │ ├── marching_cube_snob_result.png │ │ ├── metrics.npy │ │ ├── one_way_CD.txt │ │ ├── pc_from_depth.txt │ │ ├── spot.obj │ │ ├── uniform_sampling_results.npy │ │ └── uniform_sampling_results.png └── homework-4 │ ├── 04_assignment-original.zip │ └── 04_assignment │ ├── MaskRCNN │ ├── dataset.py │ ├── engine.py │ ├── results │ │ ├── 0_data.png │ │ ├── 0_result.png │ │ ├── 1_data.png │ │ ├── 1_result.png │ │ ├── 2_data.png │ │ ├── 2_result.png │ │ ├── 3_data.png │ │ └── 3_result.png │ ├── train.py │ ├── utils.py │ └── visualize.py │ ├── PointNet │ ├── dataset.py │ ├── feature_vis.py │ ├── model.py │ ├── modelnet10_id.txt │ ├── num_seg_classes.txt │ ├── results │ │ ├── 0.png │ │ ├── 1.png │ │ ├── 2.png │ │ ├── 3.png │ │ ├── classification-256.png │ │ ├── classification1024.png │ │ └── segmentation.png │ ├── train_classficiation.py │ ├── train_segmentation.py │ └── utils.py │ ├── README.md │ ├── RNN │ ├── check_rnn.py │ ├── check_single_rnn_layer.py │ ├── results │ │ ├── pred_train_0.png │ │ ├── pred_train_1.png │ │ ├── pred_val_0.png │ │ ├── pred_val_1.png │ │ ├── reference_attention.npy │ │ ├── reference_rnn.npy │ │ ├── reference_single_rnn_layer.npy │ │ ├── rnn.npy │ │ ├── rnn_loss_history.png │ │ └── single_rnn_layer.npy │ ├── rnn.py │ ├── rnn_layers.py │ ├── train_rnn.py │ └── utils │ │ ├── captioning_solver.py │ │ ├── coco_utils.py │ │ └── optim.py │ └── pack.py ├── notes ├── README.md ├── main-latest.pdf ├── src-ele │ ├── 00.images_as_funcitons.tex │ ├── 01.edge_detection.tex │ ├── 02.keypoint_detection.tex │ ├── 03.line_fitting.tex │ ├── 04.CNN.tex │ ├── 04a.deeplearning.tex │ ├── 05.CNN_training.tex │ ├── 06.CNN_improvement.tex │ ├── 07.classification.tex │ ├── 08.CNN_for_classification.tex │ ├── 09.segmentation.tex │ ├── 10.3Dvision.tex │ ├── 11.camera_carlibration.tex │ ├── 12.single_view_geometry.tex │ ├── 13.epipolar_geometry.tex │ ├── 14.3D_data.tex │ ├── 15.3D_deep_learning.tex │ ├── 16.Sequential_Modeling.tex │ ├── 17.video_analysis.tex │ ├── 18.Transformer.tex │ ├── 19.object_detection_and_instance_segmentation.tex │ ├── 20.generative_model.tex │ ├── 21.pose_and_motion.tex │ ├── 22.Instance_Level_6D_Object_Pose_Estimation.tex │ ├── 23.motion.tex │ ├── 24.Embodied_AI.tex │ ├── 25.Summary_of_Computer_Vision.tex │ ├── DOF_and_rank.tex │ ├── appendix-QRDecomposition.tex │ ├── condition-number.tex │ ├── elegantbook.cls │ ├── figure │ │ ├── cover.jpg │ │ └── logo-blue.png │ ├── figures │ │ ├── 3DCNN.png │ │ ├── 6d_object_pose.png │ │ ├── BNsize.png │ │ ├── BinarizationviaThresholding.png │ │ ├── DETR.png │ │ ├── DiscreteConvolution.png │ │ ├── EF.png │ │ ├── Embodied_Multimodal_Large_Model.png │ │ ├── FourierTransform.png │ │ ├── GroupNorm.png │ │ ├── KLdiv.png │ │ ├── LF_FC.png │ │ ├── LF_pool.png │ │ ├── Mnistdataset.png │ │ ├── NMS.png │ │ ├── NormalizationTechniques.png │ │ ├── PointNet.png │ │ ├── PointNet_structure.png │ │ ├── RCNN_classification.png │ │ ├── RNN_grad_van.png │ │ ├── ROI_align.png │ │ ├── ResNet集成神经网络解释.png │ │ ├── RoI_pool.png │ │ ├── RoI_pool2.png │ │ ├── Screenshot_loss.png │ │ ├── Screenshot_mybest.png │ │ ├── Screenshot_mybest_resnet.png │ │ ├── UNetstructure.png │ │ ├── VAE.png │ │ ├── VAE_2.png │ │ ├── VisualizingImageGradient.png │ │ ├── YOLO.png │ │ ├── activationfunc.png │ │ ├── adam.png │ │ ├── ae.png │ │ ├── anchor.jpg │ │ ├── angle_between_line.png │ │ ├── attention.png │ │ ├── autoencoder.png │ │ ├── bilinear.png │ │ ├── camata-pose.png │ │ ├── chainrule.png │ │ ├── corner.png │ │ ├── corner_energy.png │ │ ├── corner_map.png │ │ ├── corners.png │ │ ├── cover.jpg │ │ ├── cv_tasks.png │ │ ├── ddgg.png │ │ ├── deep-test-err.png │ │ ├── edge.png │ │ ├── epi-constrain-2.png │ │ ├── epi-constrain.png │ │ ├── epi-geo-2pic.png │ │ ├── epipolargeometry.png │ │ ├── fid.png │ │ ├── g.png │ │ ├── g2.png │ │ ├── general_atten.png │ │ ├── generalgap.png │ │ ├── generative_model.png │ │ ├── grad_var.png │ │ ├── holes.png │ │ ├── horizon.png │ │ ├── hough1.png │ │ ├── hough2.png │ │ ├── image_attentoin.png │ │ ├── image_nocs_pose.png │ │ ├── image_plane.png │ │ ├── image_self_atten.png │ │ ├── image_seq2seq.png │ │ ├── learning_rate_schedule.png │ │ ├── light_invariant.png │ │ ├── logo-blue.png │ │ ├── loss1.png │ │ ├── loss2.png │ │ ├── lstm_grad_flow.png │ │ ├── marching_cube1.png │ │ ├── marching_cube2.png │ │ ├── mgtest.png │ │ ├── ministdataset.png │ │ ├── mlp.png │ │ ├── msgd1.png │ │ ├── msgd2.png │ │ ├── multilayer_rnn.png │ │ ├── not_roboust_outliner.png │ │ ├── nsloss.png │ │ ├── paralle_q.png │ │ ├── parallel-image-plane.png │ │ ├── pic_1.png │ │ ├── pinholecamera.png │ │ ├── pointnet++.png │ │ ├── pos_encoding.png │ │ ├── property-of-f-mat.png │ │ ├── rcnn_speed_comparison.png │ │ ├── rcnn_vs_frcnn.png │ │ ├── receptivefield.png │ │ ├── recu_CNN.png │ │ ├── recur_CNN.png │ │ ├── recur_CNN_detail.png │ │ ├── residual_network.png │ │ ├── rnn-seqdata.png │ │ ├── rnn.png │ │ ├── sensors.png │ │ ├── seq2seq.png │ │ ├── sigmoid.png │ │ ├── simple_NMS.png │ │ ├── simple_NMS_2.png │ │ ├── simple_VAE.png │ │ ├── single_layer_issue.png │ │ ├── single_obj_det.png │ │ ├── sparsenet.png │ │ ├── strangeknowledge.png │ │ ├── tradeoff-in-gauss-filtering.png │ │ ├── transform_all.png │ │ ├── transformer.png │ │ ├── triangulation.png │ │ ├── truncate_bp.png │ │ ├── two_stage_detector.png │ │ ├── valley.png │ │ ├── vanilla_rnn.png │ │ ├── vanishingpoints.png │ │ ├── video_cmp.png │ │ ├── vpanddir.png │ │ ├── weak_perspective.png │ │ ├── window-function.png │ │ ├── window_moving.png │ │ ├── word_model.png │ │ └── wrongDA.png │ ├── main.bcf │ ├── main.py │ ├── main.run.xml │ ├── main.synctex.gz │ ├── main.synctex.gz.sum.synctex │ ├── main.tex │ ├── now.bcf │ ├── now.run.xml │ ├── now.synctex.gz │ ├── now.tex │ ├── package.tex │ ├── package2.tex │ ├── preface.tex │ ├── reference.bib │ └── transformation-in-space.tex └── src │ ├── 00.images_as_funcitons.tex │ ├── 01.edge_detection.tex │ ├── 02.keypoint_detection.tex │ ├── 03.line_fitting.tex │ ├── 04.CNN.tex │ ├── 05.CNN_training.tex │ ├── 06.CNN_improvement.tex │ ├── 07.classification.tex │ ├── 08.CNN_for_classification.tex │ ├── 09.segmentation.tex │ ├── 10.3Dvision.tex │ ├── 11.camera_carlibration.tex │ ├── 12.single_view_geometry.tex │ ├── 13.epipolar_geometry.tex │ ├── 14.3D_data.tex │ ├── 15.3D_deep_learning.tex │ ├── 16.Sequential_Modeling.tex │ ├── 17.video_analysis.tex │ ├── 18.Transformer.tex │ ├── 19.object_detection_and_instance_segmentation.tex │ ├── 20.generative_model.tex │ ├── 21.pose_and_motion.tex │ ├── 22.Instance_Level_6D_Object_Pose_Estimation.tex │ ├── 23.motion.tex │ ├── 24.Embodied_AI.tex │ ├── 25.Summary_of_Computer_Vision.tex │ ├── CV_notes copy.bib │ ├── CV_notes.bib │ ├── CV_notes.synctex.gz │ ├── CV_notes.tex │ ├── DOF_and_rank.tex │ ├── appendix-QRDecomposition.tex │ ├── condition-number.tex │ ├── figures │ ├── 3DCNN.png │ ├── 6d_object_pose.png │ ├── BNsize.png │ ├── BinarizationviaThresholding.png │ ├── DETR.png │ ├── DiscreteConvolution.png │ ├── EF.png │ ├── Embodied_Multimodal_Large_Model.png │ ├── FourierTransform.png │ ├── GroupNorm.png │ ├── KLdiv.png │ ├── LF_FC.png │ ├── LF_pool.png │ ├── NMS.png │ ├── NormalizationTechniques.png │ ├── PointNet.png │ ├── PointNet_structure.png │ ├── RCNN_classification.png │ ├── RNN_grad_van.png │ ├── ROI_align.png │ ├── ResNet集成神经网络解释.png │ ├── RoI_pool.png │ ├── RoI_pool2.png │ ├── Screenshot_loss.png │ ├── Screenshot_mybest.png │ ├── Screenshot_mybest_resnet.png │ ├── UNetstructure.png │ ├── VAE.png │ ├── VAE_2.png │ ├── VisualizingImageGradient.png │ ├── YOLO.png │ ├── adam.png │ ├── ae.png │ ├── anchor.jpg │ ├── angle_between_line.png │ ├── attention.png │ ├── autoencoder.png │ ├── bilinear.png │ ├── camata-pose.png │ ├── corner_map.png │ ├── cover.jpg │ ├── cv_tasks.png │ ├── ddgg.png │ ├── deep-test-err.png │ ├── epi-constrain-2.png │ ├── epi-constrain.png │ ├── epi-geo-2pic.png │ ├── epipolargeometry.png │ ├── fid.png │ ├── g.png │ ├── g2.png │ ├── general_atten.png │ ├── generalgap.png │ ├── generative_model.png │ ├── grad_var.png │ ├── holes.png │ ├── horizon.png │ ├── hough1.png │ ├── hough2.png │ ├── image_attentoin.png │ ├── image_nocs_pose.png │ ├── image_plane.png │ ├── image_self_atten.png │ ├── image_seq2seq.png │ ├── learning_rate_schedule.png │ ├── light_invariant.png │ ├── logo-blue.png │ ├── lstm_grad_flow.png │ ├── marching_cube1.png │ ├── marching_cube2.png │ ├── mgtest.png │ ├── msgd1.png │ ├── msgd2.png │ ├── multilayer_rnn.png │ ├── not_roboust_outliner.png │ ├── nsloss.png │ ├── paralle_q.png │ ├── parallel-image-plane.png │ ├── pic_1.png │ ├── pinholecamera.png │ ├── pointnet++.png │ ├── pos_encoding.png │ ├── property-of-f-mat.png │ ├── rcnn_speed_comparison.png │ ├── rcnn_vs_frcnn.png │ ├── receptivefield.png │ ├── recu_CNN.png │ ├── recur_CNN.png │ ├── recur_CNN_detail.png │ ├── residual_network.png │ ├── rnn-seqdata.png │ ├── rnn.png │ ├── sensors.png │ ├── seq2seq.png │ ├── simple_NMS.png │ ├── simple_VAE.png │ ├── single_obj_det.png │ ├── sparsenet.png │ ├── strangeknowledge.png │ ├── transform_all.png │ ├── transformer.png │ ├── triangulation.png │ ├── truncate_bp.png │ ├── two_stage_detector.png │ ├── valley.png │ ├── vanilla_rnn.png │ ├── vanishingpoints.png │ ├── video_cmp.png │ ├── vpanddir.png │ ├── weak_perspective.png │ ├── window_moving.png │ ├── word_model.png │ └── wrongDA.png │ ├── now.synctex.gz.sum.synctex │ ├── now.tex │ ├── package.tex │ ├── preface.tex │ └── transformation-in-space.tex ├── resources ├── cheatsheet │ ├── cheatsheet-final.pdf │ └── cheatsheet-midterm.pdf └── 往年题 │ ├── 2022期中-部分答案.pdf │ └── 2022期末-部分答案.pdf └── slides └── readme.txt /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 EmptyBlue 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # News 2 | Notes maintenance for 2025 by [Jing Xu](iculizhi.github.io) and [Arthals](https://arthals.ink/) live updates. Version of 2024 is in branch `2024`. 3 | - latest notes are in `notes/src-ele`, this is because I refactored the previous code into the elegantbook template, so that some nice looking components can be used later. 4 | 5 | # Introduction to Computer Vision Repository 6 | 7 | - Course: Introduction to Computer Vision 8 | - Lecturer: He Wang 9 | - Year: 2024 Spring 10 | - Department: School of EECS, PKU 11 | 12 | Welcome to my [Course Review page](https://www.lyt0112.com/blog/course_review-zh) 13 | 14 | ## Contents 15 | 16 | 1. Slides (not available, Copyright belongs to Professor Wang He of Peking University.) 17 | 2. Course notes (Latex) 18 | 1. This note was originally completed by 林晓疏 (pen name) during class in the spring semester of 2022 19 | 2. I have adapted this to the latest course content for the Spring 2024 semester and added the latest content 20 | 3. You can get a compiled `.pdf` file from `Releases` or compile it yourself 21 | 3. Labs 22 | 4. Cheatsheets 23 | 1. Some contents are referenced from https://github.com/PkuCuipy/icv-assignments 24 | -------------------------------------------------------------------------------- /labs/homework-1/01_assignment-original.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-1/01_assignment-original.zip -------------------------------------------------------------------------------- /labs/homework-1/assignment-01/HM1_HarrisCorner.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from utils import read_img, draw_corner 3 | from HM1_Convolve import convolve, Sobel_filter_x, Sobel_filter_y, padding 4 | 5 | 6 | def corner_response_function(input_img, window_size, alpha, threshold): 7 | """ 8 | The function you need to implement for Q3. 9 | Inputs: 10 | input_img: array(float) 11 | window_size: int 12 | alpha: float 13 | threshold: float 14 | Outputs: 15 | corner_list: array 16 | """ 17 | 18 | # please solve the corner_response_function of each window, 19 | # and keep windows with theta > threshold. 20 | # you can use several functions from HM1_Convolve to get 21 | # I_xx, I_yy, I_xy as well as the convolution result. 22 | # for detials of corner_response_function, please refer to the slides. 23 | 24 | t = 15 25 | 26 | grad_x = Sobel_filter_x(input_img) 27 | grad_y = Sobel_filter_y(input_img) 28 | 29 | grad_x_square = grad_x**2 30 | grad_y_square = grad_y**2 31 | grad_xy = grad_x * grad_y 32 | 33 | rectangle_kernel = np.ones((window_size, window_size)) 34 | 35 | M_xx = convolve(grad_x_square, rectangle_kernel) 36 | M_yy = convolve(grad_y_square, rectangle_kernel) 37 | M_xy = convolve(grad_xy, rectangle_kernel) 38 | 39 | M_det = M_xx * M_yy - M_xy**2 40 | M_trace = M_xx + M_yy 41 | theta = M_det - alpha * M_trace**2-t 42 | theta_mask = theta > threshold 43 | 44 | index_list = np.argwhere(theta_mask) 45 | theta_list = theta[index_list[:, 0], index_list[:, 1]][:, None] 46 | 47 | corner_list = np.concatenate((index_list, theta_list), axis=1) 48 | return corner_list # array, each row contains information about one corner, namely (index of row, index of col, theta) 49 | 50 | 51 | if __name__ == "__main__": 52 | 53 | # Load the input images 54 | input_img = read_img("hand_writting.png")/255. 55 | 56 | # you can adjust the parameters to fit your own implementation 57 | window_size = 5 58 | alpha = 0.04 59 | threshold = 10 60 | 61 | corner_list = corner_response_function(input_img, window_size, alpha, threshold) 62 | 63 | # NMS 64 | corner_list_sorted = sorted(corner_list, key=lambda x: x[2], reverse=True) 65 | NML_selected = [] 66 | NML_selected.append(corner_list_sorted[0][:-1]) 67 | dis = 10 68 | for i in corner_list_sorted: 69 | for j in NML_selected: 70 | if (abs(i[0] - j[0]) <= dis and abs(i[1] - j[1]) <= dis): 71 | break 72 | else: 73 | NML_selected.append(i[:-1]) 74 | 75 | # save results 76 | draw_corner("hand_writting.png", "result/HM1_HarrisCorner.png", NML_selected) 77 | -------------------------------------------------------------------------------- /labs/homework-1/assignment-01/Lenna.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-1/assignment-01/Lenna.png -------------------------------------------------------------------------------- /labs/homework-1/assignment-01/hand_writting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-1/assignment-01/hand_writting.png -------------------------------------------------------------------------------- /labs/homework-1/assignment-01/pack.py: -------------------------------------------------------------------------------- 1 | import os 2 | import zipfile 3 | 4 | 5 | def zipDir(dirpath, outFullName): 6 | zip = zipfile.ZipFile(outFullName, "w", zipfile.ZIP_DEFLATED) 7 | for path, dirnames, filenames in os.walk(dirpath): 8 | # 去掉目标跟路径,只对目标文件夹下边的文件及文件夹进行压缩 9 | fpath = path.replace(dirpath, '') 10 | 11 | for filename in filenames: 12 | print(filename) 13 | if filename.endswith('.zip'): 14 | continue 15 | zip.write(os.path.join(path, filename), os.path.join(fpath, filename)) 16 | zip.close() 17 | 18 | 19 | if __name__ == "__main__": 20 | 21 | # --------------------------------------------------------- 22 | # 请用你的学号和姓名替换下面的内容,注意参照例子的格式,使用拼音而非中文 23 | id = 21000***** 24 | name = 'EmptyBlue' 25 | # --------------------------------------------------------- 26 | 27 | zip_name = f'{id}_{name}.zip' 28 | current_file_directory_path = os.path.dirname(os.path.abspath(__file__)) 29 | input_path = current_file_directory_path 30 | output_path = os.path.join(current_file_directory_path, zip_name) 31 | 32 | zipDir(input_path, output_path) 33 | -------------------------------------------------------------------------------- /labs/homework-1/assignment-01/result/HM1_Canny_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-1/assignment-01/result/HM1_Canny_result.png -------------------------------------------------------------------------------- /labs/homework-1/assignment-01/result/HM1_Convolve_img_blur.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-1/assignment-01/result/HM1_Convolve_img_blur.png -------------------------------------------------------------------------------- /labs/homework-1/assignment-01/result/HM1_Convolve_img_gadient_x.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-1/assignment-01/result/HM1_Convolve_img_gadient_x.png -------------------------------------------------------------------------------- /labs/homework-1/assignment-01/result/HM1_Convolve_img_gadient_y.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-1/assignment-01/result/HM1_Convolve_img_gadient_y.png -------------------------------------------------------------------------------- /labs/homework-1/assignment-01/result/HM1_Convolve_replicate_pad.txt: -------------------------------------------------------------------------------- 1 | 6.121701756176186615e-01 6.121701756176186615e-01 1.690697543456364249e-01 4.360590193711701978e-01 7.692624725231228533e-01 2.953253044028761876e-01 1.491629571394897935e-01 1.491629571394897935e-01 2 | 6.121701756176186615e-01 6.121701756176186615e-01 1.690697543456364249e-01 4.360590193711701978e-01 7.692624725231228533e-01 2.953253044028761876e-01 1.491629571394897935e-01 1.491629571394897935e-01 3 | 2.247832454416176517e-02 2.247832454416176517e-02 4.202244922645577141e-01 2.386821412464460446e-01 3.376561918887923675e-01 9.907124646308786975e-01 2.377264539000227828e-01 2.377264539000227828e-01 4 | 8.119265945121156847e-02 8.119265945121156847e-02 6.696002382466298419e-01 6.212429194006969801e-01 2.742535302065184366e-01 4.662214098949730712e-01 1.183677511232835711e-01 1.183677511232835711e-01 5 | 7.395756414320420191e-02 7.395756414320420191e-02 9.007741791606836967e-01 7.939625604796284319e-01 8.405696487242811932e-01 8.152074574529406537e-01 9.909548513304101691e-01 9.909548513304101691e-01 6 | 5.772738296076793674e-01 5.772738296076793674e-01 8.137669170364877358e-01 4.213178773992187764e-01 2.744795981315373279e-02 4.541366431584699104e-01 1.053260853150931320e-01 1.053260853150931320e-01 7 | 8.172200615549912728e-01 8.172200615549912728e-01 6.977277345956326426e-01 5.652854013397264898e-01 2.742272954485119874e-01 9.984737436896493623e-01 1.380420530559359760e-01 1.380420530559359760e-01 8 | 8.172200615549912728e-01 8.172200615549912728e-01 6.977277345956326426e-01 5.652854013397264898e-01 2.742272954485119874e-01 9.984737436896493623e-01 1.380420530559359760e-01 1.380420530559359760e-01 9 | -------------------------------------------------------------------------------- /labs/homework-1/assignment-01/result/HM1_Convolve_result_1.txt: -------------------------------------------------------------------------------- 1 | 6.726929099524359135e-01 1.299302001764504944e+00 1.242277848380564098e+00 1.990147764064087443e+00 1.979515410581911272e+00 8.042878141780547274e-01 2 | 1.233052743124086525e+00 2.113876763734376940e+00 2.199275198995008118e+00 2.326898746813730767e+00 1.975711725876759361e+00 1.312757340422121555e+00 3 | 1.373123590780443104e+00 2.444661726762888065e+00 3.120520693998054096e+00 3.237767655645754861e+00 3.132839976693769124e+00 2.301233400731500289e+00 4 | 1.989684052026016836e+00 2.631015404889353260e+00 2.756966696140569884e+00 2.457542757960983870e+00 2.250434181206241391e+00 1.502196161584189449e+00 5 | 2.328109733508479273e+00 3.096520763029277923e+00 3.029142896424832365e+00 3.081291009456641028e+00 2.682901772749619607e+00 1.796120230915411842e+00 6 | 1.152878120400029349e+00 1.967086661250722068e+00 1.516439580734680881e+00 1.356437579617560063e+00 8.570340226429324915e-01 1.101869898510777102e+00 7 | -------------------------------------------------------------------------------- /labs/homework-1/assignment-01/result/HM1_Convolve_result_2.txt: -------------------------------------------------------------------------------- 1 | 2.113876763734377384e+00 2.199275198995008118e+00 2.326898746813730767e+00 1.975711725876759139e+00 2 | 2.444661726762888510e+00 3.120520693998053652e+00 3.237767655645754861e+00 3.132839976693769568e+00 3 | 2.631015404889353260e+00 2.756966696140569884e+00 2.457542757960983870e+00 2.250434181206241391e+00 4 | 3.096520763029277923e+00 3.029142896424831921e+00 3.081291009456641028e+00 2.682901772749619607e+00 5 | -------------------------------------------------------------------------------- /labs/homework-1/assignment-01/result/HM1_Convolve_zero_pad.txt: -------------------------------------------------------------------------------- 1 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 2 | 0.000000000000000000e+00 6.121701756176186615e-01 1.690697543456364249e-01 4.360590193711701978e-01 7.692624725231228533e-01 2.953253044028761876e-01 1.491629571394897935e-01 0.000000000000000000e+00 3 | 0.000000000000000000e+00 2.247832454416176517e-02 4.202244922645577141e-01 2.386821412464460446e-01 3.376561918887923675e-01 9.907124646308786975e-01 2.377264539000227828e-01 0.000000000000000000e+00 4 | 0.000000000000000000e+00 8.119265945121156847e-02 6.696002382466298419e-01 6.212429194006969801e-01 2.742535302065184366e-01 4.662214098949730712e-01 1.183677511232835711e-01 0.000000000000000000e+00 5 | 0.000000000000000000e+00 7.395756414320420191e-02 9.007741791606836967e-01 7.939625604796284319e-01 8.405696487242811932e-01 8.152074574529406537e-01 9.909548513304101691e-01 0.000000000000000000e+00 6 | 0.000000000000000000e+00 5.772738296076793674e-01 8.137669170364877358e-01 4.213178773992187764e-01 2.744795981315373279e-02 4.541366431584699104e-01 1.053260853150931320e-01 0.000000000000000000e+00 7 | 0.000000000000000000e+00 8.172200615549912728e-01 6.977277345956326426e-01 5.652854013397264898e-01 2.742272954485119874e-01 9.984737436896493623e-01 1.380420530559359760e-01 0.000000000000000000e+00 8 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 9 | -------------------------------------------------------------------------------- /labs/homework-1/assignment-01/result/HM1_HarrisCorner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-1/assignment-01/result/HM1_HarrisCorner.png -------------------------------------------------------------------------------- /labs/homework-1/assignment-01/result/HM1_RANSAC_fig.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-1/assignment-01/result/HM1_RANSAC_fig.png -------------------------------------------------------------------------------- /labs/homework-1/assignment-01/result/HM1_RANSAC_plane.txt: -------------------------------------------------------------------------------- 1 | 2.521506402724194018e-01 2 | -7.428933927054595099e-01 3 | -1.433863589817142292e-01 4 | 6.032991080241606463e-01 5 | -------------------------------------------------------------------------------- /labs/homework-1/assignment-01/test-as_strided.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "[[ 1 2 5 6]\n", 13 | " [ 2 3 6 7]\n", 14 | " [ 3 4 7 8]\n", 15 | " [ 5 6 9 10]\n", 16 | " [ 6 7 10 11]\n", 17 | " [ 7 8 11 12]]\n" 18 | ] 19 | } 20 | ], 21 | "source": [ 22 | "import numpy as np\n", 23 | "from numpy.lib.stride_tricks import as_strided\n", 24 | "\n", 25 | "\n", 26 | "def sliding_windows(arr, window_shape, step=1):\n", 27 | " \"\"\"\n", 28 | " 从二维数组中提取滑动窗口,每个窗口展平成一维数组,所有窗口形成二维数组。\n", 29 | " \n", 30 | " 参数:\n", 31 | " arr -- 输入的二维数组\n", 32 | " window_shape -- 窗口的形状,例如(2, 3)\n", 33 | " step -- 窗口滑动的步长,默认为1 返回:\n", 34 | " 一个二维数组,其中每行是一个展平后的滑动窗口\n", 35 | " \"\"\"\n", 36 | " rows, cols = arr.shape\n", 37 | " window_rows, window_cols = window_shape\n", 38 | " new_shape = ((rows - window_rows) // step + 1,\n", 39 | " (cols - window_cols) // step + 1,\n", 40 | " window_rows,\n", 41 | " window_cols)\n", 42 | " new_strides = (arr.strides[0]*step, arr.strides[1]*step) + arr.strides\n", 43 | "\n", 44 | " strided_arr = as_strided(arr, shape=new_shape, strides=new_strides)\n", 45 | " return strided_arr.reshape(-1, window_rows * window_cols)\n", 46 | "\n", 47 | "\n", 48 | "# 示例使用\n", 49 | "arr = np.array([[1, 2, 3, 4],\n", 50 | " [5, 6, 7, 8],\n", 51 | " [9, 10, 11, 12]])\n", 52 | "\n", 53 | "# 提取 2x2 的滑窗口\n", 54 | "window_shape = (2, 2)\n", 55 | "result = sliding_windows(arr, window_shape)\n", 56 | "\n", 57 | "print(result)" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 2, 63 | "metadata": {}, 64 | "outputs": [ 65 | { 66 | "name": "stdout", 67 | "output_type": "stream", 68 | "text": [ 69 | "[[3 4 5]\n", 70 | " [3 4 5]]\n" 71 | ] 72 | } 73 | ], 74 | "source": [ 75 | "arr=np.arange(10)\n", 76 | "idx=np.array([[3,4,5],[3,4,5]])\n", 77 | "print(arr[idx])" 78 | ] 79 | } 80 | ], 81 | "metadata": { 82 | "kernelspec": { 83 | "display_name": "CV", 84 | "language": "python", 85 | "name": "python3" 86 | }, 87 | "language_info": { 88 | "codemirror_mode": { 89 | "name": "ipython", 90 | "version": 3 91 | }, 92 | "file_extension": ".py", 93 | "mimetype": "text/x-python", 94 | "name": "python", 95 | "nbconvert_exporter": "python", 96 | "pygments_lexer": "ipython3", 97 | "version": "3.11.8" 98 | } 99 | }, 100 | "nbformat": 4, 101 | "nbformat_minor": 2 102 | } 103 | -------------------------------------------------------------------------------- /labs/homework-1/assignment-01/utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import matplotlib.pyplot as plt 3 | from mpl_toolkits.mplot3d import Axes3D 4 | import numpy as np 5 | 6 | 7 | def read_img(path): 8 | return cv2.imread(path, cv2.IMREAD_GRAYSCALE) 9 | 10 | 11 | def write_img(path, img): 12 | cv2.imwrite(path, img) 13 | 14 | 15 | def draw_corner(img_path, save_path, coner_list): 16 | vis_img = cv2.imread(img_path) 17 | for point in coner_list: 18 | cv2.circle(vis_img, (int(point[1]), int(point[0])), 2, (0, 0, 255), -1) 19 | 20 | write_img(save_path, vis_img) 21 | 22 | 23 | def plane_func(pf, p_xy): 24 | return (pf[0]*p_xy[:, 0] + pf[1]*p_xy[:, 1] + pf[3])/-pf[2] 25 | 26 | 27 | def draw_save_plane_with_points(estimated_pf, p, path): # plane function: pf[0]*x+pf[1]*y+pf[2]*z+pf[3]=0 28 | fig = plt.figure() 29 | ax = fig.add_subplot(111, projection='3d') 30 | ax.scatter(p[:, 0], p[:, 1], p[:, 2], c="g", s=10) 31 | x = np.linspace(-1, 1, 100) 32 | y = np.linspace(-1, 1, 100) 33 | x, y = np.meshgrid(x, y) 34 | estimated_plane_z = plane_func(estimated_pf, np.concatenate((x.reshape(-1, 1), y.reshape(-1, 1)), axis=1)) 35 | ax.plot_surface(x, y, estimated_plane_z.reshape(100, 100), alpha=0.5) 36 | ax.view_init(elev=15, azim=15) 37 | plt.savefig(path) 38 | plt.show() 39 | 40 | 41 | def normalize(pf): 42 | return pf / np.linalg.norm(pf) * np.sign(pf[0]) 43 | -------------------------------------------------------------------------------- /labs/homework-2/02_assignment-original.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-2/02_assignment-original.zip -------------------------------------------------------------------------------- /labs/homework-2/02_assignment/HW1_BP.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import os 4 | 5 | 6 | def read_img(path): 7 | return cv2.imread(path, cv2.IMREAD_GRAYSCALE) 8 | 9 | 10 | if __name__ == "__main__": 11 | 12 | # input 13 | input_vector = np.zeros((10, 784)) 14 | for i in range(10): 15 | input_vector[i, :] = read_img("mnist_subset/"+str(i)+".png").reshape(-1)/255. 16 | gt_y = np.zeros((10, 1)) 17 | gt_y[0] = 1 18 | 19 | np.random.seed(14) 20 | 21 | # Intialization MLP (784 -> 16 -> 1) 22 | MLP_layer_1 = np.random.randn(784, 16) 23 | MLP_layer_2 = np.random.randn(16, 1) 24 | lr = 1e-1 25 | loss_list = [] 26 | 27 | for i in range(50): 28 | # Forward 29 | output_layer_1 = input_vector.dot(MLP_layer_1) 30 | output_layer_1_act = 1 / (1 + np.exp(-output_layer_1)) # sigmoid activation function 31 | output_layer_2 = output_layer_1_act.dot(MLP_layer_2) 32 | pred_y = 1 / (1 + np.exp(-output_layer_2)) # sigmoid activation function 33 | loss = -(gt_y * np.log(pred_y) + (1 - gt_y) * np.log(1 - pred_y)).sum() # cross-entroy loss 34 | print("iteration: %d, loss: %f" % (i + 1, loss)) 35 | loss_list.append(loss) 36 | 37 | # 反向传播 38 | loss_batch = -(gt_y * np.log(pred_y) + (1 - gt_y) * np.log(1 - pred_y)) 39 | d_pred_y = -gt_y/pred_y + (1-gt_y)/(1-pred_y) 40 | d_output_layer_2 = d_pred_y * pred_y * (1-pred_y) 41 | d_MLP_layer_2 = output_layer_1_act.T.dot(d_output_layer_2) # 16*1 42 | d_output_layer_1_act = d_output_layer_2.dot(MLP_layer_2.T) # 10*16 43 | d_output_layer_1 = d_output_layer_1_act * output_layer_1_act * (1-output_layer_1_act) # 10*16 44 | d_MLP_layer_1 = input_vector.T.dot(d_output_layer_1) # 784*16 45 | 46 | # Backward : compute the gradient of paratmerters of layer1 (grad_layer_1) and layer2 (grad_layer_2) 47 | grad_layer_1 = d_MLP_layer_1 48 | grad_layer_2 = d_MLP_layer_2 49 | 50 | MLP_layer_1 -= lr * grad_layer_1 51 | MLP_layer_2 -= lr * grad_layer_2 52 | 53 | os.makedirs("results", exist_ok=True) 54 | np.savetxt("results/HW1_BP.txt", loss_list) 55 | -------------------------------------------------------------------------------- /labs/homework-2/02_assignment/README.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | - We recommand using [Anaconda](https://www.anaconda.com/) to manage your python environments. Use the following command to create a new environment. 4 | ```bash 5 | conda create -n hw2 python=3.7 6 | conda activate hw2 7 | ``` 8 | 9 | - We recommand using [Tsinghua Mirror](https://mirrors.tuna.tsinghua.edu.cn/) to install dependent packages. 10 | 11 | ```bash 12 | # pip 13 | python -m pip install --upgrade pip 14 | pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple 15 | 16 | # conda 17 | conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch 18 | conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main 19 | conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free 20 | conda config --set show_channel_urls yes 21 | ``` 22 | 23 | - Now you can install [pytorch](https://pytorch.org/get-started/previous-versions/) and other dependencies as below. 24 | ```bash 25 | conda install pytorch==1.7.1 torchvision==0.8.2 torchaudio==0.7.2 cpuonly # remember to remove "-c pytorh"! 26 | 27 | # tips: try "pip install xxx" first before "conda install xxx" 28 | pip install opencv-python 29 | pip install pillow 30 | pip install tensorboardx 31 | pip install tensorflow # for tensorboardx 32 | ``` 33 | The specific version of pytorch should make no difference for this assignment, since we only use some basic functions. You can also install the GPU version if you can access a GPU. 34 | 35 | # CIFAR-10 36 | 37 | ## Dataset 38 | - Download cifar-10 dataset from [here](https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz) and save to `datasets`. Then unzip it. 39 | 40 | ```bash 41 | cd datasets 42 | wget https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz 43 | tar -zxvf cifar-10-python.tar.gz 44 | ``` 45 | 46 | ## Visualization 47 | - Visualize the structure of your network 48 | ```bash 49 | cd cifar-10 50 | python network.py 51 | cd ../experiments 52 | tensorboard --logdir . 53 | ``` 54 | 55 | - Visualize and check the input data 56 | ```bash 57 | cd cifar-10 58 | python dataset.py 59 | ``` 60 | 61 | - Train network and visualize the curves 62 | ```bash 63 | cd cifar-10 64 | python train.py 65 | cd ../experiments 66 | tensorboard --logdir . 67 | ``` 68 | 69 | # Submission 70 | - update your personal information in pack.py, and run the script to compress your code and results. Subsequently, submit the zip file to course.pku.edu.cn. 71 | 72 | # Appendix 73 | We list some libraries that may help you solve this assignment. 74 | 75 | - [TensorboardX](https://pytorch.org/docs/stable/tensorboard.html) 76 | - [OpenCV-Python](https://docs.opencv.org/4.x/d6/d00/tutorial_py_root.html) 77 | - [Pillow (PIL)](https://pillow.readthedocs.io/en/stable/) 78 | - [Torchvision.transforms](https://pytorch.org/vision/0.9/transforms.html) 79 | -------------------------------------------------------------------------------- /labs/homework-2/02_assignment/cifar-10/Lenna.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-2/02_assignment/cifar-10/Lenna.png -------------------------------------------------------------------------------- /labs/homework-2/02_assignment/cifar-10/network.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class ConvNet(nn.Module): 5 | def __init__(self, num_classes=10): 6 | super(ConvNet, self).__init__() 7 | # VGG11 8 | self.features = nn.Sequential( 9 | # 第一个卷积块 10 | nn.Conv2d(3, 64, kernel_size=3, padding=1), 11 | nn.BatchNorm2d(64), 12 | nn.ReLU(inplace=True), 13 | nn.MaxPool2d(kernel_size=2, stride=2), 14 | # 第二个卷积块 15 | nn.Conv2d(64, 128, kernel_size=3, padding=1), 16 | nn.BatchNorm2d(128), 17 | nn.ReLU(inplace=True), 18 | nn.MaxPool2d(kernel_size=2, stride=2), 19 | # 第三个卷积块 20 | nn.Conv2d(128, 256, kernel_size=3, padding=1), 21 | nn.BatchNorm2d(256), 22 | nn.ReLU(inplace=True), 23 | nn.Conv2d(256, 256, kernel_size=3, padding=1), 24 | nn.BatchNorm2d(256), 25 | nn.ReLU(inplace=True), 26 | nn.MaxPool2d(kernel_size=2, stride=2), 27 | # 第四个卷积块 28 | nn.Conv2d(256, 512, kernel_size=3, padding=1), 29 | nn.BatchNorm2d(512), 30 | nn.ReLU(inplace=True), 31 | nn.Conv2d(512, 512, kernel_size=3, padding=1), 32 | nn.BatchNorm2d(512), 33 | nn.ReLU(inplace=True), 34 | nn.MaxPool2d(kernel_size=2, stride=2), 35 | # 第五个卷积块 36 | nn.Conv2d(512, 512, kernel_size=3, padding=1), 37 | nn.BatchNorm2d(512), 38 | nn.ReLU(inplace=True), 39 | nn.Conv2d(512, 512, kernel_size=3, padding=1), 40 | nn.BatchNorm2d(512), 41 | nn.ReLU(inplace=True), 42 | nn.MaxPool2d(kernel_size=2, stride=2), 43 | ) 44 | self.classifier = nn.Sequential( 45 | nn.Dropout(), 46 | nn.Linear(512, 4096), 47 | nn.ReLU(inplace=True), 48 | nn.Dropout(), 49 | nn.Linear(4096, 4096), 50 | nn.ReLU(inplace=True), 51 | nn.Linear(4096, num_classes), 52 | ) 53 | 54 | def forward(self, x): 55 | x = self.features(x) 56 | x = x.view(x.size(0), -1) 57 | x = self.classifier(x) 58 | return x 59 | 60 | 61 | if __name__ == '__main__': 62 | import torch 63 | from torch.utils.tensorboard import SummaryWriter 64 | from dataset import CIFAR10 65 | writer = SummaryWriter(log_dir='../experiments/network_structure') 66 | net = ConvNet() 67 | train_dataset = CIFAR10() 68 | train_loader = torch.utils.data.DataLoader( 69 | train_dataset, batch_size=2, shuffle=False, num_workers=2) 70 | # Write a CNN graph. 71 | # Please save a figure/screenshot to '../results' for submission. 72 | for imgs, labels in train_loader: 73 | writer.add_graph(net, imgs) 74 | writer.close() 75 | break 76 | -------------------------------------------------------------------------------- /labs/homework-2/02_assignment/cifar-10/util.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class AverageMeter(object): 5 | """Computes and stores the average and current value 6 | Imported from https://github.com/pytorch/examples/blob/master/imagenet/main.py#L247-L262 7 | """ 8 | 9 | def __init__(self): 10 | self.reset() 11 | 12 | def reset(self): 13 | self.val = 0 14 | self.avg = 0 15 | self.sum = 0 16 | self.count = 0 17 | 18 | def update(self, val, n=1): 19 | self.val = val 20 | self.sum += val * n 21 | self.count += n 22 | self.avg = self.sum / self.count 23 | 24 | 25 | def evaluate(output, target, topk=(1,)): 26 | """Computes the accuracy over the k top predictions for the specified values of k""" 27 | with torch.no_grad(): 28 | maxk = max(topk) 29 | batch_size = target.size(0) 30 | 31 | _, pred = output.topk(maxk, 1, True, True) 32 | pred = pred.t() 33 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 34 | 35 | res = [] 36 | for k in topk: 37 | correct_k = correct[:k].reshape(-1).float().sum(0) 38 | res.append(correct_k.mul_(100.0 / batch_size)) 39 | return res 40 | -------------------------------------------------------------------------------- /labs/homework-2/02_assignment/mnist_subset/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-2/02_assignment/mnist_subset/0.png -------------------------------------------------------------------------------- /labs/homework-2/02_assignment/mnist_subset/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-2/02_assignment/mnist_subset/1.png -------------------------------------------------------------------------------- /labs/homework-2/02_assignment/mnist_subset/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-2/02_assignment/mnist_subset/2.png -------------------------------------------------------------------------------- /labs/homework-2/02_assignment/mnist_subset/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-2/02_assignment/mnist_subset/3.png -------------------------------------------------------------------------------- /labs/homework-2/02_assignment/mnist_subset/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-2/02_assignment/mnist_subset/4.png -------------------------------------------------------------------------------- /labs/homework-2/02_assignment/mnist_subset/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-2/02_assignment/mnist_subset/5.png -------------------------------------------------------------------------------- /labs/homework-2/02_assignment/mnist_subset/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-2/02_assignment/mnist_subset/6.png -------------------------------------------------------------------------------- /labs/homework-2/02_assignment/mnist_subset/7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-2/02_assignment/mnist_subset/7.png -------------------------------------------------------------------------------- /labs/homework-2/02_assignment/mnist_subset/8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-2/02_assignment/mnist_subset/8.png -------------------------------------------------------------------------------- /labs/homework-2/02_assignment/mnist_subset/9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-2/02_assignment/mnist_subset/9.png -------------------------------------------------------------------------------- /labs/homework-2/02_assignment/pack.py: -------------------------------------------------------------------------------- 1 | import os 2 | import zipfile 3 | 4 | 5 | def zipHW2(input_path: str, output_path: str): 6 | zip = zipfile.ZipFile(output_path, "w", zipfile.ZIP_DEFLATED) 7 | for path, dirnames, filenames in os.walk(os.path.join(input_path, "batch_normalization")): 8 | fpath = path.replace(input_path, 'HW2') 9 | for filename in filenames: 10 | if filename in ["bn.py"]: 11 | zip.write(os.path.join(path, filename), os.path.join(fpath, filename)) 12 | for path, dirnames, filenames in os.walk(os.path.join(input_path, "cifar-10")): 13 | fpath = path.replace(input_path, 'HW2') 14 | for filename in filenames: 15 | if filename in ["dataset.py", "network.py", "train.py"]: 16 | zip.write(os.path.join(path, filename), os.path.join(fpath, filename)) 17 | for path, dirnames, filenames in os.walk(os.path.join(input_path, "results")): 18 | fpath = path.replace(input_path, 'HW2') 19 | for filename in filenames: 20 | zip.write(os.path.join(path, filename), os.path.join(fpath, filename)) 21 | zip.write(os.path.join(input_path, "HW1_BP.py"), os.path.join(input_path.replace(input_path, 'HW2'), "HW1_BP.py")) 22 | zip.close() 23 | 24 | 25 | if __name__ == "__main__": 26 | 27 | # --------------------------------------------------------- 28 | # 请用你的学号和姓名替换下面的内容,注意参照例子的格式,使用拼音而非中文 29 | id = 21000***** 30 | name = 'EmptyBlue' 31 | # --------------------------------------------------------- 32 | 33 | zip_name = f'{id}_{name}.zip' 34 | input_path = os.path.dirname(__file__) 35 | output_path = os.path.join(os.path.dirname(__file__), zip_name) 36 | 37 | zipHW2(input_path, output_path) 38 | -------------------------------------------------------------------------------- /labs/homework-2/02_assignment/results/Best performance of my model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-2/02_assignment/results/Best performance of my model.png -------------------------------------------------------------------------------- /labs/homework-2/02_assignment/results/HW1_BP.txt: -------------------------------------------------------------------------------- 1 | 9.779284769618636020e+00 2 | 4.179958911953550960e+00 3 | 3.212628089564451006e+00 4 | 2.781939232574513809e+00 5 | 2.521983696361398408e+00 6 | 2.317483164400497841e+00 7 | 2.140868130341873510e+00 8 | 1.984310588985535251e+00 9 | 1.844338086451177583e+00 10 | 1.718717435254833337e+00 11 | 1.605697625090467762e+00 12 | 1.503788222428893206e+00 13 | 1.411693541447843092e+00 14 | 1.328286516684635821e+00 15 | 1.252585864633001478e+00 16 | 1.183731285334008509e+00 17 | 1.120961515703425393e+00 18 | 1.063599520962807743e+00 19 | 1.011044783556032334e+00 20 | 9.627693681274920312e-01 21 | 9.183140418425477236e-01 22 | 8.772823295082271544e-01 23 | 8.393323255945466244e-01 24 | 8.041673595907686822e-01 25 | 7.715269910867809111e-01 26 | 7.411794933070362523e-01 27 | 7.129163281032775412e-01 28 | 6.865484872314184805e-01 29 | 6.619042068756150199e-01 30 | 6.388274911054412364e-01 31 | 6.171770013305253322e-01 32 | 5.968250494395516137e-01 33 | 5.776565842351080171e-01 34 | 5.595681544049054823e-01 35 | 5.424668735082945892e-01 36 | 5.262694221302973707e-01 37 | 5.109011162768259950e-01 38 | 4.962950599135952379e-01 39 | 4.823913885516623590e-01 40 | 4.691366018603405075e-01 41 | 4.564829767179175435e-01 42 | 4.443880476090865117e-01 43 | 4.328141385444576050e-01 44 | 4.217279295930684846e-01 45 | 4.111000417449203193e-01 46 | 4.009046262801188587e-01 47 | 3.911189491179152822e-01 48 | 3.817229664558677293e-01 49 | 3.726988946794458912e-01 50 | 3.640307839412160740e-01 51 | -------------------------------------------------------------------------------- /labs/homework-2/02_assignment/results/Lenna.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-2/02_assignment/results/Lenna.png -------------------------------------------------------------------------------- /labs/homework-2/02_assignment/results/Lenna_aug1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-2/02_assignment/results/Lenna_aug1.png -------------------------------------------------------------------------------- /labs/homework-2/02_assignment/results/Lenna_aug2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-2/02_assignment/results/Lenna_aug2.png -------------------------------------------------------------------------------- /labs/homework-2/02_assignment/results/bn_loss.txt: -------------------------------------------------------------------------------- 1 | 1.390185479780855005e+01 2 | 3.033448550031815572e+00 3 | 2.752286567016787977e+00 4 | 2.656001582022864849e+00 5 | 2.593305208061171374e+00 6 | 2.539740282361730994e+00 7 | 2.489078261615607968e+00 8 | 2.439408137224943651e+00 9 | 2.390089215026078406e+00 10 | 2.340904016676573107e+00 11 | 2.291789582483166221e+00 12 | 2.242744965600917073e+00 13 | 2.193797333728852994e+00 14 | 2.144988604485112749e+00 15 | 2.096369477225719535e+00 16 | 2.047996165041164218e+00 17 | 1.999928135915148175e+00 18 | 1.952226276004763950e+00 19 | 1.904951297351569206e+00 20 | 1.858162358771124456e+00 21 | 1.811915912627082070e+00 22 | 1.766264793810403688e+00 23 | 1.721257555994122557e+00 24 | 1.676938045352085016e+00 25 | 1.633345188631723177e+00 26 | 1.590512963047900685e+00 27 | 1.548470510562641511e+00 28 | 1.507242358394743409e+00 29 | 1.466848710181751114e+00 30 | 1.427305777002585652e+00 31 | 1.388626123398267298e+00 32 | 1.350819009712002750e+00 33 | 1.313890717861643243e+00 34 | 1.277844852656450314e+00 35 | 1.242682614788496398e+00 36 | 1.208403044640071533e+00 37 | 1.175003238132246386e+00 38 | 1.142478537135842132e+00 39 | 1.110822697637114720e+00 40 | 1.080028039056951661e+00 41 | 1.050085578008018050e+00 42 | 1.020985149458546459e+00 43 | 9.927155178477751196e-01 44 | 9.652644802354031217e-01 45 | 9.386189631135047895e-01 46 | 9.127651140943586761e-01 47 | 8.876883893285623106e-01 48 | 8.633736372114765301e-01 49 | 8.398051787026914461e-01 50 | 8.169668844084373438e-01 51 | 2.765909178387937484e-02 52 | -------------------------------------------------------------------------------- /labs/homework-2/02_assignment/results/different learning rates.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-2/02_assignment/results/different learning rates.png -------------------------------------------------------------------------------- /labs/homework-2/02_assignment/results/network_structure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-2/02_assignment/results/network_structure.png -------------------------------------------------------------------------------- /labs/homework-3/03_assignment-original.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-3/03_assignment-original.zip -------------------------------------------------------------------------------- /labs/homework-3/03_assignment/README.md: -------------------------------------------------------------------------------- 1 | # Calibration 2 | 3 | - We provide a reference answer in code because the data is fake and the result is strange. 4 | 5 | 6 | 7 | # Depth_pc 8 | 9 | - The `depth.png` and `seg.png` are generated by `render.py`, which uses `pyrender` to render `spot.obj` to 2D image plane. 10 | - If you implement the backprojection correctly, the result should be overlapped with `aligned_full_pc.txt`. 11 | - For interest readers, `aligned_full_pc.txt` is different from `raw_full_pc.txt`, which is sampled from `spot.obj`, because the default camera in `pyrender` looks at -z axis. 12 | 13 | 14 | 15 | # Mesh_pc 16 | 17 | - Note that we use the mean distance in this assignment, which is different from the slides. 18 | - We recommend using [this repo](https://github.com/j2kun/earthmover/) to compute earth move distance between two point clouds. 19 | ```bash 20 | cd mesh_pc 21 | git clone git@github.com:j2kun/earthmover.git 22 | ``` 23 | 24 | 25 | 26 | 27 | # Submission 28 | - Compress the entire folder using our provided `pack.py` and submit to [course.pku.edu.cn](https://course.pku.edu.cn/). 29 | - The folder named `results` in the main directoy should be structed as follows. 30 | ```bash 31 | results 32 | ├── bob.obj 33 | ├── spot.obj 34 | ├── calibr.npy 35 | ├── uniform_sampling_results.npy 36 | ├── fps_results.npy 37 | ├── metrics.npy 38 | ├── pc_from_depth.txt 39 | └── one_way_CD.txt 40 | ``` 41 | 42 | 43 | 44 | # Appendix 45 | 46 | - We recommand to use some softwares to visualize the results to help debug. 47 | - CloudCompare: https://www.danielgm.net/cc/ 48 | - Meshlab: https://www.meshlab.net/ 49 | - ... 50 | -------------------------------------------------------------------------------- /labs/homework-3/03_assignment/camera_calibr/back_image.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-3/03_assignment/camera_calibr/back_image.npy -------------------------------------------------------------------------------- /labs/homework-3/03_assignment/camera_calibr/front.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-3/03_assignment/camera_calibr/front.png -------------------------------------------------------------------------------- /labs/homework-3/03_assignment/camera_calibr/front_image.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-3/03_assignment/camera_calibr/front_image.npy -------------------------------------------------------------------------------- /labs/homework-3/03_assignment/depth_pc/depth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-3/03_assignment/depth_pc/depth.png -------------------------------------------------------------------------------- /labs/homework-3/03_assignment/depth_pc/intrinsic.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-3/03_assignment/depth_pc/intrinsic.npy -------------------------------------------------------------------------------- /labs/homework-3/03_assignment/depth_pc/render.py: -------------------------------------------------------------------------------- 1 | # This code is used to render a depth image from a mesh. 2 | # This code is only provided for interested readers and you don't have to run this. 3 | 4 | import pyrender 5 | import trimesh 6 | import numpy as np 7 | import cv2 8 | 9 | # # if you are using a headless server, you may need below lines 10 | # import os 11 | # os.environ['PYOPENGL_PLATFORM'] = 'egl' 12 | 13 | # load object and preprocess 14 | mesh = trimesh.load('spot.obj', force='mesh') 15 | base_scale = np.sqrt(((mesh.vertices.max(axis=0)-mesh.vertices.min(axis=0))**2).sum()) 16 | obj_center = np.array(mesh.vertices.max(axis=0) + mesh.vertices.min(axis=0)) 17 | mesh.vertices = (mesh.vertices - obj_center) / base_scale 18 | mesh.vertices = mesh.vertices + [0.5, 0.5, -1] 19 | np.savetxt('raw_full_pc.txt', np.array(mesh.vertices)) 20 | 21 | # pyrender load object 22 | scene = pyrender.Scene() 23 | obj_mesh = pyrender.Mesh.from_trimesh(mesh) 24 | obj_node = pyrender.Node(mesh=obj_mesh, matrix=np.eye(4)) 25 | scene.add_node(obj_node) 26 | 27 | # initialize camera 28 | pw = 640 29 | ph = 480 30 | camera_pose = np.eye(4) 31 | camera = pyrender.PerspectiveCamera(yfov=np.deg2rad(60), aspectRatio=pw / ph, znear=0.1, zfar=10) 32 | scene.add(camera, camera_pose) 33 | 34 | # render 35 | r = pyrender.OffscreenRenderer(pw, ph) 36 | seg_img, depth = r.render(scene, flags=pyrender.constants.RenderFlags.SEG, seg_node_map={obj_node: [255, 0, 0]}) 37 | 38 | depth_scale = 0.00012498664727900177 39 | print(depth.mean(), depth.max(), depth.min()) 40 | 41 | depth = (depth / depth_scale).astype(np.int32) 42 | depth_img = np.zeros_like(seg_img) 43 | depth_img[..., 1] = depth // 256 44 | depth_img[..., 2] = depth % 256 45 | 46 | cv2.imwrite('seg.png', seg_img) 47 | cv2.imwrite('depth.png', depth_img) 48 | 49 | 50 | # intrinsic 51 | projection = camera.get_projection_matrix() 52 | K = np.eye(3) 53 | K[0, 0] = projection[0, 0] * pw / 2 54 | K[1, 1] = projection[1, 1] * ph / 2 55 | K[0, 2] = pw / 2 56 | K[1, 2] = ph / 2 57 | np.save('intrinsic', K) 58 | -------------------------------------------------------------------------------- /labs/homework-3/03_assignment/depth_pc/seg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-3/03_assignment/depth_pc/seg.png -------------------------------------------------------------------------------- /labs/homework-3/03_assignment/marching_cube/data/bob_cell.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-3/03_assignment/marching_cube/data/bob_cell.npy -------------------------------------------------------------------------------- /labs/homework-3/03_assignment/marching_cube/data/spot_cell.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-3/03_assignment/marching_cube/data/spot_cell.npy -------------------------------------------------------------------------------- /labs/homework-3/03_assignment/mesh_pc/earthmover.py: -------------------------------------------------------------------------------- 1 | ''' 2 | A python implementation of the Earthmover distance metric. 3 | ''' 4 | 5 | import math 6 | 7 | from collections import Counter 8 | from collections import defaultdict 9 | from ortools.linear_solver import pywraplp 10 | 11 | 12 | def euclidean_distance(x, y): 13 | return math.sqrt(sum((a - b)**2 for (a, b) in zip(x, y))) 14 | 15 | 16 | def earthmover_distance(p1, p2): 17 | ''' 18 | Output the Earthmover distance between the two given points. 19 | 20 | Arguments: 21 | 22 | - p1: an iterable of hashable iterables of numbers (i.e., list of tuples) 23 | - p2: an iterable of hashable iterables of numbers (i.e., list of tuples) 24 | ''' 25 | dist1 = {x: float(count) / len(p1) for (x, count) in Counter(p1).items()} 26 | dist2 = {x: float(count) / len(p2) for (x, count) in Counter(p2).items()} 27 | solver = pywraplp.Solver('earthmover_distance', pywraplp.Solver.GLOP_LINEAR_PROGRAMMING) 28 | 29 | variables = dict() 30 | 31 | # for each pile in dist1, the constraint that says all the dirt must leave this pile 32 | dirt_leaving_constraints = defaultdict(lambda: 0) 33 | 34 | # for each hole in dist2, the constraint that says this hole must be filled 35 | dirt_filling_constraints = defaultdict(lambda: 0) 36 | 37 | # the objective 38 | objective = solver.Objective() 39 | objective.SetMinimization() 40 | 41 | for (x, dirt_at_x) in dist1.items(): 42 | for (y, capacity_of_y) in dist2.items(): 43 | amount_to_move_x_y = solver.NumVar(0, solver.infinity(), 'z_{%s, %s}' % (x, y)) 44 | variables[(x, y)] = amount_to_move_x_y 45 | dirt_leaving_constraints[x] += amount_to_move_x_y 46 | dirt_filling_constraints[y] += amount_to_move_x_y 47 | objective.SetCoefficient(amount_to_move_x_y, euclidean_distance(x, y)) 48 | 49 | for x, linear_combination in dirt_leaving_constraints.items(): 50 | solver.Add(linear_combination == dist1[x]) 51 | 52 | for y, linear_combination in dirt_filling_constraints.items(): 53 | solver.Add(linear_combination == dist2[y]) 54 | 55 | status = solver.Solve() 56 | if status not in [solver.OPTIMAL, solver.FEASIBLE]: 57 | raise Exception('Unable to find feasible solution') 58 | 59 | for ((x, y), variable) in variables.items(): 60 | if variable.solution_value() != 0: 61 | cost = euclidean_distance(x, y) * variable.solution_value() 62 | print("move {} dirt from {} to {} for a cost of {}".format( 63 | variable.solution_value(), x, y, cost)) 64 | 65 | return objective.Value() 66 | 67 | 68 | if __name__ == "__main__": 69 | p1 = [ 70 | (0, 0), 71 | (0, 0), 72 | (0, 0), 73 | ] 74 | 75 | p2 = [ 76 | (0, 0), 77 | (0, 1), 78 | (0, 2), 79 | ] 80 | 81 | print(earthmover_distance(p1, p2)) 82 | -------------------------------------------------------------------------------- /labs/homework-3/03_assignment/pack.py: -------------------------------------------------------------------------------- 1 | import os 2 | import zipfile 3 | 4 | 5 | def zipHW3(input_path: str, output_path: str, zip_name: str): 6 | zip = zipfile.ZipFile(output_path, "w", zipfile.ZIP_DEFLATED) 7 | for path, dirnames, filenames in os.walk(os.path.join(input_path, "camera_calibr")): 8 | fpath = path.replace(input_path, f'HW3_{zip_name}') 9 | for filename in filenames: 10 | if filename in ["calibr.ipynb"]: 11 | zip.write(os.path.join(path, filename), os.path.join(fpath, filename)) 12 | for path, dirnames, filenames in os.walk(os.path.join(input_path, "depth_pc")): 13 | fpath = path.replace(input_path, f'HW3_{zip_name}') 14 | for filename in filenames: 15 | if filename in ["depth_pc.ipynb"]: 16 | zip.write(os.path.join(path, filename), os.path.join(fpath, filename)) 17 | for path, dirnames, filenames in os.walk(os.path.join(input_path, "marching_cube")): 18 | fpath = path.replace(input_path, f'HW3_{zip_name}') 19 | for filename in filenames: 20 | if filename in ["marching_cube.ipynb"]: 21 | zip.write(os.path.join(path, filename), os.path.join(fpath, filename)) 22 | for path, dirnames, filenames in os.walk(os.path.join(input_path, "mesh_pc")): 23 | fpath = path.replace(input_path, f'HW3_{zip_name}') 24 | for filename in filenames: 25 | if filename in ["mesh_pc.ipynb"]: 26 | zip.write(os.path.join(path, filename), os.path.join(fpath, filename)) 27 | for path, dirnames, filenames in os.walk(os.path.join(input_path, "results")): 28 | fpath = path.replace(input_path, f'HW3_{zip_name}') 29 | for filename in filenames: 30 | zip.write(os.path.join(path, filename), os.path.join(fpath, filename)) 31 | zip.close() 32 | 33 | 34 | if __name__ == "__main__": 35 | 36 | # --------------------------------------------------------- 37 | # 请用你的学号和姓名替换下面的内容,注意参照例子的格式,使用拼音而非中文 38 | id = 21000***** 39 | name = 'EmptyBlue' 40 | # --------------------------------------------------------- 41 | 42 | zip_name = f'{id}_{name}.zip' 43 | input_path = os.path.dirname(os.path.abspath(__file__)) 44 | output_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), zip_name) 45 | 46 | zipHW3(input_path, output_path, zip_name.split(".")[0]) 47 | -------------------------------------------------------------------------------- /labs/homework-3/03_assignment/results/calibr.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-3/03_assignment/results/calibr.npy -------------------------------------------------------------------------------- /labs/homework-3/03_assignment/results/fps_results.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-3/03_assignment/results/fps_results.npy -------------------------------------------------------------------------------- /labs/homework-3/03_assignment/results/fps_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-3/03_assignment/results/fps_results.png -------------------------------------------------------------------------------- /labs/homework-3/03_assignment/results/marching_cube_bob_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-3/03_assignment/results/marching_cube_bob_result.png -------------------------------------------------------------------------------- /labs/homework-3/03_assignment/results/marching_cube_snob_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-3/03_assignment/results/marching_cube_snob_result.png -------------------------------------------------------------------------------- /labs/homework-3/03_assignment/results/metrics.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-3/03_assignment/results/metrics.npy -------------------------------------------------------------------------------- /labs/homework-3/03_assignment/results/one_way_CD.txt: -------------------------------------------------------------------------------- 1 | 9.971174980812829575e-03 2 | -------------------------------------------------------------------------------- /labs/homework-3/03_assignment/results/uniform_sampling_results.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-3/03_assignment/results/uniform_sampling_results.npy -------------------------------------------------------------------------------- /labs/homework-3/03_assignment/results/uniform_sampling_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-3/03_assignment/results/uniform_sampling_results.png -------------------------------------------------------------------------------- /labs/homework-4/04_assignment-original.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-4/04_assignment-original.zip -------------------------------------------------------------------------------- /labs/homework-4/04_assignment/MaskRCNN/engine.py: -------------------------------------------------------------------------------- 1 | import math 2 | import sys 3 | import utils 4 | import cv2 5 | 6 | def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq): 7 | model.train() 8 | metric_logger = utils.MetricLogger(delimiter=" ") 9 | metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) 10 | header = 'Epoch: [{}]'.format(epoch) 11 | 12 | 13 | 14 | lr_scheduler = None 15 | for images, targets in metric_logger.log_every(data_loader, print_freq, header): 16 | images = list(image.to(device) for image in images) 17 | 18 | 19 | targets = [{k: v.to(device) for k, v in t.items()} for t in targets] 20 | 21 | loss_dict = model(images, targets) 22 | 23 | losses = sum(loss for loss in loss_dict.values()) 24 | 25 | # reduce losses over all GPUs for logging purposes 26 | loss_dict_reduced = utils.reduce_dict(loss_dict) 27 | losses_reduced = sum(loss for loss in loss_dict_reduced.values()) 28 | 29 | loss_value = losses_reduced.item() 30 | if not math.isfinite(loss_value): 31 | print("Loss is {}, stopping training".format(loss_value)) 32 | print(loss_dict_reduced) 33 | sys.exit(1) 34 | 35 | optimizer.zero_grad() 36 | losses.backward() 37 | optimizer.step() 38 | 39 | if lr_scheduler is not None: 40 | lr_scheduler.step() 41 | 42 | metric_logger.update(loss=losses_reduced, **loss_dict_reduced) 43 | metric_logger.update(lr=optimizer.param_groups[0]["lr"]) 44 | return metric_logger 45 | -------------------------------------------------------------------------------- /labs/homework-4/04_assignment/MaskRCNN/results/0_data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-4/04_assignment/MaskRCNN/results/0_data.png -------------------------------------------------------------------------------- /labs/homework-4/04_assignment/MaskRCNN/results/0_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-4/04_assignment/MaskRCNN/results/0_result.png -------------------------------------------------------------------------------- /labs/homework-4/04_assignment/MaskRCNN/results/1_data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-4/04_assignment/MaskRCNN/results/1_data.png -------------------------------------------------------------------------------- /labs/homework-4/04_assignment/MaskRCNN/results/1_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-4/04_assignment/MaskRCNN/results/1_result.png -------------------------------------------------------------------------------- /labs/homework-4/04_assignment/MaskRCNN/results/2_data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-4/04_assignment/MaskRCNN/results/2_data.png -------------------------------------------------------------------------------- /labs/homework-4/04_assignment/MaskRCNN/results/2_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-4/04_assignment/MaskRCNN/results/2_result.png -------------------------------------------------------------------------------- /labs/homework-4/04_assignment/MaskRCNN/results/3_data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-4/04_assignment/MaskRCNN/results/3_data.png -------------------------------------------------------------------------------- /labs/homework-4/04_assignment/MaskRCNN/results/3_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-4/04_assignment/MaskRCNN/results/3_result.png -------------------------------------------------------------------------------- /labs/homework-4/04_assignment/MaskRCNN/train.py: -------------------------------------------------------------------------------- 1 | 2 | import utils 3 | from engine import train_one_epoch 4 | from dataset import MultiShapeDataset 5 | import torch 6 | import torch.utils.data 7 | import os 8 | import time 9 | 10 | os.makedirs("results", exist_ok=True) 11 | # writer = utils.log_writer("results", "maskrcnn") 12 | num_classes = 4 # 0 for backgroud 13 | 14 | model = utils.get_instance_segmentation_model(num_classes).double() 15 | 16 | model.load_state_dict(torch.load(r'./intro2cv_maskrcnn_pretrained.pth',map_location='cpu')) 17 | 18 | dataset = MultiShapeDataset(10) 19 | 20 | torch.manual_seed(233) 21 | 22 | data_loader = torch.utils.data.DataLoader( 23 | dataset, batch_size=2, num_workers=0, shuffle=True, 24 | collate_fn=utils.collate_fn) 25 | 26 | params = [p for p in model.parameters() if p.requires_grad] 27 | optimizer = torch.optim.SGD(params, lr=0.001, 28 | momentum=0.9, weight_decay=0.0005) 29 | 30 | lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 31 | step_size=3, 32 | gamma=0.1) 33 | 34 | num_epochs = 3 35 | device = torch.device('cpu') 36 | 37 | count = 0 38 | for epoch in range(num_epochs): 39 | t0 = time.time() 40 | # count = train_one_epoch(model, optimizer, data_loader, device, count, writer) 41 | count = train_one_epoch(model, optimizer, data_loader, device, count, 1) 42 | torch.save(model.state_dict(), "results/maskrcnn_"+str(epoch)+".pth") 43 | lr_scheduler.step() 44 | 45 | print(f"Epoch {epoch} finished, time: {int(time.time()-t0) / 60.0} min.") -------------------------------------------------------------------------------- /labs/homework-4/04_assignment/MaskRCNN/visualize.py: -------------------------------------------------------------------------------- 1 | import utils 2 | from dataset import SingleShapeDataset 3 | from utils import plot_save_output 4 | import torch 5 | import torch.utils.data 6 | 7 | 8 | dataset_test = SingleShapeDataset(10) 9 | data_loader_test = torch.utils.data.DataLoader( 10 | dataset_test, batch_size=1, shuffle=False, num_workers=0, 11 | collate_fn=utils.collate_fn) 12 | 13 | 14 | num_classes = 4 15 | 16 | # get the model using the helper function 17 | model = utils.get_instance_segmentation_model(num_classes).double() 18 | 19 | model.load_state_dict(torch.load(r'./intro2cv_maskrcnn_pretrained.pth', map_location='cpu')) 20 | 21 | 22 | model.eval() 23 | path = "results/" 24 | for i in range(4): 25 | imgs, labels = dataset_test[i] 26 | output = model([imgs]) 27 | plot_save_output(path+str(i)+"_result.png", imgs, output[0]) -------------------------------------------------------------------------------- /labs/homework-4/04_assignment/PointNet/feature_vis.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import random 4 | import torch 5 | import torch.nn.parallel 6 | import torch.utils.data 7 | from dataset import ShapeNetClassficationDataset 8 | from model import PointNetCls1024D 9 | import numpy as np 10 | from utils import write_points, setting 11 | import cv2 12 | import matplotlib.pyplot as plt 13 | 14 | if __name__ == '__main__': 15 | feat_dim = 1024 16 | batch_size = 16 17 | num_classes = 16 18 | opt = setting() 19 | def blue(x): return '\033[94m' + x + '\033[0m' 20 | random.seed(opt.manualSeed) 21 | torch.manual_seed(opt.manualSeed) 22 | 23 | test_dataset = ShapeNetClassficationDataset( 24 | root=opt.dataset, 25 | split='test', 26 | npoints=opt.num_points, 27 | class_choice=['Airplane', 'Lamp', 'Guitar', 'Laptop', 'Car'], 28 | with_data_augmentation=False) 29 | 30 | testdataloader = torch.utils.data.DataLoader( 31 | test_dataset, 32 | batch_size=batch_size, 33 | shuffle=True, 34 | num_workers=int(opt.workers)) 35 | 36 | print('classes', num_classes) 37 | 38 | classifier = PointNetCls1024D(k=num_classes) 39 | 40 | # load weights: 41 | classifier.load_state_dict(torch.load(f"{opt.expf}/cls_{feat_dim}D/model.pth")) 42 | 43 | classifier.eval() 44 | 45 | for i, data in enumerate(testdataloader, 0): 46 | points, target = data 47 | target = target[:, 0] 48 | classifier = classifier.eval() 49 | 50 | pred, heat_feat = classifier(points) 51 | heat_feat = heat_feat.detach().numpy() 52 | heat_feat = np.max(heat_feat, 2) 53 | heat_feat = (heat_feat - np.min(heat_feat, axis=0))/(np.max(heat_feat, axis=0) - np.min(heat_feat, axis=0)) 54 | color_heat_feat = cv2.applyColorMap((heat_feat*255).astype(np.uint8), cv2.COLORMAP_JET) # BGR 55 | 56 | for i in range(batch_size): 57 | fig = plt.figure() 58 | ax = fig.add_subplot(111, projection='3d') 59 | 60 | point = points.numpy()[i, ...] 61 | ax.scatter(point[:, 0], point[:, 1], point[:, 2], c=color_heat_feat[i, ...]/255, marker='o', s=1) 62 | 63 | ax.set_xlim(-1, 1) 64 | ax.set_ylim(-1, 1) 65 | ax.set_zlim(-1, 1) 66 | 67 | save_dir = os.path.join(opt.expf, 'cls_1024D', 'vis') 68 | if not os.path.exists(save_dir): 69 | os.makedirs(save_dir) 70 | plt.savefig(f"{opt.expf}/cls_{feat_dim}D/vis/{i}.png") 71 | 72 | break 73 | -------------------------------------------------------------------------------- /labs/homework-4/04_assignment/PointNet/modelnet10_id.txt: -------------------------------------------------------------------------------- 1 | bathtub 0 2 | bed 1 3 | chair 2 4 | desk 3 5 | dresser 4 6 | monitor 5 7 | night_stand 6 8 | sofa 7 9 | table 8 10 | toilet 9 -------------------------------------------------------------------------------- /labs/homework-4/04_assignment/PointNet/num_seg_classes.txt: -------------------------------------------------------------------------------- 1 | Airplane 4 2 | Bag 2 3 | Cap 2 4 | Car 4 5 | Chair 4 6 | Earphone 3 7 | Guitar 3 8 | Knife 2 9 | Lamp 4 10 | Laptop 2 11 | Motorbike 6 12 | Mug 2 13 | Pistol 3 14 | Rocket 3 15 | Skateboard 3 16 | Table 3 -------------------------------------------------------------------------------- /labs/homework-4/04_assignment/PointNet/results/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-4/04_assignment/PointNet/results/0.png -------------------------------------------------------------------------------- /labs/homework-4/04_assignment/PointNet/results/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-4/04_assignment/PointNet/results/1.png -------------------------------------------------------------------------------- /labs/homework-4/04_assignment/PointNet/results/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-4/04_assignment/PointNet/results/2.png -------------------------------------------------------------------------------- /labs/homework-4/04_assignment/PointNet/results/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-4/04_assignment/PointNet/results/3.png -------------------------------------------------------------------------------- /labs/homework-4/04_assignment/PointNet/results/classification-256.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-4/04_assignment/PointNet/results/classification-256.png -------------------------------------------------------------------------------- /labs/homework-4/04_assignment/PointNet/results/classification1024.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-4/04_assignment/PointNet/results/classification1024.png -------------------------------------------------------------------------------- /labs/homework-4/04_assignment/PointNet/results/segmentation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-4/04_assignment/PointNet/results/segmentation.png -------------------------------------------------------------------------------- /labs/homework-4/04_assignment/PointNet/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | from torch.utils.tensorboard import SummaryWriter 3 | 4 | 5 | class setting: 6 | dataset = "./shapenetcore_partanno_segmentation_benchmark_v0" 7 | batchSize = 32 8 | num_points = 1024 9 | workers = 1 10 | nepoch = 5 11 | expf = "exps" 12 | model = "" 13 | dataset_type = "shapenet" 14 | manualSeed = 233 15 | 16 | 17 | class log_writer: 18 | def __init__(self, path, log_name) -> None: 19 | 20 | output_path = os.path.join(path, log_name) 21 | if not os.path.exists(output_path): 22 | os.makedirs(output_path) 23 | self.writer = SummaryWriter(log_dir=output_path) 24 | 25 | def add_train_scalar(self, name, data, n): 26 | self.writer.add_scalar(name+'/train', data, n) 27 | 28 | def add_test_scalar(self, name, data, n): 29 | self.writer.add_scalar(name+'/test', data, n) 30 | 31 | 32 | def write_points(finename, points, color, weight): 33 | point_count = points.shape[0] 34 | ply_file = open(finename, 'w') 35 | ply_file.write("ply\n") 36 | ply_file.write("format ascii 1.0\n") 37 | ply_file.write("element vertex " + str(point_count) + "\n") 38 | ply_file.write("property float x\n") 39 | ply_file.write("property float y\n") 40 | ply_file.write("property float z\n") 41 | 42 | ply_file.write("property uchar red\n") 43 | ply_file.write("property uchar green\n") 44 | ply_file.write("property uchar blue\n") 45 | 46 | ply_file.write("property float weight\n") 47 | 48 | ply_file.write("end_header\n") 49 | 50 | for i in range(point_count): 51 | ply_file.write(str(points[i, 0]) + " " + 52 | str(points[i, 1]) + " " + 53 | str(points[i, 2])) 54 | 55 | ply_file.write(" "+str(int(color[i, 2])) + " " + 56 | str(int(color[i, 1])) + " " + 57 | str(int(color[i, 0])) + " ") 58 | 59 | ply_file.write(str(weight[i])) 60 | ply_file.write("\n") 61 | ply_file.close() 62 | -------------------------------------------------------------------------------- /labs/homework-4/04_assignment/RNN/check_rnn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from rnn_layers import rnn_forward, rnn_backward 3 | 4 | 5 | def unit_test(x, h0, Wx, Wh, b, dout): 6 | h, cache = rnn_forward(x, h0, Wx, Wh, b) 7 | dx, dh0, dWx, dWh, db = rnn_backward(dout, cache) 8 | answer_dict = { 9 | 'h': h, 10 | 'dx': dx, 11 | 'dh0': dh0, 12 | 'dWx': dWx, 13 | 'dWh': dWh, 14 | 'db': db 15 | } 16 | return answer_dict 17 | 18 | 19 | def check_answer(answer_dict, gt_answer_dict): 20 | for key in answer_dict.keys(): 21 | if key not in gt_answer_dict: 22 | print('Unrecognized key %s in answer_dict' % key) 23 | return False 24 | if not np.allclose(answer_dict[key], gt_answer_dict[key], atol=1e-6, rtol=1e-5): 25 | print('The %s does not match the gt_answer.' % key) 26 | return False 27 | print('All items in the answer_dict match the gt_answer!') 28 | return True 29 | 30 | 31 | # ------------------------------------------------------------------- # 32 | # You can use the following example to check your implementation of # 33 | # rnn_step_forward and rnn_step_backward. # 34 | # ------------------------------------------------------------------- # 35 | N, T, D, H = 2, 3, 4, 5 36 | 37 | x = np.linspace(-0.1, 0.3, num=N*T*D).reshape(N, T, D) 38 | h0 = np.linspace(-0.3, 0.1, num=N*H).reshape(N, H) 39 | Wx = np.linspace(-0.2, 0.4, num=D*H).reshape(D, H) 40 | Wh = np.linspace(-0.4, 0.1, num=H*H).reshape(H, H) 41 | b = np.linspace(-0.7, 0.1, num=H) 42 | dout = np.linspace(-0.1, 0.2, num=N*T*H).reshape(N, T, H) 43 | 44 | answer_dict = unit_test(x, h0, Wx, Wh, b, dout) 45 | gt_dict = np.load('results/reference_rnn.npy', allow_pickle=True).item() 46 | check_answer(answer_dict, gt_dict) 47 | 48 | # ------------------------------------------------------------------- # 49 | # Now, we run your code again and get your results for fair evaluation. 50 | # -------------------------------------------------------------------# 51 | np.random.seed(233) 52 | x = np.random.randn(N, T, D) 53 | h0 = np.random.randn(N, H) 54 | Wx = np.random.randn(D, H) 55 | Wh = np.random.randn(H, H) 56 | b = np.random.randn(H) 57 | dout = np.random.randn(N, T, H) 58 | 59 | answer_dict = unit_test(x, h0, Wx, Wh, b, dout) 60 | np.save('results/rnn.npy', answer_dict) -------------------------------------------------------------------------------- /labs/homework-4/04_assignment/RNN/check_single_rnn_layer.py: -------------------------------------------------------------------------------- 1 | from rnn_layers import rnn_step_forward, rnn_step_backward 2 | import numpy as np 3 | 4 | 5 | def check_answer(answer_dict, gt_answer_dict): 6 | for key in answer_dict.keys(): 7 | if key not in gt_answer_dict: 8 | print('Unrecognized key %s in answer_dict' % key) 9 | return False 10 | if not np.allclose(answer_dict[key], gt_answer_dict[key], atol=1e-6, rtol=1e-5): 11 | print('The %s does not match the gt_answer.' % key) 12 | return False 13 | print('All items in the answer_dict match the gt_answer!') 14 | return True 15 | 16 | 17 | def unit_test(x, prev_h, Wx, Wh, b): 18 | next_h, cache = rnn_step_forward(x, prev_h, Wx, Wh, b) 19 | dx, dprev_h, dWx, dWh, db = rnn_step_backward(dnext_h, cache) 20 | answer_dict = { 21 | 'next_h': next_h, 22 | 'dx': dx, 23 | 'dprev_h': dprev_h, 24 | 'dWx': dWx, 25 | 'dWh': dWh, 26 | 'db': db 27 | } 28 | return answer_dict 29 | 30 | # ------------------------------------------------------------------- # 31 | # You can use the following example to check your implementation of # 32 | # rnn_step_forward and rnn_step_backward. # 33 | # ------------------------------------------------------------------- # 34 | N, D, H = 3, 10, 4 35 | 36 | x = np.linspace(-0.4, 0.7, num=N*D).reshape(N, D) 37 | prev_h = np.linspace(-0.2, 0.5, num=N*H).reshape(N, H) 38 | Wx = np.linspace(-0.1, 0.9, num=D*H).reshape(D, H) 39 | Wh = np.linspace(-0.3, 0.7, num=H*H).reshape(H, H) 40 | b = np.linspace(-0.2, 0.4, num=H) 41 | dnext_h = np.linspace(-3, 3, num=N*H).reshape(N, H) 42 | 43 | answer_dict = unit_test(x, prev_h, Wx, Wh, b) 44 | gt_answer_dict = np.load('results/reference_single_rnn_layer.npy', allow_pickle=True).item() 45 | check_answer(answer_dict, gt_answer_dict) 46 | 47 | # ------------------------------------------------------------------- # 48 | # Now, we run your code again and get your results for fair evaluation. 49 | # -------------------------------------------------------------------# 50 | np.random.seed(233) 51 | x = np.random.randn(N, D) 52 | prev_h = np.random.randn(N, H) 53 | Wx = np.random.randn(D, H) 54 | Wh = np.random.randn(H, H) 55 | b = np.random.randn(H) 56 | dnext_h = np.random.randn(N, H) 57 | 58 | answer_dict = unit_test(x, prev_h, Wx, Wh, b) 59 | np.save('results/single_rnn_layer.npy', answer_dict) -------------------------------------------------------------------------------- /labs/homework-4/04_assignment/RNN/results/pred_train_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-4/04_assignment/RNN/results/pred_train_0.png -------------------------------------------------------------------------------- /labs/homework-4/04_assignment/RNN/results/pred_train_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-4/04_assignment/RNN/results/pred_train_1.png -------------------------------------------------------------------------------- /labs/homework-4/04_assignment/RNN/results/pred_val_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-4/04_assignment/RNN/results/pred_val_0.png -------------------------------------------------------------------------------- /labs/homework-4/04_assignment/RNN/results/pred_val_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-4/04_assignment/RNN/results/pred_val_1.png -------------------------------------------------------------------------------- /labs/homework-4/04_assignment/RNN/results/reference_attention.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-4/04_assignment/RNN/results/reference_attention.npy -------------------------------------------------------------------------------- /labs/homework-4/04_assignment/RNN/results/reference_rnn.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-4/04_assignment/RNN/results/reference_rnn.npy -------------------------------------------------------------------------------- /labs/homework-4/04_assignment/RNN/results/reference_single_rnn_layer.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-4/04_assignment/RNN/results/reference_single_rnn_layer.npy -------------------------------------------------------------------------------- /labs/homework-4/04_assignment/RNN/results/rnn.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-4/04_assignment/RNN/results/rnn.npy -------------------------------------------------------------------------------- /labs/homework-4/04_assignment/RNN/results/rnn_loss_history.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-4/04_assignment/RNN/results/rnn_loss_history.png -------------------------------------------------------------------------------- /labs/homework-4/04_assignment/RNN/results/single_rnn_layer.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/labs/homework-4/04_assignment/RNN/results/single_rnn_layer.npy -------------------------------------------------------------------------------- /labs/homework-4/04_assignment/RNN/train_rnn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from utils.coco_utils import load_coco_data, decode_captions 3 | from rnn import CaptioningRNN 4 | from utils.captioning_solver import CaptioningSolver 5 | import matplotlib.pyplot as plt 6 | 7 | np.random.seed(233) 8 | 9 | 10 | # ------------------------------------------------------------------- # 11 | # Overfit your RNN on 50 training data # 12 | # ------------------------------------------------------------------- # 13 | 14 | small_data = load_coco_data(max_train=50) 15 | 16 | small_rnn_model = CaptioningRNN( 17 | word_to_idx=small_data['word_to_idx'], 18 | input_dim=small_data['train_features'].shape[1], 19 | hidden_dim=512, 20 | wordvec_dim=256, 21 | ) 22 | 23 | small_rnn_solver = CaptioningSolver( 24 | small_rnn_model, small_data, 25 | update_rule='adam', 26 | num_epochs=50, 27 | batch_size=25, 28 | optim_config={ 29 | 'learning_rate': 5e-3, 30 | }, 31 | lr_decay=0.95, 32 | verbose=True, print_every=10, 33 | ) 34 | 35 | small_rnn_solver.train() 36 | 37 | # Plot the training losses. 38 | plt.plot(small_rnn_solver.loss_history) 39 | plt.xlabel('Iteration') 40 | plt.ylabel('Loss') 41 | plt.title('Training loss history') 42 | plt.savefig('results/rnn_loss_history.png') 43 | plt.close() 44 | 45 | 46 | # ------------------------------------------------------------------- # 47 | # Inference: please uncomment these lines after completing the sample # 48 | # function in rnn.py. 49 | # ------------------------------------------------------------------- # 50 | for split in ['train', 'val']: 51 | for i in range(2): 52 | data_dict = np.load(f'datasets/samples/{split}_{i}.npy', allow_pickle=True).item() 53 | feature = data_dict['feature'].reshape(1, -1) 54 | image = plt.imread(f'datasets/samples/{split}_{i}.png') 55 | 56 | sample_captions = small_rnn_model.sample(feature) 57 | sample_captions = decode_captions(sample_captions, small_data['idx_to_word']) 58 | 59 | # set image size 60 | plt.figure(figsize=(8, 4)) 61 | plt.imshow(image) 62 | plt.title('Your prediction: %s\nGT: %s' % (sample_captions[0], data_dict['gt_caption'])) 63 | plt.axis('off') 64 | plt.savefig(f'results/pred_{split}_{i}.png') 65 | plt.close() -------------------------------------------------------------------------------- /labs/homework-4/04_assignment/pack.py: -------------------------------------------------------------------------------- 1 | import os 2 | import zipfile 3 | 4 | 5 | def zipHW4(input_path: str, output_path: str, zip_name: str): 6 | zip = zipfile.ZipFile(output_path, "w", zipfile.ZIP_DEFLATED) 7 | 8 | result_files = { 9 | 'PointNet': ['model.py'] + [f'results/{i}.png' for i in range(4)] + ['results/classification_256.png', 'results/classification_1024.png', 'results/segmentation.png'], 10 | 'MaskRCNN': ['dataset.py'] + [f'results/{i}_data.png' for i in range(4)] + [f'results/{i}_result.png' for i in range(4)], 11 | 'RNN': ['rnn.py', 'rnn_layers.py'] + [f'results/pred_{split}_{i}.png' for i in range(2) for split in ['train', 'val']] + ['results/rnn_loss_history.png', 'results/single_rnn_layer.npy', 'results/rnn.npy'] 12 | } 13 | 14 | for folder, file_names in result_files.items(): 15 | for file_name in file_names: 16 | if os.path.exists(os.path.join(input_path, folder, file_name)): 17 | zip.write(os.path.join(input_path, folder, file_name), os.path.join(f'HW4_{zip_name}', folder, file_name)) 18 | else: 19 | print(f"File {file_name} not found in {folder}") 20 | raise FileNotFoundError 21 | zip.close() 22 | 23 | 24 | if __name__ == "__main__": 25 | 26 | # --------------------------------------------------------- 27 | # 请用你的学号和姓名替换下面的内容,注意参照例子的格式,使用拼音而非中文 28 | id = 21000xxxxx 29 | name = 'EmptyBlue' 30 | # --------------------------------------------------------- 31 | 32 | zip_name = f'{id}_{name}.zip' 33 | input_path = os.path.dirname(os.path.abspath(__file__)) 34 | output_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), zip_name) 35 | 36 | zipHW4(input_path, output_path, zip_name.split(".")[0]) 37 | -------------------------------------------------------------------------------- /notes/README.md: -------------------------------------------------------------------------------- 1 | ## Compile 2 | 3 | ```shell 4 | xelatex CV_notes.tex 5 | bibtex CV_notes 6 | xelatex CV_notes.tex 7 | xelatex CV_notes.tex 8 | ``` 9 | 10 | Until catalogs are integrated. -------------------------------------------------------------------------------- /notes/main-latest.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/main-latest.pdf -------------------------------------------------------------------------------- /notes/src-ele/07.classification.tex: -------------------------------------------------------------------------------- 1 | \chapter{Classification} 2 | 3 | 图片分类是CV领域的核心问题.简单来说,就是给定一张图片,判断其属于何种分类,比如是不是猫或狗等等,这对图片的语义理解非常重要. 4 | 5 | 但是传统的方法对此类问题难以下手,因为图片通常是由数字的矩阵来描述,而从数字到语义有很大的鸿沟,很难设计某个规则来判定是否属于某类. 6 | 比如:对象不同的姿势,不同的摄像机视角,不同的背景信息,不同的光照条件,以及对象被隐藏和类内差异等问题. 7 | 8 | 对于一个好的图片分类器,应该对上述无关因素不敏感,而这也是data augmentation的意义.比如rotation代表姿势和视角的改变,颜色改变代表光照的变化等. 9 | 10 | 对于图片分类,我们有下列方法:无参方法有最近邻法,参数方法则可以采用CNN. 11 | 12 | \section{Nearest Neighbour Classifier} 13 | 14 | 所谓最近邻,就是将图片视为高维空间的点,将每个训练数据作为已知点,定义一种图片间距离的度量,选取最近的一个 (或几个) 15 | 训练数据的类别作为待判断图片的类别.这是一种非常低效的方法,其完美避开了我们上面说到的应具有的标准,对光照/背景/视角/姿势极为敏感,正确率极低,而且需要存储所有训练集.因此,实际中从不使用此种方法.但是最近邻方法在度量学习当中仍有广泛应用. 16 | 17 | \section{Using CNN for image Classification} 18 | 19 | 选用CNN之后,我们需要面对的问题有两个:选取何种网络结构,以及如何设计损失函数. 20 | 如今分类问题的网络范式是Softmax classifier + cross-entropy loss. 21 | \footnote{对二分类问题,也可采用SVM loss.但是扩展的多分类SVM loss在如今已经极少使用了.} 22 | 23 | \textbf{\\SoftMax} 24 | 25 | SoftMax就是一个${\mathbb R}^k \to {(0, 1)}^k$的映射. 26 | 27 | \begin{equation} 28 | \sigma(z)_i = \frac{\exp{\beta z_i}}{\sum \exp(\beta z_j)} 29 | \end{equation} 30 | 31 | 一般取$\beta = 1.$当$\beta \to \infty$时,SoftMax变成Argmax. 32 | 33 | 所以 SoftMax 是 Soft 的 Argmax. 34 | 35 | 关于loss的设计,如果正确标签是one-hot的,那么我们可以使用负对数概率(NLL)作为损失函数. 36 | 但是如果ground truth也是一个概率分布(有时这是人为的), 37 | 那么我们就需要对两个概率分布的距离度量给出定义.在信息论领域常用的度量是KL divergence $D(P \parallel Q)$,其定义如下: 38 | 39 | \begin{equation} 40 | D(P \parallel Q) = \sum_{x \in \mathcal X} P(x) \log \frac{P(x)}{Q(x)}. 41 | \end{equation} 42 | 43 | 这个度量并不满足距离的定义,因为其满足正定性,而不满足对称性和三角不等式. 44 | 45 | 我们不难看出 46 | \begin{equation} 47 | D(P \parallel Q) = \underbrace{-\sum_{x \in \mathcal X} P(x)\log Q(x)}_{H(P, Q)} - \underbrace{\xk{-\sum_{x \in \mathcal X} P(x) \log P(x)}}_{H(P)}. 48 | \end{equation} 49 | 即KL divergence是相对熵和分布$P$的熵之差.如果$P$是groud truth的分布,那么第二项成为常数,就得到了我们的交叉熵损失函数: 50 | \begin{equation} 51 | \mathcal L_{CE} = H(P, Q) = -\sum_{x \in \mathcal X} P(x) \log Q(x). 52 | \end{equation} 53 | 54 | 交叉熵函数在随机初始化时,取值约为$\log (\text{sum of classes})$.它没有上界,有下界$0$. 55 | 56 | 所以 CrossEntrophyLoss 应该在 $\log{\text{类别数}}$ 开始下降 57 | 58 | \section{Cross Entrophy Loss V.S. Accuracy} 59 | 60 | 1.CEL有可能已经降到了log2,acc仍是0.例子:$\Pr=[0.499,0.501]$,仍然输出错误答案,但是 $loss=\log2$ 很小 61 | 62 | 2.$acc=100\%$的时候,CEL仍然可能是初始化的 $\log(N)$ , 同理举一个例子:$\Pr=[0.498,0.001,0.001]$ 63 | 64 | 综上所述,两者没有确定关系,训练一定要同时画两个曲线 -------------------------------------------------------------------------------- /notes/src-ele/08.CNN_for_classification.tex: -------------------------------------------------------------------------------- 1 | \chapter{CNNs for Image Classification} 2 | 3 | 当我们分析一个CNN的结构时,需要考虑以下的方面: 4 | 5 | \begin{enumerate} 6 | \item 表示能力 7 | \item 是否适合任务 8 | \item 是否容易优化 9 | \item 代价 10 | \end{enumerate} 11 | 12 | \section{Reception Field} 13 | 14 | 如图,使用三层3*3卷积层,感受野与一层7*7卷积层相同. 15 | 16 | \begin{figure}[htbp] 17 | \centering 18 | \includegraphics[scale=0.85]{figures/receptivefield.png} 19 | \caption{三层3*3卷积核的感受野} 20 | \end{figure} 21 | 22 | 感受野是一个很重要的概念,它表征一个数据可以接收到原图多大范围的信息. 23 | 我们这里姑且认为感受野相同则表达能力相同.我们希望可以在神经网络的中部将整个图片纳入感受野, 24 | 这样在后面可以进行全图的pixel信息的交流,有利于结合多个特征.例如:结合狗的耳朵和毛色进行判断. 25 | 26 | 既然三层3*3卷积层,感受野与一层7*7卷积层相同,那么为什么要选用小而深的网络呢? 27 | 其一,层数增加,网络的非线性性增加,分割能力更强.此外,参数量也更小 ($3\times 3^2 C^2 < 7^2C^2$). -------------------------------------------------------------------------------- /notes/src-ele/22.Instance_Level_6D_Object_Pose_Estimation.tex: -------------------------------------------------------------------------------- 1 | \chapter{Instance-Level 6D Object Pose Estimation} 2 | 3 | \textbf{这一章节在2024年教学中已经被删去,为了让有兴趣的读者了解,故保留} 4 | 5 | Instance-level: a small set of known instances. 6 | 7 | Pose is defined for each instance according to their CAD model. 8 | 9 | Input: RGB/RGBD.如果有相机内参,那么没有D也可以.有D可以做得更好.\marginpar{\kaishu 为什么有内参没有深度也是可以的呢?因为这里我们是Instance-level的姿态估计,换言之我们已经有了这个物体的形状参数,其大小规格也是已知的.理论上我们甚至可以不停地试$\bd R, \bm t$使得转换后的形状与照片符合.} 10 | 11 | 2D center localization.先预测2d图片的中心位置和深度.随后利用相机内参得到translation. 12 | 13 | PoseCNN: Translation Estimation:Voting.每个pixel给出一个指向中心的向量,得到center. 14 | 15 | PoseCNN: Rotation Estimation. RoI? 16 | 17 | loss: $\mathcal{L}(\bd q, \bd q^{*})$.我们发现$\bd{q}$和$-\bd{q}$在旋转意义上是相同的,double coverage.因此一种可行的regression loss是取两者的最小值. 18 | 19 | PoseCNN则采用了另一种loss: 20 | \begin{equation} 21 | \mathrm{PLoss}(\widetilde{\bd{q}}, \bd{q}) = \frac{1}{2m}\sum_{\bd x \in \mathcal{M}} \norm{R(\widetilde{\bd{q}}) \bd x - R(\bd{q}) \bd x}^2 22 | \end{equation} 23 | 24 | 对称性:(表示旋转的等价类) 25 | \begin{equation} 26 | \operatorname{SLoss}(\widetilde{\mathbf{q}}, \mathbf{q})=\frac{1}{2 m} \sum_{\mathbf{x}_{1} \in \mathcal{M}} \min _{\mathbf{x}_{2} \in \mathcal{M}}\left\|R(\tilde{\mathbf{q}}) \mathbf{x}_{1}-R(\mathbf{q}) \mathbf{x}_{2}\right\|^{2} 27 | \end{equation} 28 | 29 | PoseCNN的translation表现尚可,但是rotation的表现一般,这受限于四元数的性能. 30 | 31 | 6D pose要求已知物体的cad模型,这在现实中不太可能. 32 | 33 | category-level 6D pose.希望能够泛化,输入3d输出6d pose,Without the need to use CAD model. 34 | 35 | 王鹤老师的论文:Normalized Object Coordinate Space for Category-Level 6D Object Pose and Size Estimation,CVPR2019 oral. 36 | 37 | Detecting and estimating 6D pose and 3D size of previously unseen objects from certain categories from RGBD images. 38 | 39 | 为什么要depth呢?因为对于未知的物体来说,仅有rgb而没有depth是无法确定其大小的.有了depth和相机内参,才能消除scale的不确定性. 40 | 41 | 问题的主要难点是rotation的估计.前面我们看到PoseCNN即使对于已知的物体,做得也相当不好. 42 | 43 | 间接法.Representation: Normalized Object Coordinate Space(NOCS) 44 | 45 | 简而言之,我们需要对一张图片的像素预测其在CAD model 中的位置.你可能会问:不是没有CAD model吗?在此我们建立了一个reference space:NOCS. 46 | 47 | step 1:rotation Normalization:align object orientations.将所有物体对齐成同样的姿态,如马克杯的方向都向左,此时旋转矩阵为0.\marginpar{\kaishu 这里我们隐含了一个假设,即我们可以在没有其CAD的情形下讨论其朝向.如马克杯的把手.} 48 | 49 | Step 2 (translation normalization): zero-center the objects.对于新物体,将其紧bbox的中心作为原点. 50 | 51 | Step 3 (scale normalization): uniformly normalize the scales.将bbox的对角线长度设置为1.这样所有的都可以放入一个对角线长为1的正方体里了.NOCS = Reference frame. 52 | 53 | NOCS = Reference frame transformation from NOCS to camera space. 54 | 55 | \begin{figure}[htbp] 56 | \centering 57 | \includegraphics[scale=0.65]{figures/image_nocs_pose.png} 58 | \caption{From Image to NOCS map to Pose.} 59 | \label{} 60 | \end{figure} 61 | 62 | \section{Beyond Object Pose} 63 | human/hand pose extimation.人体可以按照关节活动,并不是刚体. 64 | -------------------------------------------------------------------------------- /notes/src-ele/23.motion.tex: -------------------------------------------------------------------------------- 1 | \chapter{Motion} 2 | 3 | \textbf{这一章节在2024年教学中没有讲授,为了让有兴趣的读者了解,故保留} 4 | 5 | Today let’s focus on motions between two consecutive frames! 6 | 7 | Optical Flow 光流. 8 | 9 | 图片的亮的部分在两帧之间的表象运动. 10 | 11 | 几个假设:亮度相对稳定,小移动,一个点的运动与其邻居相似. 12 | 13 | \begin{equation} 14 | \begin{array}{l} 15 | I(x+u, y+v, t) \approx I(x, y, t-1)+I_{x} \cdot u(x, y)+I_{y} \cdot v(x, y)+I_{t} \\ 16 | I(x+u, y+v, t)-I(x, y, t-1)=I_{x} \cdot u(x, y)+I_{y} \cdot v(x, y)+I_{t} \\ 17 | \text { Hence, } I_{x} \cdot u+I_{y} \cdot v+I_{t} \approx 0 \quad \rightarrow \nabla I \cdot[u v]^{T}+I_{t}=0 18 | \end{array} 19 | \end{equation} 20 | 21 | 那么,这个方程足够解出所有$(u, v)$吗?我们有$n^2$个方程,但有$2n^2$个未知数,因此不够. 22 | 23 | The Aperture Problem.单纯从图像来看,运动可能并不完整.Barberpole Illusion.沿着线的方向不容易观测,垂直的容易被观察到. 24 | 25 | 更多约束: Spatial coherence constraint. 1981年Lucas和Kanade提出了假设在每个pixel的5*5window当中flow相同. 26 | 27 | \begin{equation} 28 | \left[\begin{array}{cc} 29 | I_{x}\left(\mathrm{p}_{1}\right) & I_{y}\left(\mathbf{p}_{1}\right) \\ 30 | I_{x}\left(\mathbf{p}_{2}\right) & I_{y}\left(\mathbf{p}_{2}\right) \\ 31 | \vdots & \vdots \\ 32 | I_{x}\left(\mathbf{p}_{25}\right) & I_{y}\left(\mathbf{p}_{25}\right) 33 | \end{array}\right]\left[\begin{array}{l} 34 | u \\ 35 | v 36 | \end{array}\right]=-\left[\begin{array}{c} 37 | I_{t}\left(\mathbf{p}_{1}\right) \\ 38 | I_{t}\left(\mathbf{p}_{2}\right) \\ 39 | \vdots \\ 40 | I_{t}\left(\mathbf{p}_{25}\right) 41 | \end{array}\right] 42 | \end{equation} 43 | 44 | 即$\bd A_{25\times 2} \bm d_{2\times 1} = \bm b_{25\times 1}$ 45 | 46 | 得到 47 | \begin{equation} 48 | \bd A^\top \bd A \bm d = \bd A^\top \bm b 49 | \end{equation} 50 | 51 | 什么时候可解?\marginpar{\kaishu 这和我们之前的Harris Corner Detector非常相似.光流当中最容易被捕捉的也是corner.corner与光流紧密相关.} 52 | \begin{enumerate} 53 | \item 可逆 54 | \item 特征值不能太小 55 | \item 良态 56 | \end{enumerate} 57 | 58 | FlowNet:最简单的想法:两张三通道图merge在一起,卷.dense regression.early fusion. 59 | 60 | 或者:分别提取feature.两个网络share weight.然后结合到一起.middle fusion. 61 | 62 | 过早fusion会使得问题空间变大.过完fusion会使得微观细节缺失. 63 | -------------------------------------------------------------------------------- /notes/src-ele/25.Summary_of_Computer_Vision.tex: -------------------------------------------------------------------------------- 1 | \chapter{Summary of Computer Vision} 2 | 3 | Compared to human vision, computer vision deals with the following tasks: 4 | 5 | \begin{enumerate} 6 | \item visual data acquisition (similar to human eyes but comes withmany more choices) 7 | \item image processing and feature extraction (mostly low-level) 8 | \item analyze local structures and then 3D reconstruct the original scene (from mid-level to high-level) 9 | \item understanding (mostly high-level) 10 | \item generation (beyond the scope of human vision system) 11 | \item and further serving embodied agents to make decisions and take actions. 12 | \end{enumerate} 13 | 14 | \textbf{谢谢老师的 wonderful lecture!} 15 | 16 | \textbf{谢谢看到这里的同学! 欢迎在 github 上 star \href{https://github.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU}{本项目}或者访问我的\href{https://www.lyt0112.com/blog/course}{个人网站上关于课程的测评}!} -------------------------------------------------------------------------------- /notes/src-ele/DOF_and_rank.tex: -------------------------------------------------------------------------------- 1 | \chapter{DOF and rank in essential matrix and fundamental matrix} 2 | \label{DOFandRank} 3 | 所谓矩阵的自由度,实际就是指矩阵中有多少个元素可以独立变化.例如,一个$m \times n$的矩阵在不加任何限制的情况下有$mn$个自由度,而对于$n$阶上三角矩阵,其自由度为$n(n+1)/2$. 4 | 5 | 从三维的物体投影成二维的图像,这个过程其实就是射影变换.在射影空间当中的单应矩阵$\bd H$(可以理解为我们的投影变换矩阵)天然少一个自由度,因为自原点出发位于同一条线上的两个点,在射影之后无法区分,所以$\bd H \sim \alpha \bd H$. 6 | 7 | 测地线距离:geodesic distance. 8 | -------------------------------------------------------------------------------- /notes/src-ele/appendix-QRDecomposition.tex: -------------------------------------------------------------------------------- 1 | \chapter{QR Decomposition} 2 | \label{QR Decomposition} 3 | 矩阵的QR分解就是将矩阵分解为正交矩阵和上三角矩阵的乘积,它可以对任意形状的矩阵进行.常用的方法有Gram–Schmidt process, Givens rotaitons和Householder reflections等.我们用最容易理解的施密特正交化方法来推导方阵的情形. 4 | 5 | 我们先将分解后的形式写出: 6 | \begin{equation} 7 | \begin{bmatrix} 8 | \bm a_1 & \bm a_2 & \cdots & \bm a_n 9 | \end{bmatrix} 10 | = 11 | \begin{bmatrix} 12 | \bm e_1 & \bm e_2 & \cdots & \bm e_n 13 | \end{bmatrix} 14 | \begin{bmatrix} 15 | r_{11} & r_{12} & \cdots & r_{1n} 16 | \\ 17 | 0 & r_{22} & \cdots & r_{2n} 18 | \\ 19 | 0 & 0 & \ddots & \vdots 20 | \\ 21 | 0 & 0 & \cdots & r_{nn} 22 | \end{bmatrix} 23 | \end{equation} 24 | 也就是满足 25 | \begin{equation} 26 | \bm a_i = \sum_{j = 1}^{i} r_{ji} \bm e_j 27 | \end{equation} 28 | 由此可以定出 29 | \begin{equation} 30 | \begin{aligned} 31 | \bm u_1 &= \bm a_1, \quad &\bm e_1 = \frac{\bm u_1}{\norm{\bm u_1}} 32 | \\ 33 | \bm u_2 &= \bm a_2 - \text{proj}_{\bm u_1} \bm a_2, & \bm e_2 = \frac{\bm u_2}{\norm{\bm u_2}} 34 | \\ 35 | &\vdots 36 | \\ 37 | \bm u_n &= \bm a_n - \sum_{j=1}^{n-1} \text{proj}_{\bm u_j} \bm a_n , &\bm e_n = \frac{\bm u_n}{\norm{\bm u_n}} 38 | \end{aligned} 39 | \end{equation} 40 | 以及 41 | \begin{equation} 42 | r_{ij} = \langle \bm e_i, \bm a_j \rangle 43 | \end{equation} 44 | 45 | 我们考虑一个方阵$\bd P$,其副对角线上的元素为$1$,其余为$0$.不难验证$\bd P\bd P = \bd I, \bd P = \bd P^\top = \bd P^{-1}$.左乘矩阵$\bd P$会使得矩阵上下翻转,右乘会使得矩阵左右翻转.将一个上三角矩阵上下翻转后左右翻转,即变为下三角矩阵.记$\widetilde{\bd A} = \bd P \bd A$,对$\widetilde{\bd A}^\top$进行QR分解,得到 46 | \begin{equation} 47 | \widetilde{\bd A}^\top = \widetilde{\bd Q} \widetilde{\bd R} 48 | \end{equation} 49 | 50 | 由此得到 51 | \begin{equation} 52 | \bd A = \bd P \widetilde{\bd R}^\top \widetilde{\bd Q}^\top = \xk{\bd P \widetilde{\bd R}^\top \bd P} \xk{\bd P \widetilde{\bd Q}^\top} \xlongequal{\text{def}} \bd R \bd Q 53 | \end{equation} 54 | 55 | 同样的,对于$\bd A^{\top}$进行QR分解就可以得到$\bd A$的LQ分解,可以用同样的方法得到QL分解.不过在使用时要注意:$\bd{P}$不一定是旋转矩阵. -------------------------------------------------------------------------------- /notes/src-ele/condition-number.tex: -------------------------------------------------------------------------------- 1 | \chapter{Condition Number} 2 | 问题的条件数是数值分析中常见的概念,是导数的一种推广.简单来说,就是对于输入发生微小变化时,输出的变化程度的大小.如果一个问题的条件数较小,那么就称其是良置的(well conditioned),否则称为病态的(ill conditioned). 3 | 4 | 考虑一个线性系统$\bd A \bm x = \bm b$,那么若$\det \bd A \ne 0$,则$\bm x = \bd{A}^{-1}\bm b$.若输入变化$\delta \bm x$,则输出变化为$\bd A \delta \bm x$.考虑相对变化之比的上界: -------------------------------------------------------------------------------- /notes/src-ele/figure/cover.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figure/cover.jpg -------------------------------------------------------------------------------- /notes/src-ele/figure/logo-blue.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figure/logo-blue.png -------------------------------------------------------------------------------- /notes/src-ele/figures/3DCNN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/3DCNN.png -------------------------------------------------------------------------------- /notes/src-ele/figures/6d_object_pose.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/6d_object_pose.png -------------------------------------------------------------------------------- /notes/src-ele/figures/BNsize.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/BNsize.png -------------------------------------------------------------------------------- /notes/src-ele/figures/BinarizationviaThresholding.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/BinarizationviaThresholding.png -------------------------------------------------------------------------------- /notes/src-ele/figures/DETR.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/DETR.png -------------------------------------------------------------------------------- /notes/src-ele/figures/DiscreteConvolution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/DiscreteConvolution.png -------------------------------------------------------------------------------- /notes/src-ele/figures/EF.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/EF.png -------------------------------------------------------------------------------- /notes/src-ele/figures/Embodied_Multimodal_Large_Model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/Embodied_Multimodal_Large_Model.png -------------------------------------------------------------------------------- /notes/src-ele/figures/FourierTransform.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/FourierTransform.png -------------------------------------------------------------------------------- /notes/src-ele/figures/GroupNorm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/GroupNorm.png -------------------------------------------------------------------------------- /notes/src-ele/figures/KLdiv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/KLdiv.png -------------------------------------------------------------------------------- /notes/src-ele/figures/LF_FC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/LF_FC.png -------------------------------------------------------------------------------- /notes/src-ele/figures/LF_pool.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/LF_pool.png -------------------------------------------------------------------------------- /notes/src-ele/figures/Mnistdataset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/Mnistdataset.png -------------------------------------------------------------------------------- /notes/src-ele/figures/NMS.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/NMS.png -------------------------------------------------------------------------------- /notes/src-ele/figures/NormalizationTechniques.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/NormalizationTechniques.png -------------------------------------------------------------------------------- /notes/src-ele/figures/PointNet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/PointNet.png -------------------------------------------------------------------------------- /notes/src-ele/figures/PointNet_structure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/PointNet_structure.png -------------------------------------------------------------------------------- /notes/src-ele/figures/RCNN_classification.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/RCNN_classification.png -------------------------------------------------------------------------------- /notes/src-ele/figures/RNN_grad_van.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/RNN_grad_van.png -------------------------------------------------------------------------------- /notes/src-ele/figures/ROI_align.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/ROI_align.png -------------------------------------------------------------------------------- /notes/src-ele/figures/ResNet集成神经网络解释.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/ResNet集成神经网络解释.png -------------------------------------------------------------------------------- /notes/src-ele/figures/RoI_pool.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/RoI_pool.png -------------------------------------------------------------------------------- /notes/src-ele/figures/RoI_pool2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/RoI_pool2.png -------------------------------------------------------------------------------- /notes/src-ele/figures/Screenshot_loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/Screenshot_loss.png -------------------------------------------------------------------------------- /notes/src-ele/figures/Screenshot_mybest.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/Screenshot_mybest.png -------------------------------------------------------------------------------- /notes/src-ele/figures/Screenshot_mybest_resnet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/Screenshot_mybest_resnet.png -------------------------------------------------------------------------------- /notes/src-ele/figures/UNetstructure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/UNetstructure.png -------------------------------------------------------------------------------- /notes/src-ele/figures/VAE.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/VAE.png -------------------------------------------------------------------------------- /notes/src-ele/figures/VAE_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/VAE_2.png -------------------------------------------------------------------------------- /notes/src-ele/figures/VisualizingImageGradient.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/VisualizingImageGradient.png -------------------------------------------------------------------------------- /notes/src-ele/figures/YOLO.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/YOLO.png -------------------------------------------------------------------------------- /notes/src-ele/figures/activationfunc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/activationfunc.png -------------------------------------------------------------------------------- /notes/src-ele/figures/adam.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/adam.png -------------------------------------------------------------------------------- /notes/src-ele/figures/ae.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/ae.png -------------------------------------------------------------------------------- /notes/src-ele/figures/anchor.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/anchor.jpg -------------------------------------------------------------------------------- /notes/src-ele/figures/angle_between_line.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/angle_between_line.png -------------------------------------------------------------------------------- /notes/src-ele/figures/attention.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/attention.png -------------------------------------------------------------------------------- /notes/src-ele/figures/autoencoder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/autoencoder.png -------------------------------------------------------------------------------- /notes/src-ele/figures/bilinear.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/bilinear.png -------------------------------------------------------------------------------- /notes/src-ele/figures/camata-pose.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/camata-pose.png -------------------------------------------------------------------------------- /notes/src-ele/figures/chainrule.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/chainrule.png -------------------------------------------------------------------------------- /notes/src-ele/figures/corner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/corner.png -------------------------------------------------------------------------------- /notes/src-ele/figures/corner_energy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/corner_energy.png -------------------------------------------------------------------------------- /notes/src-ele/figures/corner_map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/corner_map.png -------------------------------------------------------------------------------- /notes/src-ele/figures/corners.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/corners.png -------------------------------------------------------------------------------- /notes/src-ele/figures/cover.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/cover.jpg -------------------------------------------------------------------------------- /notes/src-ele/figures/cv_tasks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/cv_tasks.png -------------------------------------------------------------------------------- /notes/src-ele/figures/ddgg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/ddgg.png -------------------------------------------------------------------------------- /notes/src-ele/figures/deep-test-err.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/deep-test-err.png -------------------------------------------------------------------------------- /notes/src-ele/figures/edge.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/edge.png -------------------------------------------------------------------------------- /notes/src-ele/figures/epi-constrain-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/epi-constrain-2.png -------------------------------------------------------------------------------- /notes/src-ele/figures/epi-constrain.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/epi-constrain.png -------------------------------------------------------------------------------- /notes/src-ele/figures/epi-geo-2pic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/epi-geo-2pic.png -------------------------------------------------------------------------------- /notes/src-ele/figures/epipolargeometry.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/epipolargeometry.png -------------------------------------------------------------------------------- /notes/src-ele/figures/fid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/fid.png -------------------------------------------------------------------------------- /notes/src-ele/figures/g.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/g.png -------------------------------------------------------------------------------- /notes/src-ele/figures/g2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/g2.png -------------------------------------------------------------------------------- /notes/src-ele/figures/general_atten.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/general_atten.png -------------------------------------------------------------------------------- /notes/src-ele/figures/generalgap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/generalgap.png -------------------------------------------------------------------------------- /notes/src-ele/figures/generative_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/generative_model.png -------------------------------------------------------------------------------- /notes/src-ele/figures/grad_var.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/grad_var.png -------------------------------------------------------------------------------- /notes/src-ele/figures/holes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/holes.png -------------------------------------------------------------------------------- /notes/src-ele/figures/horizon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/horizon.png -------------------------------------------------------------------------------- /notes/src-ele/figures/hough1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/hough1.png -------------------------------------------------------------------------------- /notes/src-ele/figures/hough2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/hough2.png -------------------------------------------------------------------------------- /notes/src-ele/figures/image_attentoin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/image_attentoin.png -------------------------------------------------------------------------------- /notes/src-ele/figures/image_nocs_pose.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/image_nocs_pose.png -------------------------------------------------------------------------------- /notes/src-ele/figures/image_plane.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/image_plane.png -------------------------------------------------------------------------------- /notes/src-ele/figures/image_self_atten.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/image_self_atten.png -------------------------------------------------------------------------------- /notes/src-ele/figures/image_seq2seq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/image_seq2seq.png -------------------------------------------------------------------------------- /notes/src-ele/figures/learning_rate_schedule.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/learning_rate_schedule.png -------------------------------------------------------------------------------- /notes/src-ele/figures/light_invariant.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/light_invariant.png -------------------------------------------------------------------------------- /notes/src-ele/figures/logo-blue.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/logo-blue.png -------------------------------------------------------------------------------- /notes/src-ele/figures/loss1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/loss1.png -------------------------------------------------------------------------------- /notes/src-ele/figures/loss2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/loss2.png -------------------------------------------------------------------------------- /notes/src-ele/figures/lstm_grad_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/lstm_grad_flow.png -------------------------------------------------------------------------------- /notes/src-ele/figures/marching_cube1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/marching_cube1.png -------------------------------------------------------------------------------- /notes/src-ele/figures/marching_cube2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/marching_cube2.png -------------------------------------------------------------------------------- /notes/src-ele/figures/mgtest.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/mgtest.png -------------------------------------------------------------------------------- /notes/src-ele/figures/ministdataset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/ministdataset.png -------------------------------------------------------------------------------- /notes/src-ele/figures/mlp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/mlp.png -------------------------------------------------------------------------------- /notes/src-ele/figures/msgd1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/msgd1.png -------------------------------------------------------------------------------- /notes/src-ele/figures/msgd2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/msgd2.png -------------------------------------------------------------------------------- /notes/src-ele/figures/multilayer_rnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/multilayer_rnn.png -------------------------------------------------------------------------------- /notes/src-ele/figures/not_roboust_outliner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/not_roboust_outliner.png -------------------------------------------------------------------------------- /notes/src-ele/figures/nsloss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/nsloss.png -------------------------------------------------------------------------------- /notes/src-ele/figures/paralle_q.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/paralle_q.png -------------------------------------------------------------------------------- /notes/src-ele/figures/parallel-image-plane.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/parallel-image-plane.png -------------------------------------------------------------------------------- /notes/src-ele/figures/pic_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/pic_1.png -------------------------------------------------------------------------------- /notes/src-ele/figures/pinholecamera.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/pinholecamera.png -------------------------------------------------------------------------------- /notes/src-ele/figures/pointnet++.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/pointnet++.png -------------------------------------------------------------------------------- /notes/src-ele/figures/pos_encoding.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/pos_encoding.png -------------------------------------------------------------------------------- /notes/src-ele/figures/property-of-f-mat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/property-of-f-mat.png -------------------------------------------------------------------------------- /notes/src-ele/figures/rcnn_speed_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/rcnn_speed_comparison.png -------------------------------------------------------------------------------- /notes/src-ele/figures/rcnn_vs_frcnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/rcnn_vs_frcnn.png -------------------------------------------------------------------------------- /notes/src-ele/figures/receptivefield.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/receptivefield.png -------------------------------------------------------------------------------- /notes/src-ele/figures/recu_CNN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/recu_CNN.png -------------------------------------------------------------------------------- /notes/src-ele/figures/recur_CNN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/recur_CNN.png -------------------------------------------------------------------------------- /notes/src-ele/figures/recur_CNN_detail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/recur_CNN_detail.png -------------------------------------------------------------------------------- /notes/src-ele/figures/residual_network.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/residual_network.png -------------------------------------------------------------------------------- /notes/src-ele/figures/rnn-seqdata.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/rnn-seqdata.png -------------------------------------------------------------------------------- /notes/src-ele/figures/rnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/rnn.png -------------------------------------------------------------------------------- /notes/src-ele/figures/sensors.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/sensors.png -------------------------------------------------------------------------------- /notes/src-ele/figures/seq2seq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/seq2seq.png -------------------------------------------------------------------------------- /notes/src-ele/figures/sigmoid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/sigmoid.png -------------------------------------------------------------------------------- /notes/src-ele/figures/simple_NMS.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/simple_NMS.png -------------------------------------------------------------------------------- /notes/src-ele/figures/simple_NMS_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/simple_NMS_2.png -------------------------------------------------------------------------------- /notes/src-ele/figures/simple_VAE.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/simple_VAE.png -------------------------------------------------------------------------------- /notes/src-ele/figures/single_layer_issue.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/single_layer_issue.png -------------------------------------------------------------------------------- /notes/src-ele/figures/single_obj_det.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/single_obj_det.png -------------------------------------------------------------------------------- /notes/src-ele/figures/sparsenet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/sparsenet.png -------------------------------------------------------------------------------- /notes/src-ele/figures/strangeknowledge.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/strangeknowledge.png -------------------------------------------------------------------------------- /notes/src-ele/figures/tradeoff-in-gauss-filtering.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/tradeoff-in-gauss-filtering.png -------------------------------------------------------------------------------- /notes/src-ele/figures/transform_all.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/transform_all.png -------------------------------------------------------------------------------- /notes/src-ele/figures/transformer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/transformer.png -------------------------------------------------------------------------------- /notes/src-ele/figures/triangulation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/triangulation.png -------------------------------------------------------------------------------- /notes/src-ele/figures/truncate_bp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/truncate_bp.png -------------------------------------------------------------------------------- /notes/src-ele/figures/two_stage_detector.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/two_stage_detector.png -------------------------------------------------------------------------------- /notes/src-ele/figures/valley.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/valley.png -------------------------------------------------------------------------------- /notes/src-ele/figures/vanilla_rnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/vanilla_rnn.png -------------------------------------------------------------------------------- /notes/src-ele/figures/vanishingpoints.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/vanishingpoints.png -------------------------------------------------------------------------------- /notes/src-ele/figures/video_cmp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/video_cmp.png -------------------------------------------------------------------------------- /notes/src-ele/figures/vpanddir.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/vpanddir.png -------------------------------------------------------------------------------- /notes/src-ele/figures/weak_perspective.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/weak_perspective.png -------------------------------------------------------------------------------- /notes/src-ele/figures/window-function.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/window-function.png -------------------------------------------------------------------------------- /notes/src-ele/figures/window_moving.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/window_moving.png -------------------------------------------------------------------------------- /notes/src-ele/figures/word_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/word_model.png -------------------------------------------------------------------------------- /notes/src-ele/figures/wrongDA.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/figures/wrongDA.png -------------------------------------------------------------------------------- /notes/src-ele/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | from pathlib import Path 4 | 5 | def modify_headings(tex_content, mode='promote'): 6 | rules = { 7 | 'promote': [ 8 | # 修正顺序:从高级别到低级别替换 9 | ('section', 'chapter'), 10 | ('subsection', 'section'), 11 | ('subsubsection', 'subsection') 12 | ], 13 | 'demote': [ 14 | ('chapter', 'section'), 15 | ('section', 'subsection'), 16 | ('subsection', 'subsubsection'), 17 | ('subsubsection', 'paragraph'), 18 | ('paragraph', 'subparagraph') 19 | ] 20 | } 21 | 22 | replace_rules = rules[mode] 23 | 24 | # 正则表达式优化(支持可选参数和嵌套内容) 25 | pattern = r'\\%s(\*?)($$.*?$$)?\{((?:[^{}]|{(?:[^{}]|{[^{}]*})*})*)\}' 26 | 27 | for original, target in replace_rules: 28 | compiled = re.compile(pattern % original, re.DOTALL) 29 | replacement = r'\\%s\g<1>\g<2>{\g<3>}' % target 30 | tex_content = compiled.sub(replacement, tex_content) 31 | 32 | return tex_content 33 | 34 | def process_directory(root_dir, mode='promote'): 35 | """ 36 | 递归处理目录下所有 .tex 文件 37 | """ 38 | for root, _, files in os.walk(root_dir): 39 | for file in files: 40 | if file.endswith('.tex'): 41 | file_path = Path(root) / file 42 | with open(file_path, 'r', encoding='utf-8') as f: 43 | content = f.read() 44 | modified = modify_headings(content, mode) 45 | with open(file_path, 'w', encoding='utf-8') as f: 46 | f.write(modified) 47 | print(f'已处理: {file_path}') 48 | 49 | if __name__ == "__main__": 50 | process_directory('.', mode='promote') # 或 mode='demote' -------------------------------------------------------------------------------- /notes/src-ele/main.run.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 23 | 28 | 33 | 36 | 39 | 42 | ]> 43 | 44 | 45 | latex 46 | 47 | main.bcf 48 | 49 | 50 | main.bbl 51 | 52 | 53 | blx-dm.def 54 | blx-unicode.def 55 | blx-compat.def 56 | biblatex.def 57 | standard.bbx 58 | numeric.bbx 59 | numeric-comp.cbx 60 | biblatex.cfg 61 | english.lbx 62 | 63 | 64 | 65 | biber 66 | 67 | biber 68 | main 69 | 70 | 71 | main.bcf 72 | 73 | 74 | main.bbl 75 | 76 | 77 | main.bbl 78 | 79 | 80 | main.bcf 81 | 82 | 83 | reference.bib 84 | 85 | 86 | 87 | -------------------------------------------------------------------------------- /notes/src-ele/main.synctex.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/main.synctex.gz -------------------------------------------------------------------------------- /notes/src-ele/main.synctex.gz.sum.synctex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/main.synctex.gz.sum.synctex -------------------------------------------------------------------------------- /notes/src-ele/main.tex: -------------------------------------------------------------------------------- 1 | \documentclass[lang=cn,10pt,green]{elegantbook} 2 | \include{package2.tex} 3 | 4 | \title{Introduction to Computer Vision Notes} 5 | \subtitle{授课教师:\href{https://hughw19.github.io}{王鹤}} 6 | \author{林晓疏,\href{https://lyt0112.com/}{梁昱桐}, \href{https://iculizhi.github.io/}{徐靖}, \href{https://arthals.ink/}{卓致用}} 7 | \institute{PKU EECS} 8 | \version{2025 Spring} 9 | \bioinfo{声明}{\textcolor{red}{请勿用于个人学习外其他用途!}} 10 | \extrainfo{个人笔记,如有谬误,欢迎指正!\\ 联系方式:2200012917@stu.pku.edu.cn} 11 | 12 | % 自定义封面元素 13 | \cover{cover.jpg} 14 | \logo{logo-blue.png} 15 | 16 | % 本文档命令 17 | \usepackage{array} 18 | \newcommand{\ccr}[1]{\makecell{{\color{#1}\rule{1cm}{1cm}}}} 19 | 20 | % 修改标题页的橙色带 21 | \definecolor{customcolor}{RGB}{214, 136, 0} 22 | \colorlet{coverlinecolor}{customcolor} 23 | \setcounter{tocdepth}{1} % 设置目录深度 24 | \begin{document} 25 | 26 | \maketitle 27 | \frontmatter 28 | 29 | \tableofcontents 30 | 31 | \mainmatter 32 | 33 | 34 | \include{00.images_as_funcitons} 35 | \include{01.edge_detection} 36 | \include{02.keypoint_detection} 37 | \include{03.line_fitting} 38 | \include{04a.deeplearning} 39 | \include{04.CNN} 40 | \include{05.CNN_training} 41 | \include{06.CNN_improvement} 42 | \include{07.classification} 43 | \include{08.CNN_for_classification} 44 | \include{09.segmentation} 45 | \include{10.3Dvision} 46 | \include{11.camera_carlibration} 47 | \include{12.single_view_geometry} 48 | \include{13.epipolar_geometry} 49 | \include{14.3D_data} 50 | \include{15.3D_deep_learning} 51 | \include{16.Sequential_Modeling} 52 | \include{17.video_analysis} 53 | \include{18.Transformer} 54 | \include{19.object_detection_and_instance_segmentation} 55 | \include{20.generative_model} 56 | \include{21.pose_and_motion} 57 | \include{22.Instance_Level_6D_Object_Pose_Estimation} 58 | \include{23.motion} 59 | \include{24.Embodied_AI} 60 | \include{25.Summary_of_Computer_Vision} 61 | \clearpage 62 | % appendix: appendix-QRDecomposition, condition-number, transformation-in-space 63 | \appendix 64 | \include{condition-number} 65 | \include{transformation-in-space} 66 | \include{DOF_and_rank} 67 | \include{appendix-QRDecomposition} 68 | 69 | \printbibliography[heading=bibintoc, title=\ebibname] 70 | \end{document} 71 | -------------------------------------------------------------------------------- /notes/src-ele/now.run.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 23 | 28 | 33 | 36 | 39 | 42 | ]> 43 | 44 | 45 | latex 46 | 47 | now.bcf 48 | 49 | 50 | now.bbl 51 | 52 | 53 | blx-dm.def 54 | blx-unicode.def 55 | blx-compat.def 56 | biblatex.def 57 | standard.bbx 58 | numeric.bbx 59 | numeric-comp.cbx 60 | biblatex.cfg 61 | english.lbx 62 | 63 | 64 | 65 | biber 66 | 67 | biber 68 | now 69 | 70 | 71 | now.bcf 72 | 73 | 74 | now.bbl 75 | 76 | 77 | now.bbl 78 | 79 | 80 | now.bcf 81 | 82 | 83 | reference.bib 84 | 85 | 86 | 87 | -------------------------------------------------------------------------------- /notes/src-ele/now.synctex.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src-ele/now.synctex.gz -------------------------------------------------------------------------------- /notes/src-ele/package2.tex: -------------------------------------------------------------------------------- 1 | \usepackage{float} 2 | \usepackage[normalem]{ulem} % \sout{想加删除线的中文} 3 | \usepackage{wrapfig} 4 | \makeatletter 5 | \let\c@lofdepth\relax % 重置 lofdepth 计数器 6 | \let\c@lotdepth\relax % 重置 lotdepth 计数器 7 | \makeatother 8 | \usepackage{subfigure} 9 | \usepackage{extarrows} 10 | \newcommand{\incfig}[1]{% 11 | \def\svgwidth{\columnwidth} 12 | \import{./figures/}{#1.pdf_tex} 13 | } 14 | 15 | 16 | \renewcommand{\proofname}{\indent Pr} 17 | 18 | \newcommand{\argmin}[1]{\underset{#1}{\arg \min}\ } 19 | \newcommand{\ceil}[1]{\left\lceil #1 \right \rceil } 20 | \newcommand{\norm}[1]{\left \Vert #1 \right \Vert} 21 | \newcommand{\tform}[1]{\left \Vert #1 \right \Vert_2} 22 | \newcommand{\tnorm}[1]{\left \Vert #1 \right \Vert_2} 23 | \newcommand{\onorm}[1]{\left \Vert #1 \right \Vert_1} 24 | \newcommand{\abs}[1]{\left|#1 \right|} 25 | \newcommand{\var}[1]{\text{Var}\left[ #1\right]} 26 | \newcommand{\xk}[1]{\left( #1\right)} 27 | \newcommand{\zk}[1]{\left[ #1\right]} 28 | \newcommand{\dk}[1]{\left\{ #1\right\}} 29 | \newcommand{\bd}[1]{\bold{#1}} 30 | 31 | %量子力学符号------ 32 | \newcommand{\xde}{\text{Schrödinger}} 33 | \newcommand{\avg}[1]{\left \langle #1 \right \rangle} 34 | \newcommand{\lvec}[1]{\left \langle #1 \right |} 35 | \newcommand{\rvec}[1]{\left | #1 \right \rangle} 36 | 37 | 38 | \newtheorem{lproof}{证明}[section] 39 | \newtheorem{tuilun}{推论} 40 | \newtheorem{eg}{例}[section] 41 | \newtheorem{solve}{解}[section] 42 | \newcommand\ii{\textup{i}} 43 | \newcommand\dd{\mathrm{d}} 44 | 45 | 46 | %自定义数学符号 47 | \newcommand{\diag}{\textup{diag}} 48 | \newcommand{\Frobenius}[1]{\left\Vert #1 \right\Vert} 49 | \newcommand{\fform}[1]{\left\Vert #1 \right\Vert_F} 50 | \newcommand{\parr}[2]{\frac{\partial #1}{\partial #2}}%一阶偏微分 51 | \newcommand{\parrr}[2]{\frac{\partial^2 #1}{\partial #2^2}}%二阶偏微分 52 | \newcommand{\lap}[1]{\parrr{#1}{x} + \parrr{#1}{y} = 0}%二元拉普拉斯方程 53 | \newcommand{\ddd}[2]{\frac{\textup{d} #1}{\textup{d} #2}}%微商 54 | \newcommand{\dddd}[2]{\frac{\textup{d}^2 #1}{\textup{d} #2^2}}%微商 55 | 56 | %二重以上环路积分,强迫症了属于是 57 | \def\ooint{{\bigcirc}\kern-11.5pt{\int}\kern-6.5pt{\int}} 58 | \def\oooint{{\bigcirc}\kern-12.3pt{\int}\kern-7pt{\int}\kern-7pt{\int}} 59 | 60 | 61 | \newcommand{\tu}{\textup} 62 | \newcommand{\ol}[1]{$\overline{#1}$} 63 | \newcommand{\re}[1]{\textup{Re}(#1)} 64 | \newcommand{\im}[1]{\textup{Im}(#1)} 65 | \newcommand{\fa}{\forall} 66 | \newcommand{\ex}{\exists} 67 | \newcommand{\st}{\textup{ s.t. }} 68 | \newcommand{\ve}{\varepsilon} 69 | \newcommand{\disp}{\displaystyle} 70 | \newcommand{\chj}{\textup{Cauchy}积分公式} 71 | \newcommand{\res}[1]{\textup{Res}\left(#1\right)} 72 | \newcommand{\mysum}[1][n]{\sum_{i = 1}^{#1}}%求和 73 | \newcommand{\series}[1]{\sum_{n = 0}^{\infty} #1_{n}}%级数 74 | \newcommand{\seriesa}[1]{\sum_{n = 0}^{\infty} \left| #1_{n}\right|}%绝对级数 75 | \newcommand{\fseries}[1]{\sum_{k = 1}^{\infty} #1_k (z)} 76 | 77 | \newcommand*{\num}{pi} 78 | 79 | %书写横线 80 | \newcommand{\horrule}[1]{\rule[0.5ex]{\linewidth}{#1}} % Horizontal rule -------------------------------------------------------------------------------- /notes/src-ele/preface.tex: -------------------------------------------------------------------------------- 1 | \chapter*{前言} 2 | 3 | 这本笔记是作者于2022年春信息科学技术学院王鹤老师开设的计算机视觉导论课程期间的笔记.王鹤老师在Stanford获得Ph.D学位,课程中也毫不令人意外地带有许多\href{https://cs231n.github.io/}{CS231n: Convolutional Neural Network for Visial Recognition}和\href{https://web.stanford.edu/class/cs231a/course_notes.html}{CS231A: Computer Vision, From 3D Reconstruction to Recognition}等课程的影子.课程从对计算机视觉领域的传统方法的介绍开始,介绍了CNN和诸多深度学习的基本知识,如BatchNorm,Regularization等.随后进入3D视觉部分,详细介绍了Pinhole Camera这一模型以及相机标定,对极几何等相关知识.期中之后转入3D数据,语义分割,物体位姿判定以及RNN和生成模型部分. 4 | 5 | 笔记主要是对王鹤老师上课内容的记录,部分内容由笔者在课余时间了解后添加,这些内容都给出了参考文献或链接.除此之外,笔者还依惯例添加了几节附录,以补充正文当中一些没有展开的细节,以供参考. 6 | 7 | 这门课是笔者三年以来在信科上过的水准最高的课程,无论是课程内容,教师讲授水平,作业质量,考试区分度还是答疑,都是笔者体验过的课程中最高水准的一档.若信科未来能有一半专业课能达到本课的水平,则世界一流大学指日可待 (. 8 | 9 | 最后,感谢王鹤老师和张嘉曌,陈嘉毅两位助教.笔者曾多次向张助教询问问题,均得到了细致的回答,在此一并表示感谢. 10 | 11 | \rightline{林晓疏} 12 | 13 | \rightline{2022年春} 14 | 15 | 作为北京大学信息科学技术学院的学生,长期以来饱受糟糕课程质量、糟糕课程作业、糟糕考试难度的折磨. 16 | 比如算法设计与分析的等课程的教学质量极低,教考分离,ICS考试一面黑板的考试错误题目订正等等. 17 | 在这样的环境下,幸运地遇到了王鹤老师开设的计算机视觉导论课程,内容丰富,作业质量高,考试难度适中, 18 | 绝对称得上是精品课程\sout{(与算分这种国家精品课程相区别)}. 19 | 20 | 王鹤老师将计算机视觉的发展脉络呈现给大家,在这个深度学习时代, 21 | 老师并没有完全忽视传统CV的方法,而是挑选了其中具有代表性的工作,这些工作为深度学习时代的CV打下了良好的基础,提供了许多基础工具和数据集的构建方式. 22 | 同时老师也更加注重深度学习的基础知识,如 BatchNorm 的特性和与其他 Norm 的区别,许多人仅仅只是会 PyTorch 的积木搭建,但是对于这些基础知识的原理和性质却不甚了解, 23 | 导致在实际使用中遇到问题时无法解决,王老师在这方面往往提出 intuitive 的问题,引人深思. 24 | 25 | 我是在大三下学期选修了这门课程,即使我已经具有了一定的深度学习基础,但是我仍然很享受上课\sout{看回放}的过程,因为对于许多已经了解的知识,王老师会再度给出解释, 26 | 总是让我在同一个地方有不同的收获. 27 | 28 | 我在本学期期中考试之前偶然了解到曾经有学长撰写了一本笔记,但是许多内容已经进行了更新或者删改,因此我联系上林晓疏(笔名)学长,获取了这份笔记的源代码, 29 | 并在此基础上进行更新,以飨后人. 30 | 31 | 该笔记按照讲授先后顺序进行排列,但是章节编排按照知识结构划分,因此章节划分可能与课程进度有所不同. 32 | 同时本笔记不能替代课程,只是对这部分知识的总结和思考,建议与课程回放配合食用. 33 | 34 | \rightline{Yutong Liang} 35 | \rightline{2024年4月24日} 36 | -------------------------------------------------------------------------------- /notes/src/01.edge_detection.tex: -------------------------------------------------------------------------------- 1 | \section{Edge Detection} 2 | 3 | \subsection{What is an Edge?} 4 | 5 | “边缘”是图像中的一个区域,在这个区域中,沿着图像的一个方向, 6 | 像素强度值 (或者说对比度) 发生了“显著”的变化,而在其正交方向上, 7 | 像素强度值 (或对比度) 几乎没有变化. 8 | 9 | \subsection{Criteria for Optimal Edge Detection} 10 | 11 | \begin{equation} 12 | \text{Accuracy}=\frac{\text{TP}+\text{TN}}{\text{TP}+\text{FP}+\text{TN}+\text{FN}} 13 | \end{equation} 14 | 15 | \begin{equation} 16 | \text{Precision}=\frac{\text{TP}}{\text{TP}+\text{FP}} 17 | \end{equation} 18 | 19 | \begin{equation} 20 | \text{Recall}=\frac{\text{TP}}{\text{TP}+\text{FN}} 21 | \end{equation} 22 | 23 | Precision 和 Recall 都代表着你检测出的真正边缘所占比例,但是 Precision 的分母 24 | 是你检测出的边缘,Recall 的分母是真正的边缘. 25 | 26 | \subsection{Non-Maximal Suppression (NMS)} 27 | 28 | 非最大值抑制,顾名思义,就是抑制非最大值,这里的最大值指的是梯度的局部最大值. 29 | 30 | 在计算出了所有点的梯度之后,会有很多像素的梯度大于设定的阈值,而我们希望最后得出的边缘像素真的看起来 31 | 像一条线而不是一块区域,所以 NMS 的目的是为了抑制那些不是边缘的像素,只保留那些是边缘的像素. 32 | 33 | \begin{figure}[htbp] 34 | \centering 35 | \includegraphics[scale=0.2]{figures/NMS.png} 36 | \caption{NMS示意图} 37 | \end{figure} 38 | 39 | 对于一个边缘像素的候选点,我们认为它是边缘当:它比它梯度方向的两个点 $q+\nabla q$ 和 $q-\nabla q$ 的梯度值大, 40 | 也就是这个点的梯度大小是局部最大值的时候. 41 | 42 | \begin{figure}[htbp] 43 | \centering 44 | \includegraphics[scale=0.4]{figures/bilinear.png} 45 | \caption{双线性插值} 46 | \end{figure} 47 | 48 | 计算这个点梯度方向的点的梯度值可以使用双线性插值法,就是把这个点周围的四个点的梯度按照横纵距离反比加权. 49 | 50 | 当然,NMS 是一个思想而不是针对边缘检测的算法,比如对于 keypoint detection,object detection (like YOLO) 都可以使用 NMS, 51 | 实现的思路都很类似,使用一个打分函数看这个备选点 (bounding box) 是不是比跟它相邻 (冲突) 的点 (bounding box) 好,如果是就保留,否则就抑制. 52 | 53 | \subsection{A Simplified Version of NMS} 54 | 55 | \begin{figure}[htbp] 56 | \centering 57 | \includegraphics[scale=0.55]{figures/simple_NMS.png} 58 | \caption{简化版本的双线性插值} 59 | \end{figure} 60 | 61 | 一个 NMS 的简化版本是把双线性插值省去,直接让这个像素的梯度大于它梯度方向的那两个相邻像素的梯度. 62 | 63 | \subsection{Hysteresis Thresholding} 64 | 65 | 使用高阈值 (maxVal) 开始边缘曲线,使用低阈值 (minVal) 继续它们. 66 | 67 | \begin{itemize} 68 | \item Pixels with gradient magnitudes > maxVal should be reserved. 69 | \item Pixels with gradient magnitudes < minVal should be removed. 70 | \end{itemize} 71 | 72 | How to decide maxVal and minVal? Examples: 73 | 74 | \begin{itemize} 75 | \item maxVal = 0.3 $\times$ average magnitude of the pixels that pass NMS 76 | \item minVal = 0.1 $\times$ average magnitude of the pixels that pass NMS 77 | \end{itemize} 78 | -------------------------------------------------------------------------------- /notes/src/02.keypoint_detection.tex: -------------------------------------------------------------------------------- 1 | \section{Keypoint Detection} 2 | 3 | \subsection{The Basic Idea of Harris Corner} 4 | 5 | \begin{figure}[htbp] 6 | \centering 7 | \includegraphics[width=0.8\textwidth]{figures/window_moving.png} 8 | \caption{移动窗口} 9 | \end{figure} 10 | 11 | Move a window and explore intensity changes within the window. 12 | 13 | Corner: significant change in all directions. 14 | 15 | \subsection{Harris Corner} 16 | 17 | 一个 window,给定它的移动方向 $(u,v)$: 18 | 19 | \begin{equation} 20 | \begin{aligned} 21 | E(u,v) &= \sum_{x,y} w(x,y) [I(x+u,y+v) - I(x,y)]^2\\ 22 | &\approx \sum_{x,y} w(x,y) [I(x,y) + uI_x + vI_y - I(x,y)]^2\\ 23 | &= \sum_{x,y} w(x,y) [uI_x + vI_y]^2\\ 24 | &= w \ast \begin{bmatrix} u & v \end{bmatrix} \begin{bmatrix} I_x^2 & I_xI_y \\ I_xI_y & I_y^2 \end{bmatrix} \begin{bmatrix} u \\ v \end{bmatrix}\\ 25 | &= \begin{bmatrix} u & v \end{bmatrix} \begin{bmatrix} w \ast I_x^2 & w \ast I_xI_y \\ w \ast I_xI_y & w \ast I_y^2 \end{bmatrix} \begin{bmatrix} u \\ v \end{bmatrix}\\ 26 | &= \begin{bmatrix} u & v \end{bmatrix} R^{-1} \begin{bmatrix} \lambda_1 & 0\\ 0 & \lambda_2 \end{bmatrix} R \begin{bmatrix} u \\ v \end{bmatrix}\\ 27 | &= \lambda_1 u_R^2 + \lambda_2 v_R^2 28 | \end{aligned} 29 | \end{equation} 30 | 31 | 根据这两个特征值的大小可以判断这个点是不是角点. 32 | 33 | \begin{figure}[htbp] 34 | \centering 35 | \includegraphics[width=0.6\textwidth]{figures/corner_map.png} 36 | \caption{特征值大小和这个点的是什么种类的点的关系} 37 | \end{figure} 38 | 39 | 这个点是角点一般需要满足: 40 | 41 | \begin{itemize} 42 | \item $\lambda_1, \lambda_2>b$ 43 | \item $\frac{1}{k}<\frac{\lambda_1}{\lambda_2} \frac{\log(1-p)}{\log(1-w^n)} 51 | \end{equation} 52 | 53 | \subsection{Hough Transform} 54 | 55 | 其实就是把一条直线从实际空间的表示转换到参数空间的表示.但是如果存在垂直的直线,可能需要考虑使用极坐标来作为参数空间. 56 | 57 | \begin{figure}[htbp] 58 | \centering 59 | \includegraphics[width=0.8\textwidth]{figures/hough1.png} 60 | \caption{Hough Transform w/o Noise} 61 | \end{figure} 62 | 63 | \begin{figure}[htbp] 64 | \centering 65 | \includegraphics[width=0.8\textwidth]{figures/hough2.png} 66 | \caption{Hough Transform w/ Noise and Outliers} 67 | \end{figure} -------------------------------------------------------------------------------- /notes/src/07.classification.tex: -------------------------------------------------------------------------------- 1 | \section{Classification} 2 | 3 | 图片分类是CV领域的核心问题.简单来说,就是给定一张图片,判断其属于何种分类,比如是不是猫或狗等等,这对图片的语义理解非常重要. 4 | 5 | 但是传统的方法对此类问题难以下手,因为图片通常是由数字的矩阵来描述,而从数字到语义有很大的鸿沟,很难设计某个规则来判定是否属于某类. 6 | 比如:对象不同的姿势,不同的摄像机视角,不同的背景信息,不同的光照条件,以及对象被隐藏和类内差异等问题. 7 | 8 | 对于一个好的图片分类器,应该对上述无关因素不敏感,而这也是data augmentation的意义.比如rotation代表姿势和视角的改变,颜色改变代表光照的变化等. 9 | 10 | 对于图片分类,我们有下列方法:无参方法有最近邻法,参数方法则可以采用CNN. 11 | 12 | \subsection{Nearest Neighbour Classifier} 13 | 14 | 所谓最近邻,就是将图片视为高维空间的点,将每个训练数据作为已知点,定义一种图片间距离的度量,选取最近的一个 (或几个) 15 | 训练数据的类别作为待判断图片的类别.这是一种非常低效的方法,其完美避开了我们上面说到的应具有的标准,对光照/背景/视角/姿势极为敏感,正确率极低,而且需要存储所有训练集.因此,实际中从不使用此种方法.但是最近邻方法在度量学习当中仍有广泛应用. 16 | 17 | \subsection{Using CNN for image Classification} 18 | 19 | 选用CNN之后,我们需要面对的问题有两个:选取何种网络结构,以及如何设计损失函数. 20 | 如今分类问题的网络范式是Softmax classifier + cross-entropy loss. 21 | \footnote{对二分类问题,也可采用SVM loss.但是扩展的多分类SVM loss在如今已经极少使用了.} 22 | 23 | \textbf{\\SoftMax} 24 | 25 | SoftMax就是一个${\mathbb R}^k \to {(0, 1)}^k$的映射. 26 | 27 | \begin{equation} 28 | \sigma(z)_i = \frac{\exp{\beta z_i}}{\sum \exp(\beta z_j)} 29 | \end{equation} 30 | 31 | 一般取$\beta = 1.$当$\beta \to \infty$时,SoftMax变成Argmax. 32 | 33 | 所以 SoftMax 是 Soft 的 Argmax. 34 | 35 | 关于loss的设计,如果正确标签是one-hot的,那么我们可以使用负对数概率(NLL)作为损失函数. 36 | 但是如果ground truth也是一个概率分布(有时这是人为的), 37 | 那么我们就需要对两个概率分布的距离度量给出定义.在信息论领域常用的度量是KL divergence $D(P \parallel Q)$,其定义如下: 38 | 39 | \begin{equation} 40 | D(P \parallel Q) = \sum_{x \in \mathcal X} P(x) \log \frac{P(x)}{Q(x)}. 41 | \end{equation} 42 | 43 | 这个度量并不满足距离的定义,因为其满足正定性,而不满足对称性和三角不等式. 44 | 45 | 我们不难看出 46 | \begin{equation} 47 | D(P \parallel Q) = \underbrace{-\sum_{x \in \mathcal X} P(x)\log Q(x)}_{H(P, Q)} - \underbrace{\xk{-\sum_{x \in \mathcal X} P(x) \log P(x)}}_{H(P)}. 48 | \end{equation} 49 | 即KL divergence是相对熵和分布$P$的熵之差.如果$P$是groud truth的分布,那么第二项成为常数,就得到了我们的交叉熵损失函数: 50 | \begin{equation} 51 | \mathcal L_{CE} = H(P, Q) = -\sum_{x \in \mathcal X} P(x) \log Q(x). 52 | \end{equation} 53 | 54 | 交叉熵函数在随机初始化时,取值约为$\log (\text{sum of classes})$.它没有上界,有下界$0$. 55 | 56 | 所以 CrossEntrophyLoss 应该在 $\log{\text{类别数}}$ 开始下降 57 | 58 | \subsection{Cross Entrophy Loss V.S. Accuracy} 59 | 60 | 1.CEL有可能已经降到了log2,acc仍是0.例子:$\Pr=[0.499,0.501]$,仍然输出错误答案,但是 $loss=\log2$ 很小 61 | 62 | 2.$acc=100\%$的时候,CEL仍然可能是初始化的 $\log(N)$ , 同理举一个例子:$\Pr=[0.498,0.001,0.001]$ 63 | 64 | 综上所述,两者没有确定关系,训练一定要同时画两个曲线 -------------------------------------------------------------------------------- /notes/src/08.CNN_for_classification.tex: -------------------------------------------------------------------------------- 1 | \section{CNNs for Image Classification} 2 | 3 | 当我们分析一个CNN的结构时,需要考虑以下的方面: 4 | 5 | \begin{enumerate} 6 | \item 表示能力 7 | \item 是否适合任务 8 | \item 是否容易优化 9 | \item 代价 10 | \end{enumerate} 11 | 12 | \subsection{Reception Field} 13 | 14 | 如图,使用三层3*3卷积层,感受野与一层7*7卷积层相同. 15 | 16 | \begin{figure}[htbp] 17 | \centering 18 | \includegraphics[scale=0.85]{figures/receptivefield.png} 19 | \caption{三层3*3卷积核的感受野} 20 | \end{figure} 21 | 22 | 感受野是一个很重要的概念,它表征一个数据可以接收到原图多大范围的信息. 23 | 我们这里姑且认为感受野相同则表达能力相同.我们希望可以在神经网络的中部将整个图片纳入感受野, 24 | 这样在后面可以进行全图的pixel信息的交流,有利于结合多个特征.例如:结合狗的耳朵和毛色进行判断. 25 | 26 | 既然三层3*3卷积层,感受野与一层7*7卷积层相同,那么为什么要选用小而深的网络呢? 27 | 其一,层数增加,网络的非线性性增加,分割能力更强.此外,参数量也更小 ($3\times 3^2 C^2 < 7^2C^2$). -------------------------------------------------------------------------------- /notes/src/22.Instance_Level_6D_Object_Pose_Estimation.tex: -------------------------------------------------------------------------------- 1 | \section{Instance-Level 6D Object Pose Estimation} 2 | 3 | \textbf{这一章节在2024年教学中已经被删去,为了让有兴趣的读者了解,故保留} 4 | 5 | Instance-level: a small set of known instances. 6 | 7 | Pose is defined for each instance according to their CAD model. 8 | 9 | Input: RGB/RGBD.如果有相机内参,那么没有D也可以.有D可以做得更好.\marginpar{\kaishu 为什么有内参没有深度也是可以的呢?因为这里我们是Instance-level的姿态估计,换言之我们已经有了这个物体的形状参数,其大小规格也是已知的.理论上我们甚至可以不停地试$\bd R, \bm t$使得转换后的形状与照片符合.} 10 | 11 | 2D center localization.先预测2d图片的中心位置和深度.随后利用相机内参得到translation. 12 | 13 | PoseCNN: Translation Estimation:Voting.每个pixel给出一个指向中心的向量,得到center. 14 | 15 | PoseCNN: Rotation Estimation. RoI? 16 | 17 | loss: $\mathcal{L}(\bd q, \bd q^{*})$.我们发现$\bd{q}$和$-\bd{q}$在旋转意义上是相同的,double coverage.因此一种可行的regression loss是取两者的最小值. 18 | 19 | PoseCNN则采用了另一种loss: 20 | \begin{equation} 21 | \mathrm{PLoss}(\widetilde{\bd{q}}, \bd{q}) = \frac{1}{2m}\sum_{\bd x \in \mathcal{M}} \norm{R(\widetilde{\bd{q}}) \bd x - R(\bd{q}) \bd x}^2 22 | \end{equation} 23 | 24 | 对称性:(表示旋转的等价类) 25 | \begin{equation} 26 | \operatorname{SLoss}(\widetilde{\mathbf{q}}, \mathbf{q})=\frac{1}{2 m} \sum_{\mathbf{x}_{1} \in \mathcal{M}} \min _{\mathbf{x}_{2} \in \mathcal{M}}\left\|R(\tilde{\mathbf{q}}) \mathbf{x}_{1}-R(\mathbf{q}) \mathbf{x}_{2}\right\|^{2} 27 | \end{equation} 28 | 29 | PoseCNN的translation表现尚可,但是rotation的表现一般,这受限于四元数的性能. 30 | 31 | 6D pose要求已知物体的cad模型,这在现实中不太可能. 32 | 33 | category-level 6D pose.希望能够泛化,输入3d输出6d pose,Without the need to use CAD model. 34 | 35 | 王鹤老师的论文:Normalized Object Coordinate Space for Category-Level 6D Object Pose and Size Estimation,CVPR2019 oral. 36 | 37 | Detecting and estimating 6D pose and 3D size of previously unseen objects from certain categories from RGBD images. 38 | 39 | 为什么要depth呢?因为对于未知的物体来说,仅有rgb而没有depth是无法确定其大小的.有了depth和相机内参,才能消除scale的不确定性. 40 | 41 | 问题的主要难点是rotation的估计.前面我们看到PoseCNN即使对于已知的物体,做得也相当不好. 42 | 43 | 间接法.Representation: Normalized Object Coordinate Space(NOCS) 44 | 45 | 简而言之,我们需要对一张图片的像素预测其在CAD model 中的位置.你可能会问:不是没有CAD model吗?在此我们建立了一个reference space:NOCS. 46 | 47 | step 1:rotation Normalization:align object orientations.将所有物体对齐成同样的姿态,如马克杯的方向都向左,此时旋转矩阵为0.\marginpar{\kaishu 这里我们隐含了一个假设,即我们可以在没有其CAD的情形下讨论其朝向.如马克杯的把手.} 48 | 49 | Step 2 (translation normalization): zero-center the objects.对于新物体,将其紧bbox的中心作为原点. 50 | 51 | Step 3 (scale normalization): uniformly normalize the scales.将bbox的对角线长度设置为1.这样所有的都可以放入一个对角线长为1的正方体里了.NOCS = Reference frame. 52 | 53 | NOCS = Reference frame transformation from NOCS to camera space. 54 | 55 | \begin{figure}[htbp] 56 | \centering 57 | \includegraphics[scale=0.65]{figures/image_nocs_pose.png} 58 | \caption{From Image to NOCS map to Pose.} 59 | \label{} 60 | \end{figure} 61 | 62 | \subsection{Beyond Object Pose} 63 | human/hand pose extimation.人体可以按照关节活动,并不是刚体. 64 | -------------------------------------------------------------------------------- /notes/src/23.motion.tex: -------------------------------------------------------------------------------- 1 | \section{Motion} 2 | 3 | \textbf{这一章节在2024年教学中没有讲授,为了让有兴趣的读者了解,故保留} 4 | 5 | Today let’s focus on motions between two consecutive frames! 6 | 7 | Optical Flow 光流. 8 | 9 | 图片的亮的部分在两帧之间的表象运动. 10 | 11 | 几个假设:亮度相对稳定,小移动,一个点的运动与其邻居相似. 12 | 13 | \begin{equation} 14 | \begin{array}{l} 15 | I(x+u, y+v, t) \approx I(x, y, t-1)+I_{x} \cdot u(x, y)+I_{y} \cdot v(x, y)+I_{t} \\ 16 | I(x+u, y+v, t)-I(x, y, t-1)=I_{x} \cdot u(x, y)+I_{y} \cdot v(x, y)+I_{t} \\ 17 | \text { Hence, } I_{x} \cdot u+I_{y} \cdot v+I_{t} \approx 0 \quad \rightarrow \nabla I \cdot[u v]^{T}+I_{t}=0 18 | \end{array} 19 | \end{equation} 20 | 21 | 那么,这个方程足够解出所有$(u, v)$吗?我们有$n^2$个方程,但有$2n^2$个未知数,因此不够. 22 | 23 | The Aperture Problem.单纯从图像来看,运动可能并不完整.Barberpole Illusion.沿着线的方向不容易观测,垂直的容易被观察到. 24 | 25 | 更多约束: Spatial coherence constraint. 1981年Lucas和Kanade提出了假设在每个pixel的5*5window当中flow相同. 26 | 27 | \begin{equation} 28 | \left[\begin{array}{cc} 29 | I_{x}\left(\mathrm{p}_{1}\right) & I_{y}\left(\mathbf{p}_{1}\right) \\ 30 | I_{x}\left(\mathbf{p}_{2}\right) & I_{y}\left(\mathbf{p}_{2}\right) \\ 31 | \vdots & \vdots \\ 32 | I_{x}\left(\mathbf{p}_{25}\right) & I_{y}\left(\mathbf{p}_{25}\right) 33 | \end{array}\right]\left[\begin{array}{l} 34 | u \\ 35 | v 36 | \end{array}\right]=-\left[\begin{array}{c} 37 | I_{t}\left(\mathbf{p}_{1}\right) \\ 38 | I_{t}\left(\mathbf{p}_{2}\right) \\ 39 | \vdots \\ 40 | I_{t}\left(\mathbf{p}_{25}\right) 41 | \end{array}\right] 42 | \end{equation} 43 | 44 | 即$\bd A_{25\times 2} \bm d_{2\times 1} = \bm b_{25\times 1}$ 45 | 46 | 得到 47 | \begin{equation} 48 | \bd A^\top \bd A \bm d = \bd A^\top \bm b 49 | \end{equation} 50 | 51 | 什么时候可解?\marginpar{\kaishu 这和我们之前的Harris Corner Detector非常相似.光流当中最容易被捕捉的也是corner.corner与光流紧密相关.} 52 | \begin{enumerate} 53 | \item 可逆 54 | \item 特征值不能太小 55 | \item 良态 56 | \end{enumerate} 57 | 58 | FlowNet:最简单的想法:两张三通道图merge在一起,卷.dense regression.early fusion. 59 | 60 | 或者:分别提取feature.两个网络share weight.然后结合到一起.middle fusion. 61 | 62 | 过早fusion会使得问题空间变大.过完fusion会使得微观细节缺失. 63 | -------------------------------------------------------------------------------- /notes/src/25.Summary_of_Computer_Vision.tex: -------------------------------------------------------------------------------- 1 | \section{Summary of Computer Vision} 2 | 3 | Compared to human vision, computer vision deals with the following tasks: 4 | 5 | \begin{enumerate} 6 | \item visual data acquisition (similar to human eyes but comes withmany more choices) 7 | \item image processing and feature extraction (mostly low-level) 8 | \item analyze local structures and then 3D reconstruct the original scene (from mid-level to high-level) 9 | \item understanding (mostly high-level) 10 | \item generation (beyond the scope of human vision system) 11 | \item and further serving embodied agents to make decisions and take actions. 12 | \end{enumerate} 13 | 14 | \textbf{谢谢老师的 wonderful lecture!} 15 | 16 | \textbf{谢谢看到这里的同学! 欢迎在 github 上 star \href{https://github.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU}{本项目}或者访问我的\href{https://www.lyt0112.com/blog/course}{个人网站上关于课程的测评}!} -------------------------------------------------------------------------------- /notes/src/CV_notes.synctex.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/CV_notes.synctex.gz -------------------------------------------------------------------------------- /notes/src/CV_notes.tex: -------------------------------------------------------------------------------- 1 | \include{package} 2 | 3 | % 文档标题 4 | \title{ 5 | {\normalfont\normalsize\textsc{ 6 | Peking University\\ 7 | Introduction to Computer Vision, Spring 2024 \\[25pt]}} 8 | \horrule{0.5pt}\\ 9 | \sffamily{Introduction to Computer Vision\\Course Notes}\\ 10 | \horrule{1.8pt}\\[20pt] 11 | } 12 | 13 | % 作者和联系方式 14 | \author[1]{Prof. He Wang\thanks{\href{https://hughw19.github.io/}{Prof. He Wang}}} 15 | \author[2]{林晓疏\thanks{wangyuanqing@pku.edu.cn}} 16 | \author[2]{Yutong Liang\thanks{\href{https://lyt0112.com/}{Yutong Liang's Website}}} 17 | \affil[1]{主讲教师} 18 | \affil[2]{笔记整理} 19 | 20 | % 文档日期 21 | \date{\today} 22 | 23 | \pagestyle{fancy} 24 | \fancyhf{} 25 | \fancyhead[L]{\leftmark} % 在页眉左侧显示章节名 26 | \fancyfoot[C]{\thepage} % 在页脚中间显示页码 27 | 28 | 29 | \begin{document} 30 | \maketitle 31 | \include{preface} 32 | \clearpage 33 | \tableofcontents 34 | \include{00.images_as_funcitons} 35 | \include{01.edge_detection} 36 | \include{02.keypoint_detection} 37 | \include{03.line_fitting} 38 | \include{04.CNN} 39 | \include{05.CNN_training} 40 | \include{06.CNN_improvement} 41 | \include{07.classification} 42 | \include{08.CNN_for_classification} 43 | \include{09.segmentation} 44 | \include{10.3Dvision} 45 | \include{11.camera_carlibration} 46 | \include{12.single_view_geometry} 47 | \include{13.epipolar_geometry} 48 | \include{14.3D_data} 49 | \include{15.3D_deep_learning} 50 | \include{16.Sequential_Modeling} 51 | \include{17.video_analysis} 52 | \include{18.Transformer} 53 | \include{19.object_detection_and_instance_segmentation} 54 | \include{20.generative_model} 55 | \include{21.pose_and_motion} 56 | \include{22.Instance_Level_6D_Object_Pose_Estimation} 57 | \include{23.motion} 58 | \include{24.Embodied_AI} 59 | \include{25.Summary_of_Computer_Vision} 60 | 61 | 62 | \clearpage 63 | % appendix: appendix-QRDecomposition, condition-number, transformation-in-space 64 | \appendix 65 | \include{condition-number} 66 | \include{transformation-in-space} 67 | \include{DOF_and_rank} 68 | \include{appendix-QRDecomposition} 69 | 70 | \bibliographystyle{plain} 71 | \bibliography{CV_notes} 72 | \end{document} -------------------------------------------------------------------------------- /notes/src/DOF_and_rank.tex: -------------------------------------------------------------------------------- 1 | \section{DOF and rank in essential matrix and fundamental matrix} 2 | \label{DOFandRank} 3 | 所谓矩阵的自由度,实际就是指矩阵中有多少个元素可以独立变化.例如,一个$m \times n$的矩阵在不加任何限制的情况下有$mn$个自由度,而对于$n$阶上三角矩阵,其自由度为$n(n+1)/2$. 4 | 5 | 从三维的物体投影成二维的图像,这个过程其实就是射影变换.在射影空间当中的单应矩阵$\bd H$(可以理解为我们的投影变换矩阵)天然少一个自由度,因为自原点出发位于同一条线上的两个点,在射影之后无法区分,所以$\bd H \sim \alpha \bd H$. 6 | 7 | 测地线距离:geodesic distance. 8 | -------------------------------------------------------------------------------- /notes/src/appendix-QRDecomposition.tex: -------------------------------------------------------------------------------- 1 | \section{QR Decomposition} 2 | \label{QR Decomposition} 3 | 矩阵的QR分解就是将矩阵分解为正交矩阵和上三角矩阵的乘积,它可以对任意形状的矩阵进行.常用的方法有Gram–Schmidt process, Givens rotaitons和Householder reflections等.我们用最容易理解的施密特正交化方法来推导方阵的情形. 4 | 5 | 我们先将分解后的形式写出: 6 | \begin{equation} 7 | \begin{bmatrix} 8 | \bm a_1 & \bm a_2 & \cdots & \bm a_n 9 | \end{bmatrix} 10 | = 11 | \begin{bmatrix} 12 | \bm e_1 & \bm e_2 & \cdots & \bm e_n 13 | \end{bmatrix} 14 | \begin{bmatrix} 15 | r_{11} & r_{12} & \cdots & r_{1n} 16 | \\ 17 | 0 & r_{22} & \cdots & r_{2n} 18 | \\ 19 | 0 & 0 & \ddots & \vdots 20 | \\ 21 | 0 & 0 & \cdots & r_{nn} 22 | \end{bmatrix} 23 | \end{equation} 24 | 也就是满足 25 | \begin{equation} 26 | \bm a_i = \sum_{j = 1}^{i} r_{ji} \bm e_j 27 | \end{equation} 28 | 由此可以定出 29 | \begin{equation} 30 | \begin{aligned} 31 | \bm u_1 &= \bm a_1, \quad &\bm e_1 = \frac{\bm u_1}{\norm{\bm u_1}} 32 | \\ 33 | \bm u_2 &= \bm a_2 - \text{proj}_{\bm u_1} \bm a_2, & \bm e_2 = \frac{\bm u_2}{\norm{\bm u_2}} 34 | \\ 35 | &\vdots 36 | \\ 37 | \bm u_n &= \bm a_n - \sum_{j=1}^{n-1} \text{proj}_{\bm u_j} \bm a_n , &\bm e_n = \frac{\bm u_n}{\norm{\bm u_n}} 38 | \end{aligned} 39 | \end{equation} 40 | 以及 41 | \begin{equation} 42 | r_{ij} = \langle \bm e_i, \bm a_j \rangle 43 | \end{equation} 44 | 45 | 我们考虑一个方阵$\bd P$,其副对角线上的元素为$1$,其余为$0$.不难验证$\bd P\bd P = \bd I, \bd P = \bd P^\top = \bd P^{-1}$.左乘矩阵$\bd P$会使得矩阵上下翻转,右乘会使得矩阵左右翻转.将一个上三角矩阵上下翻转后左右翻转,即变为下三角矩阵.记$\widetilde{\bd A} = \bd P \bd A$,对$\widetilde{\bd A}^\top$进行QR分解,得到 46 | \begin{equation} 47 | \widetilde{\bd A}^\top = \widetilde{\bd Q} \widetilde{\bd R} 48 | \end{equation} 49 | 50 | 由此得到 51 | \begin{equation} 52 | \bd A = \bd P \widetilde{\bd R}^\top \widetilde{\bd Q}^\top = \xk{\bd P \widetilde{\bd R}^\top \bd P} \xk{\bd P \widetilde{\bd Q}^\top} \xlongequal{\text{def}} \bd R \bd Q 53 | \end{equation} 54 | 55 | 同样的,对于$\bd A^{\top}$进行QR分解就可以得到$\bd A$的LQ分解,可以用同样的方法得到QL分解.不过在使用时要注意:$\bd{P}$不一定是旋转矩阵. -------------------------------------------------------------------------------- /notes/src/condition-number.tex: -------------------------------------------------------------------------------- 1 | \section{Condition Number} 2 | 问题的条件数是数值分析中常见的概念,是导数的一种推广.简单来说,就是对于输入发生微小变化时,输出的变化程度的大小.如果一个问题的条件数较小,那么就称其是良置的(well conditioned),否则称为病态的(ill conditioned). 3 | 4 | 考虑一个线性系统$\bd A \bm x = \bm b$,那么若$\det \bd A \ne 0$,则$\bm x = \bd{A}^{-1}\bm b$.若输入变化$\delta \bm x$,则输出变化为$\bd A \delta \bm x$.考虑相对变化之比的上界: -------------------------------------------------------------------------------- /notes/src/figures/3DCNN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/3DCNN.png -------------------------------------------------------------------------------- /notes/src/figures/6d_object_pose.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/6d_object_pose.png -------------------------------------------------------------------------------- /notes/src/figures/BNsize.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/BNsize.png -------------------------------------------------------------------------------- /notes/src/figures/BinarizationviaThresholding.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/BinarizationviaThresholding.png -------------------------------------------------------------------------------- /notes/src/figures/DETR.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/DETR.png -------------------------------------------------------------------------------- /notes/src/figures/DiscreteConvolution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/DiscreteConvolution.png -------------------------------------------------------------------------------- /notes/src/figures/EF.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/EF.png -------------------------------------------------------------------------------- /notes/src/figures/Embodied_Multimodal_Large_Model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/Embodied_Multimodal_Large_Model.png -------------------------------------------------------------------------------- /notes/src/figures/FourierTransform.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/FourierTransform.png -------------------------------------------------------------------------------- /notes/src/figures/GroupNorm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/GroupNorm.png -------------------------------------------------------------------------------- /notes/src/figures/KLdiv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/KLdiv.png -------------------------------------------------------------------------------- /notes/src/figures/LF_FC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/LF_FC.png -------------------------------------------------------------------------------- /notes/src/figures/LF_pool.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/LF_pool.png -------------------------------------------------------------------------------- /notes/src/figures/NMS.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/NMS.png -------------------------------------------------------------------------------- /notes/src/figures/NormalizationTechniques.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/NormalizationTechniques.png -------------------------------------------------------------------------------- /notes/src/figures/PointNet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/PointNet.png -------------------------------------------------------------------------------- /notes/src/figures/PointNet_structure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/PointNet_structure.png -------------------------------------------------------------------------------- /notes/src/figures/RCNN_classification.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/RCNN_classification.png -------------------------------------------------------------------------------- /notes/src/figures/RNN_grad_van.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/RNN_grad_van.png -------------------------------------------------------------------------------- /notes/src/figures/ROI_align.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/ROI_align.png -------------------------------------------------------------------------------- /notes/src/figures/ResNet集成神经网络解释.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/ResNet集成神经网络解释.png -------------------------------------------------------------------------------- /notes/src/figures/RoI_pool.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/RoI_pool.png -------------------------------------------------------------------------------- /notes/src/figures/RoI_pool2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/RoI_pool2.png -------------------------------------------------------------------------------- /notes/src/figures/Screenshot_loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/Screenshot_loss.png -------------------------------------------------------------------------------- /notes/src/figures/Screenshot_mybest.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/Screenshot_mybest.png -------------------------------------------------------------------------------- /notes/src/figures/Screenshot_mybest_resnet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/Screenshot_mybest_resnet.png -------------------------------------------------------------------------------- /notes/src/figures/UNetstructure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/UNetstructure.png -------------------------------------------------------------------------------- /notes/src/figures/VAE.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/VAE.png -------------------------------------------------------------------------------- /notes/src/figures/VAE_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/VAE_2.png -------------------------------------------------------------------------------- /notes/src/figures/VisualizingImageGradient.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/VisualizingImageGradient.png -------------------------------------------------------------------------------- /notes/src/figures/YOLO.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/YOLO.png -------------------------------------------------------------------------------- /notes/src/figures/adam.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/adam.png -------------------------------------------------------------------------------- /notes/src/figures/ae.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/ae.png -------------------------------------------------------------------------------- /notes/src/figures/anchor.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/anchor.jpg -------------------------------------------------------------------------------- /notes/src/figures/angle_between_line.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/angle_between_line.png -------------------------------------------------------------------------------- /notes/src/figures/attention.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/attention.png -------------------------------------------------------------------------------- /notes/src/figures/autoencoder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/autoencoder.png -------------------------------------------------------------------------------- /notes/src/figures/bilinear.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/bilinear.png -------------------------------------------------------------------------------- /notes/src/figures/camata-pose.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/camata-pose.png -------------------------------------------------------------------------------- /notes/src/figures/corner_map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/corner_map.png -------------------------------------------------------------------------------- /notes/src/figures/cover.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/cover.jpg -------------------------------------------------------------------------------- /notes/src/figures/cv_tasks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/cv_tasks.png -------------------------------------------------------------------------------- /notes/src/figures/ddgg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/ddgg.png -------------------------------------------------------------------------------- /notes/src/figures/deep-test-err.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/deep-test-err.png -------------------------------------------------------------------------------- /notes/src/figures/epi-constrain-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/epi-constrain-2.png -------------------------------------------------------------------------------- /notes/src/figures/epi-constrain.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/epi-constrain.png -------------------------------------------------------------------------------- /notes/src/figures/epi-geo-2pic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/epi-geo-2pic.png -------------------------------------------------------------------------------- /notes/src/figures/epipolargeometry.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/epipolargeometry.png -------------------------------------------------------------------------------- /notes/src/figures/fid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/fid.png -------------------------------------------------------------------------------- /notes/src/figures/g.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/g.png -------------------------------------------------------------------------------- /notes/src/figures/g2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/g2.png -------------------------------------------------------------------------------- /notes/src/figures/general_atten.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/general_atten.png -------------------------------------------------------------------------------- /notes/src/figures/generalgap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/generalgap.png -------------------------------------------------------------------------------- /notes/src/figures/generative_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/generative_model.png -------------------------------------------------------------------------------- /notes/src/figures/grad_var.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/grad_var.png -------------------------------------------------------------------------------- /notes/src/figures/holes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/holes.png -------------------------------------------------------------------------------- /notes/src/figures/horizon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/horizon.png -------------------------------------------------------------------------------- /notes/src/figures/hough1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/hough1.png -------------------------------------------------------------------------------- /notes/src/figures/hough2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/hough2.png -------------------------------------------------------------------------------- /notes/src/figures/image_attentoin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/image_attentoin.png -------------------------------------------------------------------------------- /notes/src/figures/image_nocs_pose.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/image_nocs_pose.png -------------------------------------------------------------------------------- /notes/src/figures/image_plane.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/image_plane.png -------------------------------------------------------------------------------- /notes/src/figures/image_self_atten.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/image_self_atten.png -------------------------------------------------------------------------------- /notes/src/figures/image_seq2seq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/image_seq2seq.png -------------------------------------------------------------------------------- /notes/src/figures/learning_rate_schedule.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/learning_rate_schedule.png -------------------------------------------------------------------------------- /notes/src/figures/light_invariant.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/light_invariant.png -------------------------------------------------------------------------------- /notes/src/figures/logo-blue.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/logo-blue.png -------------------------------------------------------------------------------- /notes/src/figures/lstm_grad_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/lstm_grad_flow.png -------------------------------------------------------------------------------- /notes/src/figures/marching_cube1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/marching_cube1.png -------------------------------------------------------------------------------- /notes/src/figures/marching_cube2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/marching_cube2.png -------------------------------------------------------------------------------- /notes/src/figures/mgtest.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/mgtest.png -------------------------------------------------------------------------------- /notes/src/figures/msgd1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/msgd1.png -------------------------------------------------------------------------------- /notes/src/figures/msgd2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/msgd2.png -------------------------------------------------------------------------------- /notes/src/figures/multilayer_rnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/multilayer_rnn.png -------------------------------------------------------------------------------- /notes/src/figures/not_roboust_outliner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/not_roboust_outliner.png -------------------------------------------------------------------------------- /notes/src/figures/nsloss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/nsloss.png -------------------------------------------------------------------------------- /notes/src/figures/paralle_q.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/paralle_q.png -------------------------------------------------------------------------------- /notes/src/figures/parallel-image-plane.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/parallel-image-plane.png -------------------------------------------------------------------------------- /notes/src/figures/pic_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/pic_1.png -------------------------------------------------------------------------------- /notes/src/figures/pinholecamera.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/pinholecamera.png -------------------------------------------------------------------------------- /notes/src/figures/pointnet++.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/pointnet++.png -------------------------------------------------------------------------------- /notes/src/figures/pos_encoding.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/pos_encoding.png -------------------------------------------------------------------------------- /notes/src/figures/property-of-f-mat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/property-of-f-mat.png -------------------------------------------------------------------------------- /notes/src/figures/rcnn_speed_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/rcnn_speed_comparison.png -------------------------------------------------------------------------------- /notes/src/figures/rcnn_vs_frcnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/rcnn_vs_frcnn.png -------------------------------------------------------------------------------- /notes/src/figures/receptivefield.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/receptivefield.png -------------------------------------------------------------------------------- /notes/src/figures/recu_CNN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/recu_CNN.png -------------------------------------------------------------------------------- /notes/src/figures/recur_CNN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/recur_CNN.png -------------------------------------------------------------------------------- /notes/src/figures/recur_CNN_detail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/recur_CNN_detail.png -------------------------------------------------------------------------------- /notes/src/figures/residual_network.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/residual_network.png -------------------------------------------------------------------------------- /notes/src/figures/rnn-seqdata.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/rnn-seqdata.png -------------------------------------------------------------------------------- /notes/src/figures/rnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/rnn.png -------------------------------------------------------------------------------- /notes/src/figures/sensors.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/sensors.png -------------------------------------------------------------------------------- /notes/src/figures/seq2seq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/seq2seq.png -------------------------------------------------------------------------------- /notes/src/figures/simple_NMS.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/simple_NMS.png -------------------------------------------------------------------------------- /notes/src/figures/simple_VAE.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/simple_VAE.png -------------------------------------------------------------------------------- /notes/src/figures/single_obj_det.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/single_obj_det.png -------------------------------------------------------------------------------- /notes/src/figures/sparsenet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/sparsenet.png -------------------------------------------------------------------------------- /notes/src/figures/strangeknowledge.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/strangeknowledge.png -------------------------------------------------------------------------------- /notes/src/figures/transform_all.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/transform_all.png -------------------------------------------------------------------------------- /notes/src/figures/transformer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/transformer.png -------------------------------------------------------------------------------- /notes/src/figures/triangulation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/triangulation.png -------------------------------------------------------------------------------- /notes/src/figures/truncate_bp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/truncate_bp.png -------------------------------------------------------------------------------- /notes/src/figures/two_stage_detector.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/two_stage_detector.png -------------------------------------------------------------------------------- /notes/src/figures/valley.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/valley.png -------------------------------------------------------------------------------- /notes/src/figures/vanilla_rnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/vanilla_rnn.png -------------------------------------------------------------------------------- /notes/src/figures/vanishingpoints.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/vanishingpoints.png -------------------------------------------------------------------------------- /notes/src/figures/video_cmp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/video_cmp.png -------------------------------------------------------------------------------- /notes/src/figures/vpanddir.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/vpanddir.png -------------------------------------------------------------------------------- /notes/src/figures/weak_perspective.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/weak_perspective.png -------------------------------------------------------------------------------- /notes/src/figures/window_moving.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/window_moving.png -------------------------------------------------------------------------------- /notes/src/figures/word_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/word_model.png -------------------------------------------------------------------------------- /notes/src/figures/wrongDA.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/figures/wrongDA.png -------------------------------------------------------------------------------- /notes/src/now.synctex.gz.sum.synctex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/notes/src/now.synctex.gz.sum.synctex -------------------------------------------------------------------------------- /notes/src/now.tex: -------------------------------------------------------------------------------- 1 | \include{package} 2 | 3 | % 文档标题 4 | \title{ 5 | {\normalfont\normalsize\textsc{ 6 | Peking University\\ 7 | Introduction to Computer Vision, Spring 2024 \\[25pt]}} 8 | \horrule{0.5pt}\\ 9 | \sffamily{Introduction to Computer Vision\\Course Notes}\\ 10 | \horrule{1.8pt}\\[20pt] 11 | } 12 | 13 | % 作者和联系方式 14 | \author[1]{Prof. He Wang\thanks{\href{https://hughw19.github.io/}{Prof. He Wang}}} 15 | \author[2]{林晓疏\thanks{wangyuanqing@pku.edu.cn}} 16 | \author[2]{Yutong Liang\thanks{\href{https://lyt0112.com/}{Yutong Liang's Website}}} 17 | \affil[1]{主讲教师} 18 | \affil[2]{笔记整理} 19 | 20 | % 文档日期 21 | \date{\today} 22 | 23 | \pagestyle{fancy} 24 | \fancyhf{} 25 | \fancyhead[L]{\leftmark} % 在页眉左侧显示章节名 26 | \fancyfoot[C]{\thepage} % 在页脚中间显示页码 27 | 28 | \begin{document} 29 | 30 | 31 | \section{Edge Detection} 32 | 33 | \subsection{What is an Edge?} 34 | 35 | “边缘”是图像中的一个区域,在这个区域中,沿着图像的一个方向, 36 | 像素强度值 (或者说对比度) 发生了“显著”的变化,而在其正交方向上, 37 | 像素强度值 (或对比度) 几乎没有变化. 38 | 39 | \subsection{Criteria for Optimal Edge Detection} 40 | 41 | \begin{equation} 42 | \text{Accuracy}=\frac{\text{TP}+\text{TN}}{\text{TP}+\text{FP}+\text{TN}+\text{FN}} 43 | \end{equation} 44 | 45 | \begin{equation} 46 | \text{Precision}=\frac{\text{TP}}{\text{TP}+\text{FP}} 47 | \end{equation} 48 | 49 | \begin{equation} 50 | \text{Recall}=\frac{\text{TP}}{\text{TP}+\text{FN}} 51 | \end{equation} 52 | 53 | Precision 和 Recall 都代表着你检测出的真正边缘所占比例,但是 Precision 的分母 54 | 是你检测出的边缘,Recall 的分母是真正的边缘. 55 | 56 | \subsection{Non-Maximal Suppression (NMS)} 57 | 58 | 非最大值抑制,顾名思义,就是抑制非最大值,这里的最大值指的是梯度的局部最大值. 59 | 60 | 在计算出了所有点的梯度之后,会有很多像素的梯度大于设定的阈值,而我们希望最后得出的边缘像素真的看起来 61 | 像一条线而不是一块区域,所以 NMS 的目的是为了抑制那些不是边缘的像素,只保留那些是边缘的像素. 62 | 63 | \begin{figure}[htbp] 64 | \centering 65 | \includegraphics[scale=0.2]{figures/NMS.png} 66 | \caption{NMS示意图} 67 | \end{figure} 68 | 69 | 对于一个边缘像素的候选点,我们认为它是边缘当:它比它梯度方向的两个点 $q+\nabla q$ 和 $q-\nabla q$ 的梯度值大, 70 | 也就是这个点的梯度大小是局部最大值的时候. 71 | 72 | \begin{figure}[htbp] 73 | \centering 74 | \includegraphics[scale=0.4]{figures/bilinear.png} 75 | \caption{双线性插值} 76 | \end{figure} 77 | 78 | 计算这个点梯度方向的点的梯度值可以使用双线性插值法,就是把这个点周围的四个点的梯度按照横纵距离反比加权. 79 | 80 | 当然,NMS 是一个思想而不是针对边缘检测的算法,比如对于 keypoint detection,object detection (like YOLO) 都可以使用 NMS, 81 | 实现的思路都很类似,使用一个打分函数看这个备选点 (bounding box) 是不是比跟它相邻 (冲突) 的点 (bounding box) 好,如果是就保留,否则就抑制. 82 | 83 | \subsection{A Simplified Version of NMS} 84 | 85 | \begin{figure}[htbp] 86 | \centering 87 | \includegraphics[scale=0.55]{figures/simple_NMS.png} 88 | \caption{简化版本的双线性插值} 89 | \end{figure} 90 | 91 | 一个 NMS 的简化版本是把双线性插值省去,直接让这个像素的梯度大于它梯度方向的那两个相邻像素的梯度. 92 | 93 | \subsection{Hysteresis Thresholding} 94 | 95 | 使用高阈值 (maxVal) 开始边缘曲线,使用低阈值 (minVal) 继续它们. 96 | 97 | \begin{itemize} 98 | \item Pixels with gradient magnitudes > maxVal should be reserved. 99 | \item Pixels with gradient magnitudes < minVal should be removed. 100 | \end{itemize} 101 | 102 | How to decide maxVal and minVal? Examples: 103 | 104 | \begin{itemize} 105 | \item maxVal = 0.3 $\times$ average magnitude of the pixels that pass NMS 106 | \item minVal = 0.1 $\times$ average magnitude of the pixels that pass NMS 107 | \end{itemize} 108 | 109 | 110 | 111 | \end{document} -------------------------------------------------------------------------------- /notes/src/preface.tex: -------------------------------------------------------------------------------- 1 | \section*{前言} 2 | 3 | 这本笔记是作者于2022年春信息科学技术学院王鹤老师开设的计算机视觉导论课程期间的笔记.王鹤老师在Stanford获得Ph.D学位,课程中也毫不令人意外地带有许多\href{https://cs231n.github.io/}{CS231n: Convolutional Neural Network for Visial Recognition}和\href{https://web.stanford.edu/class/cs231a/course_notes.html}{CS231A: Computer Vision, From 3D Reconstruction to Recognition}等课程的影子.课程从对计算机视觉领域的传统方法的介绍开始,介绍了CNN和诸多深度学习的基本知识,如BatchNorm,Regularization等.随后进入3D视觉部分,详细介绍了Pinhole Camera这一模型以及相机标定,对极几何等相关知识.期中之后转入3D数据,语义分割,物体位姿判定以及RNN和生成模型部分. 4 | 5 | 笔记主要是对王鹤老师上课内容的记录,部分内容由笔者在课余时间了解后添加,这些内容都给出了参考文献或链接.除此之外,笔者还依惯例添加了几节附录,以补充正文当中一些没有展开的细节,以供参考. 6 | 7 | 这门课是笔者三年以来在信科上过的水准最高的课程,无论是课程内容,教师讲授水平,作业质量,考试区分度还是答疑,都是笔者体验过的课程中最高水准的一档.若信科未来能有一半专业课能达到本课的水平,则世界一流大学指日可待 (. 8 | 9 | 最后,感谢王鹤老师和张嘉曌,陈嘉毅两位助教.笔者曾多次向张助教询问问题,均得到了细致的回答,在此一并表示感谢. 10 | 11 | \rightline{林晓疏} 12 | 13 | \rightline{2022年春} 14 | 15 | 作为北京大学信息科学技术学院的学生,长期以来饱受糟糕课程质量、糟糕课程作业、糟糕考试难度的折磨. 16 | 比如算法设计与分析的等课程的教学质量极低,教考分离,ICS考试一面黑板的考试错误题目订正等等. 17 | 在这样的环境下,幸运地遇到了王鹤老师开设的计算机视觉导论课程,内容丰富,作业质量高,考试难度适中, 18 | 绝对称得上是精品课程\sout{(与算分这种国家精品课程相区别)}. 19 | 20 | 王鹤老师将计算机视觉的发展脉络呈现给大家,在这个深度学习时代, 21 | 老师并没有完全忽视传统CV的方法,而是挑选了其中具有代表性的工作,这些工作为深度学习时代的CV打下了良好的基础,提供了许多基础工具和数据集的构建方式. 22 | 同时老师也更加注重深度学习的基础知识,如 BatchNorm 的特性和与其他 Norm 的区别,许多人仅仅只是会 PyTorch 的积木搭建,但是对于这些基础知识的原理和性质却不甚了解, 23 | 导致在实际使用中遇到问题时无法解决,王老师在这方面往往提出 intuitive 的问题,引人深思. 24 | 25 | 我是在大三下学期选修了这门课程,即使我已经具有了一定的深度学习基础,但是我仍然很享受上课\sout{看回放}的过程,因为对于许多已经了解的知识,王老师会再度给出解释, 26 | 总是让我在同一个地方有不同的收获. 27 | 28 | 我在本学期期中考试之前偶然了解到曾经有学长撰写了一本笔记,但是许多内容已经进行了更新或者删改,因此我联系上林晓疏(笔名)学长,获取了这份笔记的源代码, 29 | 并在此基础上进行更新,以飨后人. 30 | 31 | 该笔记按照讲授先后顺序进行排列,但是章节编排按照知识结构划分,因此章节划分可能与课程进度有所不同. 32 | 同时本笔记不能替代课程,只是对这部分知识的总结和思考,建议与课程回放配合食用. 33 | 34 | \rightline{Yutong Liang} 35 | \rightline{2024年4月24日} 36 | -------------------------------------------------------------------------------- /resources/cheatsheet/cheatsheet-final.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/resources/cheatsheet/cheatsheet-final.pdf -------------------------------------------------------------------------------- /resources/cheatsheet/cheatsheet-midterm.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/resources/cheatsheet/cheatsheet-midterm.pdf -------------------------------------------------------------------------------- /resources/往年题/2022期中-部分答案.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/resources/往年题/2022期中-部分答案.pdf -------------------------------------------------------------------------------- /resources/往年题/2022期末-部分答案.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EmptyBlueBox/Introduction_to_Computer_Vision-wh-2024Spring-PKU/364eb097a2ed8e634db9674cb984a3873ab49e37/resources/往年题/2022期末-部分答案.pdf -------------------------------------------------------------------------------- /slides/readme.txt: -------------------------------------------------------------------------------- 1 | Copyright belongs to Professor Wang He of Peking University. 2 | --------------------------------------------------------------------------------