├── .gitignore ├── .idea ├── ML_Notes.iml ├── misc.xml ├── modules.xml ├── vcs.xml ├── workspace-DESKTOP-EHUP4JQ.xml └── workspace.xml ├── README.md ├── __init__.py ├── ml_models ├── __init__.py ├── bayes │ ├── __init__.py │ └── linear_regression.py ├── cluster │ ├── __init__.py │ ├── agnes.py │ ├── dbscan.py │ ├── kmeans.py │ ├── lvq.py │ └── spectral.py ├── decomposition │ ├── __init__.py │ ├── isomap.py │ ├── lda.py │ ├── lle.py │ ├── mds.py │ ├── nmf.py │ └── pca.py ├── em │ ├── __init__.py │ ├── gmm_classifier.py │ └── gmm_cluster.py ├── ensemble │ ├── __init__.py │ ├── adaboost_classifier.py │ ├── adaboost_regressor.py │ ├── bagging_classifier.py │ ├── bagging_regressor.py │ ├── dart_classifier.py │ ├── dart_regressor.py │ ├── gradient_boosting_classifier.py │ ├── gradient_boosting_regressor.py │ ├── random_forest_classifier.py │ ├── random_forest_regressor.py │ ├── xgboost_base_tree.py │ ├── xgboost_classifier.py │ └── xgboost_regressor.py ├── explain │ ├── __init__.py │ ├── shap.py │ └── tree_shap.h ├── fm │ ├── __init__.py │ ├── ffm.py │ └── fm.py ├── kernel_functions.py ├── latent_dirichlet_allocation │ ├── __init__.py │ └── lda.py ├── linear_model │ ├── __init__.py │ ├── linear_regression.py │ ├── logistic_regression.py │ ├── max_entropy.py │ └── perceptron.py ├── optimization.py ├── outlier_detect │ ├── __init__.py │ ├── hbos.py │ ├── iforest.py │ ├── knn.py │ ├── lof.py │ └── phbos.py ├── pgm │ ├── __init__.py │ ├── conditional_random_field.py │ ├── gaussian_naive_bayes_classifier.py │ ├── gaussian_naive_bayes_cluster.py │ ├── hidden_markov_model.py │ ├── naive_bayes_classifier.py │ ├── naive_bayes_cluster.py │ ├── page_rank.py │ ├── semi_gaussian_naive_bayes_classifier.py │ └── simple_markov_model.py ├── svm │ ├── __init__.py │ ├── hard_margin_svm.py │ ├── soft_margin_svm.py │ └── svc.py ├── tree │ ├── __init__.py │ ├── cart_classifier.py │ ├── cart_regressor.py │ └── decision_tree.py ├── utils.py ├── vi │ ├── __init__.py │ ├── gmm.py │ └── linear_regression.py └── wrapper_models │ ├── __init__.py │ ├── data_bin_wrapper.py │ └── multi_class_wrapper.py ├── notebooks ├── 01_线性模型_线性回归.ipynb ├── 01_线性模型_线性回归_正则化(Lasso,Ridge,ElasticNet).ipynb ├── 02_线性模型_逻辑回归.ipynb ├── 03_二分类转多分类的一般实现.ipynb ├── 04_线性模型_感知机.ipynb ├── 05_线性模型_最大熵模型.ipynb ├── 06_优化_拟牛顿法实现(DFP,BFGS).ipynb ├── 07_01_svm_硬间隔支持向量机与SMO.ipynb ├── 07_02_svm_软间隔支持向量机.ipynb ├── 07_03_svm_核函数与非线性支持向量机.ipynb ├── 08_代价敏感学习_添加sample_weight支持.ipynb ├── 09_01_决策树_ID3与C4.5.ipynb ├── 09_02_决策树_CART.ipynb ├── 10_01_集成学习_简介.ipynb ├── 10_02_集成学习_boosting_adaboost_classifier.ipynb ├── 10_03_集成学习_boosting_adaboost_regressor.ipynb ├── 10_04_集成学习_boosting_提升树.ipynb ├── 10_05_集成学习_boosting_gbm_regressor.ipynb ├── 10_06_集成学习_boosting_gbm_classifier.ipynb ├── 10_07_集成学习_bagging.ipynb ├── 10_08_集成学习_bagging_randomforest.ipynb ├── 10_09_集成学习_bagging_高阶组合_stacking.ipynb ├── 10_10_集成学习_xgboost_原理介绍及回归树的简单实现.ipynb ├── 10_11_集成学习_xgboost_回归的简单实现.ipynb ├── 10_12_集成学习_xgboost_回归的更多实现:泊松回归、gamma回归、tweedie回归.ipynb ├── 10_13_集成学习_xgboost_分类的简单实现.ipynb ├── 10_14_集成学习_xgboost_优化介绍.ipynb ├── 10_15_集成学习_lightgbm_进一步优化.ipynb ├── 10_16_集成学习_dart_提升树与dropout的碰撞.ipynb ├── 10_17_集成学习_树模型的可解释性_模型的特征重要性及样本的特征重要性(sabaas,shap,tree shap).ipynb ├── 11_01_EM_GMM引入问题.ipynb ├── 11_02_EM_算法框架.ipynb ├── 11_03_EM_GMM聚类实现.ipynb ├── 11_04_EM_GMM分类实现及其与LogisticRegression的关系.ipynb ├── 12_01_PGM_贝叶斯网(有向无环图)初探.ipynb ├── 12_02_PGM_朴素贝叶斯分类器实现.ipynb ├── 12_03_PGM_半朴素贝叶斯分类器实现.ipynb ├── 12_04_PGM_朴素贝叶斯的聚类实现.ipynb ├── 12_05_PGM_马尔科夫链_初探及代码实现.ipynb ├── 12_06_PGM_马尔科夫链_语言模型及文本生成.ipynb ├── 12_07_PGM_马尔科夫链_PageRank算法.ipynb ├── 12_08_PGM_HMM_隐马模型:介绍及概率计算(前向、后向算法).ipynb ├── 12_09_PGM_HMM_隐马模型:参数学习(有监督、无监督).ipynb ├── 12_10_PGM_HMM_隐马模型:隐状态预测.ipynb ├── 12_11_PGM_HMM_隐马模型实战:中文分词.ipynb ├── 12_12_PGM_马尔科夫随机场(无向图)介绍.ipynb ├── 12_13_PGM_CRF_条件随机场:定义及形式(简化、矩阵形式).ipynb ├── 12_14_PGM_CRF_条件随机场:如何定义特征函数.ipynb ├── 12_15_PGM_CRF_条件随机场:概率及期望值计算(前向后向算法).ipynb ├── 12_16_PGM_CRF_条件随机场:参数学习.ipynb ├── 12_17_PGM_CRF_条件随机场:标签预测.ipynb ├── 12_18_PGM_CRF_代码优化及中文分词实践.ipynb ├── 12_19_PGM_CRF与HMM之间的区别与联系.ipynb ├── 13_01_sampling_为什么要采样(求期望、积分等).ipynb ├── 13_02_sampling_MC采样:接受-拒绝采样、重要采样.ipynb ├── 13_03_sampling_MCMC:采样原理(再探马尔可夫链).ipynb ├── 13_04_sampling_MCMC:MH采样的算法框架.ipynb ├── 13_05_sampling_MCMC:单分量MH采样算法.ipynb ├── 13_06_sampling_MCMC:Gibbs采样算法.ipynb ├── 14_01_概率分布:二项分布及beta分布.ipynb ├── 14_02_概率分布:多项分布及狄利克雷分布.ipynb ├── 14_03_概率分布:高斯分布(正态分布)及其共轭先验.ipynb ├── 14_04_概率分布:指数族分布.ipynb ├── 15_01_VI_变分推断的原理推导.ipynb ├── 15_02_VI_变分推断与EM的关系.ipynb ├── 15_03_VI_一元高斯分布的变分推断实现.ipynb ├── 15_04_VI_高斯混合模型(GMM)的变分推断实现.ipynb ├── 15_05_VI_线性回归模型的贝叶斯估计推导.ipynb ├── 15_06_VI_线性回归模型的贝叶斯估计实现:证据近似.ipynb ├── 15_07_VI_线性回归模型的变分推断实现.ipynb ├── 15_08_VI_线性回归模型的贝叶斯估计实现:进一步扩展VI.ipynb ├── 15_09_VI_拉普拉斯近似.ipynb ├── 16_01_LDA_主题模型原理.ipynb ├── 16_02_LDA_Gibss采样实现.ipynb ├── 16_03_LDA_变分EM实现.ipynb ├── 17_01_FM_因子分解机的原理介绍及实现.ipynb ├── 17_02_FM_FFM的原理介绍及实现.ipynb ├── 17_03_FM_FFM的损失函数扩展(possion,gamma,tweedie回归实现以及分类实现).ipynb ├── 18_01_聚类_距离度量以及性能评估.ipynb ├── 18_02_聚类_层次聚类_AGNES.ipynb ├── 18_03_聚类_密度聚类_DBSCAN.ipynb ├── 18_04_聚类_原型聚类_K均值.ipynb ├── 18_05_聚类_原型聚类_LVQ.ipynb ├── 18_06_聚类_谱聚类.ipynb ├── 19_01_降维_奇异值分解(SVD).ipynb ├── 19_02_降维_主成分分析(PCA).ipynb ├── 19_03_降维_线性判别分析(LDA).ipynb ├── 19_04_降维_多维缩放(MDS).ipynb ├── 19_05_降维_流形学习_等度量映射(Isomap).ipynb ├── 19_06_降维_流形学习_局部线性嵌入(LLE).ipynb ├── 19_07_降维_非负矩阵分解_NMF.ipynb ├── 20_01_异常检测_HBOS.ipynb ├── 20_01_异常检测_pHBOS.ipynb ├── 20_02_异常检测_iForest.ipynb ├── 20_03_异常检测_KNN.ipynb ├── 20_04_异常检测_LOF.ipynb ├── data │ ├── people_daily_mini.txt │ └── toutiao_mini.txt └── source │ ├── 01_adam.drawio │ ├── 01_adam.png │ ├── 01_归一化对梯度下降的影响.drawio │ ├── 01_归一化对梯度下降的影响.png │ ├── 05_最大熵模型.drawio │ ├── 05_最大熵模型.svg │ ├── 06_原问题与其min-max问题.png │ ├── 06_核技巧.drawio │ ├── 06_核技巧.png │ ├── 06_核技巧.svg │ ├── 06_软间隔的支持向量.png │ ├── 09_决策树学习.drawio │ ├── 09_决策树学习.jpg │ ├── 10_SHAP.drawio │ ├── 10_SHAP.png │ ├── 10_Saabas.drawio │ ├── 10_Saabas.png │ ├── 10_adaboost训练.drawio │ ├── 10_adaboost训练.png │ ├── 10_bagging.drawio │ ├── 10_bagging.png │ ├── 10_bagging简介.drawio │ ├── 10_bagging简介.png │ ├── 10_boosting简介.drawio │ ├── 10_boosting简介.png │ ├── 10_efb.drawio │ ├── 10_efb.png │ ├── 10_goss.png │ ├── 10_level_wise_vs_leaf_wise.drawio │ ├── 10_level_wise_vs_leaf_wise.png │ ├── 10_lgb中类别特征的处理.drawio │ ├── 10_lgb中类别特征的处理.png │ ├── 10_randomforest.drawio │ ├── 10_randomforest.png │ ├── 10_stacking简介.drawio │ ├── 10_stacking简介.png │ ├── 10_捆绑特征合并.png │ ├── 10_提升树训练.drawio │ ├── 10_提升树训练.png │ ├── 10_梯度提升分类树.drawio │ ├── 10_梯度提升分类树.png │ ├── 10_特征捆绑.png │ ├── 10_直方图算法.drawio │ ├── 10_直方图算法.png │ ├── 10_集成学习_极大似然估计.png │ ├── 11_Z概率分布.png │ ├── 11_凹函数的性质.png │ ├── 12_CRF_线性链.drawio │ ├── 12_CRF_线性链.png │ ├── 12_HMM_NER.drawio │ ├── 12_HMM_NER.png │ ├── 12_HMM_中文分词1.drawio │ ├── 12_HMM_中文分词1.png │ ├── 12_HMM_中文分词2.drawio │ ├── 12_HMM_中文分词2.png │ ├── 12_HMM前向后向.drawio │ ├── 12_HMM前向后向.png │ ├── 12_HMM前向后向2.drawio │ ├── 12_HMM前向后向2.png │ ├── 12_HMM定义.drawio │ ├── 12_HMM定义.png │ ├── 12_HMM维特比1.drawio │ ├── 12_HMM维特比1.png │ ├── 12_HMM维特比2.drawio │ ├── 12_HMM维特比2.png │ ├── 12_HMM维特比3.drawio │ ├── 12_HMM维特比3.png │ ├── 12_MCMC总结.drawio │ ├── 12_MCMC总结.png │ ├── 12_MC_天气demo.png │ ├── 12_MC初探.drawio │ ├── 12_MC初探.png │ ├── 12_MC并行计算.drawio │ ├── 12_MC并行计算.png │ ├── 12_beta分布的超参数.png │ ├── 12_gamma分布.png │ ├── 12_pagerank_demo1.drawio │ ├── 12_pagerank_demo1.png │ ├── 12_pagerank_demo2.drawio │ ├── 12_pagerank_demo2.png │ ├── 12_sampling_经验分布demo.drawio │ ├── 12_sampling_经验分布demo.png │ ├── 12_半朴素贝叶斯SPODE.drawio │ ├── 12_半朴素贝叶斯SPODE.png │ ├── 12_半朴素贝叶斯TAN1.drawio │ ├── 12_半朴素贝叶斯TAN1.png │ ├── 12_半朴素贝叶斯TAN2.drawio │ ├── 12_半朴素贝叶斯TAN2.png │ ├── 12_半朴素贝叶斯TAN3.drawio │ ├── 12_半朴素贝叶斯TAN3.png │ ├── 12_单纯形.png │ ├── 12_接受拒绝采样demo.png │ ├── 12_接受拒绝采样率低demo.png │ ├── 12_无向图demo.drawio │ ├── 12_无向图demo.png │ ├── 12_朴素贝叶斯概率图.drawio │ ├── 12_朴素贝叶斯概率图.png │ ├── 12_概率分布之间的关系.drawio │ ├── 12_概率分布之间的关系.png │ ├── 12_概率图初探demo.drawio │ ├── 12_概率图初探demo.png │ ├── 12_概率图初探demo2.drawio │ ├── 12_概率图初探demo2.png │ ├── 12_概率图初探demo3.drawio │ ├── 12_概率图初探demo3.png │ ├── 12_狄利克雷分布.png │ ├── 12_重要采样效率分析demo.png │ ├── 12_高斯分布_均值的贝叶斯推断.png │ ├── 12_高斯条件概率与边缘概率.png │ ├── 15_EM中E步更新.png │ ├── 15_EM中M步更新.png │ ├── 15_EM中三者间的关系.png │ ├── 15_VI_GMM1.png │ ├── 15_VI_GMM2.png │ ├── 15_VI_一元高斯分布的迭代优化.png │ ├── 15_VI_贝叶斯线性回归图模型.png │ ├── 15_VI介绍.png │ ├── 15_t分布1.png │ ├── 15_t分布2.png │ ├── 15_对数似然_ELBO_KL距离之间的关系.png │ ├── 16_LDA_文本生成过程.png │ ├── 16_LDA_盘子图.png │ ├── 16_变分EM的LDA模型.png │ ├── 16_变分EM的变分分布.png │ ├── 17_FFM1.png │ ├── 17_FFM2.png │ ├── 17_FFM3.png │ ├── 17_FM_one-hot1.png │ ├── 17_FM_one-hot2.png │ ├── 18_聚类_AGNES.png │ ├── 18_聚类_DBSCAN1.png │ ├── 19_isomap1.png │ ├── 19_isomap2.png │ ├── 19_lda1.png │ ├── 19_lle1.png │ ├── 19_mds.png │ ├── 19_pca1.png │ ├── 19_pca2.png │ ├── 19_降维_svd_demo.jpg │ ├── 20_iforest.jpg │ ├── 20_lof_01.png │ ├── 20_phbos_01.png │ └── 20_phbos_02.png └── tests ├── adaboost_classifier_test.py ├── adaboost_regressor_test.py ├── agnes_test.py ├── bagging_classifier_test.py ├── cart_classifier_test.py ├── cart_regressor_test.py ├── conditional_random_field_test.py ├── dart_classifier_test.py ├── dart_regressor_test.py ├── dbscan_test.py ├── decision_tree_test.py ├── ffm_test1.py ├── fm_test.py ├── fmm_test2.py ├── gaussian_naive_bayes_classifier_test.py ├── gaussian_naive_bayes_cluster_test.py ├── gmm_classifier_test.py ├── gmm_cluster_test(vi).py ├── gmm_cluster_test.py ├── gradient_boosting_classifier_test.py ├── gradient_boosting_regressor_test.py ├── hard_soft_margin_svm_test.py ├── hbos_test.py ├── hidden_markov_model_test1.py ├── hidden_markov_model_test2.py ├── hidden_markov_model_test3.py ├── iforest_test.py ├── isomap_test.py ├── kmeans_test.py ├── knn_odd_detect_test.py ├── lda_test(线性判别分析).py ├── lda_test.py ├── linear_regresion_test(bayes).py ├── linear_regresion_test.py ├── linear_regression_test(vi).py ├── lle_test.py ├── lof_test.py ├── logist_test.py ├── lvq_test.py ├── maxent_test.py ├── mds_test.py ├── naive_bayes_classifier_test.py ├── naive_bayes_cluster_test.py ├── nmf_test.py ├── pca_test.py ├── perceptron_test.py ├── phbos_test.py ├── random_forest_regressor_test.py ├── random_forest_test.py ├── sample_weight_test.py ├── semi_gaussian_naive_bayes_classifier_test.py ├── shap_test.py ├── simple_markov_model_test.py ├── spectral_test.py ├── svc_test.py ├── xgboost_classifier_test.py └── xgboost_regressor_test.py /.gitignore: -------------------------------------------------------------------------------- 1 | */.idea/* 2 | */__pycache__/ 3 | *.pyc 4 | */tests/* 5 | */.ipynb_checkpoints/ 6 | -------------------------------------------------------------------------------- /.idea/ML_Notes.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | from . import ml_models -------------------------------------------------------------------------------- /ml_models/__init__.py: -------------------------------------------------------------------------------- 1 | from . import * -------------------------------------------------------------------------------- /ml_models/bayes/__init__.py: -------------------------------------------------------------------------------- 1 | from .linear_regression import * -------------------------------------------------------------------------------- /ml_models/cluster/__init__.py: -------------------------------------------------------------------------------- 1 | from .agnes import * 2 | from .dbscan import * 3 | from .kmeans import * 4 | from .lvq import * 5 | from .spectral import * -------------------------------------------------------------------------------- /ml_models/cluster/agnes.py: -------------------------------------------------------------------------------- 1 | """ 2 | 层次聚类:AGNES的实现 3 | """ 4 | 5 | import numpy as np 6 | 7 | 8 | # 定义默认的距离函数 9 | def euclidean_average_dist(Gi, Gj): 10 | return np.sum(np.power(np.mean(Gi, axis=0) - np.mean(Gj, axis=0), 2)) 11 | 12 | 13 | class AGNES(object): 14 | def __init__(self, k=3, dist_method=None): 15 | """ 16 | :param k: 聚类数量 17 | :param dist_method: 距离函数定义 18 | """ 19 | self.k = k 20 | self.dist_method = dist_method 21 | if self.dist_method is None: 22 | self.dist_method = euclidean_average_dist 23 | self.G = None 24 | self.cluster_center = {} # 记录聚类中心点 25 | 26 | def fit(self, X): 27 | m, _ = X.shape 28 | # 初始化簇 29 | G = {} 30 | for row in range(m): 31 | G[row] = X[[row]] 32 | # 计算簇间距离 33 | M = np.zeros(shape=(m, m)) 34 | for i in range(0, m): 35 | for j in range(0, m): 36 | M[i, j] = self.dist_method(G[i], G[j]) 37 | M[j, i] = M[i, j] 38 | q = m 39 | while q > self.k: 40 | # 寻找最近的簇 41 | min_dist = np.infty 42 | i_ = None 43 | j_ = None 44 | for i in range(0, q - 1): 45 | for j in range(i + 1, q): 46 | if M[i, j] < min_dist: 47 | i_ = i 48 | j_ = j 49 | min_dist = M[i, j] 50 | # 合并 51 | G[i_] = np.concatenate([G[i_], G[j_]]) 52 | # 重编号 53 | for j in range(j_ + 1, q): 54 | G[j - 1] = G[j] 55 | # 删除G[q] 56 | del G[q-1] 57 | # 删除 58 | M = np.delete(M, j_, axis=0) 59 | M = np.delete(M, j_, axis=1) 60 | # 更新距离 61 | for j in range(q - 1): 62 | M[i_, j] = self.dist_method(G[i_], G[j]) 63 | M[j, i_] = M[i_, j] 64 | # 更新q 65 | q = q - 1 66 | # self.G = G 67 | for idx in G: 68 | self.cluster_center[idx] = np.mean(G[idx], axis=0) 69 | 70 | def predict(self, X): 71 | rst = [] 72 | rows, _ = X.shape 73 | for row in range(rows): 74 | # vec = X[[row]] 75 | vec = X[row] 76 | min_dist = np.infty 77 | bst_label = None 78 | for idx in self.cluster_center: 79 | # dist = self.dist_method(vec, self.G[idx]) < min_dist 80 | dist = np.sum(np.power(vec - self.cluster_center[idx], 2)) 81 | if dist < min_dist: 82 | bst_label = idx 83 | min_dist = dist 84 | rst.append(bst_label) 85 | return np.asarray(rst) -------------------------------------------------------------------------------- /ml_models/cluster/dbscan.py: -------------------------------------------------------------------------------- 1 | """ 2 | DBSCAN密度聚类的代码实现 3 | """ 4 | import numpy as np 5 | from queue import Queue 6 | 7 | 8 | class DBSCAN(object): 9 | def __init__(self, eps=0.5, min_sample=3, dist_method=None): 10 | """ 11 | :param eps:epsilon领域半径 12 | :param min_sample: 核心对象的epsilon领域半径内的最少样本量 13 | :param dist_method:样本距离度量,默认欧氏距离 14 | """ 15 | self.eps = eps 16 | self.min_sample = min_sample 17 | self.dist_method = dist_method 18 | if self.dist_method is None: 19 | self.dist_method = lambda x, y: np.sqrt(np.sum(np.power(x - y, 2))) 20 | self.label_ = None # 记录样本标签,-1表示异常点 21 | 22 | def fit(self, X): 23 | rows = X.shape[0] 24 | self.label_ = np.ones(rows) * -1 25 | M = np.zeros(shape=(rows, rows)) 26 | # 计算样本间的距离 27 | for i in range(rows - 1): 28 | for j in range(i, rows): 29 | M[i, j] = self.dist_method(X[i], X[j]) 30 | M[j, i] = M[i, j] 31 | # 记录核心矩阵 32 | H = set() 33 | for i in range(0, rows): 34 | if np.sum(M[i] <= self.eps) >= self.min_sample: 35 | H.add(i) 36 | # 初始化聚类簇数 37 | k = 0 38 | # 初始化未访问样本集合 39 | W = set(range(rows)) 40 | while len(H) > 0: 41 | # 记录当前未访问样本集合 42 | W_old = W.copy() 43 | # 随机选择一个核心对象 44 | o = np.random.choice(list(H)) 45 | # 初始化队列 46 | Q = Queue() 47 | Q.put(o) 48 | # 未访问队列中去掉核心对象o 49 | W = W - set([o]) 50 | while not Q.empty(): 51 | # 取出首个样本 52 | q = Q.get() 53 | # 判断是否为核心对象 54 | if q in H: 55 | # 获取领域内样本与未访问样本的交集 56 | delta = set(np.argwhere(M[q] <= self.eps).reshape(-1).tolist()) & W 57 | # 将其放入队列 58 | for d in delta: 59 | Q.put(d) 60 | # 从未访问集合中去掉 61 | W = W - delta 62 | # 获取聚类簇idx 63 | C_k = W_old - W 64 | k_idx = list(C_k) 65 | self.label_[k_idx] = k 66 | k += 1 67 | # 去掉在当前簇中的核心对象 68 | H = H - C_k 69 | 70 | def fit_predict(self, X): 71 | self.fit(X) 72 | return self.label_ 73 | -------------------------------------------------------------------------------- /ml_models/cluster/kmeans.py: -------------------------------------------------------------------------------- 1 | """ 2 | kmeans聚类实现 3 | """ 4 | 5 | import numpy as np 6 | 7 | 8 | class KMeans(object): 9 | def __init__(self, k=3, epochs=100, tol=1e-3, dist_method=None): 10 | """ 11 | :param k: 聚类簇数量 12 | :param epochs: 最大迭代次数 13 | :param tol: 终止条件 14 | :param dist_method:距离函数,默认欧氏距离 15 | """ 16 | self.k = k 17 | self.epochs = epochs 18 | self.tol = tol 19 | self.dist_method = dist_method 20 | if self.dist_method is None: 21 | self.dist_method = lambda x, y: np.sqrt(np.sum(np.power(x - y, 2))) 22 | self.cluster_centers_ = {} # 记录簇中心坐标 23 | 24 | def fit(self, X): 25 | m = X.shape[0] 26 | # 初始化 27 | for idx, data_idx in enumerate(np.random.choice(list(range(m)), self.k, replace=False).tolist()): 28 | self.cluster_centers_[idx] = X[data_idx] 29 | # 迭代 30 | for _ in range(self.epochs): 31 | C = {} 32 | for idx in range(self.k): 33 | C[idx] = [] 34 | for j in range(m): 35 | best_k = None 36 | min_dist = np.infty 37 | for idx in range(self.k): 38 | dist = self.dist_method(self.cluster_centers_[idx], X[j]) 39 | if dist < min_dist: 40 | min_dist = dist 41 | best_k = idx 42 | C[best_k].append(j) 43 | # 更新 44 | eps = 0 45 | for idx in range(self.k): 46 | vec_k = np.mean(X[C[idx]], axis=0) 47 | eps += self.dist_method(vec_k, self.cluster_centers_[idx]) 48 | self.cluster_centers_[idx] = vec_k 49 | # 判断终止条件 50 | if eps < self.tol: 51 | break 52 | 53 | def predict(self, X): 54 | m = X.shape[0] 55 | rst = [] 56 | for i in range(m): 57 | vec = X[i] 58 | best_k = None 59 | min_dist = np.infty 60 | for idx in range(self.k): 61 | dist = self.dist_method(self.cluster_centers_[idx], vec) 62 | if dist < min_dist: 63 | min_dist = dist 64 | best_k = idx 65 | rst.append(best_k) 66 | return np.asarray(rst) -------------------------------------------------------------------------------- /ml_models/cluster/lvq.py: -------------------------------------------------------------------------------- 1 | """ 2 | 原型聚类LVQ的实现 3 | """ 4 | 5 | import numpy as np 6 | import copy 7 | 8 | 9 | class LVQ(object): 10 | def __init__(self, class_label=None, epochs=100, eta=1e-3, tol=1e-3, dist_method=None): 11 | """ 12 | :param class_label: 原型向量类别标记 13 | :param epochs: 最大迭代次数 14 | :param eta:学习率 15 | :param tol: 终止条件 16 | :param dist_method:距离函数,默认欧氏距离 17 | """ 18 | self.class_label = class_label 19 | self.epochs = epochs 20 | self.eta = eta 21 | self.tol = tol 22 | self.dist_method = dist_method 23 | if self.dist_method is None: 24 | self.dist_method = lambda x, y: np.sqrt(np.sum(np.power(x - y, 2))) 25 | self.cluster_centers_ = {} # 记录簇中心坐标 26 | 27 | def fit(self, X, y): 28 | m = X.shape[0] 29 | # 随机初始化一组原型向量 30 | for idx, random_idx in enumerate(np.random.choice(list(range(m)), len(self.class_label), replace=False)): 31 | self.cluster_centers_[idx] = X[random_idx] 32 | # 更新 33 | for _ in range(self.epochs): 34 | eps = 0 35 | cluster_centers_old = copy.deepcopy(self.cluster_centers_) 36 | idxs = list(range(m)) 37 | np.random.shuffle(idxs) 38 | # 随机选择样本点 39 | for idx in idxs: 40 | vec = X[idx] 41 | yi = y[idx] 42 | bst_distance = np.infty 43 | bst_cid = None 44 | for cid in range(len(self.class_label)): 45 | center_vec = self.cluster_centers_[cid] 46 | if self.dist_method(vec, center_vec) < bst_distance: 47 | bst_distance = self.dist_method(vec, center_vec) 48 | bst_cid = cid 49 | # 更新 50 | if yi == self.class_label[bst_cid]: 51 | self.cluster_centers_[bst_cid] = (1-self.eta)*self.cluster_centers_[bst_cid] + self.eta * vec 52 | else: 53 | self.cluster_centers_[bst_cid] = self.cluster_centers_[bst_cid] - self.eta * (vec - self.cluster_centers_[bst_cid]) 54 | # 判断终止条件 55 | for key in self.cluster_centers_: 56 | eps += self.dist_method(cluster_centers_old[key], self.cluster_centers_[key]) 57 | eps /= len(self.cluster_centers_) 58 | if eps < self.tol: 59 | break 60 | 61 | def predict(self, X): 62 | m = X.shape[0] 63 | rst = [] 64 | for i in range(m): 65 | vec = X[i] 66 | best_k = None 67 | min_dist = np.infty 68 | for idx in range(len(self.cluster_centers_)): 69 | dist = self.dist_method(self.cluster_centers_[idx], vec) 70 | if dist < min_dist: 71 | min_dist = dist 72 | best_k = idx 73 | rst.append(best_k) 74 | return np.asarray(rst) 75 | -------------------------------------------------------------------------------- /ml_models/cluster/spectral.py: -------------------------------------------------------------------------------- 1 | """ 2 | 谱聚类实现 3 | """ 4 | 5 | import numpy as np 6 | from ml_models.cluster import KMeans 7 | 8 | 9 | class Spectral(object): 10 | def __init__(self, n_clusters=None, n_components=None, gamma=None): 11 | """ 12 | :param n_clusters: 聚类数量 13 | :param n_components: 降维数量 14 | :param gamma: rbf函数的超参数 15 | """ 16 | self.n_clusters = n_clusters 17 | self.n_components = n_components 18 | self.gamma = gamma 19 | if self.n_components is None: 20 | self.n_components = 10 21 | if self.gamma is None: 22 | self.gamma = 1 23 | if self.n_clusters is None: 24 | self.n_clusters = 3 25 | 26 | def fit_transform(self, X): 27 | rows, cols = X.shape 28 | # 1.构建拉普拉斯矩阵 29 | W = np.zeros(shape=(rows, rows)) 30 | for i in range(0, rows): 31 | for j in range(i, rows): 32 | w = np.exp(-1 * np.sum(np.power(X[i] - X[j], 2)) / (2 * self.gamma * self.gamma)) 33 | W[i, j] = w 34 | W[j, i] = w 35 | D = np.diag(np.sum(W, axis=0)) 36 | L = D - W 37 | # 2.对拉普拉斯矩阵特征分解 38 | eig_vals, eig_vecs = np.linalg.eig(L) 39 | sorted_indice = np.argsort(eig_vals) # 默认升序排序 40 | eig_vecs[:] = eig_vecs[:, sorted_indice] 41 | return eig_vecs[:, 0:self.n_components].real 42 | 43 | def fit_predict(self, X): 44 | # 3.对特征矩阵进行聚类 45 | transform_matrix = self.fit_transform(X) 46 | transform_matrix = transform_matrix / np.sqrt(np.sum(np.power(transform_matrix, 2), axis=1, keepdims=True)) 47 | kmeans = KMeans(k=self.n_clusters) 48 | kmeans.fit(transform_matrix) 49 | return kmeans.predict(transform_matrix) -------------------------------------------------------------------------------- /ml_models/decomposition/__init__.py: -------------------------------------------------------------------------------- 1 | from .pca import * 2 | from .lda import * 3 | from .mds import * 4 | from .isomap import * 5 | from .lle import * 6 | from .nmf import * 7 | -------------------------------------------------------------------------------- /ml_models/decomposition/isomap.py: -------------------------------------------------------------------------------- 1 | """ 2 | 等度量映射实现 3 | """ 4 | import numpy as np 5 | 6 | 7 | class Isomap(object): 8 | def __init__(self, n_components=None, epsilon=1e-3): 9 | self.n_components = n_components 10 | self.epsilon = epsilon 11 | 12 | @staticmethod 13 | def floyd(dist_matrix): 14 | vex_num = len(dist_matrix) 15 | for k in range(vex_num): 16 | for i in range(vex_num): 17 | for j in range(vex_num): 18 | if dist_matrix[i][k] == np.inf or dist_matrix[k][j] == np.inf: 19 | temp = np.inf 20 | else: 21 | temp = dist_matrix[i][k] + dist_matrix[k][j] 22 | if dist_matrix[i][j] > temp: 23 | dist_matrix[i][j] = temp 24 | return dist_matrix 25 | 26 | def fit_transform(self, data=None, D=None): 27 | 28 | if D is None: 29 | m = data.shape[0] 30 | D = np.zeros(shape=(m, m)) 31 | # 初始化D 32 | for i in range(0, m): 33 | for j in range(i, m): 34 | D[i, j] = np.sqrt(np.sum(np.power(data[i] - data[j], 2))) 35 | D[j, i] = D[i, j] 36 | else: 37 | m = D.shape[0] 38 | 39 | # floyd更新 40 | D = np.where(D < self.epsilon, D, np.inf) 41 | D = self.floyd(D) 42 | D_i = np.sum(np.power(D, 2), axis=0) / m 43 | D_j = np.sum(np.power(D, 2), axis=1) / m 44 | D_2 = np.sum(np.power(D, 2)) / (m * m) 45 | 46 | # 计算B 47 | B = np.zeros(shape=(m, m)) 48 | for i in range(0, m): 49 | for j in range(i, m): 50 | B[i, j] = -0.5 * (D[i, j] * D[i, j] - D_i[i] - D_j[j] + D_2) 51 | B[j, i] = B[i, j] 52 | # 求Z 53 | eig_vals, eig_vecs = np.linalg.eig(B) 54 | sorted_indice = np.argsort(-1 * eig_vals) 55 | eig_vals = eig_vals[sorted_indice] 56 | eig_vecs[:] = eig_vecs[:, sorted_indice] 57 | # 简化:取前n_components维 58 | Lambda = np.diag(eig_vals.real[:self.n_components]) 59 | vecs = eig_vecs.real[:, :self.n_components] 60 | return vecs.dot(np.sqrt(Lambda)) -------------------------------------------------------------------------------- /ml_models/decomposition/lda.py: -------------------------------------------------------------------------------- 1 | """ 2 | 线性判别分析的代码实现 3 | """ 4 | import numpy as np 5 | 6 | 7 | class LDA(object): 8 | def __init__(self, n_components=None): 9 | """ 10 | :param n_components: 主成分数量,原则上只需要(类别数-1)即可 11 | """ 12 | self.n_components = n_components 13 | self.trans_matrix = None # 保存前n_components个特征向量 14 | 15 | def fit(self, X, y): 16 | 17 | x_mean = np.mean(X, axis=0) 18 | k = np.max(y) + 1 # 类别 19 | dims = len(x_mean) # 数据维度 20 | if self.n_components is None: 21 | self.n_components = dims 22 | S_b = np.zeros(shape=(dims, dims)) 23 | S_w = np.zeros(shape=(dims, dims)) 24 | for j in range(0, k): 25 | idx = np.argwhere(y == j).reshape(-1) 26 | N_j = len(idx) 27 | X_j = X[idx] 28 | x_mean_j = np.mean(X_j, axis=0) 29 | S_b += N_j * ((x_mean - x_mean_j).reshape(-1, 1).dot((x_mean - x_mean_j).reshape(1, -1))) 30 | S_w += (X_j - x_mean_j).T.dot(X_j - x_mean_j) 31 | eig_vals, eig_vecs = np.linalg.eig(np.linalg.inv(S_w).dot(S_b)) 32 | sorted_indice = np.argsort(-1 * eig_vals) # 默认升序排序 33 | eig_vecs[:] = eig_vecs[:, sorted_indice] 34 | self.trans_matrix = eig_vecs[:, 0:self.n_components] 35 | 36 | def transform(self, X): 37 | return X.dot(self.trans_matrix) 38 | -------------------------------------------------------------------------------- /ml_models/decomposition/lle.py: -------------------------------------------------------------------------------- 1 | """ 2 | 局部线性嵌入LLE 3 | """ 4 | import numpy as np 5 | 6 | 7 | class LLE(object): 8 | def __init__(self, k=5, n_components=3): 9 | """ 10 | :param k: 最近邻数 11 | :param n_components: 降维后的维度 12 | """ 13 | self.k = k 14 | self.n_components = n_components 15 | 16 | def fit_transform(self, data): 17 | # 构建距离矩阵 18 | m = data.shape[0] 19 | D = np.zeros(shape=(m, m)) 20 | for i in range(0, m): 21 | for j in range(i, m): 22 | D[i, j] = np.sqrt(np.sum(np.power(data[i] - data[j], 2))) 23 | D[j, i] = D[i, j] 24 | 25 | # 保留最近的k个坐标 26 | idx = [] 27 | for i in range(0, m): 28 | idx.append(np.argsort(D[i])[1:self.k + 1].tolist()) 29 | # 构建权重矩阵W 30 | C = np.zeros(shape=(m, self.k, self.k)) # 辅助计算W 31 | W = np.zeros(shape=(m, m)) 32 | for i in range(0, m): 33 | nn_idx = idx[i] # Q_i 34 | for cj1 in range(0, self.k): 35 | for cj2 in range(cj1, self.k): 36 | j1 = nn_idx[cj1] 37 | j2 = nn_idx[cj2] 38 | c = np.dot(data[i] - data[j1], data[i] - data[j2]) 39 | c = 1.0 / c 40 | C[i, cj1, cj2] = c 41 | C[i, cj2, cj1] = c 42 | C = np.sum(C, axis=1) 43 | C = C / np.sum(C, axis=1, keepdims=True) 44 | for i in range(0, m): 45 | nn_idx = idx[i] 46 | for cj in range(0, self.k): 47 | W[i, nn_idx[cj]] = C[i, cj] 48 | # 对M特征分解 49 | M = (np.eye(m) - W).T.dot(np.eye(m) - W) 50 | eig_vals, eig_vecs = np.linalg.eig(M) 51 | sorted_indice = np.argsort(eig_vals.real) 52 | eig_vals = eig_vals[sorted_indice] 53 | eig_vecs[:] = eig_vecs[:, sorted_indice] 54 | # 保留前n_compnent维 55 | return eig_vecs.real[:, :self.n_components] 56 | -------------------------------------------------------------------------------- /ml_models/decomposition/mds.py: -------------------------------------------------------------------------------- 1 | """ 2 | 多维缩放的代码实现 3 | """ 4 | import numpy as np 5 | 6 | 7 | class MDS(object): 8 | def __init__(self, n_components=None): 9 | self.n_components = n_components 10 | 11 | def fit_transform(self, data=None, D=None): 12 | if D is None: 13 | m = data.shape[0] 14 | D = np.zeros(shape=(m, m)) 15 | # 初始化D 16 | for i in range(0, m): 17 | for j in range(i, m): 18 | D[i, j] = np.sqrt(np.sum(np.power(data[i] - data[j], 2))) 19 | D[j, i] = D[i, j] 20 | else: 21 | m = D.shape[0] 22 | D_i = np.sum(np.power(D, 2), axis=0) / m 23 | D_j = np.sum(np.power(D, 2), axis=1) / m 24 | D_2 = np.sum(np.power(D, 2)) / (m * m) 25 | # 计算B 26 | B = np.zeros(shape=(m, m)) 27 | for i in range(0, m): 28 | for j in range(i, m): 29 | B[i, j] = -0.5 * (D[i, j] * D[i, j] - D_i[i] - D_j[j] + D_2) 30 | B[j, i] = B[i, j] 31 | # 求Z 32 | eig_vals, eig_vecs = np.linalg.eig(B) 33 | sorted_indice = np.argsort(-1 * eig_vals) 34 | eig_vals = eig_vals[sorted_indice] 35 | eig_vecs[:] = eig_vecs[:, sorted_indice] 36 | # 简化:取前n_components维 37 | Lambda = np.diag(eig_vals.real[:self.n_components]) 38 | vecs = eig_vecs.real[:, :self.n_components] 39 | return vecs.dot(np.sqrt(Lambda)) 40 | -------------------------------------------------------------------------------- /ml_models/decomposition/nmf.py: -------------------------------------------------------------------------------- 1 | """ 2 | 非负矩阵分解,NMF 3 | """ 4 | 5 | import numpy as np 6 | 7 | 8 | class NMF(object): 9 | def __init__(self, n_components=None, epochs=100, tol=1e-3): 10 | """ 11 | :param n_components: 降维数 12 | :param epochs:最大迭代次数 13 | :param tol:最大误差 14 | """ 15 | self.n_components = n_components 16 | if self.n_components is None: 17 | self.n_components = 2 18 | self.epochs = epochs 19 | self.tol = tol 20 | 21 | def fit_transform(self, X): 22 | m, n = X.shape 23 | W = np.abs(np.random.random(size=(m, self.n_components))) 24 | H = np.abs(np.random.random(size=(self.n_components, n))) 25 | # update 26 | for _ in range(self.epochs): 27 | W_ratio = (X @ H.T) / (W @ H @ H.T) 28 | H_ratio = (W.T @ X) / (W.T @ W @ H) 29 | W_u = W * W_ratio 30 | H_u = H * H_ratio 31 | if np.mean(np.abs(W - W_u)) < self.tol: 32 | return W_u 33 | W = W_u 34 | H = H_u 35 | return W 36 | -------------------------------------------------------------------------------- /ml_models/decomposition/pca.py: -------------------------------------------------------------------------------- 1 | """ 2 | 主成分分析 3 | """ 4 | import numpy as np 5 | 6 | 7 | class PCA(object): 8 | def __init__(self, n_components=None): 9 | """ 10 | :param n_components: 主成分数量 11 | """ 12 | self.n_components = n_components 13 | self.mean_info = None # 保存均值信息 14 | self.trans_matrix = None # 保存前n_components个主成分矩阵 15 | 16 | def fit(self, X): 17 | self.mean_info = np.mean(X, axis=0, keepdims=True) 18 | if self.n_components is None: 19 | self.n_components = X.shape[1] 20 | X_ = X - self.mean_info 21 | xTx = X_.T.dot(X_) 22 | eig_vals, eig_vecs = np.linalg.eig(xTx) 23 | sorted_indice = np.argsort(-1 * eig_vals) # 默认升序排序 24 | eig_vecs[:] = eig_vecs[:, sorted_indice] 25 | self.trans_matrix = eig_vecs[:, 0:self.n_components] 26 | 27 | def transform(self, X): 28 | X_ = X - self.mean_info 29 | return X_.dot(self.trans_matrix) -------------------------------------------------------------------------------- /ml_models/em/__init__.py: -------------------------------------------------------------------------------- 1 | from .gmm_cluster import * 2 | from .gmm_classifier import * -------------------------------------------------------------------------------- /ml_models/em/gmm_classifier.py: -------------------------------------------------------------------------------- 1 | """ 2 | 利用GMMCluster实现分类 3 | """ 4 | from ml_models.em import GMMCluster 5 | import numpy as np 6 | 7 | 8 | class GMMClassifier(object): 9 | def __init__(self, cluster_each_class=1, n_iter=100, tol=1e-3, shr_cov=False): 10 | """ 11 | :param cluster_each_class:每个类需要几个高斯模型去拟合,默认1个 12 | :param n_iter:迭代次数 13 | :param tol: -log likehold增益 0).astype(int) * 2 - 1 43 | elif self.loss == 'huber': 44 | return np.where(np.abs(y - y_pred) > self.huber_threshold, 45 | self.huber_threshold * ((y - y_pred > 0).astype(int) * 2 - 1), y - y_pred) 46 | elif self.loss == "quantile": 47 | return np.where(y - y_pred > 0, self.quantile_threshold, self.quantile_threshold-1) 48 | 49 | def fit(self, x, y): 50 | # 拟合第一个模型 51 | self.base_estimator[0].fit(x, y) 52 | y_pred = self.base_estimator[0].predict(x) 53 | new_y = self._get_gradient(y, y_pred) 54 | for index in range(1, self.n_estimators): 55 | self.base_estimator[index].fit(x, new_y) 56 | y_pred += self.base_estimator[index].predict(x) * self.learning_rate 57 | new_y = self._get_gradient(y, y_pred) 58 | 59 | def predict(self, x): 60 | return np.sum( 61 | [self.base_estimator[0].predict(x)] + 62 | [self.learning_rate * self.base_estimator[i].predict(x) for i in 63 | range(1, self.n_estimators - 1)] + 64 | [self.base_estimator[self.n_estimators - 1].predict(x)] 65 | , axis=0) 66 | -------------------------------------------------------------------------------- /ml_models/ensemble/random_forest_classifier.py: -------------------------------------------------------------------------------- 1 | """ 2 | randomforest分类实现 3 | """ 4 | from ml_models.tree import CARTClassifier 5 | import copy 6 | import numpy as np 7 | 8 | 9 | class RandomForestClassifier(object): 10 | def __init__(self, base_estimator=None, n_estimators=10, feature_sample=0.66): 11 | """ 12 | :param base_estimator: 基学习器,允许异质;异质的情况下使用列表传入比如[estimator1,estimator2,...,estimator10],这时n_estimators会失效; 13 | 同质的情况,单个estimator会被copy成n_estimators份 14 | :param n_estimators: 基学习器迭代数量 15 | :param feature_sample:特征抽样率 16 | """ 17 | self.base_estimator = base_estimator 18 | self.n_estimators = n_estimators 19 | if self.base_estimator is None: 20 | # 默认使用决策树 21 | self.base_estimator = CARTClassifier() 22 | # 同质分类器 23 | if type(base_estimator) != list: 24 | estimator = self.base_estimator 25 | self.base_estimator = [copy.deepcopy(estimator) for _ in range(0, self.n_estimators)] 26 | # 异质分类器 27 | else: 28 | self.n_estimators = len(self.base_estimator) 29 | self.feature_sample = feature_sample 30 | # 记录每个基学习器选择的特征 31 | self.feature_indices = [] 32 | 33 | def fit(self, x, y): 34 | # TODO:并行优化 35 | n_sample, n_feature = x.shape 36 | for estimator in self.base_estimator: 37 | # 重采样训练集 38 | indices = np.random.choice(n_sample, n_sample, replace=True) 39 | x_bootstrap = x[indices] 40 | y_bootstrap = y[indices] 41 | # 对特征抽样 42 | feature_indices = np.random.choice(n_feature, int(n_feature * self.feature_sample), replace=False) 43 | self.feature_indices.append(feature_indices) 44 | x_bootstrap = x_bootstrap[:, feature_indices] 45 | estimator.fit(x_bootstrap, y_bootstrap) 46 | 47 | def predict_proba(self, x): 48 | # TODO:并行优化 49 | probas = [] 50 | for index, estimator in enumerate(self.base_estimator): 51 | probas.append(estimator.predict_proba(x[:, self.feature_indices[index]])) 52 | return np.mean(probas, axis=0) 53 | 54 | def predict(self, x): 55 | return np.argmax(self.predict_proba(x), axis=1) 56 | -------------------------------------------------------------------------------- /ml_models/ensemble/random_forest_regressor.py: -------------------------------------------------------------------------------- 1 | """ 2 | random forest回归实现 3 | """ 4 | from ml_models.tree import CARTRegressor 5 | import copy 6 | import numpy as np 7 | 8 | 9 | class RandomForestRegressor(object): 10 | def __init__(self, base_estimator=None, n_estimators=10, feature_sample=0.66): 11 | """ 12 | :param base_estimator: 基学习器,允许异质;异质的情况下使用列表传入比如[estimator1,estimator2,...,estimator10],这时n_estimators会失效; 13 | 同质的情况,单个estimator会被copy成n_estimators份 14 | :param n_estimators: 基学习器迭代数量 15 | :param feature_sample:特征抽样率 16 | """ 17 | self.base_estimator = base_estimator 18 | self.n_estimators = n_estimators 19 | if self.base_estimator is None: 20 | # 默认使用决策树 21 | self.base_estimator = CARTRegressor() 22 | # 同质 23 | if type(base_estimator) != list: 24 | estimator = self.base_estimator 25 | self.base_estimator = [copy.deepcopy(estimator) for _ in range(0, self.n_estimators)] 26 | # 异质 27 | else: 28 | self.n_estimators = len(self.base_estimator) 29 | self.feature_sample = feature_sample 30 | # 记录每个基学习器选择的特征 31 | self.feature_indices = [] 32 | 33 | def fit(self, x, y): 34 | # TODO:并行优化 35 | n_sample, n_feature = x.shape 36 | for estimator in self.base_estimator: 37 | # 重采样训练集 38 | indices = np.random.choice(n_sample, n_sample, replace=True) 39 | x_bootstrap = x[indices] 40 | y_bootstrap = y[indices] 41 | # 对特征抽样 42 | feature_indices = np.random.choice(n_feature, int(n_feature * self.feature_sample), replace=False) 43 | self.feature_indices.append(feature_indices) 44 | x_bootstrap = x_bootstrap[:, feature_indices] 45 | estimator.fit(x_bootstrap, y_bootstrap) 46 | 47 | def predict(self, x): 48 | # TODO:并行优化 49 | preds = [] 50 | for index, estimator in enumerate(self.base_estimator): 51 | preds.append(estimator.predict(x[:, self.feature_indices[index]])) 52 | 53 | return np.mean(preds, axis=0) 54 | -------------------------------------------------------------------------------- /ml_models/ensemble/xgboost_classifier.py: -------------------------------------------------------------------------------- 1 | """ 2 | xgboost分类树的实现 3 | """ 4 | from ml_models.ensemble import XGBoostBaseTree 5 | from ml_models import utils 6 | import copy 7 | import numpy as np 8 | 9 | 10 | class XGBoostClassifier(object): 11 | def __init__(self, base_estimator=None, n_estimators=10, learning_rate=1.0): 12 | """ 13 | :param base_estimator: 基学习器 14 | :param n_estimators: 基学习器迭代数量 15 | :param learning_rate: 学习率,降低后续基学习器的权重,避免过拟合 16 | """ 17 | self.base_estimator = base_estimator 18 | self.n_estimators = n_estimators 19 | self.learning_rate = learning_rate 20 | if self.base_estimator is None: 21 | self.base_estimator = XGBoostBaseTree() 22 | # 同质分类器 23 | if type(base_estimator) != list: 24 | estimator = self.base_estimator 25 | self.base_estimator = [copy.deepcopy(estimator) for _ in range(0, self.n_estimators)] 26 | # 异质分类器 27 | else: 28 | self.n_estimators = len(self.base_estimator) 29 | 30 | # 扩展class_num组分类器 31 | self.expand_base_estimators = [] 32 | 33 | def fit(self, x, y): 34 | # 将y转one-hot编码 35 | class_num = np.amax(y) + 1 36 | y_cate = np.zeros(shape=(len(y), class_num)) 37 | y_cate[np.arange(len(y)), y] = 1 38 | 39 | # 扩展分类器 40 | self.expand_base_estimators = [copy.deepcopy(self.base_estimator) for _ in range(class_num)] 41 | 42 | # 第一个模型假设预测为0 43 | y_pred_score_ = np.zeros(shape=(x.shape[0], class_num)) 44 | # 计算一阶、二阶导数 45 | g = utils.softmax(y_pred_score_) - y_cate 46 | h = utils.softmax(y_pred_score_) * (1 - utils.softmax(y_pred_score_)) 47 | # 训练后续模型 48 | for index in range(0, self.n_estimators): 49 | y_pred_score = [] 50 | for class_index in range(0, class_num): 51 | self.expand_base_estimators[class_index][index].fit(x, g[:, class_index], h[:, class_index]) 52 | y_pred_score.append(self.expand_base_estimators[class_index][index].predict(x)) 53 | y_pred_score_ += np.c_[y_pred_score].T * self.learning_rate 54 | g = utils.softmax(y_pred_score_) - y_cate 55 | h = utils.softmax(y_pred_score_) * (1 - utils.softmax(y_pred_score_)) 56 | 57 | def predict_proba(self, x): 58 | # TODO:并行优化 59 | y_pred_score = [] 60 | for class_index in range(0, len(self.expand_base_estimators)): 61 | estimator_of_index = self.expand_base_estimators[class_index] 62 | y_pred_score.append( 63 | np.sum( 64 | [estimator_of_index[0].predict(x)] + 65 | [self.learning_rate * estimator_of_index[i].predict(x) for i in 66 | range(1, self.n_estimators - 1)] + 67 | [estimator_of_index[self.n_estimators - 1].predict(x)] 68 | , axis=0) 69 | ) 70 | return utils.softmax(np.c_[y_pred_score].T) 71 | 72 | def predict(self, x): 73 | return np.argmax(self.predict_proba(x), axis=1) 74 | -------------------------------------------------------------------------------- /ml_models/explain/__init__.py: -------------------------------------------------------------------------------- 1 | from .shap import * 2 | -------------------------------------------------------------------------------- /ml_models/fm/__init__.py: -------------------------------------------------------------------------------- 1 | from .fm import * 2 | from .ffm import * 3 | -------------------------------------------------------------------------------- /ml_models/kernel_functions.py: -------------------------------------------------------------------------------- 1 | """ 2 | 定义一些常见的核函数 3 | """ 4 | import numpy as np 5 | 6 | 7 | def linear(): 8 | """ 9 | 线性核函数 10 | :return:linear function 11 | """ 12 | 13 | def _linear(x, y): 14 | return np.dot(x, y) 15 | 16 | return _linear 17 | 18 | 19 | def poly(p=2): 20 | """ 21 | 多项式核函数 22 | :param p: 23 | :return: poly function 24 | """ 25 | 26 | def _poly(x, y): 27 | return np.power(np.dot(x, y) + 1, p) 28 | 29 | return _poly 30 | 31 | 32 | def rbf(sigma=0.1): 33 | """ 34 | 径向基/高斯核函数 35 | :param sigma: 36 | :return: 37 | """ 38 | 39 | def _rbf(x, y): 40 | np_x = np.asarray(x) 41 | if np_x.ndim <= 1: 42 | return np.exp((-1 * np.dot(x - y, x - y) / (2 * sigma * sigma))) 43 | else: 44 | return np.exp((-1 * np.multiply(x - y, x - y).sum(axis=1) / (2 * sigma * sigma))) 45 | 46 | return _rbf 47 | -------------------------------------------------------------------------------- /ml_models/latent_dirichlet_allocation/__init__.py: -------------------------------------------------------------------------------- 1 | from .lda import * 2 | -------------------------------------------------------------------------------- /ml_models/linear_model/__init__.py: -------------------------------------------------------------------------------- 1 | from .linear_regression import * 2 | from .logistic_regression import * 3 | from .perceptron import * 4 | from .max_entropy import * -------------------------------------------------------------------------------- /ml_models/optimization.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | """ 4 | DPF拟牛顿法 5 | """ 6 | 7 | 8 | class DFP(object): 9 | def __init__(self, x0, g0): 10 | """ 11 | 12 | :param x0: 初始的x 13 | :param g0: 初始x对应的梯度 14 | """ 15 | self.x0 = x0 16 | self.g0 = g0 17 | # 初始化G0 18 | self.G0 = np.eye(len(x0)) 19 | 20 | def update_quasi_newton_matrix(self, x1, g1): 21 | """ 22 | 更新拟牛顿矩阵 23 | :param x1: 24 | :param g1: 25 | :return: 26 | """ 27 | # 进行一步更新 28 | y0 = g1 - self.g0 29 | delta0 = x1 - self.x0 30 | self.G0 = self.G0 + delta0.dot(delta0.T) / delta0.T.dot(y0)[0][0] - self.G0.dot(y0).dot(y0.T).dot(self.G0) / \ 31 | y0.T.dot(self.G0).dot(y0)[0][0] 32 | 33 | def adjust_gradient(self, gradient): 34 | """ 35 | 对原始的梯度做调整 36 | :param gradient: 37 | :return: 38 | """ 39 | return self.G0.dot(gradient) 40 | 41 | 42 | """ 43 | BFGS拟牛顿法 44 | """ 45 | 46 | 47 | class BFGS(object): 48 | def __init__(self, x0, g0): 49 | """ 50 | 51 | :param x0: 初始的x 52 | :param g0: 初始x对应的梯度 53 | """ 54 | self.x0 = x0 55 | self.g0 = g0 56 | # 初始化B0 57 | self.B0 = np.eye(len(x0)) 58 | 59 | def update_quasi_newton_matrix(self, x1, g1): 60 | """ 61 | 更新拟牛顿矩阵 62 | :param x1: 63 | :param g1: 64 | :return: 65 | """ 66 | # 进行一步更新 67 | y0 = g1 - self.g0 68 | delta0 = x1 - self.x0 69 | # 使用sherman-morrison公式不稳定 70 | # divide_value = delta0.T.dot(y0)[0][0] 71 | # tmp = np.eye(len(y0)) - delta0.dot(y0.T) / divide_value 72 | # self.G0 = self.G0 + tmp.dot(self.G0).dot(tmp.T) + delta0.dot(delta0.T) / divide_value 73 | self.B0 = self.B0 + y0.dot(y0.T) / y0.T.dot(delta0)[0][0] - self.B0.dot(delta0).dot(delta0.T).dot(self.B0) / \ 74 | delta0.T.dot(self.B0).dot(delta0)[0][0] 75 | 76 | def adjust_gradient(self, gradient): 77 | """ 78 | 对原始的梯度做调整 79 | :param gradient: 80 | :return: 81 | """ 82 | return np.linalg.pinv(self.B0).dot(gradient) 83 | -------------------------------------------------------------------------------- /ml_models/outlier_detect/__init__.py: -------------------------------------------------------------------------------- 1 | from .hbos import * 2 | from .iforest import * 3 | from .knn import * 4 | from .phbos import * 5 | from .lof import * 6 | -------------------------------------------------------------------------------- /ml_models/outlier_detect/hbos.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class HBOS(object): 5 | def __init__(self, bins=10, thresh=0.01): 6 | """ 7 | :param bins:分箱数 8 | :param thresh: 9 | """ 10 | self.bins = bins 11 | self.thresh = thresh 12 | self.thresh_value = None 13 | self.hist_bins = {} 14 | 15 | def fit_transform(self, X): 16 | # 计算直方图概率 17 | hist_X = np.zeros_like(X) 18 | for i in range(0, X.shape[1]): 19 | hist, bins = np.histogram(X[:, i]) 20 | hist = hist / hist.sum() 21 | hist_X[:, i] = np.asarray([hist[idx - 1] for idx in np.digitize(X[:, i], bins[:-1])]) 22 | self.hist_bins[i] = (hist, bins) 23 | # 计算HBOS异常值 24 | hbos = np.zeros_like(hist_X[:, 0]) 25 | for i in range(0, hist_X.shape[1]): 26 | hbos += np.log(1.0 / (hist_X[:, i] + 1e-7)) 27 | # 计算异常阈值 28 | self.thresh_value = sorted(hbos)[int(len(hbos) * (1 - self.thresh))] 29 | return (hbos >= self.thresh_value).astype(int) 30 | 31 | def transform(self, X): 32 | # 计算直方图概率 33 | hist_X = np.zeros_like(X) 34 | for i in range(0, X.shape[1]): 35 | hist, bins = self.hist_bins[i] 36 | hist_X[:, i] = np.asarray([hist[idx - 1] for idx in np.digitize(X[:, i], bins[:-1])]) 37 | # 计算HBOS异常值 38 | hbos = np.zeros_like(hist_X[:, 0]) 39 | for i in range(0, hist_X.shape[1]): 40 | hbos += np.log(1.0 / (hist_X[:, i] + 1e-7)) 41 | return (hbos >= self.thresh_value).astype(int) 42 | -------------------------------------------------------------------------------- /ml_models/outlier_detect/knn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class KNN(object): 5 | def __init__(self, n_neighbors=3): 6 | """ 7 | :param n_neighbors: 最近的样本量 8 | """ 9 | self.n_neighbors = n_neighbors 10 | 11 | def fit_transform(self, X): 12 | # 构建距离矩阵,这里默认使用欧式距离 13 | m = X.shape[0] 14 | D = np.zeros(shape=(m, m)) 15 | for i in range(0, m): 16 | for j in range(i, m): 17 | D[i, j] = np.sqrt(np.sum(np.power(X[i] - X[j], 2))) 18 | D[j, i] = D[i, j] 19 | # 对每个样本,求最近的n_neighbors个非0距离之和 20 | rst = [] 21 | for i in range(0, m): 22 | d = D[i] 23 | d = d[d > 0] 24 | d.sort() 25 | rst.append(np.sum(d[:min(self.n_neighbors, len(d))])) 26 | return np.asarray(rst) 27 | -------------------------------------------------------------------------------- /ml_models/outlier_detect/lof.py: -------------------------------------------------------------------------------- 1 | """ 2 | Local Outlier Factor实现 3 | """ 4 | import numpy as np 5 | 6 | 7 | class LOF(object): 8 | def __init__(self, n_neighbors=5): 9 | """ 10 | :param n_neighbors: 考虑最近的样本量 11 | """ 12 | self.n_neighbors = n_neighbors 13 | 14 | def fit_transform(self, X): 15 | # 构建距离矩阵,这里默认使用欧式距离 16 | m = X.shape[0] 17 | D = np.zeros(shape=(m, m)) 18 | for i in range(0, m): 19 | for j in range(i, m): 20 | D[i, j] = np.sqrt(np.sum(np.power(X[i] - X[j], 2))) 21 | D[j, i] = D[i, j] 22 | # 对每个样本,求最近的n_neighbors距离(非0) 23 | d_k = [] 24 | for i in range(0, m): 25 | d = D[i] 26 | d = d[d > 0] 27 | d.sort() 28 | d_k.append(d[min(self.n_neighbors, len(d)) - 1]) 29 | # 计算局部可达密度 30 | lrd = [] 31 | for i in range(0, m): 32 | d = D[i] 33 | indices = d.argsort() 34 | k = 0 35 | neighbor_distances = [] 36 | for idx in indices: 37 | if k == self.n_neighbors: 38 | break 39 | if D[i, idx] > 0: 40 | neighbor_distances.append(max(D[i, idx], d_k[idx])) 41 | k += 1 42 | lrd.append(len(neighbor_distances) / np.sum(neighbor_distances)) 43 | # 计算lof 44 | lof = [] 45 | for i in range(0, m): 46 | d = D[i] 47 | indices = d.argsort() 48 | k = 0 49 | neighbor_lrd = [] 50 | for idx in indices: 51 | if k == self.n_neighbors: 52 | break 53 | if D[i, idx] > 0: 54 | neighbor_lrd.append(lrd[idx]) 55 | k += 1 56 | lof.append(np.sum(neighbor_lrd) / (len(neighbor_lrd) * lrd[i])) 57 | return np.asarray(lof) 58 | -------------------------------------------------------------------------------- /ml_models/outlier_detect/phbos.py: -------------------------------------------------------------------------------- 1 | """ 2 | pca(去相关性)+hbos 3 | """ 4 | import numpy as np 5 | from ..decomposition import PCA 6 | 7 | 8 | class pHbos(object): 9 | 10 | def __init__(self, bins=20, thresh=0.01): 11 | """ 12 | :param bins:分箱数 13 | :param thresh: 14 | """ 15 | self.bins = bins 16 | self.thresh = thresh 17 | self.thresh_value = None 18 | self.hist_bins = {} 19 | self.pca = None 20 | self.pca_mins = None 21 | 22 | def fit_transform(self, X): 23 | # pca 24 | self.pca = PCA(n_components=X.shape[1]) 25 | self.pca.fit(X) 26 | X = self.pca.transform(X) 27 | # 计算直方图概率 28 | hist_X = np.zeros_like(X) 29 | for i in range(0, X.shape[1]): 30 | hist, bins = np.histogram(X[:, i]) 31 | hist = hist / hist.sum() 32 | hist_X[:, i] = np.asarray([hist[idx - 1] for idx in np.digitize(X[:, i], bins[:-1])]) 33 | self.hist_bins[i] = (hist, bins) 34 | 35 | # 计算HBOS异常值 36 | hbos = np.zeros_like(hist_X[:, 0]) 37 | for i in range(0, hist_X.shape[1]): 38 | hbos += np.log(1.0 / (hist_X[:, i] + 1e-7)) 39 | # 计算异常阈值 40 | self.thresh_value = sorted(hbos)[int(len(hbos) * (1 - self.thresh))] 41 | return (hbos >= self.thresh_value).astype(int) 42 | 43 | def transform(self, X): 44 | # pca 45 | X = self.pca.transform(X) 46 | # 计算直方图概率 47 | hist_X = np.zeros_like(X) 48 | for i in range(0, X.shape[1]): 49 | hist, bins = self.hist_bins[i] 50 | hist_X[:, i] = np.asarray([hist[idx - 1] for idx in np.digitize(X[:, i], bins[:-1])]) 51 | # 计算HBOS异常值 52 | hbos = np.zeros_like(hist_X[:, 0]) 53 | for i in range(0, hist_X.shape[1]): 54 | hbos += np.log(1.0 / (hist_X[:, i] + 1e-7)) 55 | return (hbos >= self.thresh_value).astype(int) 56 | -------------------------------------------------------------------------------- /ml_models/pgm/__init__.py: -------------------------------------------------------------------------------- 1 | from .naive_bayes_classifier import * 2 | from .gaussian_naive_bayes_classifier import * 3 | from .semi_gaussian_naive_bayes_classifier import * 4 | from .naive_bayes_cluster import * 5 | from .gaussian_naive_bayes_cluster import * 6 | from .simple_markov_model import * 7 | from .page_rank import * 8 | from .hidden_markov_model import * 9 | from .conditional_random_field import * 10 | -------------------------------------------------------------------------------- /ml_models/pgm/gaussian_naive_bayes_classifier.py: -------------------------------------------------------------------------------- 1 | """ 2 | 高斯朴素贝叶斯分类器实现 3 | """ 4 | import numpy as np 5 | from .. import utils 6 | 7 | 8 | class GaussianNBClassifier(object): 9 | def __init__(self): 10 | self.p_y = {} # p(y) 11 | self.p_x_y = {} # p(x | y) 12 | self.class_num = None 13 | 14 | def fit(self, x, y): 15 | # 参数估计 16 | self.class_num = y.max() + 1 17 | for y_index in range(0, self.class_num): 18 | # p(y) 19 | y_n_sample = np.sum(y == y_index) 20 | self.p_y[y_index] = np.log(y_n_sample / len(y)) 21 | self.p_x_y[y_index] = {} 22 | # p(x | y) 23 | x_y = x[y == y_index] 24 | for i in range(0, x_y.shape[1]): 25 | u = np.mean(x_y[:, i]) 26 | sigma = np.std(x_y[:, i]) 27 | self.p_x_y[y_index][i] = [u, sigma] 28 | 29 | def predict_proba(self, x): 30 | rst = [] 31 | for x_row in x: 32 | tmp = [] 33 | for y_index in range(0, self.class_num): 34 | p_y_log = self.p_y[y_index] 35 | for j in range(0, len(x_row)): 36 | xij = x_row[j] 37 | p_y_log += np.log(utils.gaussian_1d(xij, self.p_x_y[y_index][j][0], self.p_x_y[y_index][j][1])) 38 | tmp.append(p_y_log) 39 | rst.append(tmp) 40 | return utils.softmax(np.asarray(rst)) 41 | 42 | def predict(self, x): 43 | return np.argmax(self.predict_proba(x), axis=1) 44 | -------------------------------------------------------------------------------- /ml_models/pgm/naive_bayes_classifier.py: -------------------------------------------------------------------------------- 1 | """ 2 | 朴素贝叶斯分类器实现 3 | """ 4 | import numpy as np 5 | from .. import utils 6 | from ..wrapper_models import DataBinWrapper 7 | 8 | 9 | class NaiveBayesClassifier(object): 10 | def __init__(self, max_bins=10): 11 | """ 12 | :param max_bins:为了方便,对x每维特征做分箱 13 | """ 14 | self.dbw = DataBinWrapper(max_bins=max_bins) 15 | # 记录模型参数 16 | self.default_y_prob = None # y的默认概率 17 | self.default_x_prob = {} # x的默认概率 18 | self.p_y = {} # p(y) 19 | self.p_x_y = {} # p(x | y) 20 | self.class_num = None 21 | 22 | def fit(self, x, y): 23 | self.default_y_prob = np.log(0.5 / (y.max()+1)) 24 | # 分箱 25 | self.dbw.fit(x) 26 | x_bins = self.dbw.transform(x) 27 | # 参数估计 28 | self.class_num = y.max() + 1 29 | for y_index in range(0, self.class_num): 30 | # p(y) 31 | y_n_sample = np.sum(y == y_index) 32 | self.default_x_prob[y_index] = np.log(0.5 / y_n_sample) 33 | x_y = x_bins[y == y_index] 34 | self.p_y[y_index] = np.log(y_n_sample / (self.class_num + len(y))) 35 | self.p_x_y[y_index] = {} 36 | # p(x | y) 37 | for i in range(0, x_y.shape[1]): 38 | self.p_x_y[y_index][i] = {} 39 | x_i_feature_set = set(x_y[:, i]) 40 | for x_feature in x_i_feature_set: 41 | self.p_x_y[y_index][i][x_feature] = np.log( 42 | np.sum(x_y[:, i] == x_feature) / (y_n_sample + len(x_i_feature_set))) 43 | 44 | def predict_proba(self, x): 45 | x_bins = self.dbw.transform(x) 46 | rst = [] 47 | for x_row in x_bins: 48 | tmp = [] 49 | for y_index in range(0, self.class_num): 50 | try: 51 | p_y_log = self.p_y[y_index] 52 | except: 53 | p_y_log = self.default_y_prob 54 | for i,xij in enumerate(x_row): 55 | try: 56 | p_y_log += self.p_x_y[y_index][i][xij] 57 | except: 58 | p_y_log += self.default_x_prob[y_index] 59 | tmp.append(p_y_log) 60 | rst.append(tmp) 61 | return utils.softmax(np.asarray(rst)) 62 | 63 | def predict(self, x): 64 | return np.argmax(self.predict_proba(x), axis=1) 65 | -------------------------------------------------------------------------------- /ml_models/pgm/page_rank.py: -------------------------------------------------------------------------------- 1 | """ 2 | PageRank算法实现 3 | """ 4 | 5 | import numpy as np 6 | 7 | 8 | class PageRank(object): 9 | def __init__(self, init_prob, trans_matrix): 10 | self.init_prob = init_prob 11 | self.trans_matrix = trans_matrix 12 | 13 | def get_page_rank_values(self, time_steps=None, set_init_prob=None, set_prob_trans_matrix=None): 14 | """ 15 | 计算time_steps后的概率分布,允许通过set_init_prob和set_prob_trans_matrix设置初始概率分布和概率转移矩阵 16 | :param time_steps: 17 | :param set_init_prob: 18 | :param set_prob_trans_matrix: 19 | :return: 20 | """ 21 | init_prob = self.init_prob if set_init_prob is None else set_init_prob 22 | trans_matrix = self.trans_matrix if set_prob_trans_matrix is None else set_prob_trans_matrix 23 | for _ in range(0, time_steps): 24 | init_prob = trans_matrix.dot(init_prob) 25 | init_prob = init_prob / np.max(np.abs(init_prob)) 26 | return init_prob / np.sum(init_prob) 27 | -------------------------------------------------------------------------------- /ml_models/pgm/semi_gaussian_naive_bayes_classifier.py: -------------------------------------------------------------------------------- 1 | """ 2 | 半朴素贝叶斯分类器实现 3 | """ 4 | import numpy as np 5 | from .. import utils 6 | 7 | 8 | class SemiGaussianNBClassifier(object): 9 | def __init__(self, link_rulers=None): 10 | """ 11 | :param link_rulers: 属性间的链接关系[(x1,x2),(x3,x4)] 12 | """ 13 | self.p_y = {} # p(y) 14 | self.p_x_y = {} # p(x | y) 15 | self.class_num = None 16 | self.link_rulers = link_rulers 17 | # check link_rulers,由于某一个属性最多仅依赖于另一个属性,所以某一属性在尾部出现次数不可能大于1次 18 | self.tail_link_rulers = {} 19 | if self.link_rulers is not None and len(self.link_rulers) > 0: 20 | for x1, x2 in self.link_rulers: 21 | if x2 in self.tail_link_rulers: 22 | raise Exception("属性依赖超过1次") 23 | self.tail_link_rulers[x2] = [x1, x2] 24 | 25 | def fit(self, x, y): 26 | # 参数估计 27 | self.class_num = y.max() + 1 28 | for y_index in range(0, self.class_num): 29 | # p(y) 30 | y_n_sample = np.sum(y == y_index) 31 | self.p_y[y_index] = np.log(y_n_sample / len(y)) 32 | self.p_x_y[y_index] = {} 33 | # p(x | y) 34 | x_y = x[y == y_index] 35 | for i in range(0, x_y.shape[1]): 36 | # 如果i在link_rulers的尾部,则需要构建二维高斯分布 37 | if i in self.tail_link_rulers: 38 | first_feature, second_feature = self.tail_link_rulers[i] 39 | u = np.mean(x_y[:, [first_feature, second_feature]], axis=0) 40 | sigma = np.cov(x_y[:, [first_feature, second_feature]].T) 41 | else: 42 | u = np.mean(x_y[:, i]) 43 | sigma = np.std(x_y[:, i]) 44 | self.p_x_y[y_index][i] = [u, sigma] 45 | 46 | def predict_proba(self, x): 47 | rst = [] 48 | for x_row in x: 49 | tmp = [] 50 | for y_index in range(0, self.class_num): 51 | p_y_log = self.p_y[y_index] 52 | for j in range(0, len(x_row)): 53 | if j in self.tail_link_rulers: 54 | first_feature, second_feature = self.tail_link_rulers[j] 55 | xij = x_row[[first_feature, second_feature]] 56 | p_y_log += np.log(utils.gaussian_nd(xij, self.p_x_y[y_index][j][0], self.p_x_y[y_index][j][1])) 57 | else: 58 | xij = x_row[j] 59 | p_y_log += np.log(utils.gaussian_1d(xij, self.p_x_y[y_index][j][0], self.p_x_y[y_index][j][1])) 60 | tmp.append(p_y_log) 61 | rst.append(tmp) 62 | return utils.softmax(np.asarray(rst)).reshape(x.shape[0], self.class_num) 63 | 64 | def predict(self, x): 65 | return np.argmax(self.predict_proba(x), axis=1).reshape(-1) 66 | -------------------------------------------------------------------------------- /ml_models/svm/__init__.py: -------------------------------------------------------------------------------- 1 | from .hard_margin_svm import * 2 | from .soft_margin_svm import * 3 | from .svc import * 4 | -------------------------------------------------------------------------------- /ml_models/tree/__init__.py: -------------------------------------------------------------------------------- 1 | from .decision_tree import * 2 | from .cart_classifier import * 3 | from .cart_regressor import * 4 | -------------------------------------------------------------------------------- /ml_models/vi/__init__.py: -------------------------------------------------------------------------------- 1 | from .gmm import * 2 | from .linear_regression import * 3 | -------------------------------------------------------------------------------- /ml_models/wrapper_models/__init__.py: -------------------------------------------------------------------------------- 1 | from .multi_class_wrapper import * 2 | from .data_bin_wrapper import * 3 | -------------------------------------------------------------------------------- /ml_models/wrapper_models/data_bin_wrapper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class DataBinWrapper(object): 5 | def __init__(self, max_bins=10): 6 | # 分段数 7 | self.max_bins = max_bins 8 | # 记录x各个特征的分段区间 9 | self.XrangeMap = None 10 | 11 | def fit(self, x): 12 | n_sample, n_feature = x.shape 13 | # 构建分段数据 14 | self.XrangeMap = [[] for _ in range(0, n_feature)] 15 | for index in range(0, n_feature): 16 | tmp = sorted(x[:, index]) 17 | for percent in range(1, self.max_bins): 18 | percent_value = np.percentile(tmp, (1.0 * percent / self.max_bins) * 100.0 // 1) 19 | self.XrangeMap[index].append(percent_value) 20 | self.XrangeMap[index] = sorted(list(set(self.XrangeMap[index]))) 21 | 22 | def transform(self, x): 23 | """ 24 | 抽取x_bin_index 25 | :param x: 26 | :return: 27 | """ 28 | if x.ndim == 1: 29 | return np.asarray([np.digitize(x[i], self.XrangeMap[i]) for i in range(0, x.size)]) 30 | else: 31 | return np.asarray([np.digitize(x[:, i], self.XrangeMap[i]) for i in range(0, x.shape[1])]).T 32 | -------------------------------------------------------------------------------- /notebooks/10_09_集成学习_bagging_高阶组合_stacking.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### 介绍\n", 8 | "前面对模型的组合主要用了两种方式: \n", 9 | "\n", 10 | "(1)一种是平均/投票; \n", 11 | "\n", 12 | "(2)另外一种是加权平均/投票; \n", 13 | "\n", 14 | "所以,我们有时就会陷入纠结,是平均的好,还是加权的好,那如果是加权,权重又该如何分配的好?如果我们在这些模型预测的结果上再训练一个模型对结果做预测,那么岂不是就免除了这些烦恼;而训练后,也可以方便的获取这些基分类器的权重(等价于下一层级模型的特征权重),且结果也更为客观!简单流程如下: \n", 15 | "\n", 16 | "![avatar](./source/10_stacking简介.png) \n", 17 | "\n", 18 | "\n", 19 | "当然实际训练时回更加细致和复杂,比如: \n", 20 | "\n", 21 | "(1)通常会对数据做$k$折切分,切分为$k$份,然后将每个基学习器扩展为$k$个基学习器,每个学习器学习$k-1$份训练数据; \n", 22 | "\n", 23 | "(2)对分类器,预测结果通常会取概率分布,这样可以提取更多的信息; \n", 24 | "\n", 25 | "(3)上面的结构还可以无限叠加,构建更加复杂的stacking结构,比如对新的拼接特征又训练几组基分类器,然后再组合..." 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "stacking的代码实现,[跳转>>>](https://github.com/zhulei227/Stacking_Ensembles)" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [] 41 | } 42 | ], 43 | "metadata": { 44 | "kernelspec": { 45 | "display_name": "Python 3", 46 | "language": "python", 47 | "name": "python3" 48 | }, 49 | "language_info": { 50 | "codemirror_mode": { 51 | "name": "ipython", 52 | "version": 3 53 | }, 54 | "file_extension": ".py", 55 | "mimetype": "text/x-python", 56 | "name": "python", 57 | "nbconvert_exporter": "python", 58 | "pygments_lexer": "ipython3", 59 | "version": "3.6.4" 60 | } 61 | }, 62 | "nbformat": 4, 63 | "nbformat_minor": 2 64 | } 65 | -------------------------------------------------------------------------------- /notebooks/10_15_集成学习_lightgbm_进一步优化.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "lightgbm是继xgboost之后的又一大杀器,它训练速度快,且精度高,下面就其主要创新点做介绍,本文主要[参考自>>>](https://zhuanlan.zhihu.com/p/99069186)" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### 一.单边梯度采样算法\n", 15 | "GOSS(Gradient-based One-Side Sampling)是一种样本采样方法,它基于梯度的绝对值对样本进行采样,主要包含如下几个步骤: \n", 16 | "\n", 17 | "(1)对梯度的绝对值进行降序排序,选择前$a*100\\%$个样本; \n", 18 | "\n", 19 | "(2)从剩下的小梯度数据中随机选择$b*100\\%$个样本,为了不破坏原始数据的分布特性,对这$b*100\\%$个样本权重扩大$\\frac{1-a}{b}$倍(代价敏感学习) \n", 20 | "\n", 21 | "(3)将这$(a+b)*100\\%$个样本用作为下一颗树的训练\n", 22 | "\n", 23 | "算法流程如下: \n", 24 | "\n", 25 | "![avatar](./source/10_goss.png)" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "### 二.互斥特征捆绑算法\n", 33 | "EFB(Exclusive Feature Bundling)可以粗略的理解为one-hot的逆过程,我们可以将这个过程拆解为两步: \n", 34 | "\n", 35 | "(1)第一步,需要寻找那些特征进行捆绑,理想情况下所选择的几个特征,每行只有一个非0值,但现实数据可能没有那么离线,lgb用了一种允许一定误差的图着色算法求解: \n", 36 | "\n", 37 | "![avatar](./source/10_特征捆绑.png) \n", 38 | "\n", 39 | "(2)第二步,便是合并这些特征,如果直接合并其实是有问题的,比如下面第一张图片,因为第一列特征的取值1和第二列特征的取值1其实具有不同含义,所以lgb的做法是对每一列加上一个bias,这个bias为前面所有特征列的max取值之和,如下面第二张图例所示: \n", 40 | "\n", 41 | "![avatar](./source/10_efb.png) \n", 42 | "\n", 43 | "算法流程如下,但有一点需要注意是,计算特征的切分点是在各自的range内计算的,如上图,虽然合并后为一列特征,但需要分别在[1,2],[3,4],[5,8]这三个区间内分别计算一个切分点 \n", 44 | "\n", 45 | "![avatar](./source/10_捆绑特征合并.png)" 46 | ] 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "metadata": {}, 51 | "source": [ 52 | "### 三.类别特征优化\n", 53 | "\n", 54 | "类别特征,特别是高基类别特征的处理一直很棘手,比如日期,id等特征,如果按照常规方法对其作one-hot展开,在训练决策树时会切分出很多不平衡的结点来(比如左节点样本数远大于右节点),这本质是一种one-vs-rest的切分方法,收益很低,决策树还容易过拟合,而lgb提出了使用many-vs-many的方法对类别特征进行划分,这样既能平衡左右节点的训练样本量,又能一定程度上避免过拟合,示例如下(图中决策判断条件如果为True则走左分支,否则走右边): \n", 55 | "\n", 56 | "![avatar](./source/10_lgb中类别特征的处理.png) \n", 57 | "\n", 58 | "当然,如果暴力搜索时间复杂度会有些高,假如某类别特征有$k$种取值的可能,那么共有如下的$2^{k-1}-1$种搜索结果,时间复杂度为$O(2^k)$: \n", 59 | "\n", 60 | "$$\n", 61 | "C_k^1+C_k^2+\\cdots+C_k^{k//2}=2^{k-1}-1\n", 62 | "$$ \n", 63 | "\n", 64 | "lgb基于《On Grouping For Maximum Homogeneity》实现了时间复杂度为$O(klogk)$的求法" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [] 73 | } 74 | ], 75 | "metadata": { 76 | "kernelspec": { 77 | "display_name": "Python 3", 78 | "language": "python", 79 | "name": "python3" 80 | }, 81 | "language_info": { 82 | "codemirror_mode": { 83 | "name": "ipython", 84 | "version": 3 85 | }, 86 | "file_extension": ".py", 87 | "mimetype": "text/x-python", 88 | "name": "python", 89 | "nbconvert_exporter": "python", 90 | "pygments_lexer": "ipython3", 91 | "version": "3.6.4" 92 | } 93 | }, 94 | "nbformat": 4, 95 | "nbformat_minor": 2 96 | } 97 | -------------------------------------------------------------------------------- /notebooks/12_01_PGM_贝叶斯网(有向无环图)初探.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### 一.概率与图的关系\n", 8 | "概率图模型(PGM)其实可以拆开看做“概率”+“图”,它是以图的方式呈现随机变量的联合概率分布,这一节主要主要介绍有向无环图,即贝叶斯网,假如我们有如下的一个联合概率分布,它可以按照链式规则展开如下: \n", 9 | "\n", 10 | "\n", 11 | "$$\n", 12 | "p(a,b,c)=p(a)\\cdot p(b\\mid a)\\cdot p(c\\mid a,b)\n", 13 | "$$ \n", 14 | "\n", 15 | "那么它可以用有向图表示为: \n", 16 | "![avatar](./source/12_概率图初探demo.png)\n", 17 | "\n", 18 | "但是,很多情况下变量之间是具有条件独立性的,即某些变量之间并不会互相影响,比如对上面的模型做一个马尔科夫假设,让每一个随机变量只与它的前一个随机变量有关: \n", 19 | "\n", 20 | "$$\n", 21 | "p(a,b,c)=p(a)\\cdot p(b\\mid a)\\cdot p(c\\mid b)\n", 22 | "$$ \n", 23 | "\n", 24 | "那么,此时的概率图可以表示为: \n", 25 | "\n", 26 | "![avatar](./source/12_概率图初探demo2.png) \n", 27 | "\n", 28 | "#### 小结一下\n", 29 | "\n", 30 | "通过概率图的方式,我们可以非常简单直接的表示概率的分布关系,反过来,我们也可以通过图很容易的推导出它的联合概率分布公式: \n", 31 | "\n", 32 | "$$\n", 33 | "p(x)=\\prod_ip(x_i\\mid x_{Pa(i)})\n", 34 | "$$ \n", 35 | "\n", 36 | "这里$Pa(i)$表示第$i$个节点的前驱节点,比如上面$b$的前驱节点即为: $Pa(b)=a$ " 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "#### 案列演示\n", 44 | "再比如如下的概率图: \n", 45 | "\n", 46 | "![avatar](./source/12_概率图初探demo3.png)\n", 47 | "\n", 48 | "可以很快写出它的联合概率分布: \n", 49 | "\n", 50 | "$$\n", 51 | "p(a,b,c,d,e)=p(a)\\cdot p(b\\mid a)\\cdot p(c\\mid a,b)\\cdot p(d\\mid b)\\cdot p(e\\mid c)\n", 52 | "$$" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "### 二.条件独立性的好处\n", 60 | "上面提到了条件独立性,接下来聊一下它,假如: \n", 61 | "\n", 62 | "$$\n", 63 | "p(c\\mid b)=p(c\\mid a,b)\n", 64 | "$$ \n", 65 | "\n", 66 | "可以发现不管$a$存在与否,对条件概率$p(c\\mid b)$都没有影响,那么就称在$b$被观测的条件下,$a$与$c$条件独立,之所以可以这样做,因为随机变量大多满足这样的客观规律,比如你当前的心情可能只与最近一个小时内所遇到的事情相关,难道你还会被一星期前某个瞬间的心情所影响吗? \n", 67 | "\n", 68 | "\n", 69 | "对随机变量做条件独立假设还有一个计算上的好处:**极大地降低了参数量**\n", 70 | "\n", 71 | "比如对$p(a,b,c)$如果每个随机变量都有10种取值情况,那么$p(a,b,c)$的参数量有$10^3$,但如果对其做条件独立性假设:$p(a,b,c)=p(a)\\cdot p(b\\mid a)\\cdot p(c\\mid b)$后参数量将只有:10($p(a)$)+100($p(b\\mid a)$)+100($p(c\\mid b)$)=210,对于更加高维的随机变量,将会极大地降低参数量" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [] 80 | } 81 | ], 82 | "metadata": { 83 | "kernelspec": { 84 | "display_name": "Python 3", 85 | "language": "python", 86 | "name": "python3" 87 | }, 88 | "language_info": { 89 | "codemirror_mode": { 90 | "name": "ipython", 91 | "version": 3 92 | }, 93 | "file_extension": ".py", 94 | "mimetype": "text/x-python", 95 | "name": "python", 96 | "nbconvert_exporter": "python", 97 | "pygments_lexer": "ipython3", 98 | "version": "3.6.4" 99 | } 100 | }, 101 | "nbformat": 4, 102 | "nbformat_minor": 2 103 | } 104 | -------------------------------------------------------------------------------- /notebooks/source/01_adam.drawio: -------------------------------------------------------------------------------- 1 | 7ZtNc6M4EIZ/jY9D6QMJdEwyyexlpqYqs5XdI2VkmxqMXJhM7Pn1K4wESOCYOHxld6kcUANt9OpxS+p2Fvhue/iSBrvNVxHyeIFAeFjgzwuEoIvQIv8D4bGweNAtDOs0CtVNleEx+s2VESjrcxTyvXFjJkScRTvTuBRJwpeZYQvSVLyYt61EbH7qLljzhuFxGcRN61MUZpvC6iOvsv/Bo/VGfzKkrLiyDfTNqif7TRCKl5oJ3y/wXSpEVpxtD3c8zsXTuhTPPZy5Wr5YypOsywNPP+5XAjztPn39gp7S47c/f+yWn5SXX0H8rDqsXjY7agWkFym2bNy+bKKMP+6CZX7lRY63tG2ybSxbUJ4qVzzN+OHsO8Ky5xIZLrY8S4/yFvUAUVppWFTzpVIeatumprqrNQ7UaK9Lz5Ug8kRp8gZ90Lz0oaY+rEUf0qIP9ofSB89LH48Y+kDYIhAiLQK5QwnkzksgZgIEcVMg1qYPGEofMi99ILAEok2B2iIQGgwgOjOBkClQOb3VBMItBMHBQrTXo0DBflcsIlbRgcsPvV1FcXwnYpGeHOHQB8DD0r7PUvGT167cEAAkA/IJkWQ1++p09KQ9Ro4Z4MqAVw9wLXjCocT3O4ifhDf5Qky2lnGw30dLU3N+iLK/5DlwMPBU++9TG2Cm2p9zBYBuHGuN7zyNZFd4qm2J7FbhzkO6rdwR3a7cnVrHest22Bjphwcsj9Pter3KyvHlYWM5aY2u1EY8p0t+edGRBemaZ5fua9JSY6FtMaBtKY+DLPplvm4bIOoTvotIdqSEEVtTSWMNVnRTPVVfl1qOXNd0hD3LUaFDw9GJ17Lb1yPM3o/wa4yUOCLc4BFfA2R30C4ChKcECFqLWWgHqa4AQc+3HLFRAdLvPRBBZXhERUCrwiNF7wqP1MLRY8PieDHu4Q+BrWdhe23cgzoTUq7FfQdAVh3+uBR3SQX85ymejDrgmbAQ1wFyMVoe7pUQUit2Eub4dFzwumyR+5iAoc9sVryJJ2B3VhOwvYftHsksR2Tk0NUlifARCZqODGoOKIXXkoEdRtxqTsOmX891YC2IATQuN11yK/9PeVNBCK0pzyP9THn2dsEHY095qEPOar8JdvnpKuYHReDtm2EshPoAg42sVTWyKxadswn0gqOhh7aZEbsJg21jeDN+yF4bvEQk3Mo/KlMQR+skJ0COXP5lvs1zitEyiG/UhW0UhvG5ZGcqnpMwT23Wklu6sIlU+vJRvSbU7SLLBUk/aUyMrSFqKULAtizmYDl21CUHNGYZ1Po2TF8HxV1yHGMq5JsKzaASOrdSOptdLRSjeUlE7X3L5NVQPLN6OrWqfdOXQ/HMCurUql5MXw/FfZbU510QpWR2BVHcw55yoIooNLaNF7aME5Q9dfS9nP+fdKvCrFnj6sIns7YqYxc+3S6TzTvSZnVMaukL4Fv5Czh04ekyUJNWlLxzvzl6K1AehA6BFGPpgwHErPwJZQ6GABDqQ99jALrj0tZDmn+owOgAF5tMYgR7CpD/bnYpYg7BjQSuno2BHJn6Qa8EG7gOY+fARsR1XFYDm4wLdg/VBw02NKDGwH0V6s4ItsDf57w9LYH4VQKv/VEJJWaWuL88omxWP/ovbq/+dQLf/wM= -------------------------------------------------------------------------------- /notebooks/source/01_adam.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/01_adam.png -------------------------------------------------------------------------------- /notebooks/source/01_归一化对梯度下降的影响.drawio: -------------------------------------------------------------------------------- 1 | 7VpLk5s4EP41HEPpAQIdxx5PcslWqiZbs3ukjGxTi5ELM7G9v36FEQ8JTax4DHhqQ/mgbolG+vS5u/Vw8Hx7/JxHu81XHrPUQSA+OvjRQQh6CDnlD8SnShMQXCnWeRLLRq3iOfmXSSWQ2tckZnulYcF5WiQ7VbnkWcaWhaKL8pwf1GYrnqpf3UVr1lM8L6O0r31J4mJTaUMUtPovLFlv6i9DQquabVQ3liPZb6KYHzoqvHDwPOe8qErb45ylJXg1LtV7T2/UNh3LWVbYvPDyfbHi4GX36etn9JKf/vjz+275SVr5EaWvcsCys8WpRkBYEWALYXbYJAV73kXLsuYg5lvoNsU2FRIURWmK5QU7vtlH2IxcUIbxLSvyk2giX/AlVjVZpHhokYe1btNB3asxjuRsrxvLLSCiIDH5BXzQfeFDVHyoAR/fgA8Oh8IH3xc+ga/gA6EBIOQbAPKGAsi7L4CoSiCI+wBREz5gKHz8+8IHAg0g0gfI5IHQYAQidwYQUgFqwlsHIGxgEBzMRQc3BCja76okYpUcmfjobJWk6ZynPD8bwnEIQICFfl/k/B/WqXnwARAcEG/wrOjoV+fnRthj5KoOrnF4XQdnoCccCvzQAvwsfigTMSEt02i/T5Yq5uyYFH+JMnAxCKT891kGmEr5sUQA1MKpI3xjeSKGwvJal4lhVeYCVMvSnF/LrbmzdOpKusHeTD89YfGcm9f5Km3ml8W9dFKbXYENf82X7HLSUUT5mhWX2vXZ0uGCKRmodTlLoyL5oXbXRBD5hW88EQNpyIi1UNLLwaphyre6ealmyPNUQzjQDFU49Ayd+doM+3oK0/dT+GccaeiIcI+P+BpC2hPtIoHwlASCWjILdSdlSyAYhJohOiqB6n4PxKDGPaLKobXukaB3uUei0TGgw9Lxot/DH4K2gUbba/0ehJ5qCIcugLR9wnFZbLMV8L9n8WSsA4FKFt9zgUhGm8e7koRE850+dUMyLvFslsi3CMAwpDpXgokDsHdXAVhfw9p7Ms2QP7LrstlE+IgMmo4ZRJ1QAq9lBnap77UxDat2A8+FHScG0Li8sdlb+R3ypiIh1EJe4N8m5OnLhRCMHvIG3bOStt91DhMCHaNK7kw9Aoa5R4NtFEObraZJMaPa4QM17M2Z9o6hvuFxO8xs9jamxIxomQMyHdh4JswG229HNqv5STHDGmaGMxxo+m/Cwf6byGbtOClm2sFyE6M6mBEDZGQwxNCQiN3VqQUJ7uzMAtkcWt9d2qfkfBfyPed2K9QqhEyVA+oxFSHPLXOM5sGqRdsckPjqAqfxoSMlgGisPQ/QKioSTn3kMO2aoh87r1xEEA+4PkA+8jEk1IMaTc21Y5Fr2GWt0SPB8TxSNZrJGKRnEnrybs0gAl0aarbG3TdDFsvQ/SbalcVVyo6SMbNfJk/lqz/AngPSDneQfnHOdnZ9/ZqMbmjoqTWslhfEEeF0BspC+Og8LJyFEB8dKqo8ZxY6oo9CE84dSsoCBU646BFCJILFz6Y74xnTUlCpitJknZWcEXNdhpZZmVYmyyh9kBXbJI7Tt/LdnL9mcZnddm5l1Ddykcxgn2U3YS1X1zOgf5tMFtusvEyp7BWLVSG2V4MrVrQXrPHiPw== -------------------------------------------------------------------------------- /notebooks/source/01_归一化对梯度下降的影响.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/01_归一化对梯度下降的影响.png -------------------------------------------------------------------------------- /notebooks/source/05_最大熵模型.drawio: -------------------------------------------------------------------------------- 1 | 7Z1dk6I4FIZ/DVU9F2tBPki4VNueremaqpntrZqPmy1asWVWxUV6tOfXbxCikqCiHUikx+rqggAR3vOQnHMS0IL92fp97C8mH6NRMLWAPVpb8NYCwKMO+58WvGQFGLhZwVMcjrIiZ1fwEP4K8kI7L30OR8GysGMSRdMkXBQLh9F8HgyTQpkfx9GquNs4mha/deE/BVLBw9CfyqVfwlEyyUopILvyP4PwacK/2XG9bMvM5zvnV7Kc+KNotVcEBxbsx1GUZEuzdT+YptpxXbLj7g5s3Z5YHMyTKgeg2YcZGr9Pvn7/8P2/1eceWXz564+8lp/+9Dm/4HVetExeuAZx9DwfBWk1tgV7q0mYBA8Lf5huXTGjs7JJMpuyNYctjsPptB9No3hzLByPA3c4ZOXLJI7+Dfa2jIj3aKcVyhfCzyqIk2C9V5Rf2PsgmgVJ/MJ24VttnB2SU7bFZ7WzGbfDZM9cvMzPKXna1rwTki3kWp6hK5B1BW3QFWrWFcq6wjboijXrimRdURt0JZp1xZKuHQu4U/a1vceYLT2lS9WK1uGlR3baYEmvmiUd5NVkSle+ReYtEBZBT+8tQiRdx84N80LYIryzQD89T+edUqUDZ4QDUqa05xLou2qUBkRAGMkIb63RiNS0VGpQkBq2QmpXt9SeLDXYp1pQvR2AU92qc7d/X3Z4w1yZrexhQXbQBtmhrV12OYy82M0ZhzeCke7fmeH4aLIuqGhdrzbrysHseCb20K24k1BJ2qDhO0kOcK0BtXoDy4PWAFsUW12sVOkxHQblXucjxQgr8jo5nLnQTkn/jEucTlibznLAaw2IRT2r56U6M8GpsxGcWL1ba+BaHiu0U1t0B+nWbKHryI2ONbizen2LUmuArF43/UsrJBvD3aUHsjq7m0LatbqU7+9doVnFG6jMrqU3UH2GlSPuNfex1ArcSLSGPFB0sUoSGrTJaM0pCYN5F9AGfUvSDM3qK4fDXN9rTHSK+sKSHrZZfeXArCzXlkv+ox3JNMkKJWn8MivguqwA5DiNS37fBn1LAuFGKQdyQJa6N6k3cpc6M+xkuoR7IPbGvbm1KEwXeo5FQbqJXRPtX6FPgojXMc0rAXIItbjJfBLSX19j6OR6xCzHBMiRUyYxaI3Eun0TIAdNmcSwLRJrd09ApQHDTPUfueptyHpJhtDuochxTqb6fWtY1+6kyKHOMhonM3+9SXndWt3+xldhC4pDy0b8EJci8/wQeQRQUjaYj7rpxDm2Npz6y2U4LIoZrMPk697yt9QG7Eqztdt1bpLNygtfmbOz/7q/sndUuro7bLPGjztohmX0HA+DI1fK5wr68VOQHNkvZzAYFaYBykY9kajkZXEw9ZPwZ1A42TIz5t/wKQrZlR1Kk2IRhOyy84N2LEj1OKhYEaJCRZkuUkUbqLZX/QrO5Cj7jXPGB1EMAc2BwtAt8Dre/ucy7gCVxuE79t5HOM+aKYRylqGlFIKKFNK3CCESW9G6sStJvjSB3eX4ILOxwEcbkcpYiAPrTsNYyDmg87G4sGXZ4tSxES4ihb3jbRlb+RTEIbv6IH41aLhqb+mYRSQoulPQxWqaJgi09o8lkw3a2T+6V8odRB0WNO4+tEgPBR3okR09riIqCdZJpZxlXMlD97W3lCo63tO84VLeKhBNdHIJ8YHHV84lD9tF0my3WC/V2jzKuVjNGJK6HEBalVez0iXbfrhauFmVSiRGsZ5WDOVM9Mr0gMIzCxNxnjpRg4k4d4jCSmAwO/kve7st0h2Wh0//0PfsOMtqVEudnJxfyROQ6mr+LiePB3WnmzLXLEbFNud413dGB1uolgW9OpsyefhhtdDcp9bVBDpVfUAeW5oCondW51cZRMeoyFcen1iFjfSpGlpEcJ0tIhKbLltNr01ssdpG0UMqBiWuowWsmu3jChgCHibCvCs1XXHJXCONLSBSMUhxHRhWTf7xGYSGYIhQ0e0/0WNW7ohdodrjoyt1Y6giBW0ChoZAg4loXdTBVH3bdbzauqEpeXjvNzQKoaFeLdAcr7ZuaIzL5ur3u8zq8FxbwBAq6fAIkKYW6+zw5GxuYxgeROo0KeXjVKY0WECc3liZDSiwgbSyIedc33wTZVZOwgVF8E44z5UxRKJ7pRVDFZPED2BIDOOwcmxoVorChQKHagY+CSbnVFs3hyomkV9Hc1gZQ7P6YVdMUagZWCUuOafamjHEbydhWxlDw3plLMavajAk0gNzOjG8ulnlhk0WcsXEp5qBTekp7YYfteLsv4HmiVRtnsx6nsEVolWkZiCTCOCdqLZuDBUk8g+iYYghxYHB7asZX50ZbXgMGmtMoB808rU5v8pooI5wI2udGoMV5Mnf7o0svoS1bmOpyCY324mbNTWY8NlR/N47ngC+GqdQQSbZ/JsYFe89fOFDedp74xrzrYZ58F7Vrt6sBAMR0q1ITbqVCunWE9XWjaGCdKv5bYaUA7jOBt59M0lJ7ppd22wGyce8FDViixUJ02iEbq9u8oAC8rQQZAoYwuNcyD5uz4vdGDGoqRsMTW8TuByMA09fa8sTCvOfkNMSMH5nm2rxXDp89PX8WU8CaOKMOFIXH5/j8f0/j39/6v368HP+ALpR9Hld8hu0jbQbr0NKemNDnS+P83SSV/I6psuwk14OV5/PXMqZJsfFAM7AVXCGpJc5XMiZ8ISf9MtYNXOm8ZGW6sycfhrU1goDEF9JdGGORYRB3Vvf2Orux9uz3WN/MfkYjYJ0j/8B -------------------------------------------------------------------------------- /notebooks/source/06_原问题与其min-max问题.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/06_原问题与其min-max问题.png -------------------------------------------------------------------------------- /notebooks/source/06_核技巧.drawio: -------------------------------------------------------------------------------- 1 | 7ZtRb5swEIB/DS+TKsUYG+ex67JNmjZt6sO0RxecwEbiDJyF7tfPCYbEhipe1cTB20sEZzjj75zz3RkCeLes35V0nX3kKSuCcJLWAXwThCHBSP7uBI+NIEJxI1iUedqIwEFwn/9mSjhR0k2eskq7UHBeiHytCxO+WrFEaDJalnyrXzbnhd7rmi5YT3Cf0KIv/ZqnIlPDCuOD/D3LF1nbM8DTpmVJ24vVSKqMpnx7JIKzAN6VnIvmaFnfsWLHruXS3Pf2idbuwUq2EjY3RCENv9x8+vg9u+Hk24939f2H9zdRo+UXLTZqwOphxWNLQGqRsOXJ622WC3a/psmuZSvNLWWZWBbyDMjDlFYZS9WJ0stKweonHxh0GOT0YXzJRPkoL+nmTnOHmjkAqofdHuwAiKKdaTZQxKmy/aJTfcAjDxShv6AFLGit0tvdtJNnSUGrKk90SM0NLO3NupNUjoaNJv1Rt7KSFVTkv3T1QyRUD595LjvuoAMy0ahDE2bFN2XC1F3H0+2EImzoEbRcMNHTs7dLN+rnmwr6byqoAwbTZ1oKRicUndlU8WlTlXyzSve+ZXLaD83zorjjBS/398IUMZJGUl6Jkv9gRy0kfIAYv4yzAqY12vl+7K0G5sLZfBXxgWo4uTKqUx+oAmRQRY6ptopHjhXoWLvJ6wyrRbwyAqzGbO0CamdYQx+wmq7VPVaLkG0EWKNrcwIW4ZV1ikerdZNkz/N6ZwaT8ZwkLEmGGD8QFKHJyzCeGohh2EccDiAOz4XYIioYGWIwneqMhwKvSzIOLUKEsTGGRGMMhlzFRRnb1DfGxbhXQyKOEVvEDiNDbK54oB2SM8Y2tZ+RMTaXPODaVbxk4fhKGJu1BufuGPnHGEN9Hkeu/TH2j7FFXndRxBaVyLGX4pHhO+DESOdsa/EEnFB05lp8aJPK/DfWsCJ0WVvBv9uOXPEVG7ehQPRMQ8XmpuaF/1VtgWLcpS301Iajq9IW9KJiiIzJ2e3rOsNqEduPAKtZiHW9yQUtwvnrx4rhtWG1iOBHgNVM8IlrrF68lYHNTS7nWG2SoXHlmwgbYUE7JFcJJ/RvOwabb2zEjpP6dnH1iHFspN4AOa5NRf5tx8TtELr6n+t5bJGZjYwxNrcVW3jOGPu3H4OJwZi49hX+7cfgq/PH/u3HmIUx9/64n83VoEdZDljoOHVKqtR5jFSJaJEvVvI0kYCYlL/e4csTWtyqhmWepsVT9tMTnhfgDy1eGY0G8pPwXPlJ1M9P6v4s94Y/IEjjP/CW02Xx/wPbLzA2/Ppzv1uJTik6c1UfDcXnuNj9Mx7kwULsjdEI5nz/5Acz4p8b3jbcVPvPHm/lBSBa14fGTssMBeRtMCXBDAfkTXAb7iRyuGTWdiBH0PSh9yvFR8/i698YGtWzoc9E8ND/+FzLGOqnFTV45fNKFmP9TTs48IbSRV0p6mcddfjK58UMGcHE+RYzeXr4brlxp4ePv+HsDw== -------------------------------------------------------------------------------- /notebooks/source/06_核技巧.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/06_核技巧.png -------------------------------------------------------------------------------- /notebooks/source/06_软间隔的支持向量.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/06_软间隔的支持向量.png -------------------------------------------------------------------------------- /notebooks/source/09_决策树学习.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/09_决策树学习.jpg -------------------------------------------------------------------------------- /notebooks/source/10_SHAP.drawio: -------------------------------------------------------------------------------- 1 | 7VjBcpswEP0aH5MBBBgfaztOD800Mz60PcqwgBqBGCHbuF9fAZKBkNrNTByYsS+29mm1Evu0D4kJWiTFI8dZ/MQCoBPLCIoJWk4sa+aZ8rcEDjXgWG4NRJwENWQ2wJr8AQUaCt2SAPKOo2CMCpJ1QZ+lKfiig2HO2b7rFjLanTXDEfSAtY9pH/1BAhHXqGdNG/wrkCjWM5vurO5JsHZWT5LHOGD7FoQeJmjBGRN1KykWQMvc6bzU41b/6D0ujEMq/mdAmHzn5HmVv8yXeYG8p3Rlzu6QV4fZYbpVT1wuUQXPxUHnAQKZFmWmLJV/c862aQBldENajIuYRSzF9BtjWRUDzX+DEAdFKt4KJqFYJFT1QkHEz1b7Vxnq3lHWslCRK+OgjVTwQznozrg3HA3UI6eutpuxldUZ/AycJCCAKzBkqVBLNG1p9/Oqs8G23IdTydQbFPMIxClHq3Ysk9qaQvH2CEyujx+kAweKBdl19yJWWzo6+jWsy4Yi/j2bYDZ+uo0u1c4ZpocgFY2LVLNX2V/6NFMqhbRkdx8TAesMV7nYSynvcofzrFbXkBTlHpiHhNIFo4xXgVAYguv7Es8FZy/Q6gmms41xpERNbb6Hoh1wAcXJlOpeR2mtetkgQ9n7RrptBcUt1dbYh5OgA7+S13L/3irunDqerzh7VBVn9yvuRvbHke2Mimy97hbZ88vJa4DBC9+UV9f3YBN+jrwedVLJq9TXgeXVulXW2fPI+cpyR1VZNrqR+gGkTkdFql73NcslcgeWS31eapFg6hPyRS4Fng9vXwo2nmM7n3QpQK8uBeZsaBqcHg3W1bFgTYdmwe2xcHUkoKEPcPotddUkXE6PpNl8X676Wh/p0cNf -------------------------------------------------------------------------------- /notebooks/source/10_SHAP.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/10_SHAP.png -------------------------------------------------------------------------------- /notebooks/source/10_Saabas.drawio: -------------------------------------------------------------------------------- 1 | 7VlNc9owEP01PiYjW7YxxwAhPTRtZji0OQp7jdUIi5EFmP76yljCNk4otOGjJTPMoH0rydK+3ScLLNyf5g+CzJJHHgGzHBTlFh5YjmO7jmMVHxStSqTj4xKYCBrpThUwoj9Bg0ijcxpB1ugoOWeSzppgyNMUQtnAiBB82ewWc9Z86oxMoAWMQsLa6DcayaREA6dT4Z+AThLzZNvvlp4pMZ31TrKERHxZg/C9hfuCc1m2pnkfWBE8E5dy3PAN72ZhAlK5z4B4+lXQp2H20htkOQ4e06HdvcFBOc2CsLne8bNerlyZGECkQqLNlKfqqyf4PI2gmNlWFhcy4ROeEvaZ85kGf4CUK00omUuuoEROmfZCTuX3WvtZtdGtp61BkT/IGCtjpFKsikE36BZ5BihHdnxjV2PXVmPwEwg6BQlCgzFPpV6i7Rpb77TwZ1Lwlw3xWCHtqGsiMj4XIewKtUlfIiYgd3XUJVOEvfYIzeoDcLUDsVIdBDAi6aKZqUQn/GTTr8oJ1dBpcUiKdFsp8uWwFEFnSBHUTA/vN9mxnQinoBlfFs12i+Y7C99h1OaaMaW+BcXLhEoYzcg6IEt1ADQJJNmslOSY5kUi9GLKWJ8zLtYT4TgGPww3ZVbzRJ3uGKGtgrQP4WkBQkK+M67G6+kt6hMKI20vK713NZTUpN5g786EmfjPNfkfLLh3VVpn3xJ0L6oE3XYJfijte9DsXRTNZt01mntKaf0jKm1EIIhfVVo/DGAcn0ZpN5KpldbB51baNhP/v9L+RcHhfQvOv6iCc/GHrh6F5s5F0YzbNBe6enWyiv0zy6p5raoRYaNj3iOCEF6/R4wDz/VOdI/AW/cIu3tuGrwWDc7VseB0zs2C32Lh6kjA537RMyfVVZNwPD1SZvU79tpX+zcA3/8C -------------------------------------------------------------------------------- /notebooks/source/10_Saabas.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/10_Saabas.png -------------------------------------------------------------------------------- /notebooks/source/10_adaboost训练.drawio: -------------------------------------------------------------------------------- 1 | 7Vvdc5s4EP9r9NgMEl/iEWy3vetlpnN5uOZebjDINi1GHiw3Tv/6k0CYL2HjxDZ2mkwe0CIJtL/9rXZXGOij5fZT6q8W9zQkMUBauAX6GCAEDYSA+NfC51xiQyMXzNMolJ1KwUP0i0ihJqWbKCTrWkdGacyiVV0Y0CQhAavJ/DSlT/VuMxrXn7ry56QleAj8uC39JwrZIpdiZJfyzySaL4onQ8vJ7yz9orNcyXrhh/SpItInQB+llLL8arkdkVgor9BLPu5jx93di6UkYX0GzJ3/vqB/9ftPfz5uRl8m7uaPvxcf5Cw//XgjFwwmGLgT4I7BxAaeB7AOJhZwNeDZ4sIZAXckF8SeCy2ldJOERDxIA7r3tIgYeVj5gbj7xO2CyxZsGfMW5JezKI5HNKZpNlYnMDSJzeVrltIfpHLHsWzdt8QImrAH+TRYtHNTgSZvy0WQlJFtp3bgTufcWAldEpY+8y7bur1JM9WL9lMFdClaVPA2pMyXZjbfTVwiwS8kGEcAY/UBpgkDCbntyiZN2YLOaeLHk1Lq1YEq+/xF6Uoq9zth7Flq198wWgePbCP2rXL9KKa6M2VrvJUzZ43nopFwjXyrNiqjRLMclrWKcVWY9R3MYpH7QeY6oZs0IHvUK70S89M5YXv6mWqjSUnss+hn/T1ObgNIYQMtKoqLccZSB2Ab4PEYtu0ijrm/JD2o2dB4g6qhT/AsUFHVCjCZzlpUfT0vkVYnJnTaxLQVxDTPRUy9DUpL3Unoiq2Ht4LYX6+joFvL5oU5dUIOFdvxIQ4hNcAVAM09APammnzCVxrxhezsx2jYj+40DCNfphxV3T6bE5mNiZquP9dDa6LMyHbLfrndGQpnYAKMBfv5hcup72WSEXCslklyArK6EfpxNE+EhXLcCWexJ2ga8cDHlTeWURjmewZZR7/8aTaVMKGVWGK2aNMD5phLYn9KYs8Pfsyz/aXiF2bZX6cvV7gGGcjJ55XhU9VCu4nZ6Uc+aHfQ1KwahIVOX2piRRc6m63JWUDHQ27xpQt6rNw55I5KD/RYc0AX3+LNnu7JHnKLd5RhnqfJsNsxgWf8jmTGB8ms6bhGZnSS7aIx4HzMhliJvPsRYANMDOCNhBXcWoAvgDH04yKSrPWVpBFXqbDda/ELcFDHYCq3e2EVUO772MoSQShyQS5xHODiduhf2gM8Lvw3VXl3Ix0wCQ4NVTqA0VS3rNNk5jpuZgBSN0Ol5rBX0eTWqHs9ubndl6Ca2m4uQ1BbYQQ9knNFtvh2knNV1eyiyTlUZOeH46krjanPSLECp8Mc60jeL1QA6/C0NxkkXQWczpBwFq95ZFDTdplvL6jR4dBBjXkrPLrCiAX2PU7IDz8H49+LDpV+g2oD7DjmkdNrdzyGMmp8LfKz6y0dFjb5HqBWYDPQwAEqUgeotxnRXKMnNnp6YjRoYAvVpzmHIqG2c357kZCBBo6E0Hsk9Ar+9S3eoGEjIVX1ppN27V3y/nzfWMxmxAqUu2RoO1NNa/H29RQ0jWaFdehdsn2A/U7B3hR0boOCqqPPYyh4vkB1CAo2vy4ZupJafOD6TsGXfF3Ytx4wLAWL13wxBRWnXDdMQePacsXuJMHdh9Mlc4QZDogapyk2xSdzJ8kRbP3KqqVI9fUGR8QV9ZWSJ3lJzcig0QReh2trdT0mNCENpUtR/xqcCvC6o25x8cTMshopHlKc4BsK+NDx8PFm+YuKvJRW/i5Fn/wP -------------------------------------------------------------------------------- /notebooks/source/10_adaboost训练.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/10_adaboost训练.png -------------------------------------------------------------------------------- /notebooks/source/10_bagging.drawio: -------------------------------------------------------------------------------- 1 | 7Vpdc6IwFP01eWwHEgjwKJZtd7ud2Vkftu1Lh0IEdpE4GKvur98gQUAiaq1iu+10psnJh8k999ybxALUH82vU3cc3lGfxAAq/hygKwChqkEIsl/FX+SIgVEOBGnki04lMIj+EgEqAp1GPpnUOjJKYxaN66BHk4R4rIa5aUpn9W5DGtc/dewGpAEMPDduor8in4U5akKjxG9IFITFJ6vYyltGbtFZ7GQSuj6dVSDkANRPKWV5aTTvkzgzXmGXfNyXDa2rhaUkYbsMeIoeJzPrJhpAzB7Z7cLW8c0Fzmd5ceOp2DBwLGAawLwqCgZwMOgpwBZbnrBFYZyUThOfZPMrANk0ZSENaOLG3ykdc1Dl4G/C2ELQ6k4Z5VDIRrFoJfOI3VfKD9lUl7qoXc3FzMvKoqgkLF3cVyuVUVm1HLasFeOa9hImnNBp6okdBdbTLXxEd9ffHqb9W6c3/fozvCj8zk0DwlqMqef9iF9zKcHGNaEjwtfDO6Qkdln0UvcwVzhqsOpXcskLgs49qIXWQdxmuxiIakIT/sf+pLtmYOus6EbKJ91HpRvirvhuW3aNbhP0HNDjdBvAtoGJMt4tXsBbIvcsjBgZjN2lnWY8kdd5HEZx3KcxTZdjEVF9nRgcn7CU/iGVFgsbyMXZCJqwwpvUop67iaq3sfVCUkbmreYVrUVaFecKhEV9VsnSAgorCVpTjsSHLuFDB3Yf2GpW4Kz0MHA0YFuZAjli8YKptjCj7sDMml0bdq8zx3kzfU3GnAmfEcZvw4y2Rg1UOqYGNv2/Gu7KyOaU6BtEv1dGsiqn6KDIpjcjm9w+2ltHNjH0B434mleOgdc1i/RLpfKzNmG+PzHHmgusFnWAV2gbFdtrUexJBTs0PeJ5MsE+m7qmtya+3QVrKN3FUmmqNXbJbV1I+hUHmp3k23ad2Crf87p2yPLgxnNJW/rb/2AyHBIsV4tvWM+KItPbEQ4m6vb0Z5xSTarsIriXnLq8ELxeP1ZTP3L7dHawb1v2bgKC/4GAZOnopAJCH1Edhdd3cDo8LJrJniy3X7TahPJRLlqdn9sQfM+Z5qhPT61PSltTFFLPSoNQpsGNOarJ+MfLUTrsOkfprbo6owvR6WVWqOe9pbpi3Xumuja9fZRUJ9PbaVOdLATmkU9bMqEAs/lWxHfL6tauW02kv6qJBeTGUZDwqsdtSDhuZ7aLPDfuiYZR5PtLUcvorQu9zujh5OD1A7veJEeTkAP3J4dXy2/M85fA8v8OkPMP -------------------------------------------------------------------------------- /notebooks/source/10_bagging.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/10_bagging.png -------------------------------------------------------------------------------- /notebooks/source/10_bagging简介.drawio: -------------------------------------------------------------------------------- 1 | 1VjLcpswFP0aLdPhDVpig/NqFq2n06abjgzi0WLkkUVs5+srgTBQZCdtnNSxveCeKwnpHp0jDDCny+0lRavsjsS4AIYWb4EZAMPQLcMA4qfFuwZxdasBUprHslEHzPNHLEFNolUe4/WgISOkYPlqCEakLHHEBhiilGyGzRJSDO+6QikeAfMIFWP0ax6zrEE9w+3wK5ynWXtn3YFNZonaxnIl6wzFZNODzBCYU0oIa66W2ykuRPHaujT9Zgey+4lRXLLndPh0k19Vd9ezEOpkfmM+Bl8e9QvDlJNju3bFOOYFkCGhLCMpKVERduiEkqqMsRhW41HX5iMhKw7qHPyJGdtJNlHFCIcytixkFm9z9k10/2DL6L6XCbZy5DrYtUHJ6K7XSYT3/VzXrY7afgkpmZyIbvK4Wa9Y5ME6SmhNKhrJVin8cWt8N+8ub+6r6W3oV9efswtb7kdEU8yOFFk39nRznWCyxHyCvCPFBWL5w3AiSG7YdN+u45RfSFrVFB+b5QMqKnknEDoAOmACQQgBDIAvIA/4OvA9ELrAC8FEA6ENJiHwPHHh8+tgtFO6fSCo22Q5w/MVqku24W4w5LzPg93Gcqw6nxfFlBSE1mObiS2+HF8zSn7hXsapP3KEHt589gwrCH3AlOFtDxpT0mahFK10LbONN50HuBLKevK3tJeTqN5C8L3o9N/0pV60dm4COzpNtcIsoaLJzFQpzAMeBP7sPyksSYwoUiksdhaO7ey5HMlGweZBJZmm9aSSvLdU0v44Pnsl/euJd0oFto9cZ67A1uCPnXGWSoFcnVyEQd3IAnDssq+rQOwcUKALF5p2GgVa7tNn2ZsqUDfUbHEHtGpGAgCVdslTUwBrxOfgWMivSpYXYTVZC8+27BORZbhDu7TtMVnOm5Llvhe7PKXtOe/D9pxn2J5+drYXI+wlSiU5kYcXyWmUZJ+Z6xmqM8oG3kzwJXjg9jdrubIFV3AG/PEjIa8IG5Z/WMaSlPiPmksIFXla8jDiFcQcn4j65hEqfJlY5nFc61fF91DTI4qV/7Jf9uSoDxm0tedZofH3DPKwew9T53pvs8zwNw== -------------------------------------------------------------------------------- /notebooks/source/10_bagging简介.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/10_bagging简介.png -------------------------------------------------------------------------------- /notebooks/source/10_boosting简介.drawio: -------------------------------------------------------------------------------- 1 | 7VfbjpswEP2aedyKe+CRAHvJNg9tVHW3L5UDDtASHDkmt6+vDSaAIGmkZtNVVZIHz5mxGfvMGQB0b7l7oGiVTEmEM9CUaAe6D5qmGpoG4q9E+woZqUYFxDSNZFADzNIDlqAi0SKN8LoTyAjJWLrqgiHJcxyyDoYoJdtu2IJk3buuUIx7wCxEWR/9mkYsqVBbGzX4I07jpL6zajmVZ4nqYLmTdYIism1BegC6Rwlh1Wi583AmDq8+l2re/QnvMTGKc3bJhE+T9LGYPt0HjkpmE/3gfzmod6peLbNBWSF3DIEJrg+uBYEBYwdcRSC2A7YPgQWOJcBgBGMPxq4Y2JpAqj2yfX1wOOLnKE1CWUJikqMsaNAxJUUeYZGdwq0m5iMhKw6qHPyBGdvLokAFIxxK2DKTXrxL2Utr/CqW+mBKy9/JlUtjXxs5o/uXttGaJcxmWmnV86r9iU2dPH4JrUlBQxkVO9+ftW/69GHyWnjPgVs8fU7uTFnGiMaYneOmqRIuL0yWmCfEJ1KcIZZuuokgWefxMa4pBT6Q1TBcGeey7BSGDa4Krl2SHsBYKSvEBlsRLpt7vV4ZNCQLlrZJyvBshcrz2fKO0SV0QXIm2VbN2pZrlf40yzySEVqurS9M8eP4mlHyE7c8VnnJFVp4dR3pHGBvgynDuxbUP3/prXUtG5ted6xtq01IKGl1CEP5c8KGy8X4L+U3lPIZiV6i5RO1dHUtn03zIjHz0nDAMcDpF8Kbinmx0MJwSMyRNbdM60hcT6ED1J0WrfreVDvUZ/9l1V5TffUL4W/Vp/1V9akDFPfVZ8I4EA9RwT4f+zdWH7ZOqG/kzBXlOuoz6mfUu1Gfdmlj9MApEXcEdYa3osYO8TA1c9s0zCtRY92uMXKz+eopfa1vRz34BQ== -------------------------------------------------------------------------------- /notebooks/source/10_boosting简介.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/10_boosting简介.png -------------------------------------------------------------------------------- /notebooks/source/10_efb.drawio: -------------------------------------------------------------------------------- 1 | 7Z1db9pIFIZ/DZeV5sP2jC9bmnT3YqVKqbTaq5VjD2CtwciYQPrr1wQPYA5pmizTtz3a3ASO7cF+zjDwHHvMSI/n209Ntpz9UReuGilRbEf640gpKVTU/dtFHveROBL7wLQpi36lY+Cu/Or8ln10XRZuNVixreuqLZfDYF4vFi5vB7GsaerNcLVJXQ1fdZlNHQnc5VlFo3+WRTvbR60yx/hvrpzO/CvLJN0vmWd+5f5IVrOsqDcnIX0z0uOmrtv9o/l27KodPM9lv93tM0sPO9a4Rfs9G6wL/fv2b/l+8ji++dDe/rX88nDzrm/lIavWwwNetY8eQVOvF4XbtSJG+sNmVrbubpnlu6WbLuldbNbOq+6Z7B5Oyqoa11XdPG2ri8zZSd7FV21T/+NOliS5dfeTbgk9Dr9Trmnd9iTUH9cnV89d2zx2q/ilSc+472TKP98cU6b70OwkWz6W9Z1kemj5yLF70KN8BVZFsAoOWFMwVs0Sq1ZgrBHLQUDHYKwxwao4YLVgrAnLQSCSYKyGJ9YIjNWyHAQiA8aasuytsQBj9YNQMK6TiUvyi1wLk94LcSWu6ZAr3AdkaM8CcUULgQwtWhiucCOQoU0LxBWtBJKqFguuaCeQ1LU4cIVLgaSydd2vryCuaCuQoW0LxBWtBZLqFgeueC8I7luxs0V0iatV9zpJrsP1sNc/ixf4/eHGFe0FKrRvYbjCvcB/+jPrr3AvUMHPbGG4or1AUd+KGHCFe4EK7VsgrmgvUMFPbmG4or1AUd/SDLjCvUBR37pyXdvm7rJv3ds4iq/kW4f++dN4AfUtFlzRXuAbZsYV7gU6+PktDFe0F2jqW4oDV7QXaOpb1/UCDFe4F2jqWxz6K9wLdOhrCUFc0V6gqW9d1wswXOFeoKlvEaxuUbzfTdDonuVVtlqV+ZDks1xcMZiyQamcHHV84ah9rHFV1pYPw+8rl1D0r/C5Lrs9OcqYPevM4ozmql43ueu3OgIlDR1Gl+caarNm6lrS0FNmDof9H5JFJe5byVrUCzfM1L5v++kx0QU7tkIYfeldcHuru79fINtJpAdJOnyFeW22EzFsKFI/ONtULf/P9kvZJkl6a7ZJtwmc7YQOxBzELEF/wCV0zOTwxSHRaK50dIo5cEUXFP3+cBsH0AVFQwtfHMTMoAuKRhGulgNXdEHR8Cx8GXRB0fAsfFl0QdHQwpfhwBVdUDS08EWwcivQmOiM5lsLNKShwBJnXldNe07ZT/q2+miSp4uVX5T5RGSpNL9AtpOzayOMfGO2E3S2r1OOY57tl5L03dk+GyQORvGjsh36Sg/MTGZ4gcanldkMcXiBxlLhZcEVXaCxVHhZjAPoAo2lwntdMcNwhRdobOiZzCCu6AKNDX5lPYYrukBjQ89kxnCFF2gsz/tGwQs0ludMZrwXBL+yHsMV7QV+f7j1V7QXpKF9C8QV7QUp9S0OXOFekPK8cxTcC9LgJxgxXNFekPK8cxTcC9LQM5lBXNFekAb3LchMZrgXpKF9C8QV7gXBz29huKK9QPo3DLcOixYDKYKf4YKAhZuBFFS5ONyMB64GUvC8exTcDaQIfpILAhYuB1JQ6+JwP56AdtA9Pf5U2P7io+MPrumbfwE= -------------------------------------------------------------------------------- /notebooks/source/10_efb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/10_efb.png -------------------------------------------------------------------------------- /notebooks/source/10_goss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/10_goss.png -------------------------------------------------------------------------------- /notebooks/source/10_level_wise_vs_leaf_wise.drawio: -------------------------------------------------------------------------------- 1 | 7V1bb6M4GP01fWyFDRjzOG33qh1ppK60O/tGg5MwS+KUkCbdX78m2EkwJNAGbGqQKjV8gCHn2Mf+LpAb+2Gx+yUJVvOvNCTxDbTC3Y39eAMhcCC8yf6s8C23eMjODbMkCvlBR8NT9B/hRotbN1FI1oUDU0rjNFoVjRO6XJJJWrAFSUK3xcOmNC5edRXMSMnwNAnisvWvKEznuRVD72j/lUSzubgyQH6+ZxGIg/k3Wc+DkG5PTPZPN/ZDQmmaf1rsHkicgSdwyc/7+czew40lZJk2OeE3j/zY2FY6X+Hw5dvv//z5sn24hZyNdfomvjEJGQB8c0mX7N99QjfLkGTtWGyLJumczugyiP+gdMWMgBl/kDR94/QFm5Qy0zxdxHwv2UXp39npdy7f+n6y53HHW95vvImNZZq8nZyUbX4/3Xc8bb8lzsu/UPYtzgLFTWu6SSbkAjqAc5kGyYykl2AEB0LZSCB0QdgdsRMTEgdp9Fq8k4B3ydnhuCNr7AMn7j0kOiOJbZAIdZIobvM1iDf8UmVW45jpXkbmdh6l5GkV7L/5lklvkapgvcrFcBrtMsrvp1EcP9CYJvuG7DAgeDph9nWa0H/JyR40weR5emDglSQp2V3moIyYOAFx6ePaDx3+HbdHJRXqOD8RUWFrf6AA8zC2JYx93RhD8zD2+4axOwr+BXS8poKPtc7aaCSxDRJ9rSR6xqmdI6udY2lWO2wexvLKyNeNsXmrTxf2DGOxUhuk4NfquH2t98VP/UYjdunjSPOKvcAGEr35TMTPkhg+3MYVpIOR9Avo2GaSrtIHmk4JmlSqaej5z5bVjpo6jgSp7lWBbZuHsYd7hvGQA4v14oU6ES9XTBm6xGvIwYV60j0zSUfGqalrSZBqV1OVsQJFGNt+zzBuECtYz4NV9nEak92XLEW814uQf3ycxMF6HU2KcMserEtw6FShi+GzjdA7NKeM7Ql2bgV2wnal4kDnjHtcozjlxbZV01Cuqd1JV1XoAsUMrftn9mGW7snIDVO6v/NjZ0AvGyp23K73c9UXdgBwV7vjTtFKTF5JfLuN2Pjk7bEbzpssXoaZTy4tdT822tJi5yp2Ij7BnvY4bgriaLbMeijrQYTZ77OxG02C+AvfsYjCMD4nHsUZu4XBDyUvqyq951d0YLurwe8MOcBSG1F3YMMljnOtU34diUMOmLRIoqOVRJUBED3FDI7ucLKjMgCip5jB1b2cdBzzMPb7hvGQ4w31gt80D+5oLWZwxmKGVkjUWszgGFnMcOcW522kW++MLGeQUHaBbpSNLGjoG8rukD3uWjV34ZVqfja7LfcDOaTScbbAHbKPXk97Z0UNumlX6dUrK2uQQdW9PnCNLGzoHcoKswbBdEwanEsaOLjcEZQmDdCQlzC1niuCDSc9pDVpgIa8IGmRRK1JA3GbBrlsWFSC9iX4gMxLGmCriLF2pxiZlzTAqG8YDzdp0LpjiYFEbkO3stSQ50sNIamhjiuB0JCTEPWT+7UJonZ7T2ukm1dV6eEz40ib2qpMWajBGEtPB+rH2B/F6zw6XkdTn7Q+d7Fa8fKGHHyoJx2aSbrKt9AoUlPJB0O6/QNPZSxBEcZSwb5+jN0KjMcQeuchdGSXaloqalGVBtG9IXtetfFX8Vhv/YSntfzL80YSWyARW1pJNK+6DEvusPYXEnnm1ZZhAKQ5RfvrcsSq3CCUfZF+6Q3GQ86b1it5N4VcGMpjTfXT/rhiJPWO9i5I7xGX5aZceOdD7NsQYAxsUYx/eLK+cq+izAo2rybNK5VTan95AR5DwBfQ8bsZwR4u9QM5bdmxGvtjEPgSOt0EgXtAu3lhYM8tgap7hStKEkxCGcvV1PpRVlnypCrc7vRrdeAPt+SpyTTRzXvZsCv1AtWTxJAD6/Wkd1TSpJt080qaMJQg1T5jGVjSJBXpascYWKNDewke0I1H63vFKLPq6CIAn8Gh/RThxY9SWWoIWJZz5/Q0usgHglFi7ENJjHW7DwBUYNq7UalRjK999u3MCJaKDVWvJQEY8i98NqDdNZR28yIyvlRkqn99C6oqID85yPLvfOgH2bxX0/u+27fcIwBDrv5rME9cW5p5dl1e6grKpwo8Mn8BHnhtPWePmVdZyKhIWr0SqtpnMNGyQTADC9j9w7lBGGH88Y/sCGTJiXooio/e/dB/6d2fpaa6Dh8JmsdfANH+JJL23wABsEGp4igCe65s6Z188IMSYPs1DXUuAA2CHSPnufCDdjj3UE1DH+acbSY0U93j4Uy+5l9pSLIj/gc= -------------------------------------------------------------------------------- /notebooks/source/10_level_wise_vs_leaf_wise.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/10_level_wise_vs_leaf_wise.png -------------------------------------------------------------------------------- /notebooks/source/10_lgb中类别特征的处理.drawio: -------------------------------------------------------------------------------- 1 | 7ZpRb5swEIB/DY+bgsGEPCY06yptUqdM2ro3Bxvw5uAInIb0188EEyCmKVHbUKFIkWKffca+z747kxiWt8puE7SOvnNMmAFGODOsGwMAcwRs+ZVLdoUE2qNCECYUq06VYEGfSKmppBuKSdroKDhngq6bQp/HMfFFQ4aShG+b3QLOmk9do5BogoWPmC79RbGICqkLxpX8K6FhVD7ZdCZFywqVndVK0ghhvq2JrLlheQnnoiitMo+w3HilXQq9L8+0HiaWkFh0UXgCf2zyg87u7366tx6ebO6c8JOl5iZ25YITvokxyXVGhjXjiYh4yGPEvnG+lkJTCv8SIXYKFdoILkWRWDHVSjIqfufqn6GqPdRabjI18r6yKyuxSHY1pbz6UG+r1Pa1Ui/gsVATMSeyXqyGYI1pZSQlSvkm8ckJy5SbDSUhESf6gQNKeQYIXxE5P6mXEIYEfWzOA6nNGB76VbxkQSE7Ax/U8OVrX6hqzGP5NbsSPY+o3SdRNe4jYhv1pNyI0xPHNOewjaggizXar38rPXET4N6oStdsMXJAGfM448l+bAsj4ga+lKci4f9IrcXxXbIMDlgeSSJIdhqMbkilMFZ+UQWGQ31budlSFNU8bBk/3tzyQLO8rZ8uxmTgIS9bHKXrIhoFNMsp6SY/RnKEABIX220IXLC0HOdtEFhNBFYLAtiCAL4XgvHVnXV3Z3ZHd+b06c4mV6JvT9Ttk6jdGqBmAwxQZgf3eNEI5Wimb8n/hhWhJk0Edt8RyjR1i18d2klH1cGhmaP2TXChlFu/BF+Zvp5prxdjtzVKeUOMUu7LLvKiUap8c1ezvTP0MGXaTQjQ6TtO6bfZwecKwLY/GAT3GljOeD3X9fpjwl6zheuV9l2g9vqawmy/1U6lyBh7+QeMhppAOObRNRfqbtMEF80goEajnONwoxc8zuN6j17664bhUxiDD0YB6Nm0xiCN0DovBoxk0/w3772Hx6p44zOUptRvcUXPu54gCIDf6nqws3Sgc0aU0G3d0ZadXb96wj2n8sHVjxvHKI8ZFcFLaVWYtIEgfGGgIrhpA+15H5b9ii2g/yZozG1j5hkT15hDw5UFJy9MRoY713aHPAyiyb7JVGUs9Q2gRIjRMM43kARKpHyWHy3qIzZVDSuKMXvu7DdToNOn/fUH1xo3k38b6AfXdfTNBs4/uLJa/WujgFz998Wa/wc= -------------------------------------------------------------------------------- /notebooks/source/10_lgb中类别特征的处理.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/10_lgb中类别特征的处理.png -------------------------------------------------------------------------------- /notebooks/source/10_randomforest.drawio: -------------------------------------------------------------------------------- 1 | 7Vptb6M4EP41/tgV2GDgY0jYdturdHfRaXf7ZeWCE7gjOCJOk/TXnwmmgeAQmje6batK9YxfYs8zz8zYKUD9yfI6JdPwngU0BlALlgANAIS6ASHIfrVglWssjHLFOI0COWijGEbPVCo1qZ1HAZ1VBnLGYh5Nq0qfJQn1eUVH0pQtqsNGLK5+6pSMaU0x9Elc136PAh7mWhtaG/0NjcZh8ck6dvKeCSkGy5PMQhKwRUmFPID6KWM8b02WfRpnxivsks/7uqP3ZWMpTXibCb+ih9nCuYmGEPMHfrdyTXxzhfNVnkg8lwcGngNsC9iDomEBD4OeBlx55BlfFcZJ2TwJaLa+BpDLUh6yMUtI/AdjU6HUhfJfyvlKwkrmnAlVyCex7KXLiP8otX9mS30xpTRYypXXwqoQEp6ufpSF0qxM3ExbS8W8ur2kCWdsnvryRGPn1x18QPfXtz/n/TuvN//2d3hV+B1Jx5Q3GNPMx9Gg4lISjWvKJlTsRwxIaUx49FT1MCIddfwyboOlaEg4XwEtdI7CNjvFUIoJS8Qf9xPuioGdNwU30j7hPivcEHeFd9O2K3DboOeBnoDbAq4LbJTh7ogG3hO5F2HE6XBK1nZaiERexXEUxXGfxSxdz0VUD0xqCf2Mp+w/WupxsIUIzmawhBfepBdy7ia62YTWE005XTaaV/YWaVXWFQhLeVHK0lIVlhK0oZ0JD1OBhwncPnD1rCFQ6WHgGcB1MgYKjSMatt6AjN4CmS271uxeRU7gZgeGCjkbPiKMT4OMaVShgVrH0MC6/5fD3SayeRvtCaLfgZGsjCk6KrKZ9cimto9x6sgmp/7JIrHnF8ewthwDIfOLVvrZWjA/n1xjywVeNnWEVxg7GdtrYOxFCTuyfer7KsI+2qZhNia+9oS1O4ylf91GN/P7b189R2fDW/Q8+OdZv1IhI8oXJ4Mig8gDInCKUsbuAXfQrqY5F8kPKHGUZNvL8KYbR5nhSoN2WpmqMuPOSqUpIb6+VBmNKFbzJ7CcR01TMfAMpYq+PyFal+SXCpH3w6/jrhAn4aXTkpewS16q3gd28hJ+AF6q8t65eKm+w6NGjnV5Lz+cCzrurAw9Cgxd9Ta6/0bXRJT3cqPrvEBUQfOZwE6YwKCCtEokUJcUhSo/2JnC6sH1/aUwE3adwsyPwbJDSIUUT82/QyYs9v3KTNjEt/eSCVV8O1cmVGOjToVZ5DPWSGjArr9ZidPyqrWrVpOVZtnEUkXiaJwI0Rc2pELvZraLfBL3ZMckCoI1qVXwVoleRfR4cCy0VaaYdXAMBTjwXGWK1SY9/Sbh8rDiYvdrVIswaF4o6DVtsoIdBg4Grnax56uAUHukrDGwb9PHUesao8YhBXA7aYXs/c9XziWrf/uTVjsfk1p8kZ1fbLviFTyIV6d9fnqbvFLdqi/Kq1YPgh+PV6glr9ClivSmXb6SV6e9E79NXqlq9BPxSoibf+DMv5je/Bss8v4H -------------------------------------------------------------------------------- /notebooks/source/10_randomforest.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/10_randomforest.png -------------------------------------------------------------------------------- /notebooks/source/10_stacking简介.drawio: -------------------------------------------------------------------------------- 1 | 7Vtdb6M4FP01fpxVwMaYR0jIzI62UlfVaptHAk7CLsFZ4rRJf/0aMF+BZhwlhEynVaXii+04PuceX9u3AI7X+6+Jt1k9sIBGQB8FewAnQNc1pOsg/R0Fh9xiYpgblkkYyEqV4Sl8o9I4ktZdGNBtoyJnLOLhpmn0WRxTnzdsXpKw12a1BYuan7rxlrRlePK9qG39Owz4KrcS3azs32i4XBWfrGErf7P2isrym2xXXsBeayboAjhOGOP503o/plE6ecW85O2m77wtB5bQmKs0+PN7+G338PvUtTT29B2+Tf56077oJO/mxYt28hs/y+HyQzEHNBBTIoss4Su2ZLEXuZXVSdguDmj6QSNRqur8wdhGGDVh/IdyfpD4ejvOhGnF15F8S/chf649z9KufjNkabKXPWeFgywsWMxlhxoU5Xzc6WDfnSFp2rJd4tNT04Ik1bxkSfmpirhEUrgAZWvKk4NomNDI4+FLcySe5OKyrFfBJR4kYuegh1roAR1HYsDOvAEh/m+X0iybsi/bbM5sUUGzNvvqpXhaZn9dAmwX2JOyr6R81ba4JnAcQKBaZQwsAzgj1cpkko5EofLz+5XEzM4r2xG1m8R9XYWcPm28jBuvQtCaJD0m3CKMojGLWJL1BQODkgAJ+5Yn7F9ae0P0OcS4/PwXmnC6P03SNqVkAyTlROophLL8WqkTlqZVTZhQIahXJyE02iR0LWDrgKAUQ8cApD3xd6opAoTk8Fwv1FqlxapZVrqFFhmKWgT1QbWoiwaFkJQiIfhgC6MGXANYbkoMaD9oLXZc1S09ShZ+l1tin9D54jpuWQYsJ/xSG3U4ptGbX+JPv+zVL7GqX8Ih/bLgV40GsyFRr5Ce1YH+YYSHBkIv17XBVLXLidVUVe9VVcfiZzq9taoa2uCqan6qaq+qaqqqKhrSL1F73/ypqmegN/C+ucuJ1VQV9qqqljWd3l5VsTW4qraDlA9/DHUB+WXTRxaKHktYS8iKo1bNaHaRu69sVT8pPOromB+tjnL3bnWUQV9+nwvYoH2y4epsgMaV2NDqqGc2FAdeHxd7hYUV6qphERlyYS2G2VhYs/XUytZTsYxaY7GMPvZ75GPjMZyO+15GoXVvRz6oPa2/oKtcetzSLYNYP9qKwhsvilDRtfrd99uu7fQfoR671vD7ftSe1jt1rTvc1EPV69SBV6+O69ROF+t3E2jCW2wCj11s+E0gat8gPf4sTten8xBF50GDnojB9olYcQO/3XhxA8Xz0hGqjIZaFgFWziIwAbGAY6lVNoDjAqINm8zwqJrMUNryGb5BjoODXeLALm26Zo4DVrhMvW2SA+qIvs7KtEGC2hlkrWQbnJ4sOuPswQW2oZS3IuaWN4FrAhKzmB6hJ01eFC5jUfQFFlTYnRSp0PciW75Yh0GQCWkXU5pc6j5Jvgh6A/848kMd0Ou9Id8VFKidFffqiQviU78zrWFODGSMrgOHad5bIG78NHvcKhCvYu9Z0YdiIH7l+xmkeus9bCxhtI+RBr1d+1VQz+/FBrtT7ch1uDybdaYantkjQEzVkFJov3MilvvYOacmUQvHNPN8qohilRKfn51V/1gA3f8B -------------------------------------------------------------------------------- /notebooks/source/10_stacking简介.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/10_stacking简介.png -------------------------------------------------------------------------------- /notebooks/source/10_捆绑特征合并.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/10_捆绑特征合并.png -------------------------------------------------------------------------------- /notebooks/source/10_提升树训练.drawio: -------------------------------------------------------------------------------- 1 | 7VtRd6I4FP41eZw5kECAR7E43e3MOXO2D1v7ModKVHaReDBW3V+/QYKARIqVITod+1ByQzDky3fvdy8I0HCx/ZL4y/k3GpAIQC3YAnQHINR12+T/Ussus2CEM8MsCQNxUmF4DP8jwqgJ6zoMyKpyIqM0YuGyapzQOCYTVrH5SUI31dOmNKp+69KfkZrhceJHdevfYcDmmdWGVmG/J+Fsnn+zjp2sZ+HnJ4s7Wc39gG5KJuQBNEwoZdnRYjskUbp4+bpk40Yneg8TS0jM2gz4ET6vNs59+Agxe2YPO9fE958EGCu2y284oes4IOkYDSCXJmxOZzT2o6+ULrlR58Z/CGM7AZW/ZpSb5mwRiV6yDdlT6XicXuqzKVp3W3HlfWOXN2KW7J7KjdKotFkM27fycdnsSVDDsFgUYVrRdTIRZ82cHw/wGX378ud4PXzwBus//pp/yjeXn8wIa1gx8wAd3/OELgifDx+XkMhn4Wt1Hr7YfLPDeQU+/EBAdAZc0Knhld78o2jGNOb/3N8QNkPoqIQQab8hvBxCiHvCsGmWr360Ft8EPBsMPDC4A54FXBfYCHgOcPgBfnrDxW7mISOPS3+/LhseRatATsMoGtKIJvuxiOiBSSxuX7GE/ktKPQ62kI/TETRm+XbS83a2T3TzAJcEnVeSMLItmerrK3rzmCaCOsKivSmFSGGal6Kjof0kQGwJIA4YQGAbwMPANYHt1nEo866gmFdYu6NhQb1xqUdOwzJeqAmvN9lkSpl5AWvE0O805FM57AYDV7fDAfv8EhmdxagjqA/TeD/6pgR9E7hD4OrpASflAAPPAC7fElpqcfiBrdf2Q4G23oKXR6yqsa7KW85aOzBkvLXhC8K4G14aR8SEmmJiwrrA7IN17wxknTOvHMfk62P0Q1F87LGR+VkrfY4umN3fTyNsftcSxg7SA9vmcXMfSfU0mOaM7ZWwU3tCJhMZYV9s0zC1bghraeoiqVRYWW2kjcJAeo6efZ8uPZ31teCz0uxQV4rMORKncNCFTx5XXHKXmUZbROWLClVCKpM2VppXcEZyYcvljG2Nm5TM+RnGdEqw3PEFlvOiaTLXKc0wal5QAtTpDEN/W8lYfTpG3VBKrvek8TroIY2XLpZ9E+RqkzXu5YjGpcn3jllmpn8yluH9pwXL0nZpXPbphn0GNKvss1Wzz7lUl6gsrHXILKcts/oqkMnLc2avWNyY9GiLIUQqMZQxDgOH+0VNokHgB9AgsuSsXy/YTKsrEvgdskWHLemi9LFOPssSXb7S1aqOVxSFyxVpWa44XZ4YjYbD0UjGkNEI431PFwxooQOMPhkAZfWJ8yr9KoNPl7TAbaOIUo2NPoQqk4HRUPlU5qSwhD1vPylpiu0Kn5RcmOFcWeEVNtcXOvdat1VQgG1FgFrNnE+zZUmhY9F8UyUFw1AspmH9edK1yIQryFEhaks4tYUG9BFkt4wrvcpuJPNrN1N+6/e9tgaWtKATuvT5/2V0kgnE00WfOuS/XtHHhIrjFFJa9LkxniHJC6RXmIjl0zwzEWvi26+SiMn41msihmQ+MAtxxh4JLX0/6RgIvgCsutrVVRPxr7zEwuRH4SzmzQlfMMLtbrqc4cSPBqJjEQZBdEq6VIleRfRycPBxBdxsJ0Tg+eDwZvFbmOxFs+IXRcj7Hw== -------------------------------------------------------------------------------- /notebooks/source/10_提升树训练.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/10_提升树训练.png -------------------------------------------------------------------------------- /notebooks/source/10_梯度提升分类树.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/10_梯度提升分类树.png -------------------------------------------------------------------------------- /notebooks/source/10_特征捆绑.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/10_特征捆绑.png -------------------------------------------------------------------------------- /notebooks/source/10_直方图算法.drawio: -------------------------------------------------------------------------------- 1 | 1ZnbjpswEIafhstWgAmH203Sk1qpUip1t3cODIctwaxjFtKnr1lMgOBErYSIkXLh+ccG/I01HjsaWh+qjxTn8TcSQKqZelBpaKOZpmGZplb/9ODUKI6NGiGiSSA6dcIu+QNC1IVaJAEcBx0ZISlL8qHokywDnw00TCkph91Ckg7fmuMIRsLOx+lY/ZkELG5U13Q6/RMkUdy+2bC9xnPAbWcxk2OMA1L2JLTV0JoSwprWoVpDWsNruTTjPlzxnj+MQsb+ZcBnB54LpLM4d4OX719+/Xgp1+/EU15xWrQTfm+IkB3ZqaVASZEFUD9I19BDGScMdjn2a2/J4861mB1Sbhm8GSZpuiYpoW9jUYDBDX2uHxklv6HnsX0X9iH3iI8AyqC6OjvjzIwvNiAHYPTEu7QDWsxinRm2sMsuakhIcS9grYbFOonOT+5Q8oag+R9kTQlZc7VEsivFyCIJWctdIllXMbKWhKzjLZDseftQhazhjChCwDcaYRLKYhKRDKfbTn0Ycu76fCUkF3SfgbGT2DVxwciQPVQJexTD6/ZTT99UPcfm1BoZn+1j33jqG92gN6sd1cytntDtePH5k4L6cAOUyJAM0wjYLaBXFgCFFLPkdfgh04dznG7UDqehejite4ZzJcl73hKrINNSLO/ZI7LmMqsg01GMrCMhi5a4ZpGuGFlXQnaRVRBCipH1JHl24tNmGILtS8kGjrfX9WnIGhdr9pwd7lZf6hK0EyfamdAi1dDK7kgmPm/OhNZWDa3skmTiVDsTWk81tLJbkolr2tD1QY52766s1URoLyuv+6MdX5NMXtTOg/ay9Lo/2vFJbPKqdia0ym1jsqPY1Ll2HrTzbWPc7P6gefP1/uZC278= -------------------------------------------------------------------------------- /notebooks/source/10_直方图算法.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/10_直方图算法.png -------------------------------------------------------------------------------- /notebooks/source/10_集成学习_极大似然估计.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/10_集成学习_极大似然估计.png -------------------------------------------------------------------------------- /notebooks/source/11_Z概率分布.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/11_Z概率分布.png -------------------------------------------------------------------------------- /notebooks/source/11_凹函数的性质.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/11_凹函数的性质.png -------------------------------------------------------------------------------- /notebooks/source/12_CRF_线性链.drawio: -------------------------------------------------------------------------------- 1 | 7Zpbb9owFMc/DY9DcZwLeVxp12nTLhKTWh7dxBBvIUbGLbBPP4fYudi0CaxpIjapDzkn9nF8/j8OJ6YjOF3tbhlax19ohJORbUW7Ebwe2TZwbHuU/VnRPvf4wMkdS0YiOah0zMhvLJ2W9D6SCG9qAzmlCSfrujOkaYpDXvMhxui2PmxBk/qqa7TEhmMWosT03pGIx7l3Yvul/yMmy1itDLwgv7NCarDcySZGEd1WXPBmBKeMUp5frXZTnGTJU3nJ53145m7xYAynvM0EEtx8DX/8vLt9+HQ1iaez5Ntn+E5GeULJo9zwXLo2fK9yIOKIdAvjahsTjmdrFGZ3tkJx4Yv5KhEWEJdos841WJAdFsteLUiSTGlC2SEQXCywF4bCv+GM/sKVO5EfPFhWNoOmfCaXBsrOqQBQ2PJ5MeN492wiQJFewSWmK8zZXgyRExwpyL5ubkt5lSuuKKt8SAK1LOKWORcXMu0nSABNCexLlwDYw9LAMTWAl66BbQ1LA9fQYDweX7wIk2GJ4JkfhPTSNYDesDTwDQ3uu5MAg8jF/jEJAs+HyOunFhXfD31pEBgamBKk0fuswRRWSlNcT7ueI7wj/F7ey67n4toau9K6ztJgKWOvjFTs5L5qVGZlZjntYKl5+ZPiyGhsNTXEbugjC3Fze8gRW2Le1MOY6lbUc4+op3wMJ4iTp/rjHpNUrvCdErGR8vMb1OEpOm4VIt+mnFXtkLVAjtYZAl8LlOfBCHQgrNj2+dCpd55/nTrYkjqnT+o06HRU2jIH4MtxukbOfAF8e+ReER2nJTpun+gA7dtuci47zstxumbHHgA7AyhXbkvmvF6ZA3VWgnOZc1+O0zVz5mnJ6zBXcjavENjEXInZvEbZceYitImzHvwQ/hUBbNul+b0CqB35FEXwZAK9hkBdI2geFv1HsG3L1i+CWq9V1MSTEfQbAnWNoHlW9vYIdoVSr1293lKBc98lbb3c6YG6RsQ8yesCEeusKgV6qlJ+yyrV79uB1mEVVevkKhU0BOoaQfMg8y8RfFucht3QNxaY1pVKP3vVD1XPxkSY5c/b+fDynwTgzR8= -------------------------------------------------------------------------------- /notebooks/source/12_CRF_线性链.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/12_CRF_线性链.png -------------------------------------------------------------------------------- /notebooks/source/12_HMM_NER.drawio: -------------------------------------------------------------------------------- 1 | 5Zpdb5swFIZ/DZed+DTmkqRpN2mVpkXTtksHDgGNxMw4hezXzwQTwscok9qCxFXs4+/3OTS8cRVjfcgfGUnCJ+pDrOiqnyvGvaLrmmpg8VFEzmXEMmVgzyJfdqoD2+gPVCNl9BT5kDY6ckpjHiXNoEePR/B4I0YYo1mzW0Dj5qoJ2UMnsPVI3I1+j3wellGs23X8I0T7sFpZQ07ZciBVZ3mSNCQ+zW5CxkYx1oxSXpYO+RriQrxKl3Lcwz9arxtjcORjBuBvn9nPANRNBl/ZJxzqjxm9k5t9JvFJHlhulp8rBRg9HX0oJlEVY5WFEYdtQryiNRPMRSzkh1jUNFEM6JFLiBoq6lEcr2lM2WUuIwgAeZ6Ip5zRX3DT4tvOTi0W6B5LnvQZGIf8JiSP+Qj0AJydRRfZaljliHOzmtX8UJVe4Q07S8aITJn9deJaVVGQwv6HyFqPyCgWq652DanR71ORDBcR79KLiq7ooKEkvwhTtYvS/vK5sRT8oODrdGJ3u2tjm6PYqHho4GWGJE3KJymI8oL7S1B9C7Bv9kHF+s5A6HWgmmoDalW9gWr2MDXfiqn+Ckx7gGLFtRcCVNPnRdQYIlrxMZWVq2C0BD66Oi8+5hg+4g/iWnHsRfDB8+JjjX5+3PUS+BhoXnzQuOfHNRXXWQIfc2ZvFPZIPhsFL4KPNbP3AzyGj1N8+bh4CXzQzN4PKkv3Xl4WNKGz3aeyg2yDvNa3SNPLaqir8jub2UE3W6b6duL8J4CD3l8ZkIdhF7yJIdVQ92eG983/QUu6IDBtYzk9mRHWchFk2pZyejIjTOVqEWTw3MiMsJNPSyDTtpHTkxlhJBdBpm0gpyczwkIugkzbOk5PZoR53CyBTNs0Tk9mxA1oGpKkKAYx5G5xpyy0gKMvi/deTNI08obNI+QR/yEq6gdL1n7KnkX5vpBHrSpnWWlfnmIP+i9Pd9gyrcHL05SemAcDOkgZOGF74EN6SRDgN+7Nu8BvgFoD/pRBTHj0DI3d9lGWK3yhkTha/a7vNPPJaCdKeXA5qs6VzkRtO6c7rYlKZToTXZLueuy+PBTV+pK/7F7/q4Sx+Qs= -------------------------------------------------------------------------------- /notebooks/source/12_HMM_NER.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/12_HMM_NER.png -------------------------------------------------------------------------------- /notebooks/source/12_HMM_中文分词1.drawio: -------------------------------------------------------------------------------- 1 | 5Zpbb5swGIZ/jS87cTTmkqRpN2mTplXT1ksHPgIaiZlxCtmvnwkmhENTKrUlElKl2p8P2O9jE79xkLncFvecptE3FkCCDC0okHmLDEO3DAOVf1pwqCIONqvAhseBqtQEHuJ/oIKaiu7jALJWRcFYIuK0HfTZbge+aMUo5yxvVwtZ0n5qSjfQCzz4NOlHf8WBiKooMZwm/hniTVQ/WcduVbKldWU1kyyiAcvPQuYKmUvOmKhS22IJSSlerUvV7u6Z0tPAOOzEmAbk51f+GIK2yuEH/0Ii4z5nN2qwTzTZqwmrwYpDrQBn+10AZScaMhd5FAt4SKlfluaSuYxFYpvInC6TIdsJBVHHZT5OkiVLGD/2ZYYhYN+X8Uxw9gfOSgLHXWvlA9SAgAsonp2pftJPLjxgWxD8IKuoBqZdtTi0s3nDD9fLKzpjZ6sYVUtmc+q4UVUmlLCvEFkfEBkn8qmLdUtq/HdfLoajiDfZUUVPVtBxWhyFqctlanP8v7IRuUPk1J0c3fpU2OUoByo3DbzMkGZptZPCuCi5vwQ1sIEE1hBUYqxNjN8GqqW1oNbZM6jWAFPrvZgab8B0AChBnjMToLpxXUTNS0RrPhZaeIjgOfAxtOviY43hI1+IS+Q6s+BDrouPPXr/eMs58DHxdfHB4/aPZyHPnQMf68pOFM5IPitEZsHHvrLzARnDxy0/fDwyBz74ys4HtaX7KC8LutTZGVLZxY5J3+pTpO1lddxX+YPN7EU32/iYhTSmU5/DKJBw8NsG7BNYh+9iTHXc/7rhY/fBRWt6dlAmMwHUNZrTExplNWdEqGs1pyc0ymzO6CXXNZvTExplN2e0h7p2c3pCowznjPZQ13BOT2iU5ZwRoa7lnJ7QKNM5o7dc13ROT2jEDWoW0bRMhgkUXnknLbWAXaCSt35Csyz2L5tPKGLxW2a0T7bKPaqaZfq2lEerMweV6V6+Eh+GL1/XxLbs5vIVgt6deIePnB7bcx8uCKN0EZRvQFwSUBsGfgbUvuBvOSRUxE/t8Q5RVk/4zmI5k8YbuO31ZHYXSjVP1apZK72OujbQcDsdVUL0OjouutO0h9ahzDY/EqiqNz+1MFf/AQ== -------------------------------------------------------------------------------- /notebooks/source/12_HMM_中文分词1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/12_HMM_中文分词1.png -------------------------------------------------------------------------------- /notebooks/source/12_HMM_中文分词2.drawio: -------------------------------------------------------------------------------- 1 | 5Zpdb5swFIZ/jS878WnMJfloN2mVpkXT1ksHDgGNxMw4DdmvnwkmBEJTJrUFCalS7WNj7Pc5JrxxkDnf5g+cptEjCyBBhhbkyFwgw9Atw0DFnxYcy4iDzTKw4XGgOtWBVfwXVFBT0X0cQNboKBhLRJw2gz7b7cAXjRjlnB2a3UKWNO+a0g1cBVY+Ta6jP+NARGWUGE4d/wzxJqrurGO3bNnSqrNaSRbRgB0uQuYSmXPOmChL23wOSSFepUt53f0LreeJcdiJPheQH1/5Uwja8gDf+RcSGQ8Hdqcm+0yTvVqwmqw4Vgpwtt8FUAyiIXN2iGIBq5T6RetBMpexSGwTWdNlMWQ7oSDquKjHSTJnCeOnscwwBOz7Mp4Jzn7DRUvguGutuIGaEHAB+Ysr1c/6ycQDtgXBj7KLusC0yyuOzeqh5oer9Iou2NkqRlXKbM4D16rKghL2P0TWO0TGibzrbN2QGv/ZF8lwEvEuO6noyQ46TvOTMFW7LG1O/5c2IveInIeTs1ufG9sc5UTlpoHXGdIsLXdSGOcF99egBjaQwOqCSoy1ifHbQLW0BtSqegHV6mBqvRdT4w2YdgAlyHMmAlQ3xkXUvEW04mOhmYcIngIfQxsXH6sPH/lAnCPXmQQfMi4+du/9482nwMfE4+KD++0fz0KeOwU+1sjeKJyefJaITIKPPbL3A9KHj1t8+HhkCnzwyN4PKkv3UV4WdKmz06Wyix2TvtWnSNPL6vha5Q82szfdbJnqs4HznwIJO79lwD6BdfguhlTH118zfGz+37SkJYPlFMC0jeXwZHpYy9UUyLQt5fBkepjKSTzM2mZyeDI97OQknmZtGzk8mR5GchJ7pm0ghyfTw0I+ToFM2zoOT6aHeZzE06xtGocn0+MENItoWhTDBHKvOFOWWsAuUMWFn9Asi/3b5hHyWPySFe2TrWpPqmdRXhTyaFXlqCrtw1PiQ/fh6ZrYll0fnkJwdabd4iOXx/bchxvCKF0E5RsQtwTUuoFfALVv+FMOCRXxc3O+XZTVHb6xWK6kftd3m/lkthOlXKe6qs6Vq4Hads5wWwOVQlwNdEq687K78lBW60P+snv9Uwlz+Q8= -------------------------------------------------------------------------------- /notebooks/source/12_HMM_中文分词2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/12_HMM_中文分词2.png -------------------------------------------------------------------------------- /notebooks/source/12_HMM前向后向.drawio: -------------------------------------------------------------------------------- 1 | 7VrZkps4FP0aHtsFYvWjF5ykalKdSWcq6XlTgzBMZMRguY3n60cYsYPtjs3S6a7yA7pakM65OvcKS5AXm+hDCAP3M7ERFoBoR4K8FACQFACE+Cfah8Sia3JiWIeezRvlhgfvP8SNIrfuPBttSw0pIZh6QdloEd9HFi3ZYBiSfbmZQ3D5rQFco5rhwYK4bv3u2dRNrAbQc/tH5K3d9M2SNk1qNjBtzFeydaFN9gWTbAryIiSEJk+baIFwDF6KS9Jv1VKbTSxEPr2kAzL+lhZYWf4133rg8Gk+c//8eifxYZ4h3vEV89nSQwpBSHa+jeJRREGe712PoocAWnHtnpHObC7dYFaS2KPjYbwgmITHvvJqtQKLBbNvaUh+oqYaG27d4+gSL3yBlKLQP1qAGFv5HFFIUdS6einDlDkjIhtEwwNrwjsAg9NwyIhKyvucVSmlyi0wqincmbgjrbOhc6zZA4f7BdAb3SJvSkvV1JuQz2oGQj4tn0Ve6wh5vVvkbRUZttKEPMPdWCq9Ia+I530+E9oi8pIsdgS9+kacXlfPIy+qHSH/YfXZfA7s+5/mU/TRucdfo5l7p9WARjaLdLxIQuqSNfEhNnPrvExF3uYPQgKO4D+I0gMP23BHSZkeFHn0R+H5MR5qovLSMuIjHwsHXmiBmk2d7EILnVhimh/AcI3oiXbc5+L1n+QyRBhS77mcCdycGGk6JDM5G4+FmqGYSdORsVDToFYaZguYP7GHNT1ikhgcwjApkqj9uyNpxd32SMMs1hcjiPLKdJRPUjoOm2cyVHl4Zi68suouGLN0GJ1XS7gNkhzZ8aLYeWqRCyLDsZrkU7MM9OSc8oGaXLZqYzUfAPV8QGkISkpXyqi/FmVkkIeHH8VCoVdczLsdS9fvW3DhvpVHtW2zE9foFTVnNCfxscThUIxKI6O0LyWeTCZvSIpB9YQwtBYb71rcm8byrl+Id9ww6aFRqbhEletEOnivCt3ZNK7Y6fUT4UjF+2oqz4uwOioRlntLh703pMEyGJkGA+W17MD+0yf10p1rDLVzGz+7Dcpof2G1gvbNKG7EFAwpxU3fUd/z4au1uJb8pEMMpcWN/891QvT9OD5BOQ7SrEaebX36JJ5U71/+BJUdgwajuT212gbQ/3VWa85S3s81H2K25IW/B8/V8+3wRLcL942JvvfeEM/VHHp4npv+3f+tA3Q/RFcDdPyHbV9EN1+gGTKz7u+s9NLMuj1jviCzlnvKrE9NsoePHN9GsW8vTqyvug+hasMl1u0+1ksc/va64vBNaR5cnrU6zaYqGFPBWMYPU1GY1hWcrZ+W0S2D5hMfVRDmJoi9dXyTyGJ4IWafx2h6FsQzXrHxbBu30VmOC6WLSskE0huoGvdGHiCAmpb5GtrC6kuolBR9Ur7E1HBvT2vgEnTF5bSRS0ahYb4qLpsvnXXNp6KL59jU+2RTagi0piHMV8KU0anFpM5UwdQFAwjz+hWpd17TI+70LK+SeBtiWTG/rp7835df+pfN/wE= -------------------------------------------------------------------------------- /notebooks/source/12_HMM前向后向.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/12_HMM前向后向.png -------------------------------------------------------------------------------- /notebooks/source/12_HMM前向后向2.drawio: -------------------------------------------------------------------------------- 1 | 7ZvZcqM4FIafhsukEDuXXnA6VZNyT9Iz05k7BYQhkS03lmM8Tz/CCGNAXrptlrRz40JHSKD/k46OhCypg2l8F8F58EA8hCVF9mJJHUqKYluA/SaGdWrQFSM1TKLQS00gNzyF/yFulLl1GXpoUbiREoJpOC8aXTKbIZcWbDCKyKp4m09w8alzOEEVw5MLcdX6T+jRILVaipnbv6BwEmRPBoad5kxhdjNvySKAHlntmFRHUgcRITS9msYDhBPtMl3ScqM9udsXi9CMnlLg6aH/9/TOf/6BDAe8xm+P2vTxBmhpNe8QL3mL+dvSdSZBRJYzDyW1yJLaXwUhRU9z6Ca5K8ac2QI6xSwF2KUfYjwgmESbsupoNFIGA2Zf0Ii8IVGOBxfBpnbAE18hpSiabSyKnFj5O6KIonhv68FWU9YXEZkiGq3ZLbyAJnMMvB8Ci6dXOVWgcluwQ9Tg/RXyjjTZVp1rzS643D8jvXKt0gPjROm186VH1r9ggLXhX/1FqKzv+73gTyY9+DjSV3QW0NgrvWKdIL3SpPRWvco7YKg7pkj5bU5Lyov8jVD5C/gbofJmvcp7OrI8TaQ8090aao0pr6vH+/w2vNhVfuuDLi69fiWd3tSPKy/rNSl/N3pw3ufe+M15ib/4Y/wY94IboyI08lh8x5MkogGZkBnETm7tF1Hk9/xByJwr+IooXfNgFS4pKeJBcUi/71w/J1Xd6jw1jHnNm8SaJ/aqvyDLyEUHmphFxTCaIHrgPt7nkvYfZBkhDGn4Xox/Lw4G2G2SyWk87+S0RSYLR7qCRuCtDMwa0H9hFxO60SQ1+IRpsgvR+LEkWcbNYoOhl/gXax7nmVkt9yCrh71nWlWxembeeWS5u2DMFoHouLeEi3m6MvTDOOk8lZkLIst3Re7TcC304l/GN5bjAaUaD2iCSUmryzOaH8UzMsmj9ffdxE6pJJkX26TOH7fKieNW7dSw3e4zdN6j5kRziM8Fhm0RBR1D2pQnvr29vSJXrJQ3JNr2xdanL27Mx/KiX0m4GTDZHpVW6hJl1qnr4KVKuLevccZIr64IO+q8z0Z53AnrnXLCamPhcHhFPlhVOuaDFe2jjMDmwyf91JFrtTVyhdturRLt9rQqICqUsFubRqJ91M94+Pwdc6Psi0G7vlj4fa4W0ONubEH5PjJcIWfPtF/kg2P9l7egtsug1jDvD60Wczj7daqVzlIcz5U+xGzpA38PzuX1bfug9zvuC4Meh1fEuRxDt89Z9HX/t56gmwFdnqCTD7ZNgRYfoGkzsu7MWulQxHw0sm5tp/nQWzewyfGtE+O2mcDaqHz0a3ncNhZwjb9d0Txcxty6ezaqmB1dsmzJGiYXtizZVQ/OGkuL6hZFm5EZKinMTRCHk+QkkcskRMzeT6QLXYh7PGMaeh7eh7M4LxQOKqUvkJ27Nnhv5BOEomdp3oZ9B1N/BiXQzNviISbBuT1DwFKpi6UtZMkQWs6HYik+dFY3T6O0FBLQNJukKTr07VhSfyTZDKeRQO3pkmNKliL1q0ekPrlmSx/zKFcg1wRWeJq/1U+4zUXEpXD1aIgs1Mquhsjif0jI4j5w8ZBY/PRWFznNfT6oC+n+Hfy2iAqm0nqi3/vXTkS/Jy9yzvt7T/noRIOLnANuo5FVTjc4n7zKuSjnGlc5LJn/LzI9YpP/uVR1/gc= -------------------------------------------------------------------------------- /notebooks/source/12_HMM前向后向2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/12_HMM前向后向2.png -------------------------------------------------------------------------------- /notebooks/source/12_HMM定义.drawio: -------------------------------------------------------------------------------- 1 | 7Vpbb9owFP41eSyK7RDSx5bSdtIqpPVh66NJnMtqcOYYCPv1cxLnBqXKJhqHUYkHn+/Yx5fP57NjYaDpMn3gOA6fmEeoAU0vNdCdASGwIDSyn+ntCmRiowIIeOSpSjXwHP0mCjQVuo48krQqCsaoiOI26LLViriihWHO2bZdzWe03WuMA3IAPLuYHqLfI0+EBerASY0/kigIy56BfV14lrisrGaShNhj2waEZgaacsZEUVqmU0KzxSvXpWh3f8RbDYyTlejS4OH+abaJvfnrbJE++nP6Lb0Jr2w1NrErJ0w8OX9lMi5CFrAVprMaveVsvfJIFtWUVl3nK2OxBIEEfxIhdopMvBZMQqFYUuUlaSR+NMovWajRWFl3qYqcGztlFOPMBnd0+gpK2Jq75J05l9sI84CId+rBiiS5uwlbEsF3sh0nFIto0x4HVtssqOrVTMiCIuMviAHXOpmp2XhpeAbDDABaqSnibjBdq54MaFM53tuFLAQiX5MC8JlcgiaJ9q81Kx1XSU7DjawAnDitnWWUL6CMI8dZhGqHl3Cjy/3tQqkUyWxbbMNIkOcY52u/lTrdJh0ncaGcfpRmm+fWjyidMsp4Hgh5mDi+K/FEcPZKGh7bdcjCrzrfEC5I+v4uOCRNNXCUTKpzApSyua1V11JQ2BDcEjs5y5NzUUa5vnz3o2k0WmVm3Sy3PiBvYce8RTrTtjqHB6+oNaM1iS8tDgfDKNBLaW9KDC9IiaE5MCl2PqW4c+Kijolrac1beC5SrIEZMNZJDepNUtEFSSqCA5NUrV+XZyapVsfE1Zq3EJ2LpA7gdtuVUTDRSal1VIqTGK/+XXkPBH00Gr0tuhVWdPh/aLFlDUyLofWZup1Td9w1dR2dqTvu7RYlLugWNbYHlrmgt6fg+TCegn2f2O6bPHuT64Vpnobnvafg6j1CG83HP4pOfBLP4SAO4n5o3n9n0s/zcdk+Nc/ognje//jVz/Pk0m7W/RC9f7PWT7TT2/k8jHtYPzzv38M+kGdp1v+QyX2N/xmh2R8= -------------------------------------------------------------------------------- /notebooks/source/12_HMM定义.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/12_HMM定义.png -------------------------------------------------------------------------------- /notebooks/source/12_HMM维特比1.drawio: -------------------------------------------------------------------------------- 1 | 7Zxbb5swFMc/DY+dwOb62KQ3res2qZPWPbrgJGwOzhynSfbpZ8BACGmVpsEmLVWl4IPN5fzs8zfHJAYcTlfXDM0mdzTCxABmtDLghQGAZQNgpP9mtM4tnmXnhjGLI1mpMtzH/7A0mtK6iCM8r1XklBIez+rGkCYJDnnNhhijy3q1ESX1s87QGDcM9yEiTevPOOKT3OoDr7Lf4Hg8Kc5suUG+Z4qKyvJO5hMU0eWGCV4acMgo5fnWdDXEJHVe4Ze83dUze8sLYzjh+zSwb3/cLj6jrzf8Gj0w17rDT9/OLCgvjq+LO2Z0kUQ4bWQacEAZn9AxTRD5QulMGC1h/I05X0tWaMGpME34lMi9eBXzh43tX+mhPjmydLGSR84Ka1mYc0b/4CEllGVXAa+uTPFX7il8L7w2yK8YRw1wlSekaU4XLMQv3X7JQXRgTKeYs7VoxzBBPH6qHx/JnjQu65VNv9NYnBmYstcDT/YB2efLLlAcgiM2xly2qpCJjY3LqEwZyFdALW+/gpp6614WE5qIj0E3OJum5HxsqiJOZF5+oZ79Rvo1bq+GBDoLSTicrR82Cxut0mLVLCt1Eq6rE6487hMiC3mmJmxChIaljJeTmOP7GcpueylktE4QzWe5sI3iVdoTBqOYkA0nRwj7o3CX+93Qx4+j0v1PmHG8ehlA02FFA9esxTSriGnLShVtaZpsCKJttuVitx8/h4wfcArB0fJ6uG3CdbTC9Xu4bcLVqnxAofKN/BCHO5Xv0Xdsp3L/UZUPmLqVL+jHzyHjB55CcCyzBd2D+zykQ7FrgPvW4PjcE389RkBT8RN/M43TlV7zLkKC1skwVCmpI+zultTICx7NtiTV1yypwO7HzyHjx95z/HhaJdXp4bYJ19cKt08DtQo30AnXVqh8atKopdJ1Jo36AZRPzVqicxJa2CdWW4WrVwv7xGqrcLVqoaNQC9UkVre1UHtiFXQ3sapkHLSTG7PNrSnPNr+Wc2OwuxnVTkdF9xQkD3b3Vad3AVer5LkqJU9J4rMheboTn9YHyI2pefzz9hxRFtAaL3cMoY7wVhL39qZkaqXUL+e1S/et732/ia6nUNbUZDUbU3zdWU34AbKabQwg/zTCY3cnLd2ipDXMwX5lrl26WieSvkIRU5OO3BYx7elI2K/VHDSAgtMQse4u1qijdPRkswO2BrHir17C7i4hvI8xq1X0ApWipyQh2RA97QlJU6GP1Twdb8ck7U/Hlsqvp6qZvDXivu7JmwXeXaxo+Fh3rLB3xIrLwAjOjUDscYk4/+CR1Xzu/l2kv7cxGNGEn80zeT0XFYA7W2VOKvaLrXH2eekZ/rkxcI92PNcQbvOby5aCC69Tr8OUU4dN8tKESDxORDEUHLGwD1LKcYjIudwxjaOIPNfN6nOR9D6KyYpVlPNZCHCP042234B09utFjdcG9uhGolj9vko+xat+pQZe/gc= -------------------------------------------------------------------------------- /notebooks/source/12_HMM维特比1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/12_HMM维特比1.png -------------------------------------------------------------------------------- /notebooks/source/12_HMM维特比2.drawio: -------------------------------------------------------------------------------- 1 | 7Vxbb9owGP01edzk2Lk+Dkpbres2iUnrHt3EIdlCjIy57dcvECchhHYUiB1oKqTiL3Eu5/j7jnMMaKg/Xt4xPAkfqU9iDQJ/qaEbDULdgFBbv4C/yiK2hbLAiEW+2KkMDKO/RASBiM4in0wrO3JKYx5NqkGPJgnxeCWGGaOL6m4BjatnneARqQWGHo7r0Z+Rz8Ms6kC7jN+TaBTmZ9YtN9syxvnO4k6mIfbpYiuEBhrqM0p59m687JN4DV6OS9bv9oWtxYUxkvBDOhgPPx5mn/HXe36Hn5ilP5L5tw+6YGPKV/kdMzpLfLLuBDTUo4yHdEQTHH+hdJIG9TT4m3C+ElzhGadpKOTjWGwly4g/bb3/tT7UR1O0bpbiyJvGSjSmnNE/pE9jyjZXgW5vQfpXbMmxT1HrZVdM/BpxJRIiNKUz5pHXbr/gIR3AhI4JZ6u0HyMx5tG8enwsRtKo2K/o+p1G6ZkhEKMe2mIMiDFfDIH8EByzEeGiV0lZ+mbrMsrQhsg3kFrcfknqGq2haCY0Sf/12sEzAILnc7Oa1okNyq/sZ5zIfoW3N5MEW0tSCjhbPW03tnqtm2W3TauV5FoqyRXHneN4Js5UJzuOUw1bc7wII06GE7y57UUqo1UG8XSSCVsQLdcjoRdEcbwFso+JE3j74Lc8hzwHBfxzwjhZvk5AHbC8gwUqNU3Pa9qiVEVDhMItQTRAUxBbXf4ckz/wEoqjbnfkNkmuqZRcpyO3SXKVKh+UqHyB4xFvr/I9O6ZhlvCfVfkgUK18bpc/x+QPuoTiWLgF7SP3ZZKOpV0BuacWx5ee+Ks1AgHJT/x1G6cto+YqSoLSyTCSKakBsfZLqm+7z6ApSXUUSyo0uvw5Jn+MA/PHViqpZkduk+Q6SsntbKBGyXVVkmtIVD45NmqhdK2xUd+B8slZSzQvQgs7Y7VRctVqYWesNkquUi00JWqhHGN1VwuVG6uwvcaqlDxoxhszwM6UZ5e/hr0x1F5HtdVV0boEyUPt/ajTVZCrVPIsmZInxfisSZ5q41N/B96YnMc/+8CM0qHSerknhVrCt5S6dzBLQClL3XJes+ye+rnvk9i1JcqaHFezNsVX7Wqid+BqNpFAzmWUx/ZOWtrFktIyh7qVuWbZVTqRdCSKmBw7clfElNuRqFurOSqB3MsQsfYu1shj6exmswl3kljyVy9Re5cQriNnlYqeK1P0pBiSNdFTbkgCiRjLeTrerUnKn451mV9PlTN5q9V91ZM3HV5drahhrLpWmHs+GT9wtV5Pc3VtYGufBlqvvj5YFd//IB/QhAsVhlYdeKL7JrH3Ae9aNsLWeYAvVszzAqLXgYfWHuRrs58DoE+b5W+SZNOi8pdd0OAf -------------------------------------------------------------------------------- /notebooks/source/12_HMM维特比2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/12_HMM维特比2.png -------------------------------------------------------------------------------- /notebooks/source/12_HMM维特比3.drawio: -------------------------------------------------------------------------------- 1 | 7Vxbb9owGP01edzk2Lk+Dkpbres2iUnrHt3EIdlCjIy57dcvECchhHYUiB1oKqTiL3Eu5/j7jnMMaKg/Xt4xPAkfqU9iDQJ/qaEbDULdgFBbv4C/yiK2hbLAiEW+2KkMDKO/RASBiM4in0wrO3JKYx5NqkGPJgnxeCWGGaOL6m4BjatnneARqQWGHo7r0Z+Rz8Ms6kC7jN+TaBTmZ9YtN9syxvnO4k6mIfbpYiuEBhrqM0p59m687JN4DV6OS9bv9oWtxYUxkvBDOhgPPx5mn/HXe36Hn5ilP5L5tw+6YGPKV/kdMzpLfLLuBDTUo4yHdEQTHH+hdJIG9TT4m3C+ElzhGadpKOTjWGwly4g/bb3/tT7UR1O0bpbiyJvGSjSmnNE/pE9jyjZXgW5vQfpXbMmxT1HrZVdM/BpxJRIiNKUz5pHXbr/gIR3AhI4JZ6u0HyMx5tG8enwsRtKo2K/o+p1G6ZkhEKMe2mIMiDFfDIH8EByzEeGiV0lZ+mbrMsrQhsg3kFrcfknqGq2haCY0Sf/12sEzAILnc7Oa1okNyq/sZ5zIfoW3N5MEW0tSCjhbPW03tnqtm2W3TauV5FoqyRXHneN4Js5UJzuOUw1bc7wII06GE7y57UUqo1UG8XSSCVsQLdcjoRdEcbwFso+JE3j74Lc8hzwHBfxzwjhZvk5AHbC8gwUqNU3Pa9qiVEVDhMItQTRAUxBbXf4ckz/wEoqjbnfkNkmuqZRcpyO3SXKVKh+UqHyB4xFvr/I9O6ZhlvCfVfkgUK18bpc/x+QPuoTiWLgF7SP3ZZKOpV0BuacWx5ee+Ks1AgHJT/x1G6cto+YqSoLSyTCSKakBsfZLqm+7z6ApSXUUSyo0uvw5Jn+MA/PHViqpZkduk+Q6SsntbKBGyXVVkmtIVD45NmqhdK2xUd+B8slZSzQvQgs7Y7VRctVqYWesNkquUi00JWqhHGN1VwuVG6uwvcaqlDxoxhszwM6UZ5e/hr0x1F5HtdVV0boEyUPt/ajTVZCrVPIsmZInxfisSZ5q41N/B96YnMc/+8CM0qHSerknhVrCt5S6dzBLQClL3XJes+ye+rnvk9i1JcqaHFezNsVX7Wqid+BqNpFAzmWUx/ZOWtrFktIyh7qVuWbZVTqRdCSKmBw7clfElNuRqFurOSqB3MsQsfYu1shj6exmswl3kljyVy9Re5cQriNnlYqeK1P0pBiSNdFTbkgCiRjLeTrerUnKn451mV9PlTN5q9V91ZM3HV5drahhrLpWmHs+GT9wtV5Pc3VtYGufBlqvvj5YFd//IB/QhAsVhlYdeKL7JrH3Ae9aNsLWeYAvVszzAqLXgdf3IV+b/RwAfdosf5MkmxaVv+yCBv8A -------------------------------------------------------------------------------- /notebooks/source/12_HMM维特比3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/12_HMM维特比3.png -------------------------------------------------------------------------------- /notebooks/source/12_MCMC总结.drawio: -------------------------------------------------------------------------------- 1 | 7VlLc5swEP41OrYDCIQ4Gpsk02mm0/GhyVEgYWiw5ZHl2O6vr4RFgEAc2vrVmcxw0H5aPdDut7sCAMfz7a0gy+yeU1YAx6JbACfAcWzXcYB+LLrbIz6Ce2AmcmqUamCa/2IGtAy6zilbtRQl54XMl20w4YsFS2QLI0LwTVst5UV71SWZsQ4wTUjRRX/kVGZ7FDt+jd+xfJZVK9so2PfMSaVs3mSVEco3DQhGAI4F53Lfmm/HrNCHV53LftzNG70vGxNsIQcNiPylR6bb77/ybw9fnsb5M2WfXLM3uatemFH1/kbkQmZ8xhekiGo0FHy9oEzPaimp1vnK+VKBtgJ/Mil3xphkLbmCMjkvTC/b5vKh0X7UU332jDTZmplLYVcJCyl2D02hMUqL9bBSqsalfCHNRmxXyd1TMwe54muRsANHVXkfETMmD+gZ/9bn2FjA2OSW8TlT+1MKghVE5s9tPyPGXWcverVFVcMY9Q8MbOZ9JsXarHQ/BlEAsK+fCIGRBUK/4wVtG2+yXLLpkpTns1FEb9szzYtizAsuyrEwTRlKEoWvpOBPrNFD/SC2+qxSymb1HseuXoIJybYHD9X0uoZzu7a4aTDYQFmDvJXa0Y3gfbBsKMuc/5JlzhssG6lHNTwQWiBwQeSDkQ8CWyMjF4xCrRNAEEZHJSD1GKZuHwGxE0OEzk9Ax7owA7sR7oOBh5n1LgPRVTEQ9jDw5JmOEobT3kyHEszi9AxEs1GbabZzYaYFl2Raza7HRk8/047HGDSQMfiqGIO6jLk7eWWYOm9UhihG3jkSE3SujC+23TniKyVMnZrqbPTYSkanTk14INGqjwdXwjTcYZouAPEEBF7ZwACjink3Jychs1V96PeRMEA+JJcg4cUvaJXDNEx0m8fx6tTxECesPx7G2HO9c9yUX5sCwkubolvJgcgFIdaE0fcmdZmaDLDLOYPo39T39r/V98P84P2PWkPv29dVu9hub0j98I2L+MaVpdtq3y3nQCAMwcjWjWBSNlyNhKj0EgSwo7/O4Kj0m5ec7JVxBx416CeUxTjuC/oQwQDSdw16oIIaHve91ykYnyzuK7H+y1H2Nf4Vweg3 -------------------------------------------------------------------------------- /notebooks/source/12_MCMC总结.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/12_MCMC总结.png -------------------------------------------------------------------------------- /notebooks/source/12_MC_天气demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/12_MC_天气demo.png -------------------------------------------------------------------------------- /notebooks/source/12_MC初探.drawio: -------------------------------------------------------------------------------- 1 | 7VhLj5swEP41HDcCDCQ5dpO0PbTdlaKqaW9eMI+tYagxAfrra8AO0CgRlVqSKjnF883D45n5LGINreLyHcNp+BE8QjVT90oNrTXTNHS0ED81UrWIbUkgYJEnjTpgG/0kylOieeSRbGDIASiP0iHoQpIQlw8wzBgUQzMf6HDXFAfkCNi6mB6jXyKPhy26MOcd/p5EQah2Npxlq4mxMpYnyULsQdGD0EZDKwbA21Vcrgiti6fq0vq9PaE9JMZIwsc4fHqOqzX3n5N98RmV8bdX7+npwZG58UodmHji/FIExkMIIMF006GPDPLEI3VUXUidzQeAVICGAF8J55VsJs45CCjkMZVaUkZ811t/rUPNbCmtSxm5ESoptHnWyZ08voQyyJlLzpxZjRFmAeFn7MxDk8R0E4gJZ5XwY4RiHu2HeWA5ZsHBruuEWMhm/EFjZNw9prncSTMdKtJ9fBGLgDclaQEfRAX6PXR+5KAUD1nThTfCwHDSslOqKDtDxRF5tqGG4QXc2/L3aaFUMLGeiiKMONmmuCl9IS6DYc9xlrb09KOynp1HP6J0BRRYEwj5PnFcV+AZZ/Cd9DTefPmidyOwJ4yT8vwQHDdNOSykS6VuGUnOouO2LaGwR2uF/fU2z/8X/okCs2rXF3petdi5NdI/4K05krfokrw1J+OteUO8RYqn18LbxZ23o3mLRvLWuiRv0WS8RTfEW8u4Mt4u77wdzVtrJG/tS/LWmoy31g3x1jbt6+KtPVWbZ7PZDfXZQVf2f8gwjkt6v6BPFUsfeUOfGIKJXjL0ya5o/ZaoO9mXlRC7V8pG13vrRZtf -------------------------------------------------------------------------------- /notebooks/source/12_MC初探.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/12_MC初探.png -------------------------------------------------------------------------------- /notebooks/source/12_MC并行计算.drawio: -------------------------------------------------------------------------------- 1 | 7VnLcpswFP0aL9NBEsh4GTtpu2ibzGQ6TbuTkXikArmyHON+fYURBhmc0hm/OvYqukdXF3HOPYLgAZqk+QdJZvFnQRkfQIfmA3Q3gBA4yNd/CmRVIp5rgEgm1CTVwFPym1UrDbpIKJtbiUoIrpKZDQYiy1igLIxIKZZ2Wii4fdUZiVgLeAoIb6PfEqriEvXhsMY/siSKqysDPCpnUlIlmzuZx4SKZQNC9wM0kUKocpTmE8YL8ipeynXvd8xuNiZZpvos+PKYru5U+Ji9Lr+iPP3xQh8eboBblnklfGHu2OxWrSoKpFhklBVVnAEaL+NEsacZCYrZpRZdY7FKuY6AHlIyj9e5RRAmnE8EF3JdCFGP+dTV+FxJ8ZM1Znw4RRjrGbMbJhXLd94n2LCn246JlCm50ilmgecYwk3HjeA7rwSWtYCwSoob4mGTR0zPRJvaNa16YJj9F5bREVlmQPM87GJ5hIeI7Ill6J8fy/CYvUyYHwZdLOPAZ9NwX718biTjFqWM6gPThEKqWEQiI/y+Rsc26XXOJyFmht0XptTKnP5koYQtBMsT9dwYfy9KaSrK6C43ldfBygTlPovNvc29vhexkAF7q7HMc4fIiKk38mC3lpJxopJXex/77/6O5sdcb3c81YNIrSkpgVBoBpoa4l8LUU3czNcq3OoEgGd5PVlVeQZVHb3PspRdXsONS253C+f60c3+bj4yn5XP8zDJi97ZNmAYMhx0GpAOR1PH2Y8BgQ8sB4LKbA0Deh3+q7C9yzz8X/ynCZar52bQWFWE9bJ1dADfwp6+Raf0bddD6zC+hRfkW7T1Enhy3/pX3/b2LerpW/eUvu16pT+Mb9EF+dYFZ+bb0dW3vX3r9vStd0rfdn3wOIxv3QvyrQe98/KtdzSZvQuSGaMz+3cIgDal1/N5F1lOzwN6RxMc6UOGczTrOpdk3TN7sQLtE/q2xbG+XWWTaXOUiYxtEWogwpMo02Gg6WEaHxfkJQHht2YiTSjlu9SzTwj7O7Duk+p42cTlQQH9/UgFoK0VbkvldkgFDyYVbknVtsNlSoXwmUk1bEk1uUpVnmT+saTSYf2z7nqu8eM4uv8D -------------------------------------------------------------------------------- /notebooks/source/12_MC并行计算.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/12_MC并行计算.png -------------------------------------------------------------------------------- /notebooks/source/12_beta分布的超参数.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/12_beta分布的超参数.png -------------------------------------------------------------------------------- /notebooks/source/12_gamma分布.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/12_gamma分布.png -------------------------------------------------------------------------------- /notebooks/source/12_pagerank_demo1.drawio: -------------------------------------------------------------------------------- 1 | 1Vldl5owEP01Pm4PEER83FXbPmzP8dSHbh8jDB9tJDREhf31DRIMiKJ2teCTzGUmJHMncwMO0GSVfmE4Dr5RF8jA0Nx0gKYDw9A1ZIufHMkKZKhpBeCz0JVOCliE71BGSnQdupDUHDmlhIdxHXRoFIHDaxhmjG7rbh4l9afG2IcGsHAwaaI/QpcHBWobI4V/hdAPyifr1ri4s8Kls1xJEmCXbisQmg3QhFHKi6tVOgGSJ6/MSxH3+cTd/cQYRPySgHdj7qdPSfA6XyzJ9202e/8zfhoWo2wwWcsFy8nyrMwAuCIh0qSMB9SnESYzhb4wuo5cyB+jCUv5vFIaC1AX4C/gPJPs4jWnAgr4isi7kIb8rXL9Mx/q01Ba01SOvDOy0og4y96qRiUqN1XYzirjPBpxOREjt4v15os8mVcJJXTNHGhJZlmfmPnAW/yMPfti2wBdgZifiGNAMA839XlgWb/+3k9RLC4ky1cwbp1n/C586v/CZv+5NLvkcnTd7o1odNPtqrboNQSrHfogFKMuKdYbFD83OSZEyGFO7TYIOSxivFv3VihynTicxIVGemGaF0CRXTmQfiTbXkjIhBLKdg9C7hBs1xR4whn9DZU7trFElrXnZwOMQ9rOUDOjZYAlFTMrNVXaWyXApoSCivaW2O050G6hks6abXa7Tu9cMvXeSKbxn9rsLvSZMZxVHGIaRjypjDzPAVWIpnlQiOjghHXGH+naQe0VM1CVuF/KB4qz2SF6U5x1gdAuK87+CMSlxXmind23OJFdL7ZRe23qre73KU2jUZkv3WmX54HlOMe0yx2Nl5oqnQ9pF+qddjVJ6E97eOzXPdTn9qAbrVp01v9A6+7TIFCjNicdNgjbgeMNYmkPzeGNGsTh4dYYd9wg7Ov6w83fIR/7/Gpe2AM6fYccd0zxDU6Bev8p7vSrntmgeNpdJ3Ux2N7RTmo5Niy9+xy17thJham+0xfip/7tQLO/ -------------------------------------------------------------------------------- /notebooks/source/12_pagerank_demo1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/12_pagerank_demo1.png -------------------------------------------------------------------------------- /notebooks/source/12_pagerank_demo2.drawio: -------------------------------------------------------------------------------- 1 | 1VhNl5owFP01LqcHCCIuZ9S2i+k5nrrodBnh8dFGQkMQmF/fIMGAqKMdLM5K3+W9kLx7k5vDCM02+ReG4+AbdYGMDM3NR2g+MgxdQ7b4KZGiQsamBHwWujJJAavwFepKiaahC0krkVNKeBi3QYdGETi8hWHGaNZO8yhpvzXGPnSAlYNJF/0RujyoUNuYKPwrhH5Qv1m3ptWTDa6T5UqSALs0a0BoMUIzRimv/m3yGZCyeXVfqrrPJ57uJ8Yg4pcUvBpLP39Iguflak2+Z8Xi9c/0YVyNssUklQuWk+VF3QFwRUNkSBkPqE8jTBYKfWI0jVwoX6OJSOU8UxoLUBfgL+C8kOzilFMBBXxD5FPIQ/7S+P+zHOrTWEbzXI68C4o6iDgrXppBo6oMVdkuqus8GnE5EaOMq/WWizzZVwklNGUOnGlmrU/MfOBn8ow9+2LbAN2AmJ+oY0AwD7fteWCpX3+fpygWfyTLVzBuvc34TfjU/4XN++fSHJLLyXW7N6JRr9tVbdFrCFY79INQjIakWO9Q/NjlmBBhhyW1WRByWMV4t+5MOHKbOJzElUd6YV4KoOquHEg/0m0vJGRGCWW7FyF3DLZrCjzhjP6GxhPbWCPL2vOzBcYhP89Qt6N1gSUds6g9VcaZMmBTQkHDe2usfw60PlzSSdl2t+v0wS1TvxvLNP7TMbsrfWQMF42EmIYRTxojL0tACdE0D4SIDm5Yb+QjXTvQXjUDpcT9Ut4hzu4JcTfibBuEdpk478cgLhXniePstuJEdltsk/Pa1M+m30aaRkeZT8N5l+eB5TjHvMudTNeaks67vAvdm3ehDgezATmwHTjOwdoem+OeODi8PxjTgTmwrzuhe7+mf+wrgnnhKTzoNX06MMU9GK1+/xQP+uHE7FA8H+4kdTHY3tGT1HJsWHu3cbMbnqQiVJ9CqwuI+qCMFn8B -------------------------------------------------------------------------------- /notebooks/source/12_pagerank_demo2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/12_pagerank_demo2.png -------------------------------------------------------------------------------- /notebooks/source/12_sampling_经验分布demo.drawio: -------------------------------------------------------------------------------- 1 | 7Zxdj6I8FMc/DZe7oZTScjnOOrvZZDebzMVzzUBVsggGcdT99A8IKD34UlBAFDOZ4KEe4P87tj3HgoJf55vvobWY/Qoc7ima6mwU/E3RNGaQ+H9i2KYGndDUMA1dJzWhg+Hd/cczo5pZV67Dl0LDKAi8yF2IRjvwfW5Hgs0Kw2AtNpsEnnjUhTXlJcO7bXll63+uE82yy9Lowf6Du9NZfmRkmOmeuZU3zq5kObOcYF0w4bGCX8MgiNKt+eaVe4l2uS7p595O7N2fWMj9SOYD259EW/3++D2jn8GXP/52TCZvX7TUy6flrbILzk422uYKcN95SYSM39metVy6toJHs2juxQYUb6Yf4E5Jx8OJof3lxmHCgzmPwm3cZH0QlGQizQpa5raQe1bkforurYzrdO9uf4Q/gRsfWFOzENQzN1kAIoZFD8tgFdo8+1BRv/N+dOAmssIpj0pu4o3CNR9MOzgVQOEBlCwozbjgqGFUeiVUfuBzkVNC6D1r6y53p+Pa44NxNHE97zXwgnDnDHOCdFWN7csoDP7ywp63Nxy/eoCeiMRMtSZ5TfRj4K+MqoUXkQqEGI21LTRbJA2Wp88eYfGwTOiI443U4U2DjFwOsnjAWSSbE49vsmgbXeojSiHkUPNjF1wg6CYTbth2D0Kr1BtoNYMLkwuOGu5WjDLxsakwmvyNDeVFVUa0FAMR30TnCGe9TxFuZrI8d+onYRJj5LF99MnDyI0nRi/ZjrnrOLvuaD1zI/6+sOzkmOt4FhjbwmDlOzy5nF3sBH6U92gZwMQb31QNmZwpO95fFCJKPxJREFgxeARaVdHQxx+cMYODql7zawQc6cBPw98iNqCSRmWoFxw1zApdO5M6PSESe6S8ZZ7haT2grJfg1B3XTOAIzr6apiwxlXlayiU4dSmXwqVtykemLwPlHI7GbkQZsW4pV5sJPRllCKc2ZRgubVM2B8qnKWN8I8o67pRyni0NlGXg1KYMw6VtymigfJoyQTeibKBuKWsD5dOUIZzalGG4tE252o9Ez0bZvBFlanZLeaiKnKEM4dTvsTumPFRFTlMmt6p9EaR2S3moipyhDOHUptxx7UsbqiJnKGNyI8o66ZZytZ+WnowyhFObMgyXtikPta8zlKkuwslnY5UpM+CIma1Szhf0DJRl4NSmDMOlbcpD7esM5Xyh9tU9tgEctdxjY22gfJoyhFObMgyXtilL1L6GBZFJCwMskkWoZvdNYX4GHTWN/EghbEwVc6SM9GRFpGkoI1MZk51lXIqGx1waaYBV9PmQ2tXSSFytjtXLBXcUrgyuu+AOOmp5bSSuVo16blYm7XZxJL62qPSEt5lQcC+AiWqyBzd8GPpXRqXoV721hMHuHIFYauDWEixRyBIHsWPDXCHSQCA5hDNHPxZITPvAhrEPpKsGQmpoImtcGgjRkUiD09bbDYQShaMeqEqBquU7L46qihtSNf969FxVJqrK5FRFamOySlRGeiCrKcpKJWVFjckqUYq4f1mZem+ySuT+PZAV3VsnILHQpAeyavcmq0RG3ANZsShr5xMBidy1B6rqQNWOJ626RJLZA1WJqCpS5YIVPjfidrI+RIbFjHuT9SFSLEbvTFbyEDkWAzlW110reYgUi4EUq+tpQN7T91tVE2RYnc9ZyUNkWCbIsDpPXMnwqKir74ivW8Mn4LeA5mr4Ovil6Loafvz28MzAtPnhwYt4/D8= -------------------------------------------------------------------------------- /notebooks/source/12_sampling_经验分布demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/12_sampling_经验分布demo.png -------------------------------------------------------------------------------- /notebooks/source/12_半朴素贝叶斯SPODE.drawio: -------------------------------------------------------------------------------- 1 | 7Vldb5swFP01PDYKGBPy2CRt99Bp2bppzaMDDtA5mBmTwH79DJjw0dZKG5UiJVKk+B5ff91zLlxAA/NtesdQ5H+lLiaaMXZTDSw0w9BNw9Dy39jNSmRigRLwWOBKpxp4CP5hCY4lmgQujluOnFLCg6gNOjQMscNbGGKM7ttuG0raq0bIw8+ABweR5+jvwOV+idrGpMa/4MDzq5V1a1r2bFHlLE8S+8il+wYEbjQwZ5TysrVN55jkwaviUo67faX3sDGGQ37MAPjN/rlbwcWP5fQemoaNyfbXlS53G/OsOjF2RQCkGdJQ/M0YTUIX5/OMhUUZ96lHQ0TuKY0EqAvwCXOeSfpQwqmAfL4lshenAX/Mh4+gtFaNnkUqZy6MrDJCzrLGoNxcNfvqYYVVjXseFxmqmCbMwYpgSKlyxDzMFX5SwXmgGgvIqN9husViP8KBYYJ4sGsrCUlBege/mjPRkLS9gcKDvi4UHk+hDofFoT5YDnvgwhoWF8ZguRhwPk2GxSG4cPh2Du1hcVjOu0MkkStphkXEAWZxhMIWudbfJK+iZhsa8qu4YOtaOOgwSutO0fLkfzHLugJWFSK2ue56CaxcroK7siJElKK5mvZ+wPFDhAou9qIabmsFxVFZn26CNNfcbBMQMqeEsmIi4EJsu2Z+Os7oH9zosY01sCyVBHaYcZwqOZO9AMhqQZbjU2nu69rWlJDfKGsr7BSSwfXS+3Gve36UzUnyffG0hPxKfd+rU/KmRmdOwnZF1upDy2H9qBzu5Nf7khq8qdDpIYdfpFd9K73Qezq91on0FkOvGUNZwyGiQcjjxszLHKivItP2RaSyb19xB1DpLxrlBmqdHU7yfumpK4CL9E6X3mQA0gPdlx4dd3Oi9P8Y6ZkX6X2w9OwhSA+qpQfV/idLTxXBjy+aH/VBVM0b28GO81LVvLahCZUPTsdXzZ2LyOHldA9Vs/LtVQ80G2dEszEeGs9WbzyDM+K5+xD8+TxP+uJ5NBqdEdFm98Hj04m2e0vo8Ix4hv3doIVZf7Itq7b6wze4+Q8= -------------------------------------------------------------------------------- /notebooks/source/12_半朴素贝叶斯SPODE.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/12_半朴素贝叶斯SPODE.png -------------------------------------------------------------------------------- /notebooks/source/12_半朴素贝叶斯TAN1.drawio: -------------------------------------------------------------------------------- 1 | 5VnZctsgFP0aPTojkPDy6CVNH9Kpm7TT+BFLWFIrC1XGW7++YIEWZEeOE9meeiYzgQNc4J6jywUb1nC+eUhw7H+hLgkNaLobwxoZEAIbQkP8me42RTrATgEvCVzZKQeeg79EgqZEl4FLFqWOjNKQBXEZdGgUEYeVMJwkdF3uNqNhedYYe6QCPDs4rKI/A5f5KdqFnRz/TALPVzODdi9tmWPVWe5k4WOXrguQdW9Yw4RSlpbmmyEJhfOUX9Jxnw60ZgtLSMSOGYC+dr+vJmj0NO49Iht2STj/0bJSKyscLuWGDdgOub3BIsaRWDXbSle0/yzFUgczGrHWYkdUn3cAKN7kjbzkyf87K1MFvAAF8RVO9W4cS+dTMCxNDflOOOG8Mlj7ASPPMXZEy5prjmM+m4e8BngRL+JUBbNgQ1yx2iAMhzSkyc6QNes6xHHE9lhCf5NCy7SLbGRmkxf9Kl29IgkjmwIk/fxA6JywZMu7yNaOpFxqviur61xAtoT8gnYUhqVkvcxuziovSGLfQDJAZ2MZ3hDLEF0Zze2z0WzdEM3axwytS9PcORvN9g3RrH/N5+TZ6o+9p0fg+fF2GC6/jX6NEWv1KjS3QNXJkdsXWQ6vRTQiZceSTcBeCuUJL5t3SNZGYuOmqmxVJeJrfylWCqNENR+2q6lx6cqIW8mmKpQs6DJxSH1OwnDiEVZ7rFUJLRCG9hCmsISEmAWr8nr3sShnGNOA7yTTi9XT9NLWhJDuU44q5mWaIdvWDAHNUOqIiqGdqLJtn64zNV0pnpwis1xak0JLncxyZU1KwroembUvKTMAy+oAujqOlVmmz0OGmpZZNQlt7REaZ/YRT/klsnxEhIEX8bLDWSY8+A9EXA/4La0vG+aB6wobg4TwYw5Pd/aEYGKxn90O0cBAo0xClYMhu0LKwUZ2cSuK65Uv6ODJYt7BntkrOV+J71RxqC50NluQZugCVbo6N3H8qDS6PjB0LhoYLO17hqcGhk6NoaYDA3x/onP500SFt2tXjZZsZCp6s2q6NYaaVs2+l6t3R6cTYhO4xsh00cwYaDcpoF+RjtZYr8ZQ0xqzG9DY63nxB6rl6Pz2sgFJT0vRiWKxzBpDTYul+vrWQhW58JyTlTVSftSQGiq+gEjo+BR433NLQpeRKx5XCirbkwJXdHYwqc2uD8rdR75+6snFhz2XgD3vJe3/2P/a+QvNxvzPq/mvVOnnkv/WZ93/Aw== -------------------------------------------------------------------------------- /notebooks/source/12_半朴素贝叶斯TAN1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/12_半朴素贝叶斯TAN1.png -------------------------------------------------------------------------------- /notebooks/source/12_半朴素贝叶斯TAN2.drawio: -------------------------------------------------------------------------------- 1 | 7Vffj5swDP5reOxUfrXc45XetodO666bdn1MwYXcAmYhtLC/fgGSAu2uuklVd1InIYE/24njz3aEYftJ+YGTLP6EITDDGoelYc8NyzIdyzLqZxxWLTI1nRaIOA2VUQes6C9Q4FihBQ0hHxgKRCZoNgQDTFMIxAAjnON+aLZFNtw1IxGcAKuAsFP0Ow1F3KKeNe3wj0CjWO9sTu5aTUK0sTpJHpMQ9z3IfjBsnyOK9ispfWB18nReWr/3L2gPgXFIxWsc7Ptl9LgwozirfFZ8mT8vXTHSy+wIK9SJR4qxXFQ6CRDKnCgxxVS+ZhyLNIR66bGUkIsYI0wJWyBmEjQl+AxCVIpRUgiUUCwSprRQUvFUu79zlbTuaealWrkRKi2kglc9p1pc93WdWyNpv/ZA9SlezJ2Ccix4oKzcz97X3dqdPy7vFq5jecCSbyNb1SDhEYgzdubkQLHsDcAEZEDSkQMjgu6GgRBVpNHBruNRfigq/4ZW65RW8z+tF6DVvRKt56LssWpYEybjneUZSQfkTn4W9WCZbTEVo7xh614amG5Wdkr5Fal3s8pGA0+mhmScm2MzibX7afi4rhiT47kup31MBawy0uR+L2+IYbGQPGtn9paWddHNtpQxHxnyZiF76wUQBPXxBMcf0NNsPNdxuxr4A+U74ALKHnRKmtZO1ITWV5QS9928dxQU90a9xi7fvKc0v+HmvWATts31mi6c/ssu1GFeoQ2tG2pDyx62oXnFPjxzhV+FZ/uGePbeGs3Tq9Hs3BDNx+184P3yPEux+41qdL2fUfvhNw== -------------------------------------------------------------------------------- /notebooks/source/12_半朴素贝叶斯TAN2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/12_半朴素贝叶斯TAN2.png -------------------------------------------------------------------------------- /notebooks/source/12_半朴素贝叶斯TAN3.drawio: -------------------------------------------------------------------------------- 1 | 7VlNc5swEP01HNMBCTA5xnbaHtKpm7TT5CiDDKQyokLY0F9fCSQDtuNxgmu7E89kJuzTrj72rVYr2YCjefGJoTT6QgNMDGAGhQHHBgCWDYAh/8ygrJGBC2sgZHGglBrgIf6DFWgqNI8DnHUUOaWEx2kX9GmSYJ93MMQYXXbVZpR0R01RiDeABx+RTfRnHPCoRj0waPDPOA4jPbLlXtctc6SV1UqyCAV02YLgrQFHjFJef82LESbSedovtd3HF1pXE2M44fsYwJtJeH9nhVFajkj+bfw8cfgVULPNeKlXjAPhACVSxiMa0gSR2wYdMponAZbdmkJqdO4oTQVoCfAZc14qNlHOqYAiPieqFRcxf5TmHxwlPbVaxoXquRJKLSSclS0jKa6spNCYVZK283O2qOYqFevFyhW+6EQFZTRnvtJyvnrfF0/O+H5yfefYwMNk/uNKhTFHLMR8h57lrLgWmwTTORazE4YME8TjRXciSEVruNJrCBUfitNX8AvNN/Dbcdl/RfYp+HV78luZ3jCGypZCSuOEZ62eJxIQCiqxeopXlVWBu7b319Qh2KkvPuoJNIG2WslesbfLgwtEcuUFA7hE+HKYpSjpBKX7O5dpcDijCb/Kqki6EQqWkxZNo/gK1f+ql6kGHi0NiXlO19UEVo+n4fX9QIg4TGTcL6OY44cUVXGxFOdZN5BRltYnzCwu5IYYzmJCRpRQVnUEZ56PfV8ujzP6C7dapp5jO018bgnHBWYcFy1oM6BUq9tl0tKRsGxOJ1tBUetg0tjhMwy8ZJi3ZZj6ZNgnxQxOkWKA+bocY9snyDHah0dIMuAdJRnLO12W2XHKHoVn+I54hua58Tw4Gs/2O+LZBmdWNQBnZ9WQ0OTsLppNLXDIymC7d+w9KwPYszDox6F74fAAHJ72gUA/ZF1I7Edi31eAfiR6FxIPQWLfe1Y/Eu1jlT5PZ1H5BA72Antb5eOBKXTd1eAbZc6WMHix8gFrt1L7nxU+Qmwe8+v7bfOTCLz9Cw== -------------------------------------------------------------------------------- /notebooks/source/12_半朴素贝叶斯TAN3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/12_半朴素贝叶斯TAN3.png -------------------------------------------------------------------------------- /notebooks/source/12_单纯形.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/12_单纯形.png -------------------------------------------------------------------------------- /notebooks/source/12_接受拒绝采样demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/12_接受拒绝采样demo.png -------------------------------------------------------------------------------- /notebooks/source/12_接受拒绝采样率低demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/12_接受拒绝采样率低demo.png -------------------------------------------------------------------------------- /notebooks/source/12_无向图demo.drawio: -------------------------------------------------------------------------------- 1 | 1VjBbqMwEP2aHLMCDIQc07Tb1UorVcph26ODh2AtYOQ4CezXrwkGDCRKm4LoSjl4nu0Zz5unmYgZWsfZM8dp+IsRiGaWQbIZepxZlmlb1qz4GSQvkYVpl8COU6IONcCG/gUFGgo9UAL71kHBWCRo2gZ9liTgixaGOWen9rGARe2oKd5BD9j4OOqjvykRYYl61qLBfwDdhVVk012WOzGuDqtM9iEm7KRB6GmG1pwxUa7ibA1RQV7FS3nv+5Xd+mEcEvGeC6t4+RNjyOPVPFvlx+cjSedz5eWIo4NK+FVBe5FXHEg/km5pPJxCKmCTYr/YOcmKSywUcSQtUy7xPi1rENAMZNiHgEbRmkWMnx2hIADX9yW+F5z9AW2HLJZbwyhusERsVOjqccAFZFezNmsupQiBxSB4Lo+oC55iX8nPVuapqWUFhVoZKwwr9exqvw3BcqE4/gDfVp9vazy+CQYvuMi363uwDUbgu6buixCO+oSjEQl3wCP2JcI9a4tcd3yBm9bEhNt9wu0RO4rnw+WOsvUc2xmjo3QVPjnjTo/xPt8JWRXDUFoJS6DNMWRUvGrrN7k2vjnKeizSNiojV0YZAUhveHZYlK9gB+7D7REkMN+BuNU6+1XRWHcusF5hHCIs6LH93EulUBFeGJWJ1EVH3TlidqpZpqlu6VO448hGbUfI7Tgqeeg5OiujTvt+sSwGEksjkDdt58uIBU0pFrNTY/NesdQPuuZoZLF4nxVLIuNrainMN32v0cvZynXrBTiVCQAfXkX2/9ByzGtz5sMqcm84GllFywnmU6M8XXeaDK8ob0CRoXeKzJ5UZGZHG869Iuv2vK6jkUVW5TGsyu7Q2K3eNoHCpm1jxkBtrOdosH9O0mw+w5THm49Z6Okf -------------------------------------------------------------------------------- /notebooks/source/12_无向图demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/12_无向图demo.png -------------------------------------------------------------------------------- /notebooks/source/12_朴素贝叶斯概率图.drawio: -------------------------------------------------------------------------------- 1 | 7Zhtj5pAEMc/DS/PwC4gvqx6vb64pk1t0/PlCiPQLixdFsV++i6wPOmdaVMDXrzEBOY/M/v0G3REw4sof+AkCT4yD6iGdC/X8FJDyDAR0oqP7h0qZWqYleDz0FNBrbAKf4MSdaVmoQdpL1AwRkWY9EWXxTG4oqcRztm+H7ZltD9rQnw4EVYuoafq99ATQaU6aNrqHyD0g3pmw55VnojUwWonaUA8tu9I+F7DC86YqO6ifAG0OLz6XKq89y94m4VxiMXfJFifnK+7tbX88nn2aJnIARp9uzPUalNxqHcMnjwAZcYslpc5Z1nsQTGOLi3GRcB8FhP6yFgiRUOKP0CIg8JHMsGkFIiIKi/koXgq0ieWstYdzzJXI5fGoTZiwQ+dpMJcd31tWmnVedWGil28eFBKSlnGXThzOqp2BeE+iDNxuMEpnwNgEcj1yDwOlIhw118HUQXpN3EtM3mjsP0Dwqa+3hD+B0LDGpWhcbUMx2Bhj8oCXS2L1/Q8TUdliN8YXoChMyrDatwdoZmaSUM2leudpwmJe3DtX1nRRc23LBZ3aUnrnQwwrCRvnfLOV9dylE0trGtFLnNzHCW1arpaPi4rSmUrWlTTPggFrBJSHv1edsP9WiFpUvWn2zAvam6+DSldMMp4ORD2LHA8s9id4OwndDwO2mDbbibfAReQny+CU2YqAWPVLah2fKbMfdvbmkoKOm1trV0cMh4K8pNxFZS3jguu+xzljWOZln4ZytM+5ObP1FiUq+ZqEMzohjAj/do424NxxjfE+fhLe3zO06E4TyaTGwJdDHRdoJ3BHuj4hjhbw/1AS7N9xVj6Oi9q8f0f -------------------------------------------------------------------------------- /notebooks/source/12_朴素贝叶斯概率图.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/12_朴素贝叶斯概率图.png -------------------------------------------------------------------------------- /notebooks/source/12_概率分布之间的关系.drawio: -------------------------------------------------------------------------------- 1 | 7Vnfb5swEP5r/NiJn8Y8QkpbaZ00qQ9r9+YEk7ARnDpOk+yv3xlMgELbdE1CNk2q1LuzD8zdfZ/PDrJH8821oIvZFx6zDFlGvEH2JbIs0zEw/FOWbWnxXKc0TEUa60m14S79xbTR0NZVGrNla6LkPJPpom2c8DxnE9myUSH4uj0t4Vn7rQs6ZR3D3YRmXeu3NJaz0kosr7bfsHQ6q95sYr8cmdNqsv6S5YzGfN0w2RGyR4JzWUrzzYhlKnhVXEq/qxdGdwsTLJf7OFzcGFPCk8db9n25uQ6vokc5v3DLpzzRbKU/OEcRRsEIBSaKPERCJYMQhEqOHBQGlRCiUCd3KbdVvFgM4dMqF3LGpzynWVRbQ8FXeczUogzQ6jm3nC/AaILxB5Nyq2uBriQH00zOMz3KNqm8b8gP6lGfXK1dbvSTC2VbKbkU2/um0vBSau1WaJVfwnNZfY6OuPrCF1OgTUu+EhP2StyrUqZiyuQr86xdoQDCGJ8zWBz4CZZRmT6110F1qU938+pqAEEXxDuKw+wUR5F3qI0rFLmIBCgICoGgwNcCwUoIQbA7tdHO/HqWSna3oEWc1sAe7Sy3Iw96mmUjnnFRPMtOEoYnE7AvpeA/WWMk9vyxoV6gV8+EZJvXE9YNsHZwNHY1eVmG1tcNKtCmWYMFKreDZ8TpZsTCGbw1HIMwVUKRB8gAwJeg8BIFl9oCyYl8lTEyqpxgDbXfgEiu0fvQGHkLyTV4H1rY7UfyAcFr7Qlee0jwem+XiqWIHQolDCsUV0JofO4ZO7OqOQX/D1A1+INVU7gGQtBtY8KCp7lcNp78VRlqorPJM6Z73ky8bz4I5Qrq+t19ygf2I3tP+rtCPlZtjB8hYirW8+HPOLf6bbOesWf9tlnPPL/6NQftWaz+nqXY9dT2B4x2+lYlsV5oVfAYu/gwrcrZ9SpdsI6ZpCeOfUwZSXpjjyeEjZPjxN62Bo69/79PPCDz4T2ZjwxJfLiH+FwUOMgPhiK+2GUkdvrAR6yxjQ9EfM65EV91dfUsFQAnQBcI0IkEw8LoTxrnXacx7MUJ2ROLg569SE8BFHdpxGnfl7jFXRo0pyEKvROjk5mAT68PnT72bHokdA6+NZrdK89/Ym/8y07D5t7HCWdIJFfLbEHZV/ztk+KEiavL0BOeJ2jsx24fcClzTNs6Tk9LhsZtz91nJxMX13Q+P/UZY2zGcWL05cM0PNtnxyHS4+UD1PpXqfLupv5tz45+Aw== -------------------------------------------------------------------------------- /notebooks/source/12_概率分布之间的关系.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/12_概率分布之间的关系.png -------------------------------------------------------------------------------- /notebooks/source/12_概率图初探demo.drawio: -------------------------------------------------------------------------------- 1 | 1VdLj9owEP41OW4FzoNwLI+2h61alVZdjiaePFonkzoOJP31dRKbJAuLWonushIHzzcPj+ebGcCyl2n1XtA8/ogMuEUmrLLslUXI1CHEaj4TVnfIzLM7IBIJ00Y9sEl+gwYnGi0TBsXIUCJymeRjMMAsg0COMCoEHsZmIfLxrTmN4ATYBJSfot8TJuMO9cmsxz9AEsXm5qk37zQpNcb6JUVMGR4GkL227KVAlN0prZbAm+KZunR+757QHhMTkMm/cXA/+V/3W3f15fP83nWIDzz9dufo3GRtHgxMvV+LKGSMEWaUr3t0IbDMGDRRJ0rqbe4RcwVOFfgDpKw1mbSUqKBYplxroUrkw+C8bUK9cbW0qnTkVqiNkElRPwyFgVcj9m6tZPy69zWPerJsGiqwFAFcqJVpPyoikBfsyJFcNRWAKah8lJ8ATmWyH+dBdXtGR7ueQXXQJP4Dod5FQjPMrspgz9p2oDnP4AswYb8kEzrunvJS32QRj6t0Fzt1iGRbkg4IUVVgSJr3q0SjuCtaFt4qg6mbV73SRKEmjEqzizSOruDBjY+7g3O1SpumOMSJhE1O28of1DYfU06LvNuvYVI1rbMIE86XyFG0gewwBC8IFF5IgT9hoGGz+W7Sd8AehITqcg+ccmYcXL1Na/M1oeVDv5wdDcWDvWywq7PsnlT0tQ8XeQ3DRZ5ruHY3MVzMBZ8554bLJzvb864zXGR2Y8NlPxfLwU2wHPoBnF+hO9913Cut0ONPa8Oy/99YVmL/+7bVDf4l2Os/ -------------------------------------------------------------------------------- /notebooks/source/12_概率图初探demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/12_概率图初探demo.png -------------------------------------------------------------------------------- /notebooks/source/12_概率图初探demo2.drawio: -------------------------------------------------------------------------------- 1 | 5VbbjpswEP0aHrdKzCXksbm0fdiqVdOqu48GD5fWMNSYBPr1NWAH2OxGWylKI1XiwXPmYnvOHAvLXmf1e0GL5CMy4BaZsdqyNxYhc4cQq/1mrOmRxdzpgVikTAcNwC79DRqcabRKGZSTQInIZVpMwRDzHEI5wagQeJiGRcinuxY0hhNgF1J+in5PmUx61CeLAf8AaZyYnefesvdk1ATrm5QJZXgYQfbWstcCUfarrF4Db5tn+tLnvXvBezyYgFy+JsH95H/dP7qbL5+X965DfODZtztNRikbc2Fg6v7aRCETjDGnfDugK4FVzqCtOlPWEHOPWChwrsAfIGWjyaSVRAUlMuPaC3UqH0brx7bUG1dbm1pX7ozGGLkUzcPYGGW15pDWWSavv197qRfbpqESKxHCmV6Z8aMiBnkmjhzJVaoAzECdR+UJ4FSm++k5qB7P+Bg3MKgWmsS/IFTX3VNe6Z0s4nF13FWgFrHsWtIDEaoOjLn3flVoHHdlx95bFTB3i3pwmirUlFHH7CtNqyt4tOPTIeNcCbgdpkOSStgVtOv8Qb0h01GhZdGrOkrrduRWUcr5GjmKrpAdReCFocJLKfAnjDxssQxmwwTsQUioz8/AKWcmwdUabszjpO3D8CQ4GkpGr4HBLs7y4qxsc8z/Z52SV+rU/pc6JdfSaXATOmUu+Mx5Tqc+CWzPu4xOyeLGdGpfi+XwJliO/BCef40D33XcC73G9vJqLCtz+EHrfKPfXHv7Bw== -------------------------------------------------------------------------------- /notebooks/source/12_概率图初探demo2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/12_概率图初探demo2.png -------------------------------------------------------------------------------- /notebooks/source/12_概率图初探demo3.drawio: -------------------------------------------------------------------------------- 1 | 1VhLj5swEP41HLcC8wg5No+2h61aNa26ORo8PFoHU+MkpL++BuwAm4d2JTahUg6eb8Zje76ZsYlhzzflR47z5DMjQA1kktKwFwZCloOQUf1McmiQieU0QMxTooxaYJX+BQWaCt2mBIqeoWCMijTvgyHLMghFD8Ocs33fLGK0v2qOYzgBViGmp+jPlIikQX00afFPkMaJXtnypo1mg7WxOkmRYML2HcheGvacMyaa0aacA62Cp+PSzPtwQXvcGIdMvGSC+8X/vlu7i29fp4+ug3ygmx8PioxCHPSBgcjzK5FxkbCYZZguW3TG2TYjUHk1pdTaPDKWS9CS4C8Q4qDIxFvBJJSIDVVaKFPx1BmvK1fvXCUtSuW5Fg5ayAQ/PHWFzqxKbKfVkp7XnK861MWwKahgWx7ClVjp9MM8BnHFDh3JlVUBbANyP3IeB4pFuuvvA6v0jI92LYNyoEh8BaHTq4RmLBuUwZa1dUfzKgbHx599T/6U3x2mW7WSgTwqtzsL5CAWdUgaIGIyAl2qvT9bphUPRc3de2lguXnZKrUXrN3IbTae+t4l3FnxeU5RKhtwlUr7JBWwynEd+b28A/qJgou86cpRWlYJN4tSSueMMl47sqMIvDCUeCE4+w0dDZlMA7PNgB1wAeX1HDjlTE9wVQ8+6MtFyfu2pTsKSjrdXGPDs2yehnRsZWrdqUzRf1GmaPwEdi7HkVJoXSjb23CIbtVqg1G0WuKCT5xzrdZHge15w7RaNBlbq73+xL1npQ5YcfZLK+6uXdO+VcWFo6i4yA/h/OMm8F3HHehxc/xU1hU3vXfF3ewNS0ZBM1iytU7O0Tz1JjYeqLHao6P5ZtUMo6A5iiJ04VPFCzx3qPvzGc3IfzOapdj++1TrOv/h2ct/ -------------------------------------------------------------------------------- /notebooks/source/12_概率图初探demo3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/12_概率图初探demo3.png -------------------------------------------------------------------------------- /notebooks/source/12_狄利克雷分布.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/12_狄利克雷分布.png -------------------------------------------------------------------------------- /notebooks/source/12_重要采样效率分析demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/12_重要采样效率分析demo.png -------------------------------------------------------------------------------- /notebooks/source/12_高斯分布_均值的贝叶斯推断.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/12_高斯分布_均值的贝叶斯推断.png -------------------------------------------------------------------------------- /notebooks/source/12_高斯条件概率与边缘概率.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/12_高斯条件概率与边缘概率.png -------------------------------------------------------------------------------- /notebooks/source/15_EM中E步更新.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/15_EM中E步更新.png -------------------------------------------------------------------------------- /notebooks/source/15_EM中M步更新.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/15_EM中M步更新.png -------------------------------------------------------------------------------- /notebooks/source/15_EM中三者间的关系.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/15_EM中三者间的关系.png -------------------------------------------------------------------------------- /notebooks/source/15_VI_GMM1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/15_VI_GMM1.png -------------------------------------------------------------------------------- /notebooks/source/15_VI_GMM2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/15_VI_GMM2.png -------------------------------------------------------------------------------- /notebooks/source/15_VI_一元高斯分布的迭代优化.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/15_VI_一元高斯分布的迭代优化.png -------------------------------------------------------------------------------- /notebooks/source/15_VI_贝叶斯线性回归图模型.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/15_VI_贝叶斯线性回归图模型.png -------------------------------------------------------------------------------- /notebooks/source/15_VI介绍.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/15_VI介绍.png -------------------------------------------------------------------------------- /notebooks/source/15_t分布1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/15_t分布1.png -------------------------------------------------------------------------------- /notebooks/source/15_t分布2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/15_t分布2.png -------------------------------------------------------------------------------- /notebooks/source/15_对数似然_ELBO_KL距离之间的关系.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/15_对数似然_ELBO_KL距离之间的关系.png -------------------------------------------------------------------------------- /notebooks/source/16_LDA_文本生成过程.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/16_LDA_文本生成过程.png -------------------------------------------------------------------------------- /notebooks/source/16_LDA_盘子图.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/16_LDA_盘子图.png -------------------------------------------------------------------------------- /notebooks/source/16_变分EM的LDA模型.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/16_变分EM的LDA模型.png -------------------------------------------------------------------------------- /notebooks/source/16_变分EM的变分分布.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/16_变分EM的变分分布.png -------------------------------------------------------------------------------- /notebooks/source/17_FFM1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/17_FFM1.png -------------------------------------------------------------------------------- /notebooks/source/17_FFM2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/17_FFM2.png -------------------------------------------------------------------------------- /notebooks/source/17_FFM3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/17_FFM3.png -------------------------------------------------------------------------------- /notebooks/source/17_FM_one-hot1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/17_FM_one-hot1.png -------------------------------------------------------------------------------- /notebooks/source/17_FM_one-hot2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/17_FM_one-hot2.png -------------------------------------------------------------------------------- /notebooks/source/18_聚类_AGNES.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/18_聚类_AGNES.png -------------------------------------------------------------------------------- /notebooks/source/18_聚类_DBSCAN1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/18_聚类_DBSCAN1.png -------------------------------------------------------------------------------- /notebooks/source/19_isomap1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/19_isomap1.png -------------------------------------------------------------------------------- /notebooks/source/19_isomap2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/19_isomap2.png -------------------------------------------------------------------------------- /notebooks/source/19_lda1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/19_lda1.png -------------------------------------------------------------------------------- /notebooks/source/19_lle1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/19_lle1.png -------------------------------------------------------------------------------- /notebooks/source/19_mds.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/19_mds.png -------------------------------------------------------------------------------- /notebooks/source/19_pca1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/19_pca1.png -------------------------------------------------------------------------------- /notebooks/source/19_pca2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/19_pca2.png -------------------------------------------------------------------------------- /notebooks/source/19_降维_svd_demo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/19_降维_svd_demo.jpg -------------------------------------------------------------------------------- /notebooks/source/20_iforest.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/20_iforest.jpg -------------------------------------------------------------------------------- /notebooks/source/20_lof_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/20_lof_01.png -------------------------------------------------------------------------------- /notebooks/source/20_phbos_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/20_phbos_01.png -------------------------------------------------------------------------------- /notebooks/source/20_phbos_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhulei227/ML_Notes/df90c379828b7d4db0cee38c5eaa8f6a5301a81f/notebooks/source/20_phbos_02.png -------------------------------------------------------------------------------- /tests/adaboost_classifier_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | os.chdir('../') 4 | 5 | from sklearn import model_selection 6 | 7 | from sklearn.datasets import make_classification 8 | from ml_models import utils 9 | from ml_models.linear_model import LogisticRegression 10 | from ml_models.tree import CARTClassifier 11 | from ml_models.svm import SVC 12 | 13 | 14 | data, target = make_classification(n_samples=100, n_features=2, n_classes=2, n_informative=1, n_redundant=0, 15 | n_repeated=0, n_clusters_per_class=1, class_sep=0.5) 16 | X_train, X_test, y_train, y_test = model_selection.train_test_split(data, target, test_size=0.1) 17 | 18 | from ml_models.ensemble import AdaBoostClassifier 19 | 20 | classifier = AdaBoostClassifier(base_estimator=CARTClassifier(),n_estimators=10, 21 | learning_rate=0.5) 22 | classifier.fit(X_train, y_train) 23 | # # 计算F1 24 | from sklearn.metrics import f1_score 25 | print(f1_score(y_test, classifier.predict(X_test))) 26 | print(np.sum(np.abs(y_test - classifier.predict(X_test)))) 27 | # 28 | utils.plot_decision_function(X_train, y_train, classifier) 29 | utils.plt.show() -------------------------------------------------------------------------------- /tests/adaboost_regressor_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | 4 | os.chdir('../') 5 | import matplotlib.pyplot as plt 6 | from ml_models.tree import CARTRegressor 7 | from ml_models.linear_model import LinearRegression 8 | 9 | data = np.linspace(1, 10, num=100) 10 | target = np.sin(data) + np.random.random(size=100) # 添加噪声 11 | data = data.reshape((-1, 1)) 12 | 13 | from ml_models.ensemble import AdaBoostRegressor 14 | 15 | model = AdaBoostRegressor(base_estimator=[LinearRegression(), CARTRegressor()], n_estimators=10) 16 | model.fit(data, target) 17 | 18 | plt.scatter(data, target) 19 | plt.plot(data, model.predict(data), color='r') 20 | plt.show() 21 | -------------------------------------------------------------------------------- /tests/agnes_test.py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets.samples_generator import make_blobs 2 | 3 | X, y = make_blobs(n_samples=400, centers=4, cluster_std=0.85, random_state=0) 4 | X = X[:, ::-1] 5 | 6 | from ml_models.cluster import AGNES 7 | 8 | agnes = AGNES(k=4) 9 | agnes.fit(X) 10 | 11 | from ml_models import utils 12 | utils.plot_decision_function(X, y, agnes) 13 | utils.plt.show() -------------------------------------------------------------------------------- /tests/bagging_classifier_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | os.chdir('../') 4 | 5 | from sklearn import model_selection 6 | 7 | from sklearn.datasets import make_classification 8 | from ml_models import utils 9 | from ml_models.linear_model import LogisticRegression 10 | from ml_models.tree import CARTClassifier 11 | from ml_models.svm import SVC 12 | 13 | 14 | data, target = make_classification(n_samples=100, n_features=2, n_classes=2, n_informative=1, n_redundant=0, 15 | n_repeated=0, n_clusters_per_class=1, class_sep=0.5) 16 | X_train, X_test, y_train, y_test = model_selection.train_test_split(data, target, test_size=0.1) 17 | 18 | from ml_models.ensemble import BaggingClassifier 19 | 20 | classifier = BaggingClassifier(base_estimator=CARTClassifier(),n_estimators=3) 21 | classifier.fit(X_train, y_train) 22 | # # 计算F1 23 | from sklearn.metrics import f1_score 24 | print(f1_score(y_test, classifier.predict(X_test))) 25 | print(np.sum(np.abs(y_test - classifier.predict(X_test)))) 26 | # 27 | utils.plot_decision_function(X_train, y_train, classifier) 28 | utils.plt.show() 29 | -------------------------------------------------------------------------------- /tests/cart_classifier_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | from sklearn import model_selection 4 | 5 | os.chdir('../') 6 | 7 | from sklearn.datasets import make_classification 8 | from ml_models import utils 9 | 10 | data, target = make_classification(n_samples=100, n_features=2, n_classes=2, n_informative=1, n_redundant=0, 11 | n_repeated=0, n_clusters_per_class=1, class_sep=.3, random_state=44) 12 | X_train, X_test, y_train, y_test = model_selection.train_test_split(data, target, test_size=0.1, random_state=0) 13 | 14 | from ml_models.tree import CARTClassifier 15 | 16 | tree = CARTClassifier() 17 | tree.fit(X_train, y_train) 18 | tree.prune(5) 19 | 20 | # # 计算F1 21 | from sklearn.metrics import f1_score 22 | 23 | print(f1_score(y_test, tree.predict(X_test))) 24 | print(np.sum(np.abs(y_test - tree.predict(X_test)))) 25 | # 26 | utils.plot_decision_function(X_train, y_train, tree) 27 | utils.plt.show() 28 | -------------------------------------------------------------------------------- /tests/cart_regressor_test.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | import os 4 | 5 | os.chdir('../') 6 | 7 | from sklearn.datasets import make_regression 8 | 9 | # data, target = make_regression(n_samples=100, n_features=1, random_state=44, bias=0.5, noise=2) 10 | data = np.linspace(1, 10, num=100) 11 | target = np.sin(data) + np.random.random(size=100) 12 | data = data.reshape((-1, 1)) 13 | 14 | # indices = np.argsort(target) 15 | # 16 | # data = data[indices] 17 | # target = target[indices] 18 | 19 | from ml_models.tree import CARTRegressor 20 | 21 | tree = CARTRegressor(max_bins=50) 22 | tree.fit(data, target) 23 | tree.prune(10000) 24 | 25 | plt.scatter(data, target) 26 | plt.plot(data, tree.predict(data), color='r') 27 | plt.show() 28 | -------------------------------------------------------------------------------- /tests/conditional_random_field_test.py: -------------------------------------------------------------------------------- 1 | from ml_models.pgm import CRF 2 | 3 | x = [ 4 | [1, 2, 3, 0, 1, 3, 4], 5 | [1, 2, 3], 6 | [0, 2, 4, 2], 7 | [4, 3, 2, 1], 8 | [3, 1, 1, 1, 1], 9 | [2, 1, 3, 2, 1, 3, 4] 10 | ] 11 | y = x 12 | 13 | crf = CRF(output_status_num=5, input_status_num=5) 14 | crf.fit(x, y) 15 | print(crf.predict(x[-1])) 16 | print(crf.predict([1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 3, 1, 2])) 17 | print(len(crf.FF.feature_funcs)) 18 | -------------------------------------------------------------------------------- /tests/dart_classifier_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | 4 | os.chdir('../') 5 | 6 | from sklearn import model_selection 7 | 8 | from sklearn.datasets import make_classification 9 | from ml_models import utils 10 | from ml_models.linear_model import LogisticRegression 11 | from ml_models.tree import CARTRegressor 12 | from ml_models.svm import SVC 13 | 14 | data, target = make_classification(n_samples=100, n_features=2, n_classes=2, n_informative=1, n_redundant=0, 15 | n_repeated=0, n_clusters_per_class=1, class_sep=0.5) 16 | X_train, X_test, y_train, y_test = model_selection.train_test_split(data, target, test_size=0.1) 17 | 18 | from ml_models.ensemble import DARTClassifier 19 | 20 | classifier = DARTClassifier(base_estimator=CARTRegressor(), n_estimators=10) 21 | classifier.fit(X_train, y_train) 22 | # # 计算F1 23 | from sklearn.metrics import f1_score 24 | 25 | print(f1_score(y_test, classifier.predict(X_test))) 26 | print(np.sum(np.abs(y_test - classifier.predict(X_test)))) 27 | # 28 | utils.plot_decision_function(X_train, y_train, classifier) 29 | utils.plt.show() 30 | -------------------------------------------------------------------------------- /tests/dart_regressor_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | 4 | os.chdir('../') 5 | import matplotlib.pyplot as plt 6 | from ml_models.tree import CARTRegressor 7 | from ml_models.linear_model import LinearRegression 8 | 9 | data = np.linspace(1, 10, num=100) 10 | target = np.sin(data) + np.random.random(size=100) # 添加噪声 11 | data = data.reshape((-1, 1)) 12 | 13 | from ml_models.ensemble import DARTRegressor 14 | 15 | model = DARTRegressor(base_estimator=CARTRegressor()) 16 | model.fit(data, target) 17 | 18 | plt.scatter(data, target) 19 | plt.plot(data, model.predict(data), color='r') 20 | plt.show() 21 | -------------------------------------------------------------------------------- /tests/dbscan_test.py: -------------------------------------------------------------------------------- 1 | from sklearn import datasets 2 | import matplotlib.pyplot as plt 3 | 4 | X, y = datasets.make_moons(noise=0.01) 5 | 6 | from ml_models.cluster import DBSCAN 7 | 8 | dbscan = DBSCAN(eps=0.2, min_sample=3) 9 | lable = dbscan.fit_predict(X) 10 | print(lable) 11 | plt.scatter(X[:, 0], X[:, 1], c=lable) 12 | plt.show() -------------------------------------------------------------------------------- /tests/decision_tree_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | 4 | os.chdir('../') 5 | 6 | from sklearn.datasets import make_classification 7 | from ml_models import utils 8 | 9 | data, target = make_classification(n_samples=100, n_features=2, n_classes=2, n_informative=1, n_redundant=0, 10 | n_repeated=0, n_clusters_per_class=1, class_sep=.3) 11 | 12 | from ml_models.tree import DecisionTreeClassifier 13 | 14 | tree = DecisionTreeClassifier(max_bins=20) 15 | tree.fit(data, target) 16 | tree.prune(alpha=1) 17 | 18 | # # 计算F1 19 | from sklearn.metrics import f1_score 20 | 21 | print(f1_score(target, tree.predict(data))) 22 | print(np.sum(np.abs(target - tree.predict(data)))) 23 | # 24 | utils.plot_decision_function(data, target, tree) 25 | utils.plt.show() 26 | -------------------------------------------------------------------------------- /tests/ffm_test1.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | 4 | os.chdir('../') 5 | import matplotlib.pyplot as plt 6 | from ml_models.fm import FFM 7 | 8 | data1 = np.linspace(1, 10, num=100) 9 | data2 = np.linspace(1, 10, num=100) + np.random.random(size=100) 10 | data3 = np.linspace(10, 1, num=100) 11 | target = data1 * 2 + data3 * 0.1 + data2 * 1 + 10 * data2 * data3 + np.random.random(size=100) 12 | data = np.c_[data1, data2, data3] 13 | 14 | # data = np.random.random((50000, 25)) 15 | # data = np.c_[np.linspace(0, 1, 50000), data] 16 | # target = data[:, 0] * 1 + data[:, 1] * 2 + 2 * data[:, 8] * data[:, 9] 17 | 18 | # from ml_models.wrapper_models import DataBinWrapper 19 | # 20 | # binwrapper = DataBinWrapper() 21 | # binwrapper.fit(data) 22 | # new_data = binwrapper.transform(data) 23 | # 24 | # from sklearn.preprocessing import OneHotEncoder 25 | # 26 | # one_hot_encoder = OneHotEncoder() 27 | # new_data = one_hot_encoder.fit_transform(new_data).toarray() 28 | # print(new_data.shape) 29 | 30 | from sklearn.model_selection import train_test_split 31 | 32 | X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.4, random_state=0) 33 | 34 | model = FFM(batch_size=256, epochs=10, solver='adam') 35 | train_losses, eval_losses = model.fit(X_train, y_train, eval_set=(X_test, y_test), show_log=True) 36 | 37 | plt.scatter(data[:, 0], target) 38 | plt.plot(data[:, 0], model.predict(data), color='r') 39 | plt.show() 40 | plt.plot(range(0, len(train_losses)), train_losses, label='train loss') 41 | plt.plot(range(0, len(eval_losses)), eval_losses, label='eval loss') 42 | plt.legend() 43 | plt.show() 44 | print(model.V) 45 | print(model.w) 46 | -------------------------------------------------------------------------------- /tests/fm_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | 4 | os.chdir('../') 5 | import matplotlib.pyplot as plt 6 | from ml_models.fm import FFM, FM 7 | from ml_models.vi.linear_regression import * 8 | 9 | data1 = np.linspace(0, 1, num=1000) 10 | data2 = np.linspace(0, 1, num=1000) + np.random.random(size=1000) 11 | data3 = np.linspace(1, 0, num=1000) 12 | target = data1 * 2 + data3 * 0.1 + data2 * 1 + 100 * data1 * data2 + np.random.random(size=1000) 13 | data = np.c_[data1, data2, data3] 14 | 15 | # X = np.linspace(0, 100, 100) 16 | # X = np.c_[X, np.ones(100)] 17 | # w = np.asarray([3, 2]) 18 | # Y = X.dot(w) 19 | # X = X.astype('float') 20 | # Y = Y.astype('float') 21 | # X[:, 0] += np.random.normal(size=(X[:, 0].shape)) * 3 # 添加噪声 22 | # Y = Y.reshape(100, 1) 23 | # # 加噪声 24 | # X = np.concatenate([X, np.asanyarray([[100, 1], [101, 1], [102, 1], [103, 1], [104, 1]])]) 25 | # Y = np.concatenate([Y, np.asanyarray([[3000], [3300], [3600], [3800], [3900]])]) 26 | # 27 | # target = Y.reshape(105, 1) 28 | # data = X[:, 0].reshape((-1, 1)) 29 | 30 | # data = np.random.random((50000, 25)) 31 | # data = np.c_[np.linspace(0, 1, 50000), data] 32 | # target = data[:, 0] * 1 + data[:, 1] * 2 + 2 * data[:, 8] * data[:, 9] 33 | # 34 | # from ml_models.wrapper_models import DataBinWrapper 35 | # 36 | # binwrapper = DataBinWrapper() 37 | # binwrapper.fit(data) 38 | # new_data = binwrapper.transform(data) 39 | # 40 | # from sklearn.preprocessing import OneHotEncoder 41 | # 42 | # one_hot_encoder = OneHotEncoder() 43 | # new_data = one_hot_encoder.fit_transform(new_data).toarray() 44 | # print(new_data.shape) 45 | 46 | from sklearn.model_selection import train_test_split 47 | 48 | X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.4, random_state=0) 49 | 50 | # model = FM(batch_size=10, epochs=5, objective='poisson') 51 | # model = FM(batch_size=10, epochs=5) 52 | # train_losses, eval_losses = model.fit(X_train, y_train, eval_set=(X_test, y_test)) 53 | model = LinearRegression(normalized=False) 54 | model.fit(X_train, y_train) 55 | 56 | plt.scatter(data[:, 0], target) 57 | plt.plot(data[:, 0], model.predict(data), color='r') 58 | plt.show() 59 | # plt.plot(range(0, len(train_losses)), train_losses, label='train loss') 60 | # plt.plot(range(0, len(eval_losses)), eval_losses, label='eval loss') 61 | # plt.legend() 62 | # plt.show() 63 | # print(model.V) 64 | print(model.w) 65 | -------------------------------------------------------------------------------- /tests/fmm_test2.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | 4 | os.chdir('../') 5 | import matplotlib.pyplot as plt 6 | from ml_models.fm import FFM 7 | 8 | # data1 = np.linspace(1, 10, num=100) 9 | # data2 = np.linspace(1, 10, num=100) + np.random.random(size=100) 10 | # data3 = np.linspace(10, 1, num=100) 11 | # target = data1 * 2 + data3 * 0.1 + data2 * 1 + 10 * data1 * data3 + np.random.random(size=100) 12 | # data = np.c_[data1, data2, data3] 13 | 14 | # data = np.random.random((50000, 25)) 15 | # data = np.c_[np.linspace(0, 1, 50000), data] 16 | # target = data[:, 0] * 1 + data[:, 1] * 2 + 2 * data[:, 8] * data[:, 9] 17 | 18 | # from ml_models.wrapper_models import DataBinWrapper 19 | # 20 | # binwrapper = DataBinWrapper() 21 | # binwrapper.fit(data) 22 | # new_data = binwrapper.transform(data) 23 | # 24 | # from sklearn.preprocessing import OneHotEncoder 25 | # 26 | # one_hot_encoder = OneHotEncoder() 27 | # new_data = one_hot_encoder.fit_transform(new_data).toarray() 28 | # print(new_data.shape) 29 | 30 | # from sklearn.model_selection import train_test_split 31 | # 32 | # X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.4, random_state=0) 33 | # 34 | # model = FFM(batch_size=1, epochs=20, solver='adam',objective='tweedie') 35 | # train_losses, eval_losses = model.fit(X_train, y_train, eval_set=(X_test, y_test), show_log=True) 36 | # 37 | # plt.scatter(data[:, 0], target) 38 | # plt.plot(data[:, 0], model.predict(data), color='r') 39 | # plt.show() 40 | # plt.plot(range(0, len(train_losses)), train_losses, label='train loss') 41 | # plt.plot(range(0, len(eval_losses)), eval_losses, label='eval loss') 42 | # plt.legend() 43 | # plt.show() 44 | # print(model.V) 45 | # print(model.w) 46 | 47 | """ 48 | 二分类 49 | """ 50 | from ml_models import utils 51 | 52 | from sklearn.datasets import make_classification 53 | 54 | data, target = make_classification(n_samples=200, n_features=2, n_classes=2, n_informative=1, n_redundant=0, 55 | n_repeated=0, n_clusters_per_class=1) 56 | 57 | # ffm=FFM(batch_size=1, epochs=20, solver='adam',objective='logistic') 58 | # ffm.fit(data,target,show_log=True) 59 | # utils.plot_decision_function(data,target,ffm) 60 | # utils.plt.show() 61 | 62 | 63 | """ 64 | 多分类 65 | """ 66 | from sklearn.datasets.samples_generator import make_blobs 67 | 68 | X, y = make_blobs(n_samples=400, centers=4, cluster_std=0.85, random_state=0) 69 | X = X[:, ::-1] 70 | from ml_models.wrapper_models import * 71 | # 72 | ffm = FFM(epochs=10, solver='adam', objective='logistic') 73 | ovo = MultiClassWrapper(ffm, mode='ovo') 74 | ovo.fit(X, y) 75 | utils.plot_decision_function(X, y, ovo) 76 | utils.plt.show() -------------------------------------------------------------------------------- /tests/gaussian_naive_bayes_classifier_test.py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets.samples_generator import make_blobs 2 | from ml_models import utils 3 | 4 | X, y = make_blobs(n_samples=400, centers=4, cluster_std=0.85, random_state=0) 5 | X = X[:, ::-1] 6 | 7 | from ml_models.pgm import GaussianNBClassifier 8 | 9 | nb = GaussianNBClassifier() 10 | nb.fit(X, y) 11 | print(nb.predict(X)) 12 | utils.plot_decision_function(X, y, nb) 13 | utils.plt.show() 14 | -------------------------------------------------------------------------------- /tests/gaussian_naive_bayes_cluster_test.py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets.samples_generator import make_blobs 2 | from ml_models import utils 3 | 4 | X, y = make_blobs(n_samples=400, centers=4, cluster_std=0.85, random_state=0) 5 | X = X[:, ::-1] 6 | 7 | from ml_models.pgm import GaussianNBCluster 8 | 9 | nb = GaussianNBCluster(n_iter=200, tol=1e-5, n_components=4,verbose=True) 10 | nb.fit(X) 11 | print(nb.predict(X)) 12 | utils.plot_decision_function(X, y, nb) 13 | utils.plt.show() 14 | -------------------------------------------------------------------------------- /tests/gmm_classifier_test.py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets.samples_generator import make_blobs 2 | from ml_models import utils 3 | 4 | X, y = make_blobs(n_samples=400, centers=4, cluster_std=0.85, random_state=0) 5 | X = X[:, ::-1] 6 | 7 | from ml_models.em import GMMClassifier 8 | 9 | gmm = GMMClassifier(n_iter=100) 10 | gmm.fit(X, y) 11 | print(gmm.predict(X)) 12 | utils.plot_decision_function(X, y, gmm) 13 | utils.plt.show() 14 | -------------------------------------------------------------------------------- /tests/gmm_cluster_test(vi).py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets.samples_generator import make_blobs 2 | 3 | X, y = make_blobs(n_samples=400, centers=4, cluster_std=0.55, random_state=0) 4 | X = X[:, ::-1] 5 | 6 | from ml_models.vi import GMMCluster 7 | 8 | gmm = GMMCluster(verbose=True, n_iter=100, n_components=10) 9 | gmm.fit(X) -------------------------------------------------------------------------------- /tests/gmm_cluster_test.py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets.samples_generator import make_blobs 2 | 3 | X, y = make_blobs(n_samples=400, centers=4, cluster_std=0.85, random_state=0) 4 | X = X[:, ::-1] 5 | 6 | from ml_models.em import GMMCluster 7 | 8 | gmm = GMMCluster(verbose=True, n_iter=100, n_components=4) 9 | gmm.fit(X) 10 | -------------------------------------------------------------------------------- /tests/gradient_boosting_classifier_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | os.chdir('../') 4 | 5 | from sklearn import model_selection 6 | 7 | from sklearn.datasets import make_classification 8 | from ml_models import utils 9 | from ml_models.linear_model import LogisticRegression 10 | from ml_models.tree import CARTRegressor 11 | from ml_models.svm import SVC 12 | 13 | 14 | data, target = make_classification(n_samples=100, n_features=2, n_classes=2, n_informative=1, n_redundant=0, 15 | n_repeated=0, n_clusters_per_class=1, class_sep=0.5) 16 | X_train, X_test, y_train, y_test = model_selection.train_test_split(data, target, test_size=0.1) 17 | 18 | from ml_models.ensemble import GradientBoostingClassifier 19 | 20 | classifier = GradientBoostingClassifier(base_estimator=CARTRegressor(),n_estimators=10, 21 | learning_rate=0.5) 22 | classifier.fit(X_train, y_train) 23 | # # 计算F1 24 | from sklearn.metrics import f1_score 25 | print(f1_score(y_test, classifier.predict(X_test))) 26 | print(np.sum(np.abs(y_test - classifier.predict(X_test)))) 27 | # 28 | utils.plot_decision_function(X_train, y_train, classifier) 29 | utils.plt.show() -------------------------------------------------------------------------------- /tests/gradient_boosting_regressor_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | 4 | os.chdir('../') 5 | import matplotlib.pyplot as plt 6 | from ml_models.tree import CARTRegressor 7 | from ml_models.linear_model import LinearRegression 8 | 9 | data = np.linspace(1, 10, num=100) 10 | target = np.sin(data) + np.random.random(size=100) # 添加噪声 11 | data = data.reshape((-1, 1)) 12 | 13 | from ml_models.ensemble import GradientBoostingRegressor 14 | 15 | model = GradientBoostingRegressor(base_estimator=CARTRegressor(), loss='quantile', quantile_threshold=0.6, learning_rate=1.0) 16 | model.fit(data, target) 17 | 18 | plt.scatter(data, target) 19 | plt.plot(data, model.predict(data), color='r') 20 | plt.show() 21 | -------------------------------------------------------------------------------- /tests/hard_soft_margin_svm_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | 4 | os.chdir('../') 5 | 6 | from sklearn.datasets import make_classification 7 | from ml_models import utils 8 | 9 | data, target = make_classification(n_samples=100, n_features=2, n_classes=2, n_informative=1, n_redundant=0, 10 | n_repeated=0, n_clusters_per_class=1, class_sep=3.0) 11 | 12 | from ml_models.svm import HardMarginSVM,SoftMarginSVM 13 | 14 | svm = SoftMarginSVM() 15 | svm.fit(data, target, show_train_process=True) 16 | 17 | # 计算F1 18 | from sklearn.metrics import f1_score 19 | 20 | print(f1_score(target, svm.predict(data))) 21 | print(np.sum(np.abs(target - svm.predict(data)))) 22 | 23 | utils.plt.close() 24 | utils.plot_decision_function(data, target, svm, svm.support_vectors) 25 | utils.plt.show() 26 | print('support vector', svm.support_vectors) 27 | -------------------------------------------------------------------------------- /tests/hbos_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | os.chdir('../') 4 | import matplotlib.pyplot as plt 5 | 6 | from sklearn.datasets import make_classification 7 | 8 | X, _ = make_classification(n_samples=1000, n_features=2, 9 | n_informative=2, n_redundant=0, 10 | n_repeated=0, n_classes=3, 11 | n_clusters_per_class=1, weights=[0.01, 0.05, 0.94], 12 | class_sep=0.8, random_state=0) 13 | 14 | from ml_models.outlier_detect import HBOS 15 | 16 | hbos = HBOS() 17 | y = hbos.fit_transform(X) 18 | plt.scatter(x=X[:, 0], y=X[:, 1], c=y) 19 | plt.show() 20 | -------------------------------------------------------------------------------- /tests/hidden_markov_model_test1.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from ml_models.pgm import HMM 3 | 4 | pi = np.asarray([[0.2], [0.4], [0.4]]) 5 | A = np.asarray([[0.5, 0.2, 0.3], 6 | [0.3, 0.5, 0.2], 7 | [0.2, 0.3, 0.5]]) 8 | B = np.asarray([[0.5, 0.5], 9 | [0.4, 0.6], 10 | [0.7, 0.3]]) 11 | 12 | hmm = HMM() 13 | hmm.pi = pi 14 | hmm.A = A 15 | hmm.B = B 16 | print(hmm.predict_joint_visible_prob([0, 1, 0], forward_type="backward")) 17 | -------------------------------------------------------------------------------- /tests/hidden_markov_model_test2.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from ml_models.pgm import HMM 3 | 4 | O = [ 5 | [1, 2, 3, 0, 1, 3, 4], 6 | [1, 2, 3], 7 | [0, 2, 4, 2], 8 | [4, 3, 2, 1], 9 | [3, 1, 1, 1, 1], 10 | [2, 1, 3, 2, 1, 3, 4] 11 | ] 12 | I = O 13 | 14 | print("------------------有监督学习----------------------") 15 | hmm = HMM(hidden_status_num=5, visible_status_num=5) 16 | hmm.fit_with_hidden_status(visible_list=O, hidden_list=I) 17 | print(hmm.pi) 18 | print(hmm.A) 19 | print(hmm.B) 20 | 21 | print("\n------------------无监督学习----------------------") 22 | hmm = HMM(hidden_status_num=5, visible_status_num=5) 23 | hmm.fit_without_hidden_status(O[0] + O[1] + O[2] + O[3] + O[4] + O[5]) 24 | print(hmm.pi) 25 | print(hmm.A) 26 | print(hmm.B) 27 | -------------------------------------------------------------------------------- /tests/hidden_markov_model_test3.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from ml_models.pgm import HMM 3 | 4 | pi = np.asarray([[0.2], [0.4], [0.4]]) 5 | A = np.asarray([[0.5, 0.2, 0.3], 6 | [0.3, 0.5, 0.2], 7 | [0.2, 0.3, 0.5]]) 8 | B = np.asarray([[0.5, 0.5], 9 | [0.4, 0.6], 10 | [0.7, 0.3]]) 11 | 12 | hmm = HMM(hidden_status_num=3, visible_status_num=2) 13 | hmm.pi = pi 14 | hmm.A = A 15 | hmm.B = B 16 | print(hmm.predict_hidden_status([0, 1, 0])) 17 | -------------------------------------------------------------------------------- /tests/iforest_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | os.chdir('../') 4 | import matplotlib.pyplot as plt 5 | 6 | from sklearn.datasets import make_classification 7 | 8 | X, _ = make_classification(n_samples=1000, n_features=2, 9 | n_informative=2, n_redundant=0, 10 | n_repeated=0, n_classes=3, 11 | n_clusters_per_class=1, weights=[0.01, 0.05, 0.94], 12 | class_sep=0.8, random_state=0) 13 | 14 | from ml_models.outlier_detect import IForest 15 | 16 | iforest = IForest() 17 | iforest.fit(X) 18 | score = iforest.predict(X) 19 | import numpy as np 20 | thresh=np.percentile(score,90) 21 | plt.scatter(x=X[:, 0], y=X[:, 1], c=score > thresh) 22 | plt.show() 23 | -------------------------------------------------------------------------------- /tests/isomap_test.py: -------------------------------------------------------------------------------- 1 | from matplotlib import pyplot as plt 2 | import numpy as np 3 | 4 | n = 200 5 | r = np.linspace(0, 1, n) 6 | l = np.linspace(0, 1, n) 7 | 8 | t = (3 * np.pi) / 2 * (1 + 2 * r) 9 | x = t * np.cos(t) 10 | y = 10 * l 11 | z = t * np.sin(t) 12 | 13 | data = np.c_[x, y, z] 14 | 15 | from ml_models.decomposition import Isomap 16 | 17 | isomap = Isomap(n_components=2, epsilon=15) 18 | new_data = isomap.fit_transform(data) 19 | plt.scatter(new_data[:, 0], new_data[:, 1]) 20 | plt.show() -------------------------------------------------------------------------------- /tests/kmeans_test.py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets.samples_generator import make_blobs 2 | 3 | X, y = make_blobs(n_samples=400, centers=4, cluster_std=0.85, random_state=0) 4 | X = X[:, ::-1] 5 | 6 | from ml_models.cluster import KMeans 7 | 8 | kmeans = KMeans(k=4) 9 | kmeans.fit(X) 10 | 11 | from ml_models import utils 12 | utils.plot_decision_function(X, y, kmeans) 13 | utils.plt.show() -------------------------------------------------------------------------------- /tests/knn_odd_detect_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | os.chdir('../') 4 | import matplotlib.pyplot as plt 5 | 6 | from sklearn.datasets import make_classification 7 | 8 | X, _ = make_classification(n_samples=1000, n_features=2, 9 | n_informative=2, n_redundant=0, 10 | n_repeated=0, n_classes=3, 11 | n_clusters_per_class=1, weights=[0.01, 0.05, 0.94], 12 | class_sep=0.8, random_state=0) 13 | 14 | from ml_models.outlier_detect import KNN 15 | 16 | knn = KNN() 17 | score=knn.fit_transform(X) 18 | import numpy as np 19 | thresh=np.percentile(score,99) 20 | plt.scatter(x=X[:, 0], y=X[:, 1], c=score > thresh) 21 | plt.show() 22 | -------------------------------------------------------------------------------- /tests/lda_test(线性判别分析).py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from sklearn.datasets import make_classification 3 | 4 | data, target = make_classification(n_samples=50, n_features=2, 5 | n_informative=2, n_redundant=0, 6 | n_repeated=0, n_classes=2, 7 | n_clusters_per_class=1, 8 | class_sep=3, random_state=32) 9 | plt.scatter(data[:, 0], data[:, 1], c=target, s=50) 10 | plt.show() 11 | 12 | # 开始转换 13 | from ml_models.decomposition import LDA 14 | 15 | lda = LDA() 16 | lda.fit(data, target) 17 | new_data = lda.transform(data) 18 | plt.scatter(new_data[:, 0], new_data[:, 1], c=target, s=50) 19 | plt.show() 20 | -------------------------------------------------------------------------------- /tests/lda_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | # import os 3 | # 4 | # os.chdir('../') 5 | from ml_models.latent_dirichlet_allocation import LDA 6 | 7 | W = [] 8 | for _ in range(0, 10): 9 | W.append(np.random.choice(10, np.random.randint(low=8, high=20)).tolist()) 10 | 11 | lda = LDA(epochs=100,method='vi_em') 12 | lda.fit(W) 13 | print(lda.transform(W)) 14 | -------------------------------------------------------------------------------- /tests/linear_regresion_test(bayes).py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | # 造伪样本 5 | # 造伪样本 6 | X = np.linspace(0, 100, 100) 7 | X = np.c_[X, np.ones(100)] 8 | w = np.asarray([3, 2]) 9 | Y = X.dot(w) 10 | X = X.astype('float') 11 | Y = Y.astype('float') 12 | X[:, 0] += np.random.normal(size=(X[:, 0].shape)) * 3 # 添加噪声 13 | Y = Y.reshape(100, 1) 14 | X = np.concatenate([X, np.asanyarray([[100, 1], [101, 1], [102, 1], [103, 1], [104, 1]])]) 15 | Y = np.concatenate([Y, np.asanyarray([[3000], [3300], [3600], [3800], [3900]])]) 16 | 17 | from ml_models.bayes import LinearRegression 18 | 19 | # 测试 20 | lr = LinearRegression(beta=1e-8,alpha=1e-8) 21 | lr.fit(X[:, :-1], Y) 22 | predict = lr.predict(X[:, :-1]) 23 | # 查看标准差 24 | print(np.std(Y - predict)) 25 | print(lr.w) 26 | 27 | lr.plot_fit_boundary(X[:, :-1], Y) 28 | plt.show() 29 | -------------------------------------------------------------------------------- /tests/linear_regresion_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # 造伪样本 4 | X = np.linspace(0, 100, 100) 5 | X = np.c_[X, np.ones(100)] 6 | w = np.asarray([3, 2]) 7 | Y = X.dot(w) 8 | X = X.astype('float') 9 | Y = Y.astype('float') 10 | X[:, 0] += np.random.normal(size=(X[:, 0].shape)) * 3 # 添加噪声 11 | 12 | Y = Y.reshape(100, 1) 13 | 14 | from ml_models.linear_model import LinearRegression 15 | import matplotlib.pyplot as plt 16 | 17 | # 测试 18 | lr = LinearRegression(solver='sgd') 19 | lr.fit(X[:, :-1], Y) 20 | predict = lr.predict(X[:, :-1]) 21 | # 查看w 22 | print('w', lr.get_params()) 23 | # 查看标准差 24 | print(np.std(Y - predict)) 25 | 26 | lr.plot_fit_boundary(X[:, :-1], Y) 27 | plt.show() 28 | -------------------------------------------------------------------------------- /tests/linear_regression_test(vi).py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | # 造伪样本 5 | # 造伪样本 6 | X = np.linspace(0, 100, 100) 7 | X = np.c_[X, np.ones(100)] 8 | w = np.asarray([3, 2]) 9 | Y = X.dot(w) 10 | X = X.astype('float') 11 | Y = Y.astype('float') 12 | X[:, 0] += np.random.normal(size=(X[:, 0].shape)) * 3 # 添加噪声 13 | Y = Y.reshape(100, 1) 14 | X = np.concatenate([X, np.asanyarray([[100, 1], [101, 1], [102, 1], [103, 1], [104, 1]])]) 15 | Y = np.concatenate([Y, np.asanyarray([[3000], [3300], [3600], [3800], [3900]])]) 16 | 17 | from ml_models.vi import LinearRegression 18 | 19 | # 测试 20 | lr = LinearRegression(basis_func=['linear', 'fm']) 21 | lr.fit(X[:, :-1], Y) 22 | predict = lr.predict(X[:, :-1]) 23 | # 查看标准差 24 | print(np.std(Y - predict)) 25 | print(lr.w) 26 | lr.plot_fit_boundary(X[:, :-1], Y) 27 | plt.show() 28 | -------------------------------------------------------------------------------- /tests/lle_test.py: -------------------------------------------------------------------------------- 1 | from matplotlib import pyplot as plt 2 | import numpy as np 3 | 4 | n = 200 5 | r = np.linspace(0, 1, n) 6 | l = np.linspace(0, 1, n) 7 | 8 | t = (3 * np.pi) / 2 * (1 + 2 * r) 9 | x = t * np.cos(t) 10 | y = 10 * l 11 | z = t * np.sin(t) 12 | 13 | data = np.c_[x, y, z] 14 | 15 | from ml_models.decomposition import LLE 16 | 17 | lle = LLE(n_components=2) 18 | new_data = lle.fit_transform(data) 19 | plt.scatter(new_data[:, 0], new_data[:, 1]) 20 | plt.show() -------------------------------------------------------------------------------- /tests/lof_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | os.chdir('../') 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | 7 | X = np.c_[np.random.random(size=(100, 2)).T, np.random.random(size=(200, 2)).T * 5].T 8 | 9 | from ml_models.outlier_detect import LOF 10 | 11 | lof = LOF(n_neighbors=10) 12 | score = lof.fit_transform(X) 13 | import numpy as np 14 | 15 | thresh = np.percentile(score, 95) 16 | plt.scatter(x=X[:, 0], y=X[:, 1], c=score > thresh) 17 | plt.show() 18 | -------------------------------------------------------------------------------- /tests/logist_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | 4 | os.chdir('../') 5 | from ml_models import utils 6 | import matplotlib.pyplot as plt 7 | 8 | from sklearn.datasets import make_classification 9 | 10 | data, target = make_classification(n_samples=200, n_features=2, n_classes=2, n_informative=1, n_redundant=0, 11 | n_repeated=0, n_clusters_per_class=1) 12 | 13 | from ml_models.linear_model import LogisticRegression 14 | 15 | lr = LogisticRegression() 16 | lr.fit(data, target) 17 | 18 | # 计算F1 19 | from sklearn.metrics import f1_score 20 | 21 | print(f1_score(target, lr.predict(data))) 22 | print(len(data)) 23 | print(np.sum(np.abs(target - lr.predict(data)))) 24 | lr.plot_decision_boundary(data, target) 25 | lr.plot_losses() 26 | -------------------------------------------------------------------------------- /tests/lvq_test.py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets.samples_generator import make_blobs 2 | import numpy as np 3 | 4 | X, y = make_blobs(n_samples=400, centers=4, cluster_std=0.85, random_state=0) 5 | X = X[:, ::-1] 6 | 7 | # 将0,2类归为0类 8 | y = np.where(y == 2, 0, y) 9 | # 将1,3类归为1类 10 | y = np.where(y == 3, 1, y) 11 | 12 | from ml_models.cluster import LVQ 13 | 14 | kmeans = LVQ(class_label=[0, 0, 1, 1]) 15 | kmeans.fit(X, y) 16 | 17 | from ml_models import utils 18 | 19 | utils.plot_decision_function(X, y, kmeans) 20 | utils.plt.show() 21 | -------------------------------------------------------------------------------- /tests/maxent_test.py: -------------------------------------------------------------------------------- 1 | from sklearn import datasets 2 | from sklearn import model_selection 3 | from sklearn.metrics import f1_score 4 | from ml_models.wrapper_models import DataBinWrapper 5 | from ml_models.linear_model import * 6 | 7 | digits = datasets.load_digits() 8 | data = digits['data'] 9 | target = digits['target'] 10 | X_train, X_test, y_train, y_test = model_selection.train_test_split(data, target, test_size=0.2, random_state=0) 11 | 12 | data_bin_wrapper = DataBinWrapper(max_bins=10) 13 | data_bin_wrapper.fit(X_train) 14 | X_train = data_bin_wrapper.transform(X_train) 15 | X_test = data_bin_wrapper.transform(X_test) 16 | 17 | # 构建特征函数类 18 | feature_func = SimpleFeatureFunction() 19 | feature_func.build_feature_funcs(X_train, y_train) 20 | 21 | maxEnt = MaxEnt(feature_func=feature_func) 22 | maxEnt.fit(X_train, y_train) 23 | y = maxEnt.predict(X_test) 24 | 25 | print('f1:', f1_score(y_test, y, average='macro')) 26 | print(maxEnt.w) -------------------------------------------------------------------------------- /tests/mds_test.py: -------------------------------------------------------------------------------- 1 | from matplotlib import pyplot as plt 2 | import numpy as np 3 | 4 | n = 200 5 | r = np.linspace(0, 1, n) 6 | l = np.linspace(0, 1, n) 7 | 8 | t = (3 * np.pi) / 2 * (1 + 2 * r) 9 | x = t * np.cos(t) 10 | y = 10 * l 11 | z = t * np.sin(t) 12 | 13 | data = np.c_[x, y, z] 14 | 15 | from ml_models.decomposition import MDS 16 | 17 | mds = MDS(n_components=2) 18 | new_data = mds.fit_transform(data) 19 | plt.scatter(new_data[:, 0], new_data[:, 1]) 20 | plt.show() -------------------------------------------------------------------------------- /tests/naive_bayes_classifier_test.py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets.samples_generator import make_blobs 2 | from ml_models import utils 3 | 4 | X, y = make_blobs(n_samples=400, centers=4, cluster_std=0.85, random_state=0) 5 | X = X[:, ::-1] 6 | 7 | from ml_models.pgm import NaiveBayesClassifier 8 | 9 | nb = NaiveBayesClassifier(max_bins=20) 10 | nb.fit(X, y) 11 | print(nb.predict(X)) 12 | utils.plot_decision_function(X, y, nb) 13 | utils.plt.show() 14 | -------------------------------------------------------------------------------- /tests/naive_bayes_cluster_test.py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets.samples_generator import make_blobs 2 | from ml_models import utils 3 | 4 | X, y = make_blobs(n_samples=400, centers=4, cluster_std=0.85, random_state=0) 5 | X = X[:, ::-1] 6 | 7 | from ml_models.pgm import NaiveBayesCluster 8 | 9 | nb = NaiveBayesCluster(n_iter=500, tol=1e-5, n_components=4, max_bins=20, verbose=False) 10 | nb.fit(X) 11 | print(nb.predict(X)) 12 | utils.plot_decision_function(X, y, nb) 13 | utils.plt.show() 14 | -------------------------------------------------------------------------------- /tests/nmf_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.datasets.samples_generator import make_blobs 3 | import matplotlib.pyplot as plt 4 | 5 | # 造伪样本 6 | docs = [ 7 | ["有", "微信", "红包", "的", "软件"], 8 | ["微信", "支付", "不行", "的"], 9 | ["我们", "需要", "稳定的", "微信", "支付", "接口"], 10 | ["申请", "公众号", "认证"], 11 | ["这个", "还有", "几天", "放", "垃圾", "流量"], 12 | ["可以", "提供", "聚合", "支付", "系统"] 13 | ] 14 | word2id = {} 15 | idx = 0 16 | W = [] 17 | for doc in docs: 18 | tmp = [] 19 | for word in doc: 20 | if word in word2id: 21 | tmp.append(word2id[word]) 22 | else: 23 | word2id[word] = idx 24 | idx += 1 25 | tmp.append(word2id[word]) 26 | W.append(tmp) 27 | 28 | data = np.zeros(shape=(len(docs), len(word2id))) 29 | for idx, w in enumerate(W): 30 | for i in w: 31 | data[idx][i] = 1 32 | 33 | from ml_models.decomposition import NMF 34 | 35 | nmf = NMF(n_components=4) 36 | trans = nmf.fit_transform(data) 37 | 38 | 39 | def cosine(x1, x2): 40 | return x1.dot(x2) / (np.sqrt(np.sum(np.power(x1, 2))) * np.sqrt(np.sum(np.power(x2, 2)))) 41 | 42 | 43 | print(cosine(trans[1], trans[2])) 44 | print(cosine(trans[1], trans[3])) 45 | print(cosine(trans[1], trans[4])) 46 | print(cosine(trans[1], trans[0])) -------------------------------------------------------------------------------- /tests/pca_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | # 造伪样本 5 | X = np.linspace(0, 10, 100) 6 | Y = 3 * X + 2 7 | X += np.random.normal(size=(X.shape)) * 0.3 # 添加噪声 8 | data = np.c_[X, Y] 9 | 10 | from ml_models.decomposition import PCA 11 | 12 | pca = PCA() 13 | pca.fit(data) 14 | new_data = pca.transform(data) 15 | 16 | plt.scatter(new_data[:, 0], new_data[:, 1]) 17 | plt.xlabel('x') 18 | plt.ylabel('y') 19 | plt.xlim(-20, 20) 20 | plt.ylim(-20, 20) 21 | plt.show() -------------------------------------------------------------------------------- /tests/perceptron_test.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from sklearn.datasets import make_classification 3 | 4 | data, target = make_classification(n_samples=200, n_features=2, n_classes=2, n_informative=1, n_redundant=0, 5 | n_repeated=0, n_clusters_per_class=1) 6 | from ml_models.linear_model import Perceptron 7 | 8 | perceptron = Perceptron() 9 | perceptron.fit(data, target) 10 | perceptron.plot_decision_boundary(data, target) 11 | -------------------------------------------------------------------------------- /tests/phbos_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | os.chdir('../') 4 | import matplotlib.pyplot as plt 5 | 6 | from sklearn.datasets import make_classification 7 | 8 | X, _ = make_classification(n_samples=5000, n_features=2, 9 | n_informative=2, n_redundant=0, 10 | n_repeated=0, n_classes=3, 11 | n_clusters_per_class=1, weights=[0.01, 0.05, 0.94], 12 | class_sep=0.8, random_state=0) 13 | 14 | from ml_models.outlier_detect import pHbos 15 | 16 | hbos = pHbos() 17 | y = hbos.fit_transform(X) 18 | plt.scatter(x=X[:, 0], y=X[:, 1], c=y) 19 | plt.show() -------------------------------------------------------------------------------- /tests/random_forest_regressor_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | 4 | os.chdir('../') 5 | import matplotlib.pyplot as plt 6 | from ml_models.tree import CARTRegressor 7 | from ml_models.linear_model import LinearRegression 8 | 9 | data = np.linspace(1, 10, num=100) 10 | target = np.sin(data) + np.random.random(size=100) # 添加噪声 11 | data = data.reshape((-1, 1)) 12 | 13 | from ml_models.ensemble import RandomForestRegressor 14 | 15 | model = RandomForestRegressor(base_estimator=[LinearRegression(), CARTRegressor()], n_estimators=10) 16 | model.fit(data, target) 17 | 18 | plt.scatter(data, target) 19 | plt.plot(data, model.predict(data), color='r') 20 | plt.show() 21 | -------------------------------------------------------------------------------- /tests/random_forest_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | os.chdir('../') 4 | 5 | from sklearn import model_selection 6 | 7 | from sklearn.datasets import make_classification 8 | from ml_models import utils 9 | from ml_models.linear_model import LogisticRegression 10 | from ml_models.tree import CARTClassifier 11 | from ml_models.svm import SVC 12 | 13 | 14 | data, target = make_classification(n_samples=100, n_features=2, n_classes=2, n_informative=1, n_redundant=0, 15 | n_repeated=0, n_clusters_per_class=1, class_sep=0.5) 16 | X_train, X_test, y_train, y_test = model_selection.train_test_split(data, target, test_size=0.1) 17 | 18 | from ml_models.ensemble import RandomForestClassifier 19 | 20 | classifier = RandomForestClassifier(base_estimator=CARTClassifier(),n_estimators=10) 21 | classifier.fit(X_train, y_train) 22 | # # 计算F1 23 | from sklearn.metrics import f1_score 24 | print(f1_score(y_test, classifier.predict(X_test))) 25 | print(np.sum(np.abs(y_test - classifier.predict(X_test)))) 26 | # 27 | utils.plot_decision_function(X_train, y_train, classifier) 28 | utils.plt.show() 29 | -------------------------------------------------------------------------------- /tests/sample_weight_test.py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets import make_classification, make_moons 2 | import numpy as np 3 | from ml_models import utils 4 | from ml_models.svm import SVC 5 | 6 | X, y = make_classification(n_samples=500, n_features=2, 7 | n_informative=2, n_redundant=0, 8 | n_repeated=0, n_classes=2, 9 | n_clusters_per_class=1, weights=[0.05, 0.95], 10 | class_sep=3, flip_y=0.05, random_state=0) 11 | # X, y = make_moons(noise=0.01) 12 | 13 | weights = np.where(y == 0, 50, 1) 14 | svc_with_sample_weight = SVC(kernel='rbf', gamma=2.0) 15 | svc_with_sample_weight.fit(X, y, sample_weight=weights, show_train_process=True) 16 | utils.plot_decision_function(X=X, y=y, clf=svc_with_sample_weight) 17 | -------------------------------------------------------------------------------- /tests/semi_gaussian_naive_bayes_classifier_test.py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets.samples_generator import make_blobs 2 | from ml_models import utils 3 | 4 | X, y = make_blobs(n_samples=400, centers=4, cluster_std=0.85, random_state=0) 5 | X = X[:, ::-1] 6 | 7 | from ml_models.pgm import SemiGaussianNBClassifier 8 | 9 | nb = SemiGaussianNBClassifier(link_rulers=[(0, 1)]) 10 | nb.fit(X, y) 11 | print(nb.predict_proba(X).shape) 12 | utils.plot_decision_function(X, y, nb) 13 | utils.plt.show() 14 | -------------------------------------------------------------------------------- /tests/shap_test.py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets import load_boston 2 | import lightgbm as lgb 3 | from ml_models.explain import Shap, TreeShap 4 | 5 | dataset = load_boston() 6 | x_data = dataset.data # 导入所有特征变量 7 | y_data = dataset.target # 导入目标值(房价) 8 | 9 | lgb_train = lgb.Dataset(x_data, y_data.tolist()) 10 | 11 | params = { 12 | 'boosting_type': 'gbdt', 13 | 'objective': 'regression', 14 | 'max_depth': 3, 15 | 'num_leaves': 10, 16 | 'num_iterations': 10, 17 | 'verbose': 0 18 | } 19 | 20 | # train 21 | gbm = lgb.train(params, lgb_train, valid_sets=lgb_train) 22 | model_json = gbm.dump_model() 23 | print(gbm.predict(x_data, pred_contrib=True)[0]) 24 | shap = Shap(model_json) 25 | print(shap.pred_one_contrib(x_data[0])) 26 | tree_shape = TreeShap(model_json) 27 | print(tree_shape.pred_one_contrib(x_data[0])) 28 | -------------------------------------------------------------------------------- /tests/simple_markov_model_test.py: -------------------------------------------------------------------------------- 1 | from ml_models.pgm.simple_markov_model import SimpleMarkovModel 2 | 3 | train_data = [0, 1, 1, 1, 2, 2, 0, 4, 3, 2] 4 | smm = SimpleMarkovModel(status_num=5) 5 | smm.fit(train_data) 6 | print(smm.predict_log_joint_prob([1, 2])) 7 | print("\n") 8 | print(smm.predict_prob_distribution(time_steps=2)) 9 | print("\n") 10 | print(smm.predict_next_step_prob_distribution(current_status=2)) 11 | print("\n") 12 | print(smm.predict_next_step_status(current_status=2)) 13 | print("\n") 14 | print(smm.generate_status(search_type="beam")) 15 | -------------------------------------------------------------------------------- /tests/spectral_test.py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets.samples_generator import make_blobs 2 | from matplotlib import pyplot as plt 3 | 4 | X, y = make_blobs(n_samples=400, centers=4, cluster_std=0.85, random_state=0) 5 | X = X[:, ::-1] 6 | 7 | from ml_models.cluster import Spectral 8 | 9 | spectral = Spectral(n_clusters=4) 10 | 11 | plt.scatter(X[:, 0], X[:, 1], c=spectral.fit_predict(X)) 12 | plt.show() -------------------------------------------------------------------------------- /tests/svc_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | 4 | os.chdir('../') 5 | 6 | from sklearn.datasets import make_classification 7 | from ml_models import utils 8 | 9 | # data, target = make_classification(n_samples=100, n_features=2, n_classes=2, n_informative=1, n_redundant=0, 10 | # n_repeated=0, n_clusters_per_class=1, class_sep=3.0) 11 | from sklearn import datasets 12 | 13 | data, target = datasets.make_moons(noise=0.01) 14 | 15 | from ml_models.svm import SVC 16 | 17 | svm = SVC(C=3.0, kernel='rbf',gamma=0.1, epochs=10, tol=0.2) 18 | # svm = SVC(tol=0.01) 19 | svm.fit(data, target, show_train_process=True) 20 | 21 | # 计算F1 22 | from sklearn.metrics import f1_score 23 | 24 | print(f1_score(target, svm.predict(data))) 25 | print(np.sum(np.abs(target - svm.predict(data)))) 26 | 27 | utils.plt.close() 28 | utils.plot_decision_function(data, target, svm, svm.support_vectors) 29 | utils.plt.show() 30 | print('support vector', svm.support_vectors) 31 | -------------------------------------------------------------------------------- /tests/xgboost_classifier_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | os.chdir('../') 4 | 5 | from sklearn import model_selection 6 | 7 | from sklearn.datasets import make_classification 8 | from ml_models import utils 9 | 10 | 11 | data, target = make_classification(n_samples=100, n_features=2, n_classes=2, n_informative=1, n_redundant=0, 12 | n_repeated=0, n_clusters_per_class=1, class_sep=0.5) 13 | X_train, X_test, y_train, y_test = model_selection.train_test_split(data, target, test_size=0.1) 14 | 15 | from ml_models.ensemble import XGBoostClassifier 16 | 17 | classifier = XGBoostClassifier() 18 | classifier.fit(X_train, y_train) 19 | # # 计算F1 20 | from sklearn.metrics import f1_score 21 | print(f1_score(y_test, classifier.predict(X_test))) 22 | print(np.sum(np.abs(y_test - classifier.predict(X_test)))) 23 | # 24 | utils.plot_decision_function(X_train, y_train, classifier) 25 | utils.plt.show() -------------------------------------------------------------------------------- /tests/xgboost_regressor_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | 4 | os.chdir('../') 5 | import matplotlib.pyplot as plt 6 | 7 | data = np.linspace(1, 10, num=100) 8 | target = np.sin(data) + np.random.random(size=100) + 1 # 添加噪声 9 | data = data.reshape((-1, 1)) 10 | 11 | from ml_models.ensemble import XGBoostRegressor 12 | 13 | model = XGBoostRegressor(loss='tweedie', p=1.5) 14 | model.fit(data, target) 15 | 16 | plt.scatter(data, target) 17 | plt.plot(data, model.predict(data), color='r') 18 | plt.show() 19 | --------------------------------------------------------------------------------