├── .gitignore ├── LeetCode ├── 2017-11-24.ipynb ├── 2017-12-05.ipynb └── README.md ├── NLP ├── basic_nlp.ipynb ├── gensim_study.ipynb ├── key_words_extract.ipynb ├── stopwords.txt ├── text_classification.ipynb └── text_summarization.ipynb ├── README.md ├── _config.yml ├── algorithms ├── LSTMs-TensorFlow │ ├── TensorFlow - LSTM.ipynb │ ├── data │ │ ├── README │ │ ├── ptb.char.test.txt │ │ ├── ptb.char.train.txt │ │ ├── ptb.char.valid.txt │ │ ├── ptb.test.txt │ │ ├── ptb.train.txt │ │ └── ptb.valid.txt │ ├── ptb_word_lm.py │ └── reader.py ├── LSTMs.ipynb ├── Time Series Analysis - ARIMA.ipynb ├── Time Series Analysis - Basic.ipynb ├── Time Series Analysis - LSTM.ipynb ├── data │ └── shampoo-sales.csv ├── decision_tree.ipynb ├── distant_compute.ipynb ├── images │ ├── LSTM2-notation.png │ ├── LSTM3-C-line.png │ ├── LSTM3-SimpleRNN.png │ ├── LSTM3-chain.png │ └── RNN-unrolled.png ├── lstm-test │ ├── hist │ │ └── 个体工商户+ARIMA预测.zip │ ├── lstm │ │ ├── predicted-1.csv │ │ ├── predicted-2017-11-13.xlsx │ │ └── predicted.csv │ ├── 周-商事主体新注册总量.xlsx │ ├── 商事主体 - 预警 - ARIMA.ipynb │ └── 月-商事主体新注册总量.xlsx ├── machine_learning.ipynb ├── probabilistic_graph_model.ipynb └── viterbi.ipynb ├── data_analysis ├── EDA - 数据探索性分析.ipynb └── images │ ├── box-plot.png │ └── box-plot1.png ├── html ├── c4w1_notes.html ├── images │ ├── Conv2D.gif │ ├── LogReg_kiank.png │ ├── NlayerNN.png │ ├── activation.jpg │ ├── basic_recipe_for_ML.png │ ├── bias_variance1.png │ ├── cg1.jpg │ ├── classification_kiank.png │ ├── conv3d.png │ ├── convolutions.gif │ ├── dc1.jpg │ ├── dc2.jpg │ ├── dc3.jpg │ ├── dc4.jpg │ ├── dd1.jpg │ ├── dropout1_kiank.mp4 │ ├── dropout2_kiank.mp4 │ ├── edge_detect.png │ ├── final outline.png │ ├── grad_summary1.png │ ├── gradient_descent.jpg │ ├── leaky_relu.jpg │ ├── multi_filters.png │ ├── nn1.jpg │ ├── nn2.jpg │ ├── nn4.jpg │ ├── normalize.png │ ├── normalize2.png │ ├── padding_strides.gif │ ├── pooling.jpg │ ├── relu.jpg │ ├── sgd.gif │ ├── sgd_bad.gif │ ├── sigmoid.jpg │ ├── split.png │ ├── summary_conv.png │ ├── tanh.jpg │ └── 流程图.vsdx ├── ng_第一课_第三周学习笔记.html └── ng_第二课_第一周学习笔记.html ├── manuscripts ├── 22 第二课_第二周学习笔记.ipynb ├── 23 第二课_第三周学习笔记.ipynb ├── Models │ └── model_2 │ │ ├── checkpoint │ │ ├── events.out.tfevents.1509279888.MIKE │ │ ├── events.out.tfevents.1509279977.MIKE │ │ ├── graph.pbtxt │ │ ├── model.ckpt-1.data-00000-of-00001 │ │ ├── model.ckpt-1.index │ │ ├── model.ckpt-1.meta │ │ ├── model.ckpt-10000.data-00000-of-00001 │ │ ├── model.ckpt-10000.index │ │ ├── model.ckpt-10000.meta │ │ ├── model.ckpt-10001.data-00000-of-00001 │ │ ├── model.ckpt-10001.index │ │ ├── model.ckpt-10001.meta │ │ ├── model.ckpt-20000.data-00000-of-00001 │ │ ├── model.ckpt-20000.index │ │ └── model.ckpt-20000.meta ├── datas │ └── shampoo_sales.csv ├── ng_course_Draft.ipynb └── 第三课学习笔记.ipynb ├── notes_deeplearning.ai ├── 11 第一课_第一周学习笔记.ipynb ├── 12 第一课_第二周学习笔记.ipynb ├── 13 第一课_第三周学习笔记.ipynb ├── 14 第一课_第四周学习笔记.ipynb ├── 21 第二课_第一周学习笔记.ipynb ├── 22 第二课_第二周学习笔记-未完成.ipynb ├── 23 第二课_第三周学习笔记-未完成.ipynb ├── 31 第三课_第一周学习笔记-未完成.ipynb ├── 42 第四课_第二周学习笔记-未完成.ipynb ├── 43 第四课_第三周学习笔记-未完成.ipynb ├── C4W1_CNN.ipynb ├── README.md ├── datasets │ ├── test_catvnoncat.h5 │ └── train_catvnoncat.h5 ├── images │ ├── Conv2D.gif │ ├── LogReg_kiank.png │ ├── NlayerNN.png │ ├── activation.jpg │ ├── basic_recipe_for_ML.png │ ├── bias_variance1.png │ ├── cg1.jpg │ ├── classification_kiank.png │ ├── conv3d.png │ ├── convolutions.gif │ ├── dc1.jpg │ ├── dc2.jpg │ ├── dc3.jpg │ ├── dc4.jpg │ ├── dd1.jpg │ ├── dropout1_kiank.mp4 │ ├── dropout2_kiank.mp4 │ ├── edge_detect.png │ ├── final outline.png │ ├── grad_summary1.png │ ├── gradient_descent.jpg │ ├── leaky_relu.jpg │ ├── multi_filters.png │ ├── nn1.jpg │ ├── nn2.jpg │ ├── nn4.jpg │ ├── normalize.png │ ├── normalize2.png │ ├── padding_strides.gif │ ├── pooling.jpg │ ├── relu.jpg │ ├── sgd.gif │ ├── sgd_bad.gif │ ├── sigmoid.jpg │ ├── split.png │ ├── summary_conv.png │ ├── tanh.jpg │ └── 流程图.vsdx └── pdf │ ├── 11 第一课_第一周.pdf │ ├── 12 第一课_第二周.pdf │ ├── 13 第一课_第三周.pdf │ ├── 14 第一课_第四周.pdf │ ├── 21 第二课_第一周.pdf │ ├── 22 第二课_第二周.pdf │ ├── 23 第二课_第三周.pdf │ ├── 31 第三课_第一周.pdf │ ├── 32 第三课_第二周.pdf │ ├── 41 第四课_第一周.pdf │ ├── 42 第四课_第二周.pdf │ ├── 43 第四课_第三周.pdf │ ├── 44 第四课_第四周.pdf │ ├── 51 第五课第一周讲义.pdf │ └── 53 第五课第三周讲义.pdf ├── notes_paper ├── Clustering by fast search and find of density peaks │ ├── fast_clustering │ │ ├── Aggregation.txt │ │ ├── Aggregation结果_v3.tif │ │ ├── Clustering_v3.py │ │ ├── Compound.txt │ │ ├── Compound结果_v3.tif │ │ ├── D31.txt │ │ ├── D31结果_v3.tif │ │ ├── Flame.txt │ │ ├── Flame结果_v3.tif │ │ ├── Jain.txt │ │ ├── Jain结果_v3.tif │ │ ├── Pathbased.txt │ │ ├── Pathbased结果_v3.tif │ │ ├── R15.txt │ │ ├── R15结果_v3.tif │ │ ├── Spiral.txt │ │ └── Spiral结果_v3.tif │ └── 文献阅读 - Clustering by Fast Search and Find of Density Peaks.ipynb ├── Personalized News Recommendation Based on Click Behavior │ ├── 文献阅读 — Personalized News Recommendation Based on Click Behavior.ipynb │ └── 研究路线.xmind ├── TextRank-Bring Order into Texts │ ├── Note :TextRank Bringing Order into Texts.ipynb │ └── textrank_sample.data └── papers_for_ dialogue_system.ipynb ├── others ├── cs229 │ ├── cs229-notes4.pdf │ └── cs229-notes5.pdf ├── dokcer_study_notes │ ├── Docker学习笔记.ipynb │ └── images │ │ ├── p001.png │ │ └── p002.png ├── interview.ipynb ├── markdown_on_ipynb │ ├── Markdown 语法示例.ipynb │ └── images │ │ ├── k-means_1.png │ │ └── k-means_2.png ├── mongodb.ipynb └── resume.ipynb └── python ├── Keras 学习笔记 - 以TensorFlow为后端.ipynb ├── MNIST_data ├── t10k-images-idx3-ubyte.gz ├── t10k-labels-idx1-ubyte.gz ├── train-images-idx3-ubyte.gz └── train-labels-idx1-ubyte.gz ├── TensorFlow_学习笔记.ipynb ├── list_学习笔记.ipynb ├── pymongo.ipynb ├── tsfresh_时间序列数据特征提取.ipynb ├── xgboost_study.ipynb ├── 利用百度地图坐标转换API实现gcj02转bd09.ipynb └── 日期和时间的处理.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | # self define 2 | .idea/ 3 | .ipynb_checkpoints/ 4 | log/ 5 | *.docx 6 | *.doc 7 | 8 | 9 | # Byte-compiled / optimized / DLL files 10 | __pycache__/ 11 | *.py[cod] 12 | *$py.class 13 | 14 | # C extensions 15 | *.so 16 | 17 | # Distribution / packaging 18 | .Python 19 | env/ 20 | build/ 21 | develop-eggs/ 22 | dist/ 23 | downloads/ 24 | eggs/ 25 | .eggs/ 26 | lib/ 27 | lib64/ 28 | parts/ 29 | sdist/ 30 | var/ 31 | *.egg-info/ 32 | .installed.cfg 33 | *.egg 34 | 35 | # PyInstaller 36 | # Usually these files are written by a python script from a template 37 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 38 | *.manifest 39 | *.spec 40 | 41 | # Installer logs 42 | pip-log.txt 43 | pip-delete-this-directory.txt 44 | 45 | # Unit test / coverage reports 46 | htmlcov/ 47 | .tox/ 48 | .coverage 49 | .coverage.* 50 | .cache 51 | nosetests.xml 52 | coverage.xml 53 | *,cover 54 | .hypothesis/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # IPython Notebook 78 | .ipynb_checkpoints 79 | 80 | # pyenv 81 | .python-version 82 | 83 | # celery beat schedule file 84 | celerybeat-schedule 85 | 86 | # dotenv 87 | .env 88 | 89 | # virtualenv 90 | venv/ 91 | ENV/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | 96 | # Rope project settings 97 | .ropeproject 98 | -------------------------------------------------------------------------------- /LeetCode/README.md: -------------------------------------------------------------------------------- 1 | # LeetCode刷题记录 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /NLP/basic_nlp.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# NLP基础\n", 8 | "\n", 9 | "---\n" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": { 16 | "collapsed": true 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "def load_data():\n", 21 | " from sklearn.datasets import fetch_20newsgroups\n", 22 | "\n", 23 | " categories = ['alt.atheism', 'soc.religion.christian',\n", 24 | " 'comp.graphics', 'sci.med']\n", 25 | " twenty_train = fetch_20newsgroups(subset='train',\n", 26 | " categories=categories,\n", 27 | " shuffle=True, random_state=68)\n", 28 | " twenty_test = fetch_20newsgroups(subset='test',\n", 29 | " categories=categories,\n", 30 | " shuffle=True,\n", 31 | " random_state=68)\n", 32 | " return twenty_train, twenty_test\n", 33 | "\n", 34 | "# 加载数据集\n", 35 | "train, test = load_data()" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "## 给定一个语料库,创建词袋模型\n", 43 | "\n", 44 | "---\n", 45 | "\n" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 2, 51 | "metadata": { 52 | "collapsed": true 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "from gensim.corpora import Dictionary\n", 57 | "\n", 58 | "corpus = [x.split() for x in train['data']]\n", 59 | "\n", 60 | "dct = Dictionary(corpus)" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 6, 66 | "metadata": {}, 67 | "outputs": [ 68 | { 69 | "data": { 70 | "text/plain": [ 71 | "[6,\n", 72 | " 39,\n", 73 | " 0,\n", 74 | " 4,\n", 75 | " 7,\n", 76 | " 14,\n", 77 | " 13,\n", 78 | " 32,\n", 79 | " 15,\n", 80 | " 13,\n", 81 | " 32,\n", 82 | " 9,\n", 83 | " 50,\n", 84 | " 11,\n", 85 | " 27,\n", 86 | " 12,\n", 87 | " 17,\n", 88 | " 42,\n", 89 | " 3,\n", 90 | " 10,\n", 91 | " 1,\n", 92 | " 8,\n", 93 | " 20,\n", 94 | " 35,\n", 95 | " 33,\n", 96 | " 22,\n", 97 | " 34,\n", 98 | " 43,\n", 99 | " 50,\n", 100 | " 32,\n", 101 | " 43,\n", 102 | " 18,\n", 103 | " 53,\n", 104 | " 59,\n", 105 | " 5,\n", 106 | " 55,\n", 107 | " 40,\n", 108 | " 44,\n", 109 | " 8,\n", 110 | " 41,\n", 111 | " 57,\n", 112 | " 38,\n", 113 | " 30,\n", 114 | " 56,\n", 115 | " 37,\n", 116 | " 22,\n", 117 | " 29,\n", 118 | " 25,\n", 119 | " 57,\n", 120 | " 36,\n", 121 | " 55,\n", 122 | " 54,\n", 123 | " 21,\n", 124 | " 58,\n", 125 | " 55,\n", 126 | " 48,\n", 127 | " 47,\n", 128 | " 24,\n", 129 | " 52,\n", 130 | " 57,\n", 131 | " 45,\n", 132 | " 46,\n", 133 | " 31,\n", 134 | " 49,\n", 135 | " 2,\n", 136 | " 34,\n", 137 | " 19,\n", 138 | " 55,\n", 139 | " 51,\n", 140 | " 60,\n", 141 | " 26,\n", 142 | " 28,\n", 143 | " 23,\n", 144 | " 16]" 145 | ] 146 | }, 147 | "execution_count": 6, 148 | "metadata": {}, 149 | "output_type": "execute_result" 150 | } 151 | ], 152 | "source": [ 153 | "dct.doc2idx(train['data'][0].split())" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": null, 159 | "metadata": { 160 | "collapsed": true 161 | }, 162 | "outputs": [], 163 | "source": [] 164 | } 165 | ], 166 | "metadata": { 167 | "kernelspec": { 168 | "display_name": "Python 3", 169 | "language": "python", 170 | "name": "python3" 171 | }, 172 | "language_info": { 173 | "codemirror_mode": { 174 | "name": "ipython", 175 | "version": 3 176 | }, 177 | "file_extension": ".py", 178 | "mimetype": "text/x-python", 179 | "name": "python", 180 | "nbconvert_exporter": "python", 181 | "pygments_lexer": "ipython3", 182 | "version": "3.6.2" 183 | } 184 | }, 185 | "nbformat": 4, 186 | "nbformat_minor": 2 187 | } 188 | -------------------------------------------------------------------------------- /NLP/gensim_study.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "# gensim学习笔记\n", 9 | "---\n", 10 | "\n", 11 | "gensim是一个主题模型(topic model)工具包\n", 12 | "\n", 13 | "\n", 14 | "* https://github.com/zengbin93/blog/issues/36\n" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "## 构建词袋 - Dictionary\n", 22 | "\n", 23 | "---\n", 24 | "\n", 25 | "所谓词袋,就是词与数字的一一映射。" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 1, 31 | "metadata": { 32 | "collapsed": true 33 | }, 34 | "outputs": [], 35 | "source": [ 36 | "from gensim.corpora import Dictionary\n", 37 | "\n", 38 | "corpus = [\"máma mele maso\".split(), \"ema má máma\".split()]\n", 39 | "\n", 40 | "dct = Dictionary(corpus)\n", 41 | "dct.add_documents([[\"this\", \"is\", \"sparta\"], [\"just\", \"joking\"]])" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 8, 47 | "metadata": {}, 48 | "outputs": [ 49 | { 50 | "data": { 51 | "text/plain": [ 52 | "ValuesView()" 53 | ] 54 | }, 55 | "execution_count": 8, 56 | "metadata": {}, 57 | "output_type": "execute_result" 58 | } 59 | ], 60 | "source": [] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": { 66 | "collapsed": true 67 | }, 68 | "outputs": [], 69 | "source": [] 70 | } 71 | ], 72 | "metadata": { 73 | "kernelspec": { 74 | "display_name": "Python 3", 75 | "language": "python", 76 | "name": "python3" 77 | }, 78 | "language_info": { 79 | "codemirror_mode": { 80 | "name": "ipython", 81 | "version": 3 82 | }, 83 | "file_extension": ".py", 84 | "mimetype": "text/x-python", 85 | "name": "python", 86 | "nbconvert_exporter": "python", 87 | "pygments_lexer": "ipython3", 88 | "version": "3.6.2" 89 | } 90 | }, 91 | "nbformat": 4, 92 | "nbformat_minor": 2 93 | } 94 | -------------------------------------------------------------------------------- /NLP/key_words_extract.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 关键词提取\n", 8 | "\n", 9 | "---\n", 10 | "\n", 11 | "关键词提取是指从文本中确定一些能够描述文档含义的术语的过程。\n", 12 | "\n" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "## TextRank\n", 20 | "\n", 21 | "---\n", 22 | "\n", 23 | "\n", 24 | "\n", 25 | "\n", 26 | "\n", 27 | "TextRank - A Graph-Based Ranking Model\n", 28 | "\n", 29 | "TextRank是一个无监督算法,本质上是一个用于文本处理的图排序模型。对于图排序模型来说,它需要做的就是找到一种方法来计算图中任一顶点(Vertex)的重要性,这个方法需要依赖整个图的全局信息(global information)进行递归计算,而不仅仅是任一顶点的局部信息(local vertex-specific information)。\n", 30 | "\n", 31 | "PageRank中的顶点重要性计算方法:\n", 32 | "\n", 33 | "G=(V, E)\n", 34 | "V - 顶点集\n", 35 | "E - 边集\n", 36 | "\n", 37 | "顶点Vi的计算公式为:\n", 38 | "\n", 39 | "$$S(V_i)=(1-d)+d\\cdot\\sum_{j\\in In(V_i)}\\frac{1}{|Out(V_j)|}S(V_j)$$\n", 40 | "\n", 41 | "其中,\n", 42 | "d - 阻尼系数,取值范围[0, 1];这个系数的作用是引入一个从给定顶点到任意顶点的概率;通常设置为0.85\n", 43 | "Out(Vi) - 从顶点Vi出发的边的集合\n", 44 | "In(Vi) - 到达顶点Vi的边的集合\n", 45 | "\n", 46 | "为图中的任一顶点设置一个任意值,迭代计算,直到小于一个给定的阈值(threshold)。计算结束以后,每一个顶点都会有一个对应的得分,这个就是顶点在图中的重要性。\n", 47 | "\n", 48 | "PageRank算法的最终结果与初始值无关,但是迭代次数的不同会对结果有一定的影响。\n", 49 | "\n", 50 | "特别需要注意的是:除了PageRank之外,其他的基于图的排序算法也可以与TextRank模型进行整合,如:HITS、Positional Function等。\n", 51 | "\n", 52 | "\n", 53 | "使用TextRank进行关键词提取\n", 54 | "\n", 55 | "输出:一组能够代表给定文本的词或短语的集合。\n", 56 | "\n", 57 | "语法过滤器(syntactic filter):根据语法成分对词进行筛选,比如,仅保留名词和动词用于构建图。\n", 58 | "\n", 59 | "仅使用单个词作为顶点,\n", 60 | "\n", 61 | "\n", 62 | "\n", 63 | "\n", 64 | "\n", 65 | "### 参考资料\n", 66 | "\n", 67 | "\n", 68 | "* https://github.com/letiantian/TextRank4ZH\n", 69 | "* [pdf - TextRank: Bringing Order into Texts](https://web.eecs.umich.edu/~mihalcea/papers/mihalcea.emnlp04.pdf)\n", 70 | "* [TextRank算法提取关键词和摘要](http://xiaosheng.me/2017/04/08/article49/)\n", 71 | "* [文献阅读 — TextRank: Bringing Order into Texts](https://www.jianshu.com/p/e9d251d1bcdc)\n" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "## RAKE\n", 79 | "---\n", 80 | "\n", 81 | "全称:Rapid Automatic Keyword Extraction\n", 82 | "\n", 83 | "https://github.com/aneesha/RAKE" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": { 90 | "collapsed": true 91 | }, 92 | "outputs": [], 93 | "source": [] 94 | } 95 | ], 96 | "metadata": { 97 | "kernelspec": { 98 | "display_name": "Python 3", 99 | "language": "python", 100 | "name": "python3" 101 | }, 102 | "language_info": { 103 | "codemirror_mode": { 104 | "name": "ipython", 105 | "version": 3 106 | }, 107 | "file_extension": ".py", 108 | "mimetype": "text/x-python", 109 | "name": "python", 110 | "nbconvert_exporter": "python", 111 | "pygments_lexer": "ipython3", 112 | "version": "3.6.2" 113 | } 114 | }, 115 | "nbformat": 4, 116 | "nbformat_minor": 2 117 | } 118 | -------------------------------------------------------------------------------- /NLP/text_summarization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 文本摘要\n", 8 | "\n", 9 | "---\n", 10 | "\n", 11 | "目前,自动文档摘要技术主要分为抽取式(extractive)和摘要式(又叫生成式)(abstractive)两种。\n", 12 | "\n", 13 | " \n", 14 | "\n", 15 | "抽取式摘要相对较为成熟。这种方法利用如 text rank 这样的排序算法,对处理后的文章语句进行排序。不过抽取式摘要在语义理解方面考虑较少,无法建立文本段落中的完整的语义信息。\n", 16 | "\n", 17 | " \n", 18 | "\n", 19 | "相较而言,生成式技术需要让模型理解文章语义后总结出摘要,更类似人类的做法。不过这种技术需要使用机器学习技术,长期以来并不成熟。转折点出现在 2014 年。这一年,Bengio 等人发表论文 Learning Phrase Representations using RNN Encoder-Decoder for Statistical Machine Translation,正式引入了 sequence-to-sequence 模型。这一模型通过两个循环神经网络,分别把输入文本转化成向量,再把向量转成输出序列。这种模型在论文中主要用来完成机器翻译任务,并且后来被应用在谷歌翻译中,但后续在文摘生成任务中也产生了广泛的应用。此后,这种利用深度学习的 sequence-to-sequence 方法不断被改进,在一些标准的评测数据集(如 DUC-2004)上,已经超过了传统的抽取式方法。\n", 20 | "\n", 21 | " \n", 22 | "\n", 23 | "例如,2016 年,Facebook AI 实验室(FAIR)的学者发表论文 A Convolutional Encoder Model for Neural Machine Translation,在编码的时候用 CNN 取代 RNN,获得不错的效果。Salesforce 的研究人员 2017 年发表的论文 A Deep Reinforced Model for Abstractive Summarization 中,使用了增强学习,在 CNN/Daily Mail 数据集上的 ROUGE-1 分数达到 41.16 分。同年,又是 FAIR 发表了论文 Convolutional Sequence to Sequence Learning,引入 attention 机制,不仅提高了评测分数,还极大地提升了速度。\n", 24 | "\n", 25 | " \n", 26 | "\n", 27 | "2016 年,来自 IBM 沃森的研究人员发表论文 Abstractive Text Summarization Using Sequence-to-Sequence RNNs and Beyond,和之前的论文不同,这篇论文把机器翻译和文本摘要任务完全分开,专门针对文本摘要提出了更合适的模型,除此之外,文章还发布了两个新的数据集。\n", 28 | "\n", 29 | " \n", 30 | "\n", 31 | "中国也有许多学者在从事相关工作,例如北大的万小军老师。他和姚金戈的综述《自动文摘研究进展与趋势》把摘要技术框架总结成 4 个步骤:内容表示 → 权重计算 → 内容选择 → 内容组织,并对每个步骤都作了介绍。" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "## 参考资料\n", 39 | "\n", 40 | "---\n", 41 | "\n", 42 | "* [万小军、姚金戈 - 自动文摘研究进展与趋势](http://qngw2014.bj.bcebos.com/zhuankan/12/%E8%87%AA%E5%8A%A8%E6%96%87%E6%91%98%E7%A0%94%E7%A9%B6%E8%BF%9B%E5%B1%95%E4%B8%8E%E8%B6%8B%E5%8A%BF.pdf)" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": { 49 | "collapsed": true 50 | }, 51 | "outputs": [], 52 | "source": [] 53 | } 54 | ], 55 | "metadata": { 56 | "kernelspec": { 57 | "display_name": "Python 3", 58 | "language": "python", 59 | "name": "python3" 60 | }, 61 | "language_info": { 62 | "codemirror_mode": { 63 | "name": "ipython", 64 | "version": 3 65 | }, 66 | "file_extension": ".py", 67 | "mimetype": "text/x-python", 68 | "name": "python", 69 | "nbconvert_exporter": "python", 70 | "pygments_lexer": "ipython3", 71 | "version": "3.6.2" 72 | } 73 | }, 74 | "nbformat": 4, 75 | "nbformat_minor": 2 76 | } 77 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # blog - Z 2 | >blog是我的博客仓库,主要是存放一些博客写作想法和草稿,当然,写好的博客也会在这里有一个备份,以便持续完善。 3 | >blog正式发布地址: 4 | >* [zengbin-简书](http://www.jianshu.com/u/0cd3889d64bb) 5 | >* [github pages](https://zengbin93.github.io/blog/) 6 | 7 | --- 8 | 概要 9 | * algorithms 10 | * notes_deeplearning.ai 11 | * notes_paper 12 | * python 13 | 14 | --- 15 | ## 1 - algorithms 16 | 17 | 算法学习笔记,这个是我的blog中最主要的内容。 18 | 19 | ## 2 - notes_deeplearning.ai 20 | 21 | 吴恩达最新机器学习课程的学习笔记、作业等。 22 | * [第二课第一周学习笔记](https://zengbin93.github.io/blog/html/ng_%E7%AC%AC%E4%BA%8C%E8%AF%BE_%E7%AC%AC%E4%B8%80%E5%91%A8%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0.html) 23 | * [第一课第三周学习笔记](https://zengbin93.github.io/blog/html/ng_%E7%AC%AC%E4%B8%80%E8%AF%BE_%E7%AC%AC%E4%B8%89%E5%91%A8%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0.html) 24 | 25 | 26 | ## 3 - notes_paper 27 | 28 | 一些经典文献的阅读笔记。 29 | 30 | ## 4 - python 31 | 32 | python是我的主要开发语言,边用边学,写点笔记帮助自己系统化相关知识。 33 | 34 | ## 5 - others 35 | 36 | 一些其他技术学习笔记,如Docker、Git等。 37 | 38 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-cayman -------------------------------------------------------------------------------- /algorithms/LSTMs-TensorFlow/TensorFlow - LSTM.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# sample: TensorFlow - LSTM\n", 8 | "---\n", 9 | "Trains the model described in:\n", 10 | "(Zaremba, et. al.) Recurrent Neural Network Regularization\n", 11 | "http://arxiv.org/abs/1409.2329\n", 12 | "\n", 13 | "数据集下载:\n", 14 | "\n", 15 | "$ wget http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz\n", 16 | "\n", 17 | "$ tar xvf simple-examples.tgz\n", 18 | " \n" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 1, 24 | "metadata": { 25 | "collapsed": true 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "import inspect\n", 30 | "import time\n", 31 | "import numpy as np\n", 32 | "import tensorflow as tf\n", 33 | "# 导入自定义库\n", 34 | "import reader" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": { 41 | "collapsed": true 42 | }, 43 | "outputs": [], 44 | "source": [] 45 | } 46 | ], 47 | "metadata": { 48 | "kernelspec": { 49 | "display_name": "Python 3", 50 | "language": "python", 51 | "name": "python3" 52 | }, 53 | "language_info": { 54 | "codemirror_mode": { 55 | "name": "ipython", 56 | "version": 3 57 | }, 58 | "file_extension": ".py", 59 | "mimetype": "text/x-python", 60 | "name": "python", 61 | "nbconvert_exporter": "python", 62 | "pygments_lexer": "ipython3", 63 | "version": "3.6.1" 64 | } 65 | }, 66 | "nbformat": 4, 67 | "nbformat_minor": 2 68 | } 69 | -------------------------------------------------------------------------------- /algorithms/LSTMs-TensorFlow/data/README: -------------------------------------------------------------------------------- 1 | Data description: 2 | 3 | Penn Treebank Corpus 4 | - should be free for research purposes 5 | - the same processing of data as used in many LM papers, including "Empirical Evaluation and Combination of Advanced Language Modeling Techniques" 6 | - ptb.train.txt: train set 7 | - ptb.valid.txt: development set (should be used just for tuning hyper-parameters, but not for training) 8 | - ptb.test.txt: test set for reporting perplexity 9 | 10 | - ptb.char.*: the same data, just rewritten as sequences of characters, with spaces rewritten as '_' - useful for training character based models, as is shown in example 9 11 | -------------------------------------------------------------------------------- /algorithms/LSTMs-TensorFlow/reader.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | 17 | """Utilities for parsing PTB text files.""" 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import collections 23 | import os 24 | import sys 25 | 26 | import tensorflow as tf 27 | 28 | 29 | def _read_words(filename): 30 | with tf.gfile.GFile(filename, "r") as f: 31 | if sys.version_info[0] >= 3: 32 | return f.read().replace("\n", "").split() 33 | else: 34 | return f.read().decode("utf-8").replace("\n", "").split() 35 | 36 | 37 | def _build_vocab(filename): 38 | data = _read_words(filename) 39 | 40 | counter = collections.Counter(data) 41 | count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0])) 42 | 43 | words, _ = list(zip(*count_pairs)) 44 | word_to_id = dict(zip(words, range(len(words)))) 45 | 46 | return word_to_id 47 | 48 | 49 | def _file_to_word_ids(filename, word_to_id): 50 | data = _read_words(filename) 51 | return [word_to_id[word] for word in data if word in word_to_id] 52 | 53 | 54 | def ptb_raw_data(data_path=None): 55 | """Load PTB raw data from data directory "data_path". 56 | 57 | Reads PTB text files, converts strings to integer ids, 58 | and performs mini-batching of the inputs. 59 | 60 | The PTB dataset comes from Tomas Mikolov's webpage: 61 | 62 | http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz 63 | 64 | Args: 65 | data_path: string path to the directory where simple-examples.tgz has 66 | been extracted. 67 | 68 | Returns: 69 | tuple (train_data, valid_data, test_data, vocabulary) 70 | where each of the data objects can be passed to PTBIterator. 71 | """ 72 | 73 | train_path = os.path.join(data_path, "ptb.train.txt") 74 | valid_path = os.path.join(data_path, "ptb.valid.txt") 75 | test_path = os.path.join(data_path, "ptb.test.txt") 76 | 77 | word_to_id = _build_vocab(train_path) 78 | train_data = _file_to_word_ids(train_path, word_to_id) 79 | valid_data = _file_to_word_ids(valid_path, word_to_id) 80 | test_data = _file_to_word_ids(test_path, word_to_id) 81 | vocabulary = len(word_to_id) 82 | return train_data, valid_data, test_data, vocabulary 83 | 84 | 85 | def ptb_producer(raw_data, batch_size, num_steps, name=None): 86 | """Iterate on the raw PTB data. 87 | 88 | This chunks up raw_data into batches of examples and returns Tensors that 89 | are drawn from these batches. 90 | 91 | Args: 92 | raw_data: one of the raw data outputs from ptb_raw_data. 93 | batch_size: int, the batch size. 94 | num_steps: int, the number of unrolls. 95 | name: the name of this operation (optional). 96 | 97 | Returns: 98 | A pair of Tensors, each shaped [batch_size, num_steps]. The second element 99 | of the tuple is the same data time-shifted to the right by one. 100 | 101 | Raises: 102 | tf.errors.InvalidArgumentError: if batch_size or num_steps are too high. 103 | """ 104 | with tf.name_scope(name, "PTBProducer", [raw_data, batch_size, num_steps]): 105 | raw_data = tf.convert_to_tensor(raw_data, name="raw_data", dtype=tf.int32) 106 | 107 | data_len = tf.size(raw_data) 108 | batch_len = data_len // batch_size 109 | data = tf.reshape(raw_data[0 : batch_size * batch_len], 110 | [batch_size, batch_len]) 111 | 112 | epoch_size = (batch_len - 1) // num_steps 113 | assertion = tf.assert_positive( 114 | epoch_size, 115 | message="epoch_size == 0, decrease batch_size or num_steps") 116 | with tf.control_dependencies([assertion]): 117 | epoch_size = tf.identity(epoch_size, name="epoch_size") 118 | 119 | i = tf.train.range_input_producer(epoch_size, shuffle=False).dequeue() 120 | x = tf.strided_slice(data, [0, i * num_steps], 121 | [batch_size, (i + 1) * num_steps]) 122 | x.set_shape([batch_size, num_steps]) 123 | y = tf.strided_slice(data, [0, i * num_steps + 1], 124 | [batch_size, (i + 1) * num_steps + 1]) 125 | y.set_shape([batch_size, num_steps]) 126 | return x, y 127 | -------------------------------------------------------------------------------- /algorithms/Time Series Analysis - Basic.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 时间序列分析基础\n", 8 | "---\n", 9 | "\n", 10 | "内容概要:\n" 11 | ] 12 | } 13 | ], 14 | "metadata": { 15 | "kernelspec": { 16 | "display_name": "Python 3", 17 | "language": "python", 18 | "name": "python3" 19 | }, 20 | "language_info": { 21 | "codemirror_mode": { 22 | "name": "ipython", 23 | "version": 3 24 | }, 25 | "file_extension": ".py", 26 | "mimetype": "text/x-python", 27 | "name": "python", 28 | "nbconvert_exporter": "python", 29 | "pygments_lexer": "ipython3", 30 | "version": "3.6.1" 31 | } 32 | }, 33 | "nbformat": 4, 34 | "nbformat_minor": 2 35 | } 36 | -------------------------------------------------------------------------------- /algorithms/data/shampoo-sales.csv: -------------------------------------------------------------------------------- 1 | "Month","Sales" 2 | "1-01",266.0 3 | "1-02",145.9 4 | "1-03",183.1 5 | "1-04",119.3 6 | "1-05",180.3 7 | "1-06",168.5 8 | "1-07",231.8 9 | "1-08",224.5 10 | "1-09",192.8 11 | "1-10",122.9 12 | "1-11",336.5 13 | "1-12",185.9 14 | "2-01",194.3 15 | "2-02",149.5 16 | "2-03",210.1 17 | "2-04",273.3 18 | "2-05",191.4 19 | "2-06",287.0 20 | "2-07",226.0 21 | "2-08",303.6 22 | "2-09",289.9 23 | "2-10",421.6 24 | "2-11",264.5 25 | "2-12",342.3 26 | "3-01",339.7 27 | "3-02",440.4 28 | "3-03",315.9 29 | "3-04",439.3 30 | "3-05",401.3 31 | "3-06",437.4 32 | "3-07",575.5 33 | "3-08",407.6 34 | "3-09",682.0 35 | "3-10",475.3 36 | "3-11",581.3 37 | "3-12",646.9 -------------------------------------------------------------------------------- /algorithms/distant_compute.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 相似性计算方法汇总\n", 8 | "\n", 9 | "---\n", 10 | "\n", 11 | "常用的相似性计算方法有:1)欧式距离(Euclidean Distance); 2)曼哈顿距离(Manhattan Distance); 3)闵式距离(Minkowski Distance); 4)余弦距离; 5)动态时间规整(DTW);" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 1, 17 | "metadata": { 18 | "collapsed": true 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "import numpy as np\n", 23 | "\n", 24 | "vector1 = [8.92, 8.71, 8.77, 8.75, 8.74, 8.71, 8.66, 8.71, 8.74, 8.8, 8.8, \n", 25 | " 8.79, 8.64, 8.56, 8.56, 8.43, 8.38, 8.42]\n", 26 | "vector2 = [32.99, 32.27, 32.16, 31.93, 32.8, 33.16, 32.59, 32.61, 29.35, \n", 27 | " 28.41, 27.85, 28.62, 28.62, 29.35, 30.14, 29.34, 28.88, 29.05]\n", 28 | "\n", 29 | "# vector1 = [1, 1, 1, 1]\n", 30 | "# vector2 = [1, 1, 1, 1]\n" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 3, 36 | "metadata": {}, 37 | "outputs": [ 38 | { 39 | "name": "stdout", 40 | "output_type": "stream", 41 | "text": [ 42 | "93.19353786609884\n" 43 | ] 44 | } 45 | ], 46 | "source": [ 47 | "def euclidean_distant(vector1, vector2):\n", 48 | " \"\"\"欧式距离\"\"\"\n", 49 | " vector1 = np.mat(vector1)\n", 50 | " vector2 = np.mat(vector2)\n", 51 | " return np.sqrt((vector1-vector2)*((vector1-vector2).T)).item()\n", 52 | " \n", 53 | "print(euclidean_distant(vector1, vector2))" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 76, 59 | "metadata": {}, 60 | "outputs": [ 61 | { 62 | "name": "stdout", 63 | "output_type": "stream", 64 | "text": [ 65 | "394.03\n" 66 | ] 67 | } 68 | ], 69 | "source": [ 70 | "def manhattan_distant(vector1, vector2):\n", 71 | " \"\"\"曼哈顿距离\"\"\"\n", 72 | " vector1 = np.mat(vector1)\n", 73 | " vector2 = np.mat(vector2)\n", 74 | " return np.sum(np.abs(vector1-vector2))\n", 75 | "\n", 76 | "print(manhattan_distant(vector1, vector2))" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 77, 82 | "metadata": {}, 83 | "outputs": [ 84 | { 85 | "name": "stdout", 86 | "output_type": "stream", 87 | "text": [ 88 | "0.9983665339530308\n" 89 | ] 90 | } 91 | ], 92 | "source": [ 93 | "def cosine_distant(vector1, vector2):\n", 94 | " \"\"\"余弦距离\"\"\"\n", 95 | " vector1 = np.mat(vector1)\n", 96 | " vector2 = np.mat(vector2)\n", 97 | " vector1_norm = np.linalg.norm(vector1)\n", 98 | " vector2_norm = np.linalg.norm(vector2)\n", 99 | " dot_norm = vector1_norm * vector2_norm\n", 100 | " dot_vs = np.dot(vector1, vector2)\n", 101 | " return np.divide(dot_vs, dot_norm).item()\n", 102 | "\n", 103 | "print(cosine_distant(vector1, vector2)) " 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 85, 109 | "metadata": {}, 110 | "outputs": [ 111 | { 112 | "data": { 113 | "text/plain": [ 114 | "0.6778430167108138" 115 | ] 116 | }, 117 | "execution_count": 85, 118 | "metadata": {}, 119 | "output_type": "execute_result" 120 | } 121 | ], 122 | "source": [ 123 | "def pearson_similar(vector1,vector2): \n", 124 | " \"\"\"皮尔逊相关系数\"\"\"\n", 125 | " if len(vector1)<3: \n", 126 | " return 1.0 \n", 127 | " return 0.5+0.5*np.corrcoef(vector1,vector2,rowvar=0)[0][1] \n", 128 | "\n", 129 | "pearson_similar(vector1,vector2)" 130 | ] 131 | } 132 | ], 133 | "metadata": { 134 | "kernelspec": { 135 | "display_name": "Python 3", 136 | "language": "python", 137 | "name": "python3" 138 | }, 139 | "language_info": { 140 | "codemirror_mode": { 141 | "name": "ipython", 142 | "version": 3 143 | }, 144 | "file_extension": ".py", 145 | "mimetype": "text/x-python", 146 | "name": "python", 147 | "nbconvert_exporter": "python", 148 | "pygments_lexer": "ipython3", 149 | "version": "3.6.2" 150 | } 151 | }, 152 | "nbformat": 4, 153 | "nbformat_minor": 2 154 | } 155 | -------------------------------------------------------------------------------- /algorithms/images/LSTM2-notation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/algorithms/images/LSTM2-notation.png -------------------------------------------------------------------------------- /algorithms/images/LSTM3-C-line.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/algorithms/images/LSTM3-C-line.png -------------------------------------------------------------------------------- /algorithms/images/LSTM3-SimpleRNN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/algorithms/images/LSTM3-SimpleRNN.png -------------------------------------------------------------------------------- /algorithms/images/LSTM3-chain.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/algorithms/images/LSTM3-chain.png -------------------------------------------------------------------------------- /algorithms/images/RNN-unrolled.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/algorithms/images/RNN-unrolled.png -------------------------------------------------------------------------------- /algorithms/lstm-test/hist/个体工商户+ARIMA预测.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/algorithms/lstm-test/hist/个体工商户+ARIMA预测.zip -------------------------------------------------------------------------------- /algorithms/lstm-test/lstm/predicted-1.csv: -------------------------------------------------------------------------------- 1 | y,y_hat 2 | 2258,3002.7829213142395 3 | 2951,2499.669635295868 4 | 2575,2819.228071860969 5 | 1732,2717.240808278322 6 | 2225,2133.5727856457233 7 | 2537,2241.7485314309597 8 | 2331,2468.909019768238 9 | 2887,2400.5339878350496 10 | 1601,2706.740379050374 11 | 3262,2103.9932866692543 12 | 2833,3012.4221998229623 13 | 2712,3004.1911225020885 14 | 2650,2817.361780717969 15 | 2362,2727.0890596210957 16 | 2397,2515.782590970397 17 | 2455,2452.8630428165197 18 | 2220,2466.608828276396 19 | 1976,2318.8960163742304 20 | 2536,2104.817786619067 21 | 2418,2392.9548252671957 22 | 2852,2414.862387701869 23 | 3405,2667.5498677566648 24 | 2180,3104.66360770911 25 | 4739,2579.298448383808 26 | 4546,4253.989244610071 27 | 681,4581.208477795124 28 | 2787,2447.3865847587585 29 | 1878,3402.5261708498 30 | 2206,2789.5401161909103 31 | 2184,2677.1628938913345 32 | 2330,2603.6611322164536 33 | 2217,2608.128871589899 34 | 2236,2508.8604688048363 35 | 2259,2446.5942722857 36 | 2181,2416.60184776783 37 | 2440,2335.4636971205473 38 | 2449,2448.5571070089936 39 | 2847,2481.5291820317507 40 | 1636,2721.132608219981 41 | 1577,2121.838203251362 42 | 1013,1801.8560657799244 43 | 346,1367.9965828359127 44 | 54,794.7063881456852 45 | 886,413.0101368427276 46 | 1641,835.1548262611032 47 | 2335,1407.806670062244 48 | 2592,1997.0018365308642 49 | 2945,2344.728709772229 50 | 3082,2660.4765961393714 51 | 2430,2864.0609827041626 52 | 4030,2561.456067085266 53 | 1761,3534.283471375704 54 | 3059,2713.798982977867 55 | 3304,3122.47153057158 56 | 3153,3318.7065191045403 57 | 2295,3253.1878931447864 58 | 3274,2713.03217035532 59 | 3980,3159.049605730921 60 | 2932,3722.472348295152 61 | 1096,3322.4121809899807 62 | 1850,2032.256426692009 63 | 1848,2110.786667138338 64 | 2025,2083.6460915207863 65 | 2671,2134.4841483682394 66 | 2769,2540.0530918613076 67 | 2394,2720.6573148667812 68 | 2147,2530.9141642451286 69 | 1677,2290.8320877104998 70 | 2275,1926.5091118365526 71 | 1858,2186.4813525117934 72 | 2137,2021.1142916977406 73 | 2313,2097.4547583200037 74 | 1223,2237.6190911605954 75 | 1257,1648.9488433301449 76 | 1259,1418.889237433672 77 | 1440,1355.1993656754494 78 | 2093,1432.176352467388 79 | 137,1864.9420645087957 80 | 666,948.2514091730118 81 | 785,874.1015270650387 82 | 1196,897.6612575426698 83 | 507,1133.2863474190235 84 | -------------------------------------------------------------------------------- /algorithms/lstm-test/lstm/predicted-2017-11-13.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/algorithms/lstm-test/lstm/predicted-2017-11-13.xlsx -------------------------------------------------------------------------------- /algorithms/lstm-test/lstm/predicted.csv: -------------------------------------------------------------------------------- 1 | y,y_hat 2 | 2258,3881.06780064106 3 | 2951,2198.545772433281 4 | 2575,2891.545772433281 5 | 1732,2519.4455285072327 6 | 2225,2113.7214418649673 7 | 2537,2165.545772433281 8 | 2331,2477.545772433281 9 | 2887,2271.545772433281 10 | 1601,2827.545772433281 11 | 3262,1949.913287639618 12 | 2833,3202.545772433281 13 | 2712,2878.4894454479218 14 | 2650,2652.545772433281 15 | 2362,2590.545772433281 16 | 2397,2302.545772433281 17 | 2455,2337.545772433281 18 | 2220,2395.545772433281 19 | 1976,2160.545772433281 20 | 2536,1916.545772433281 21 | 2418,2476.545772433281 22 | 2852,2358.545772433281 23 | 3405,2792.545772433281 24 | 2180,3345.545772433281 25 | 4739,2533.9091069698334 26 | 4546,4679.545772433281 27 | 681,4486.545772433281 28 | 2787,4555.994182109833 29 | 1878,2727.545772433281 30 | 2206,2257.7899396419525 31 | 2184,2146.545772433281 32 | 2330,2124.545772433281 33 | 2217,2270.545772433281 34 | 2236,2157.545772433281 35 | 2259,2176.545772433281 36 | 2181,2199.545772433281 37 | 2440,2121.545772433281 38 | 2449,2380.545772433281 39 | 2847,2389.545772433281 40 | 1636,2787.545772433281 41 | 1577,1991.0556136369705 42 | 1013,1517.545772433281 43 | 346,1315.8658999204636 44 | 54,422.8371428251266 45 | 886,-5.454227566719105 46 | 1641,826.545772433281 47 | 2335,1581.545772433281 48 | 2592,2275.545772433281 49 | 2945,2532.545772433281 50 | 3082,2885.545772433281 51 | 2430,3022.545772433281 52 | 4030,2830.838533759117 53 | 1761,3970.545772433281 54 | 3059,3792.86280977726 55 | 3304,2999.545772433281 56 | 3153,3244.545772433281 57 | 2295,3093.545772433281 58 | 3274,2678.9668942689896 59 | 3980,3214.545772433281 60 | 2932,3920.545772433281 61 | 1096,3300.405822634697 62 | 1850,-485.02903294563293 63 | 1848,1790.545772433281 64 | 2025,1788.545772433281 65 | 2671,1965.545772433281 66 | 2769,2611.545772433281 67 | 2394,2709.545772433281 68 | 2147,2336.5387675762177 69 | 1677,2087.545772433281 70 | 2275,1800.655594110489 71 | 1858,2215.545772433281 72 | 2137,1880.6116771697998 73 | 2313,2077.545772433281 74 | 1223,2253.545772433281 75 | 1257,1587.9658222198486 76 | 1259,1197.545772433281 77 | 1440,1199.545772433281 78 | 2093,1380.545772433281 79 | 137,2033.545772433281 80 | 666,1572.1299901008606 81 | 785,606.545772433281 82 | 1196,725.545772433281 83 | 507,1136.545772433281 84 | -------------------------------------------------------------------------------- /algorithms/lstm-test/周-商事主体新注册总量.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/algorithms/lstm-test/周-商事主体新注册总量.xlsx -------------------------------------------------------------------------------- /algorithms/lstm-test/月-商事主体新注册总量.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/algorithms/lstm-test/月-商事主体新注册总量.xlsx -------------------------------------------------------------------------------- /algorithms/machine_learning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 机器学习基础\n", 8 | "\n", 9 | "---" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "### Cross Validation & N-fold Cross Validation\n", 17 | "---\n", 18 | "\n", 19 | "<>\n", 20 | "\n", 21 | "![cross validation](https://upload-images.jianshu.io/upload_images/4604328-a6843d1f80327de8.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)\n", 22 | "\n", 23 | "![N-fold cross validation](https://upload-images.jianshu.io/upload_images/4604328-f13da4bad18ceb52.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": { 30 | "collapsed": true 31 | }, 32 | "outputs": [], 33 | "source": [] 34 | } 35 | ], 36 | "metadata": { 37 | "kernelspec": { 38 | "display_name": "Python 3", 39 | "language": "python", 40 | "name": "python3" 41 | }, 42 | "language_info": { 43 | "codemirror_mode": { 44 | "name": "ipython", 45 | "version": 3 46 | }, 47 | "file_extension": ".py", 48 | "mimetype": "text/x-python", 49 | "name": "python", 50 | "nbconvert_exporter": "python", 51 | "pygments_lexer": "ipython3", 52 | "version": "3.6.2" 53 | } 54 | }, 55 | "nbformat": 4, 56 | "nbformat_minor": 2 57 | } 58 | -------------------------------------------------------------------------------- /algorithms/probabilistic_graph_model.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 概率图模型\n", 8 | "\n", 9 | "---\n", 10 | "\n", 11 | "马尔可夫性\n", 12 | "\n", 13 | "\n", 14 | "\n", 15 | "**团 & 最大团**\n", 16 | "* 团(clique):无向图G中任意两个结点均有边连接的结点子集;\n", 17 | "* 最大团(maximal clique):不能再加入任何一个结点的团。" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "### 隐马尔可夫链" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "## HMM\n", 32 | "\n", 33 | "---\n", 34 | "隐马尔可夫模型(Hidden Markov Model, HMM)属于生成模型,可用于标注问题。标注问题是给定观测序列,预测其对应的标记序列,即状态序列。序列的每一个位置可以看作是一个时刻。\n", 35 | "\n", 36 | "HMM是关于时序的概率模型,描述由一个隐藏的马尔可夫链随机生成不可观测的状态随机序列,再由各个状态生成一个观测而产生观测随机序列的过程。\n", 37 | "\n", 38 | "HMM在语音识别、自然语言处理等领域应用广泛。\n", 39 | "\n", 40 | "**状态序列 & 观测序列**\n", 41 | "* **状态序列(state sequence)**:隐藏的马尔可夫链随机乘车的状态的序列,通常是不可见的\n", 42 | "\n", 43 | "* **观测序列(observation sequence)**:状态序列中每个状态生成一个观测,由此产生的随机序列便是观测序列。\n", 44 | "\n", 45 | "\n", 46 | "**HMM的两个基本假设**\n", 47 | "* 1)齐次马尔科夫性假设:隐藏的马尔可夫链在任意时刻的状态只与其前一时刻的状态有关;\n", 48 | "* 2)观测独立性假设:任一时刻的观测仅依赖于该时刻的马尔科夫链的状态。\n", 49 | "\n", 50 | "\n", 51 | "**HMM的三个基本问题**\n", 52 | "* 1、概率计算问题:计算给定模型下一个观测序列出现的概率;\n", 53 | "* 2、学习问题:已知观测序列,估计模型的三个参数,使得观测序列出现的概率最大;\n", 54 | "* 3、预测问题:模型和观测序列已知,预测最有可能的状态序列。\n", 55 | "\n", 56 | "HMM模型示意图\n", 57 | "\n", 58 | "\n", 59 | "\n", 60 | "HMM通常表示为\n", 61 | "λ=(A,B, π)\n", 62 | "其中:\n", 63 | "* A - 状态转移概率矩阵\n", 64 | "* B - 观测概率矩阵\n", 65 | "* π - 初始状态概率向量\n", 66 | "\n", 67 | "由此可见,A,B, π是决定一个HMM模型的三要素。A 和 π确定了隐藏马尔可夫链,以及生成state sequence;B确定了如何从状态生成观测,与状态序列综合确定了如何产生observation sequence。\n", 68 | "\n", 69 | "观测序列的生成过程:\n", 70 | "\n", 71 | "\n" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "## CRF\n", 79 | "\n", 80 | "---\n", 81 | "* 全称:Conditional Random Field,条件随机场\n", 82 | "\n", 83 | "在输入一组随机变量的条件下,输出另一组随机变量的条件概率分布模型,假设输出随机变量构成马尔可夫随机场。\n", 84 | "\n", 85 | "CRF的3个基本问题:概率计算、学习、预测。\n", 86 | "\n", 87 | "**马尔可夫随机场 - Markov Random Field:**\n", 88 | "一个可以用无向图表示的联合概率分布,又称概率无向图模型(Probabilistic Undirected Graphical Model),满足成对、局部、或全局马尔可夫性\n", 89 | "\n", 90 | "无向图表示的随机变量之间存在的成对马尔可夫性(pairwise markov property)、局部马尔可夫性(local markov property)、全局马尔可夫性(global markov property)\n", 91 | "\n", 92 | "CRF是给定随机变量X条件下,随机变量Y的马尔可夫随机场。\n", 93 | "\n", 94 | "**线性链条件随机场(linear chain conditional random field)**是定义在线性链上的特殊的条件随机场,即条件概率模型P(Y|X),Y是输出变量,表示状态序列,X是输入变量,表示需要标注的观测序列。一般假设X和Y具有相同的图结构。\n", 95 | "\n", 96 | "对于CRF而言,学习是指利用训练集通过极大似然估计(或正则化)得到条件概率模型,预测时,对于给定的输入序列x,求出条件概率最大的输出序列。\n", 97 | "\n", 98 | "* CRF的概率计算问题:向前向后算法\n", 99 | "* CRF的学习算法:极大似然估计、正则化的极大似然估计;具体的优化实现算法有“改进的迭代尺度法IIS”、“梯度下降法”、“拟牛顿法”等\n", 100 | "* CRF的预测算法:维特比算法\n", 101 | "\n", 102 | "\n", 103 | "\n", 104 | "\n", 105 | "\n" 106 | ] 107 | }, 108 | { 109 | "cell_type": "markdown", 110 | "metadata": {}, 111 | "source": [ 112 | "## 参考资料\n", 113 | "\n", 114 | "---\n", 115 | "\n", 116 | "* [李航. 统计学习方法[M]. 清华大学出版社, 2012.](https://pan.baidu.com/s/1-ELrcH74FwGZKGS-lQaXyA) - 第10、11章\n", 117 | "* [周志华. 机器学习[M]. 清华大学出版社, 2016.](https://pan.baidu.com/s/1b_f_QQfyK3R4uU79rooPFg) - 第14章" 118 | ] 119 | } 120 | ], 121 | "metadata": { 122 | "kernelspec": { 123 | "display_name": "Python 3", 124 | "language": "python", 125 | "name": "python3" 126 | }, 127 | "language_info": { 128 | "codemirror_mode": { 129 | "name": "ipython", 130 | "version": 3 131 | }, 132 | "file_extension": ".py", 133 | "mimetype": "text/x-python", 134 | "name": "python", 135 | "nbconvert_exporter": "python", 136 | "pygments_lexer": "ipython3", 137 | "version": "3.6.2" 138 | } 139 | }, 140 | "nbformat": 4, 141 | "nbformat_minor": 2 142 | } 143 | -------------------------------------------------------------------------------- /algorithms/viterbi.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# viterbi算法\n", 8 | "---\n", 9 | "\n", 10 | "\n", 11 | "## 输入参数\n", 12 | "\n", 13 | "* 隐含状态空间\n", 14 | "* 观测状态空间\n", 15 | "* 初始概率\n", 16 | "* 转移概率(隐含状态转移概率矩阵)\n", 17 | "* 发射概率\n", 18 | "\n", 19 | "## 输出\n", 20 | "* 最可能的观测序列\n", 21 | "\n", 22 | "\n", 23 | "## 参考\n", 24 | "https://www.cnblogs.com/zhenlingcn/p/8409576.html" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 4, 30 | "metadata": { 31 | "collapsed": true 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "import numpy as np\n", 36 | "\n", 37 | "\n", 38 | "def viterbi(hidden_states, observations, start_p, trans_p, emit_p):\n", 39 | " \"\"\"维特比算法\n", 40 | " ref: https://blog.csdn.net/youfefi/article/details/74276546\n", 41 | " \"\"\"\n", 42 | " # max_p 每一列存储第一列不同隐状态的最大概率\n", 43 | " max_p = np.zeros((len(observations), len(hidden_states)))\n", 44 | "\n", 45 | " # path 每一行存储上max_p对应列的路径\n", 46 | " path = np.zeros((len(hidden_states), len(observations)))\n", 47 | "\n", 48 | " # 初始化\n", 49 | " for i in range(len(hidden_states)):\n", 50 | " max_p[0][i] = start_p[i] * emit_p[i][observations[0]]\n", 51 | " path[i][0] = i\n", 52 | " \n", 53 | " # 递推计算\n", 54 | " for t in range(1, len(observations)):\n", 55 | " newpath = np.zeros((len(hidden_states), len(observations)))\n", 56 | " for y in range(len(hidden_states)):\n", 57 | " prob = -1\n", 58 | " for y0 in range(len(hidden_states)):\n", 59 | " nprob = max_p[t-1][y0] * trans_p[y0][y] * emit_p[y][observations[t]]\n", 60 | " if nprob > prob:\n", 61 | " prob = nprob\n", 62 | " state = y0\n", 63 | " # 记录路径\n", 64 | " max_p[t][y] = prob\n", 65 | " for m in range(t):\n", 66 | " newpath[y][m] = path[state][m]\n", 67 | " newpath[y][t] = y\n", 68 | "\n", 69 | " path = newpath\n", 70 | "\n", 71 | " max_prob = -1\n", 72 | " path_state = 0\n", 73 | " # 返回最大概率的路径\n", 74 | " for y in range(len(hidden_states)):\n", 75 | " if max_p[len(observations)-1][y] > max_prob:\n", 76 | " max_prob = max_p[len(observations)-1][y]\n", 77 | " path_state = y\n", 78 | "\n", 79 | " return max_prob, path[path_state]" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 5, 85 | "metadata": {}, 86 | "outputs": [ 87 | { 88 | "name": "stdout", 89 | "output_type": "stream", 90 | "text": [ 91 | "rainy\n", 92 | "sunny\n", 93 | "sunny\n" 94 | ] 95 | } 96 | ], 97 | "source": [ 98 | "# 隐状态\n", 99 | "hidden_state = ['sunny', 'rainy']\n", 100 | "state_s = [0, 1]\n", 101 | "\n", 102 | "# 观测序列\n", 103 | "obsevition = ['walk', 'shop', 'clean']\n", 104 | "obser = [0, 1, 2]\n", 105 | "\n", 106 | "# 初始状态,测试集中,0.6概率观测序列以sunny开始\n", 107 | "start_probability = [0.6, 0.4]\n", 108 | "\n", 109 | "# 转移概率,0.7:sunny下一天sunny的概率\n", 110 | "transititon_probability = np.array([[0.7, 0.3], [0.4, 0.6]])\n", 111 | "\n", 112 | "# 发射概率,0.4:sunny在0.4概率下为shop\n", 113 | "emission_probability = np.array([[0.1, 0.4, 0.5], [0.6, 0.3, 0.1]])\n", 114 | "\n", 115 | "prob, result = viterbi(state_s, obser, start_probability, transititon_probability, emission_probability)\n", 116 | "\n", 117 | "for k in range(len(result)):\n", 118 | " print(hidden_state[int(result[k])])" 119 | ] 120 | } 121 | ], 122 | "metadata": { 123 | "kernelspec": { 124 | "display_name": "Python 3", 125 | "language": "python", 126 | "name": "python3" 127 | }, 128 | "language_info": { 129 | "codemirror_mode": { 130 | "name": "ipython", 131 | "version": 3 132 | }, 133 | "file_extension": ".py", 134 | "mimetype": "text/x-python", 135 | "name": "python", 136 | "nbconvert_exporter": "python", 137 | "pygments_lexer": "ipython3", 138 | "version": "3.6.2" 139 | } 140 | }, 141 | "nbformat": 4, 142 | "nbformat_minor": 2 143 | } 144 | -------------------------------------------------------------------------------- /data_analysis/EDA - 数据探索性分析.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# EDA - 数据探索性分析\n", 8 | "---\n", 9 | "\n", 10 | "\n" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "### box-plot (箱线图)\n", 18 | "---\n", 19 | "\n", 20 | "箱线图(Boxplot)利用数据中的五个统计量:最小非异常值、第一四分位数Q1、中位数、第三四分位数Q3与最大非异常值来描述数据的一种方法,可以粗略地看出数据是否具有有对称性,离散程度等信息。典型的箱线图如下:\n", 21 | "\n", 22 | "\n", 23 | "
**典型箱线图**
\n", 24 | "\n", 25 | "\n", 26 | "
**箱子结构**
\n", 27 | "\n", 28 | "\n", 29 | "**四分位数(Quartile)**\n", 30 | "\n", 31 | "根据数列等分的形式不同,常用的分位数有中位数、四分位数、十分位数、百分位数等。箱线图中使用四分位数进行统计。\n", 32 | "\n", 33 | "四分位数的定义:将数据划分为4个部分,每一个部分大约包含1/4的数据项,这种划分的临界点称为四分位数。\n", 34 | "\n", 35 | "假设数据序列有n个数据,那么第一四分位数(Q1)、第二四分位数(Q2)、第三四分位数(Q3)的计算公式如下:\n", 36 | "$$Q1=\\frac{n+1}{4}$$\n", 37 | "$$Q2=\\frac{2(n+1)}{4}$$\n", 38 | "$$Q3=\\frac{3(n+1)}{4}$$\n", 39 | "\n", 40 | "注:计算四分位数之前,需要将序列按大小排序!\n", 41 | "\n", 42 | "**四分位距(Quartile Deviation)**\n", 43 | "\n", 44 | "四分位距是指将数据序列按大小顺序排列后,将此数列分成四等份,所得第三个四分位上的值与第一个四分位上的值的差。\n", 45 | "$$IQR = Q3 - Q1$$\n", 46 | "\n", 47 | "**内限**\n", 48 | "\n", 49 | "内限(又称“上下限”)是数据序列中正常值的范围,其计算方法是[Q1-1.5IQR, Q3+1.5IQR]\n", 50 | "\n", 51 | "\n", 52 | "#### 绘制箱线图的步骤\n", 53 | "---\n", 54 | "step 1. 将数据序列按大小排序,计算序列的四分位数和中位数;\n", 55 | "\n", 56 | "step 2. 计算四分位距,以及上下限(内限),将数据序列中与上下限最接近的数据作为上下边缘;\n", 57 | "\n", 58 | "step 3. 画数轴,由中位数、Q1、Q3以及上下边缘,作图;\n", 59 | "\n", 60 | "step 4. 将数据点在图中表示。\n", 61 | "\n", 62 | "\n", 63 | "\n", 64 | "#### 箱线图的作用\n", 65 | "---\n", 66 | "1. 识别数据异常值\n", 67 | ">在箱线图中,异常值被定义为小于Q1-1.5IQR或大于Q3+1.5IQR的值。使用箱线图进行异常值识别的优势是:1)依靠实际数据,不需要事先假定数据服从特定的分布形式,没有对数据作任何限制性要求;2)四分位数具有一定的耐抗性,多达25%的数据可以变得任意远而不会很大地扰动四分位数,所以异常值不能对这个标准施加影响。\n", 68 | "\n", 69 | "2. 比较几批数据的形状\n", 70 | ">同一数轴上,几批数据的箱线图并行排列,几批数据的中位数、尾长、异常值、分布区间等形状信息便一目了然。\n", 71 | "\n", 72 | "\n", 73 | "#### 使用python绘制箱线图\n", 74 | "---" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": { 81 | "collapsed": true 82 | }, 83 | "outputs": [], 84 | "source": [] 85 | } 86 | ], 87 | "metadata": { 88 | "kernelspec": { 89 | "display_name": "Python 3", 90 | "language": "python", 91 | "name": "python3" 92 | }, 93 | "language_info": { 94 | "codemirror_mode": { 95 | "name": "ipython", 96 | "version": 3 97 | }, 98 | "file_extension": ".py", 99 | "mimetype": "text/x-python", 100 | "name": "python", 101 | "nbconvert_exporter": "python", 102 | "pygments_lexer": "ipython3", 103 | "version": "3.6.1" 104 | } 105 | }, 106 | "nbformat": 4, 107 | "nbformat_minor": 2 108 | } 109 | -------------------------------------------------------------------------------- /data_analysis/images/box-plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/data_analysis/images/box-plot.png -------------------------------------------------------------------------------- /data_analysis/images/box-plot1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/data_analysis/images/box-plot1.png -------------------------------------------------------------------------------- /html/images/Conv2D.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/html/images/Conv2D.gif -------------------------------------------------------------------------------- /html/images/LogReg_kiank.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/html/images/LogReg_kiank.png -------------------------------------------------------------------------------- /html/images/NlayerNN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/html/images/NlayerNN.png -------------------------------------------------------------------------------- /html/images/activation.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/html/images/activation.jpg -------------------------------------------------------------------------------- /html/images/basic_recipe_for_ML.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/html/images/basic_recipe_for_ML.png -------------------------------------------------------------------------------- /html/images/bias_variance1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/html/images/bias_variance1.png -------------------------------------------------------------------------------- /html/images/cg1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/html/images/cg1.jpg -------------------------------------------------------------------------------- /html/images/classification_kiank.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/html/images/classification_kiank.png -------------------------------------------------------------------------------- /html/images/conv3d.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/html/images/conv3d.png -------------------------------------------------------------------------------- /html/images/convolutions.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/html/images/convolutions.gif -------------------------------------------------------------------------------- /html/images/dc1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/html/images/dc1.jpg -------------------------------------------------------------------------------- /html/images/dc2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/html/images/dc2.jpg -------------------------------------------------------------------------------- /html/images/dc3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/html/images/dc3.jpg -------------------------------------------------------------------------------- /html/images/dc4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/html/images/dc4.jpg -------------------------------------------------------------------------------- /html/images/dd1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/html/images/dd1.jpg -------------------------------------------------------------------------------- /html/images/dropout1_kiank.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/html/images/dropout1_kiank.mp4 -------------------------------------------------------------------------------- /html/images/dropout2_kiank.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/html/images/dropout2_kiank.mp4 -------------------------------------------------------------------------------- /html/images/edge_detect.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/html/images/edge_detect.png -------------------------------------------------------------------------------- /html/images/final outline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/html/images/final outline.png -------------------------------------------------------------------------------- /html/images/grad_summary1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/html/images/grad_summary1.png -------------------------------------------------------------------------------- /html/images/gradient_descent.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/html/images/gradient_descent.jpg -------------------------------------------------------------------------------- /html/images/leaky_relu.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/html/images/leaky_relu.jpg -------------------------------------------------------------------------------- /html/images/multi_filters.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/html/images/multi_filters.png -------------------------------------------------------------------------------- /html/images/nn1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/html/images/nn1.jpg -------------------------------------------------------------------------------- /html/images/nn2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/html/images/nn2.jpg -------------------------------------------------------------------------------- /html/images/nn4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/html/images/nn4.jpg -------------------------------------------------------------------------------- /html/images/normalize.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/html/images/normalize.png -------------------------------------------------------------------------------- /html/images/normalize2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/html/images/normalize2.png -------------------------------------------------------------------------------- /html/images/padding_strides.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/html/images/padding_strides.gif -------------------------------------------------------------------------------- /html/images/pooling.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/html/images/pooling.jpg -------------------------------------------------------------------------------- /html/images/relu.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/html/images/relu.jpg -------------------------------------------------------------------------------- /html/images/sgd.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/html/images/sgd.gif -------------------------------------------------------------------------------- /html/images/sgd_bad.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/html/images/sgd_bad.gif -------------------------------------------------------------------------------- /html/images/sigmoid.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/html/images/sigmoid.jpg -------------------------------------------------------------------------------- /html/images/split.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/html/images/split.png -------------------------------------------------------------------------------- /html/images/summary_conv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/html/images/summary_conv.png -------------------------------------------------------------------------------- /html/images/tanh.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/html/images/tanh.jpg -------------------------------------------------------------------------------- /html/images/流程图.vsdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/html/images/流程图.vsdx -------------------------------------------------------------------------------- /manuscripts/22 第二课_第二周学习笔记.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 022 学习笔记\n", 8 | "---\n", 9 | "\n", 10 | "## mini-batch gradient descent\n", 11 | "---\n", 12 | "mini-batch:从数据中选取的子集,如1000个样本。\n", 13 | "\n", 14 | "epoch:a single pass through the training set(一次遍历数据集)\n", 15 | "\n", 16 | "mini-batch的实现流程:\n", 17 | "1. 选择mini-batch大小,经验:2的n次方,从64到512不懂\n", 18 | "2. \n", 19 | "\n", 20 | "如果mini-batch大小为m(即所有样本),就是batch gradient descent\n", 21 | "如果mini-batch大小为1,就是SGD(s gradient descent)\n", 22 | "\n", 23 | "SGD永远不会收敛,而是在最小值附近波动。一大缺点:不能使用向量化加速计算\n", 24 | "\n", 25 | "mini-batch应该符合cpu和gpu的内存,对算法影响很大。\n" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "## exponentially weighted averages (指数加权平均)\n", 33 | "---\n", 34 | "指数加权移动平均\n", 35 | "\n" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "## bias correction(偏差修正)\n", 43 | "---\n" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "## Gradient descent with momentum(动量梯度下降法)\n", 51 | "---\n", 52 | "\n", 53 | "总是比标准GD法快;\n", 54 | "基本思想:计算梯度的指数加权平均数,并应用这个梯度更新参数。使纵轴方向摆动更小,横轴方向移动更快。" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "## RMSprop(root mean square prop)\n", 62 | "---\n", 63 | "参数更新公式改变了\n" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "## Adam(Adaptive Moment Estimation)\n", 71 | "---\n", 72 | "将mometum和RMSprop融合在一起的一个方法\n", 73 | "\n", 74 | "\n" 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "## learning rate decay\n", 82 | "---\n", 83 | "在epoch层面逐步降低learning rate大小;\n" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": {}, 89 | "source": [ 90 | "## local optima\n", 91 | "---\n", 92 | "\n", 93 | "当训练一个参数非常多的、大的神经网络的时候,不太可能陷入局部最优。\n", 94 | "\n", 95 | "平稳端(Plateaus)会时学习时间增长。" 96 | ] 97 | } 98 | ], 99 | "metadata": { 100 | "kernelspec": { 101 | "display_name": "Python 3", 102 | "language": "python", 103 | "name": "python3" 104 | }, 105 | "language_info": { 106 | "codemirror_mode": { 107 | "name": "ipython", 108 | "version": 3 109 | }, 110 | "file_extension": ".py", 111 | "mimetype": "text/x-python", 112 | "name": "python", 113 | "nbconvert_exporter": "python", 114 | "pygments_lexer": "ipython3", 115 | "version": "3.6.1" 116 | } 117 | }, 118 | "nbformat": 4, 119 | "nbformat_minor": 2 120 | } 121 | -------------------------------------------------------------------------------- /manuscripts/23 第二课_第三周学习笔记.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 023 course2 week3 学习笔记\n", 8 | "---\n", 9 | "\n", 10 | "超参优化\n", 11 | "\n", 12 | "1. Try random values:Don't use a grid\n", 13 | "2. \n", 14 | "\n", 15 | "使用对数标尺,而不是线性标尺。\n", 16 | "\n", 17 | "跨领域找灵感\n", 18 | "\n", 19 | "\n", 20 | "### 如何组织超参优化?\n", 21 | "---\n", 22 | "babysitting one model\n", 23 | "training many models in parallel\n", 24 | "\n", 25 | "\n", 26 | "### batch normarlization\n", 27 | "----\n", 28 | "每次只能处理一个mini-batch。\n" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "## Softmax回归\n", 36 | "----\n", 37 | "LR的一般形式。\n", 38 | "\n", 39 | "\n", 40 | "最大似然估计\n", 41 | "\n" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "这周内容主要分四块:第一部分是超参数,第二部分是batchnorm,第三部分是softmax做多类分类,第四部分是深度学习框架和tensorflow\n", 49 | "\n", 50 | "第一部分首先讲了超参数的优先级,也就是哪些重要、哪些不那么重要\n", 51 | "ng认为,超参数的重要性排序如下:\n", 52 | "1. learning rate;\n", 53 | "2. momentum参数(0.9是不错的默认值和初始值)、mini-batch的batch size、各层的隐藏单元数;\n", 54 | "3. 层数和learning rate decay\n", 55 | "在使用ADAM优化算法时,ng总是把beta1、beta2、epsilon这三个值直接设置为固定的0.9、0.999、10^(-8)而不去调整它们,也就是说,ng认为调这几个超参数意义不大\n", 56 | "\n", 57 | "ng讲了两种搜索超参数的方法:grid search和random search\n", 58 | "grid search在超参数数量较少时比较实用,但random search在超参数数量较多时更实用。原因是,当我们不知道哪些超参数不重要、对结果没什么影响的时候,用grid search能够试验的重要参数上的值要比random search少得多\n", 59 | "还有一种搜索超参数的策略是由粗到细(coarse to fine),顾名思义,就是先在大范围内的少数超参数上测试模型性能,然后再在性能比较好的几个超参数的附近区域中采更多超参数进行测试,逐步递进\n", 60 | "计算资源多的话就可以多random search一些超参数,后面ng也提到测试更多超参数总是好的\n", 61 | "\n", 62 | "有一些超参数需要做尺度变换,不能直接做均匀随机采样,比如learning rate。因为比如说在0.0001到1之间搜索learning rate,如果均匀随机采样,那么0.1到1之间会占据90%的点,所以这里要用对数尺度下的搜索,先对超参数取值范围的最大最小值取log,然后在log范围内均匀随机采样\n", 63 | "\n", 64 | "指数加权平均里的beta是另一个需要考虑尺度变换的超参数,这个值不能直接取log,因为用的是1-beta而不是beta,所以这里需要对1-beta取log\n", 65 | "\n", 66 | "不过ng也提到,即使没有在合适的尺度上采样,只要采样数足够多,也能获得还算过得去的效果,特别是结合前面讲的由粗到细的搜索策略,在后面较细范围的搜索中还是有机会聚焦到合适的超参数范围上、找到不错的超参数\n", 67 | "\n", 68 | "第二部分是batchnorm,这是非常重要的方法,它让超参数搜索变得更简单,让神经网络选择超参数的过程变得更robust或不那么敏感,工作效果好的超参数范围会变得比之前大了很多,训练非常深的网络也变得容易很多\n", 69 | "方法类似于前面讲逻辑回归时做的对每个维度的特征求均值和方差,然后对每个特征做归一化,batchnorm是对mini-batch每一层神经元输出的z做归一化\n", 70 | "注意,为了保持数值稳定性,分母根号内需要加上epsilon以防方差为0\n", 71 | "但同时,我们并不想让神经元总是均值为0和标准差为1,它们具有不同的分布可能更有意义,所以可以再乘上gamma并加上beta,这两个参数是可学习的,可以在反向传播时更新它们\n", 72 | "batchnorm起作用的原因是:可以让网络中较后或较深的层中的权重对前面层的权重变化更鲁棒或不敏感\n", 73 | "从后面的层看来,因为前面层的权重在不断更新,所以对于同样的输入,它接收到的输入或前面层给的激活输出都在不断变化,这样就出现了covariate shift问题\n", 74 | "\n", 75 | "batchnorm的作用,就是减小了隐层输出的变化范围,使得不管前面层的权值如何变化,后面层接收到的输入的均值和方差都保持不变\n", 76 | "所以即使它们的数值有变化,但由gamma和beta控制的均值和方差却没有变化,限制了前面层参数变化对后面层输入分布的影响,所以后面层就可以在相对固定的分布上进行训练\n", 77 | "\n", 78 | "同时,batchnorm也有轻微的正则化效果\n", 79 | "dropout是通过将激活随机设置为0或1来引入噪声,而bn则是因为mini-batch估计均值和方差时带有噪声,这样给前面神经元引入噪声,使得后面神经元训练时不过于依赖于任何一个前面层的神经元\n", 80 | "但因为添加的噪声比较微小,所以正则化效果也比较小,实践中可以将bn和dropout一起使用\n", 81 | "\n", 82 | "测试阶段没有mini-batch去估计均值和方差,所以使用指数加权平均在训练阶段的每个batch上计算的均值和方差,作为测试阶段的均值和方差\n", 83 | "\n", 84 | "第三部分是用softmax训练多类分类模型,它实际上是逻辑回归的sigmoid+cross entropy在多类上的推广\n", 85 | "前向和反向过程都和逻辑回归比较像,这里就不再赘述了\n", 86 | "\n", 87 | "最后一部分是深度学习框架和tensorflow\n", 88 | "ng认为,好的深度学习框架有三个标准:\n", 89 | "1. 便于编程,包括神经网络的开发和迭代,以及训练好网络之后对产品做部署\n", 90 | "2. 运行速度要快\n", 91 | "3. 真正的开放,这指的不仅是开源,而且要有良好的管理,有些公司会逐渐闭源并且把功能转移到他们自己的产品中比如云服务,所以能长时间保持开源、并且不在单一一家公司的掌控之下是很重要的,因为这家公司可能在未来出于某个原因而停止开源\n", 92 | "\n", 93 | "最后ng用一个编程实例来解释了如何用tensorflow定义cost function、如何设置变量和placeholder、如何把输入数据和placeholder绑定起来、如何使用optimizer来做优化,以及构建计算图的概念\n", 94 | "\n", 95 | "covariate shift 怎么理解呢?\n", 96 | "那个其实我也不是很理解,说是输入分布变化对训练模型造成的影响\n", 97 | "\n", 98 | "\n", 99 | "我看了一些batchnorm的解释,包括https://www.zhihu.com/question/38102762 ,说是主要还是解决反向传播时的梯度爆炸和梯度消失问题,说那个什么covariate shift是论文作者瞎解释的,哈哈。。\n", 100 | "\n", 101 | "\n", 102 | "我漏了一个视频的内容。。补一下,第一部分超参数的最后一小部分,那个熊猫模式和鱼子酱模式\n", 103 | "\n", 104 | "训练模型的过程有两类方法:第一种是在没有足够的计算资源或计算能力时,不能同时试验大量模型,只能照看单一模型,每天都调整这个模型的超参数,让性能变得更好;第二种是同时试验很多不同超参数的模型,最后选择学习曲线看起来效果最好的那个模型\n", 105 | "ng形象地把这两种方法称作熊猫模式和鱼子酱模式\n", 106 | "ng建议,在计算资源充足的情况下,绝对应该选择鱼子酱模式,充分尝试各种超参数\n", 107 | "但他同时也说,在某些领域,比如在线广告和计算机视觉领域,数据很多而且模型很大(层数、节点都很多),很难同时训练很多模型,所以用熊猫模式还是鱼子酱模式也和应用领域相关" 108 | ] 109 | } 110 | ], 111 | "metadata": { 112 | "kernelspec": { 113 | "display_name": "Python 3", 114 | "language": "python", 115 | "name": "python3" 116 | }, 117 | "language_info": { 118 | "codemirror_mode": { 119 | "name": "ipython", 120 | "version": 3 121 | }, 122 | "file_extension": ".py", 123 | "mimetype": "text/x-python", 124 | "name": "python", 125 | "nbconvert_exporter": "python", 126 | "pygments_lexer": "ipython3", 127 | "version": "3.6.1" 128 | } 129 | }, 130 | "nbformat": 4, 131 | "nbformat_minor": 2 132 | } 133 | -------------------------------------------------------------------------------- /manuscripts/Models/model_2/checkpoint: -------------------------------------------------------------------------------- 1 | model_checkpoint_path: "model.ckpt-20000" 2 | all_model_checkpoint_paths: "model.ckpt-1" 3 | all_model_checkpoint_paths: "model.ckpt-10000" 4 | all_model_checkpoint_paths: "model.ckpt-10001" 5 | all_model_checkpoint_paths: "model.ckpt-20000" 6 | -------------------------------------------------------------------------------- /manuscripts/Models/model_2/events.out.tfevents.1509279888.MIKE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/manuscripts/Models/model_2/events.out.tfevents.1509279888.MIKE -------------------------------------------------------------------------------- /manuscripts/Models/model_2/events.out.tfevents.1509279977.MIKE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/manuscripts/Models/model_2/events.out.tfevents.1509279977.MIKE -------------------------------------------------------------------------------- /manuscripts/Models/model_2/model.ckpt-1.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/manuscripts/Models/model_2/model.ckpt-1.data-00000-of-00001 -------------------------------------------------------------------------------- /manuscripts/Models/model_2/model.ckpt-1.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/manuscripts/Models/model_2/model.ckpt-1.index -------------------------------------------------------------------------------- /manuscripts/Models/model_2/model.ckpt-1.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/manuscripts/Models/model_2/model.ckpt-1.meta -------------------------------------------------------------------------------- /manuscripts/Models/model_2/model.ckpt-10000.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/manuscripts/Models/model_2/model.ckpt-10000.data-00000-of-00001 -------------------------------------------------------------------------------- /manuscripts/Models/model_2/model.ckpt-10000.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/manuscripts/Models/model_2/model.ckpt-10000.index -------------------------------------------------------------------------------- /manuscripts/Models/model_2/model.ckpt-10000.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/manuscripts/Models/model_2/model.ckpt-10000.meta -------------------------------------------------------------------------------- /manuscripts/Models/model_2/model.ckpt-10001.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/manuscripts/Models/model_2/model.ckpt-10001.data-00000-of-00001 -------------------------------------------------------------------------------- /manuscripts/Models/model_2/model.ckpt-10001.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/manuscripts/Models/model_2/model.ckpt-10001.index -------------------------------------------------------------------------------- /manuscripts/Models/model_2/model.ckpt-10001.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/manuscripts/Models/model_2/model.ckpt-10001.meta -------------------------------------------------------------------------------- /manuscripts/Models/model_2/model.ckpt-20000.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/manuscripts/Models/model_2/model.ckpt-20000.data-00000-of-00001 -------------------------------------------------------------------------------- /manuscripts/Models/model_2/model.ckpt-20000.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/manuscripts/Models/model_2/model.ckpt-20000.index -------------------------------------------------------------------------------- /manuscripts/Models/model_2/model.ckpt-20000.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/manuscripts/Models/model_2/model.ckpt-20000.meta -------------------------------------------------------------------------------- /manuscripts/datas/shampoo_sales.csv: -------------------------------------------------------------------------------- 1 | "Month","Sales of shampoo over a three year period" 2 | "1-01",266.0 3 | "1-02",145.9 4 | "1-03",183.1 5 | "1-04",119.3 6 | "1-05",180.3 7 | "1-06",168.5 8 | "1-07",231.8 9 | "1-08",224.5 10 | "1-09",192.8 11 | "1-10",122.9 12 | "1-11",336.5 13 | "1-12",185.9 14 | "2-01",194.3 15 | "2-02",149.5 16 | "2-03",210.1 17 | "2-04",273.3 18 | "2-05",191.4 19 | "2-06",287.0 20 | "2-07",226.0 21 | "2-08",303.6 22 | "2-09",289.9 23 | "2-10",421.6 24 | "2-11",264.5 25 | "2-12",342.3 26 | "3-01",339.7 27 | "3-02",440.4 28 | "3-03",315.9 29 | "3-04",439.3 30 | "3-05",401.3 31 | "3-06",437.4 32 | "3-07",575.5 33 | "3-08",407.6 34 | "3-09",682.0 35 | "3-10",475.3 36 | "3-11",581.3 37 | "3-12",646.9 -------------------------------------------------------------------------------- /manuscripts/ng_course_Draft.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "collapsed": true 7 | }, 8 | "source": [ 9 | "写这份学习笔记,最难的一部分是跳出ng在课程中讲解的知识框架,根据自己的理解,添加新的知识。\n", 10 | "\n" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": { 17 | "collapsed": true 18 | }, 19 | "outputs": [], 20 | "source": [ 21 | "import numpy as np\n", 22 | "\n", 23 | "\n", 24 | "def sigmoid(x):\n", 25 | " \"\"\"\n", 26 | " Compute the sigmoid of x\n", 27 | "\n", 28 | " Arguments:\n", 29 | " x -- A scalar or numpy array of any size\n", 30 | "\n", 31 | " Return:\n", 32 | " s -- sigmoid(x)\n", 33 | " \"\"\" \n", 34 | " s = 1.0 / (1.0 + np.exp(-x)) \n", 35 | " return s\n", 36 | "\n", 37 | "\n", 38 | "def sigmoid_derivative(x):\n", 39 | " \"\"\"\n", 40 | " Compute the derivative of the sigmoid function with respect to its input x.\n", 41 | " \n", 42 | " Arguments:\n", 43 | " x -- A scalar or numpy array\n", 44 | "\n", 45 | " Return:\n", 46 | " ds -- Your computed gradient.\n", 47 | " \"\"\"\n", 48 | " s = sigmoid(x)\n", 49 | " ds = s * (1 - s) \n", 50 | " return ds\n", 51 | "\n", 52 | "\n", 53 | "def softmax(x):\n", 54 | " \"\"\"Calculates the softmax for each row of the input x.\n", 55 | "\n", 56 | " Argument:\n", 57 | " x -- A numpy matrix of shape (n,m)\n", 58 | "\n", 59 | " Returns:\n", 60 | " s -- A numpy matrix equal to the softmax of x, of shape (n,m)\n", 61 | " \"\"\"\n", 62 | " x_exp = np.exp(x)\n", 63 | " x_sum = np.sum(x_exp, axis=1, keepdims=True)\n", 64 | " \n", 65 | " # Compute softmax(x) by dividing x_exp by x_sum.\n", 66 | " s = x_exp/x_sum\n", 67 | " return s" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "#### softmax函数\n", 75 | "---\n", 76 | "\n", 77 | "softmax函数表达式:\n", 78 | "\n", 79 | "- $ \\text{对于向量 } x \\in \\mathbb{R}^{1\\times n} \\text{, } softmax(x) = softmax(\\begin{bmatrix}\n", 80 | " x_1 &&\n", 81 | " x_2 &&\n", 82 | " ... &&\n", 83 | " x_n \n", 84 | "\\end{bmatrix}) = \\begin{bmatrix}\n", 85 | " \\frac{e^{x_1}}{\\sum_{j}e^{x_j}} &&\n", 86 | " \\frac{e^{x_2}}{\\sum_{j}e^{x_j}} &&\n", 87 | " ... &&\n", 88 | " \\frac{e^{x_n}}{\\sum_{j}e^{x_j}} \n", 89 | "\\end{bmatrix} $ \n", 90 | "\n", 91 | "- $\\text{对于矩阵 } x \\in \\mathbb{R}^{m \\times n} \\text{, $x_{ij}$ 表示矩阵$x$中第 $i$ 行、第 $j$ 列的元素 }$ $$softmax(x) = softmax\\begin{bmatrix}\n", 92 | " x_{11} & x_{12} & x_{13} & \\dots & x_{1n} \\\\\n", 93 | " x_{21} & x_{22} & x_{23} & \\dots & x_{2n} \\\\\n", 94 | " \\vdots & \\vdots & \\vdots & \\ddots & \\vdots \\\\\n", 95 | " x_{m1} & x_{m2} & x_{m3} & \\dots & x_{mn}\n", 96 | "\\end{bmatrix} = \\begin{bmatrix}\n", 97 | " \\frac{e^{x_{11}}}{\\sum_{j}e^{x_{1j}}} & \\frac{e^{x_{12}}}{\\sum_{j}e^{x_{1j}}} & \\frac{e^{x_{13}}}{\\sum_{j}e^{x_{1j}}} & \\dots & \\frac{e^{x_{1n}}}{\\sum_{j}e^{x_{1j}}} \\\\\n", 98 | " \\frac{e^{x_{21}}}{\\sum_{j}e^{x_{2j}}} & \\frac{e^{x_{22}}}{\\sum_{j}e^{x_{2j}}} & \\frac{e^{x_{23}}}{\\sum_{j}e^{x_{2j}}} & \\dots & \\frac{e^{x_{2n}}}{\\sum_{j}e^{x_{2j}}} \\\\\n", 99 | " \\vdots & \\vdots & \\vdots & \\ddots & \\vdots \\\\\n", 100 | " \\frac{e^{x_{m1}}}{\\sum_{j}e^{x_{mj}}} & \\frac{e^{x_{m2}}}{\\sum_{j}e^{x_{mj}}} & \\frac{e^{x_{m3}}}{\\sum_{j}e^{x_{mj}}} & \\dots & \\frac{e^{x_{mn}}}{\\sum_{j}e^{x_{mj}}}\n", 101 | "\\end{bmatrix} = \\begin{pmatrix}\n", 102 | " softmax\\text{(first row of x)} \\\\\n", 103 | " softmax\\text{(second row of x)} \\\\\n", 104 | " ... \\\\\n", 105 | " softmax\\text{(last row of x)} \\\\\n", 106 | "\\end{pmatrix} $$" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "#### L1 和 L2 损失函数(loss functions)\n", 114 | "----\n", 115 | "- L1 函数表达式:\n", 116 | "$$\\begin{align*} & L_1(\\hat{y}, y) = \\sum_{i=0}^m|y^{(i)} - \\hat{y}^{(i)}| \\end{align*}$$\n", 117 | "\n", 118 | "- L2 函数表达式:\n", 119 | "$$\\begin{align*} & L_2(\\hat{y},y) = \\sum_{i=0}^m(y^{(i)} - \\hat{y}^{(i)})^2 \\end{align*}$$\n", 120 | "其中,$\\hat{y}$表示预测值向量,$y$表示真实值向量。" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": { 127 | "collapsed": true 128 | }, 129 | "outputs": [], 130 | "source": [ 131 | "import numpy as np\n", 132 | "\n", 133 | "\n", 134 | "def L1(yhat, y):\n", 135 | " \"\"\"\n", 136 | " Compute L1 loss\n", 137 | " \n", 138 | " Arguments:\n", 139 | " yhat -- vector of size m (predicted labels)\n", 140 | " y -- vector of size m (true labels)\n", 141 | " \n", 142 | " Returns:\n", 143 | " loss -- the value of the L1 loss function defined above\n", 144 | " \"\"\"\n", 145 | " loss = np.sum(np.abs((y-yhat))) \n", 146 | " return loss\n", 147 | "\n", 148 | "\n", 149 | "def L2(yhat, y):\n", 150 | " \"\"\"\n", 151 | " Compute L2 loss\n", 152 | " \n", 153 | " Arguments:\n", 154 | " yhat -- vector of size m (predicted labels)\n", 155 | " y -- vector of size m (true labels)\n", 156 | " \n", 157 | " Returns:\n", 158 | " loss -- the value of the L2 loss\n", 159 | " \"\"\"\n", 160 | " loss = np.sum(np.power((y-yhat),2)) \n", 161 | " return loss" 162 | ] 163 | }, 164 | { 165 | "cell_type": "markdown", 166 | "metadata": {}, 167 | "source": [ 168 | "### Common Technique used in ML(Machine Learning) and DL(Deep Learning)\n", 169 | "\n", 170 | "\n", 171 | "#### normalize\n", 172 | "\n", 173 | "It often leads to a better performance because gradient descent converges faster after normalization. Here, by normalization we mean changing x to $ \\frac{x}{\\| x\\|} $ (dividing each row vector of x by its norm).\n", 174 | "\n", 175 | "For example, if $$x = \n", 176 | "\\begin{bmatrix}\n", 177 | " 0 & 3 & 4 \\\\\n", 178 | " 2 & 6 & 4 \\\\\n", 179 | "\\end{bmatrix}\\tag{3}$$ then $$\\| x\\| = np.linalg.norm(x, axis = 1, keepdims = True) = \\begin{bmatrix}\n", 180 | " 5 \\\\\n", 181 | " \\sqrt{56} \\\\\n", 182 | "\\end{bmatrix}\\tag{4} $$and $$ x\\_normalized = \\frac{x}{\\| x\\|} = \\begin{bmatrix}\n", 183 | " 0 & \\frac{3}{5} & \\frac{4}{5} \\\\\n", 184 | " \\frac{2}{\\sqrt{56}} & \\frac{6}{\\sqrt{56}} & \\frac{4}{\\sqrt{56}} \\\\\n", 185 | "\\end{bmatrix}\\tag{5}$$ Note that you can divide matrices of different sizes and it works fine: this is called broadcasting and you're going to learn about it in part 5.\n", 186 | "\n", 187 | "\n", 188 | "\n", 189 | "Common steps for pre-processing a new dataset are:\n", 190 | "- Figure out the dimensions and shapes of the problem (m_train, m_test, num_px, ...)\n", 191 | "- Reshape the datasets such that each example is now a vector of size (num_px \\* num_px \\* 3, 1)\n", 192 | "- \"Standardize\" the data\n", 193 | "\n", 194 | "**Exercise**: Implement normalizeRows() to normalize the rows of a matrix. After applying this function to an input matrix x, each row of x should be a vector of unit length (meaning length 1)." 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": null, 200 | "metadata": { 201 | "collapsed": true 202 | }, 203 | "outputs": [], 204 | "source": [] 205 | } 206 | ], 207 | "metadata": { 208 | "kernelspec": { 209 | "display_name": "Python 3", 210 | "language": "python", 211 | "name": "python3" 212 | }, 213 | "language_info": { 214 | "codemirror_mode": { 215 | "name": "ipython", 216 | "version": 3 217 | }, 218 | "file_extension": ".py", 219 | "mimetype": "text/x-python", 220 | "name": "python", 221 | "nbconvert_exporter": "python", 222 | "pygments_lexer": "ipython3", 223 | "version": "3.6.1" 224 | } 225 | }, 226 | "nbformat": 4, 227 | "nbformat_minor": 2 228 | } 229 | -------------------------------------------------------------------------------- /manuscripts/第三课学习笔记.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 第三课\n", 8 | "\n", 9 | "---" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "## using single number evaluation metric\n", 17 | "---\n", 18 | "\n", 19 | "不要使用precision或者recall来评估性能,而是使用F1来评估性能。\n", 20 | "\n", 21 | "结合性能评估指标与运行时间。tradeoff accuracy and running time : cost = accuracy - 0.5 * running time" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "## set up train/dev/test sets\n", 29 | "---\n", 30 | "数据集的划分,对开发效率会有很大的影响。train sets 和 test sets 必须来自同一分布。\n", 31 | "\n", 32 | "choose a dev set and test set to reflect data you expect to get in the future and consider important to do well on.\n", 33 | "\n", 34 | "That way, you are putting the target where you actually want to hit.\n", 35 | "\n", 36 | "训练的目标体现在dev set 和 evaluation metric中。train set的设置会影响逼近目标的速度。\n", 37 | "\n", 38 | "传统的划分方法:70%/30% or 60%/20%/20%" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "## error analysis\n", 46 | "---\n", 47 | "确定某一个努力的方向是否值得。\n", 48 | "分析训练集和测试集的数据分布是否一致。\n" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "## incorrectly labeled examples\n", 56 | "---\n", 57 | "\n", 58 | "深度学习算法对随机误差的鲁棒性很好,但是对系统误差的鲁棒性很差。" 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": {}, 64 | "source": [ 65 | "快速搭建第一个系统,并进行迭代。\n", 66 | "在不同的划分上进行训练并测试。\n", 67 | "\n" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "## addressing data mismatch\n", 75 | "---\n" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": {}, 81 | "source": [ 82 | "## Transfer Learning\n", 83 | "----\n" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": {}, 89 | "source": [ 90 | "## Multi-task learning\n", 91 | "---\n" 92 | ] 93 | } 94 | ], 95 | "metadata": { 96 | "kernelspec": { 97 | "display_name": "Python 3", 98 | "language": "python", 99 | "name": "python3" 100 | }, 101 | "language_info": { 102 | "codemirror_mode": { 103 | "name": "ipython", 104 | "version": 3 105 | }, 106 | "file_extension": ".py", 107 | "mimetype": "text/x-python", 108 | "name": "python", 109 | "nbconvert_exporter": "python", 110 | "pygments_lexer": "ipython3", 111 | "version": "3.6.1" 112 | } 113 | }, 114 | "nbformat": 4, 115 | "nbformat_minor": 2 116 | } 117 | -------------------------------------------------------------------------------- /notes_deeplearning.ai/11 第一课_第一周学习笔记.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "collapsed": true 7 | }, 8 | "source": [ 9 | "# 第一周 深度学习概论\n", 10 | "\n", 11 | "---\n", 12 | "\n", 13 | "吴恩达最新发布的深度学习课程包含以下五门课:\n", 14 | "\n", 15 | "神经网络是一种高效的机器学习算法,它模仿人脑的工作模式(最近,hinton反对这种说法)。\n", 16 | "\n", 17 | "standard NN\n", 18 | "Convolutional NN\n", 19 | "Recurrent NN\n", 20 | "\n", 21 | "监督学习(supervised learning)\n", 22 | "\n", 23 | "\n", 24 | "Structured Data & Unstuctured Data\n", 25 | "\n", 26 | "\n", 27 | "深度学习兴起的原因:\n", 28 | "\n", 29 | "https://mp.weixin.qq.com/s/rrgSh8OuEB_jk0Pv5_W5fg\n", 30 | "\n" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "## 术语 - 中英对照表\n", 38 | "----\n", 39 | "\n", 40 | "|EN|CH|\n", 41 | "|---:|:---|\n", 42 | "|bacward propagation|向后传播|\n", 43 | "|forward propagation|向前传播|\n" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "## 约定\n", 51 | "---\n", 52 | "\n", 53 | "算法 - 一种生成模型的数学逻辑\n", 54 | "模型 - 算法在数据集上训练得到,可以直接应用于新数据" 55 | ] 56 | } 57 | ], 58 | "metadata": { 59 | "kernelspec": { 60 | "display_name": "Python 3", 61 | "language": "python", 62 | "name": "python3" 63 | }, 64 | "language_info": { 65 | "codemirror_mode": { 66 | "name": "ipython", 67 | "version": 3 68 | }, 69 | "file_extension": ".py", 70 | "mimetype": "text/x-python", 71 | "name": "python", 72 | "nbconvert_exporter": "python", 73 | "pygments_lexer": "ipython3", 74 | "version": "3.6.2" 75 | } 76 | }, 77 | "nbformat": 4, 78 | "nbformat_minor": 2 79 | } 80 | -------------------------------------------------------------------------------- /notes_deeplearning.ai/22 第二课_第二周学习笔记-未完成.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 022 学习笔记\n", 8 | "---\n", 9 | "\n", 10 | "## mini-batch gradient descent\n", 11 | "---\n", 12 | "mini-batch:从数据中选取的子集,如1000个样本。\n", 13 | "\n", 14 | "epoch:a single pass through the training set(一次遍历数据集)\n", 15 | "\n", 16 | "mini-batch的实现流程:\n", 17 | "1. 选择mini-batch大小,经验:2的n次方,从64到512不懂\n", 18 | "2. \n", 19 | "\n", 20 | "如果mini-batch大小为m(即所有样本),就是batch gradient descent\n", 21 | "如果mini-batch大小为1,就是SGD(s gradient descent)\n", 22 | "\n", 23 | "SGD永远不会收敛,而是在最小值附近波动。一大缺点:不能使用向量化加速计算\n", 24 | "\n", 25 | "mini-batch应该符合cpu和gpu的内存,对算法影响很大。\n" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "## exponentially weighted averages (指数加权平均)\n", 33 | "---\n", 34 | "指数加权移动平均\n", 35 | "\n" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "## bias correction(偏差修正)\n", 43 | "---\n" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "## Gradient descent with momentum(动量梯度下降法)\n", 51 | "---\n", 52 | "\n", 53 | "总是比标准GD法快;\n", 54 | "基本思想:计算梯度的指数加权平均数,并应用这个梯度更新参数。使纵轴方向摆动更小,横轴方向移动更快。" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "## RMSprop(root mean square prop)\n", 62 | "---\n", 63 | "参数更新公式改变了\n" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "## Adam(Adaptive Moment Estimation)\n", 71 | "---\n", 72 | "将mometum和RMSprop融合在一起的一个方法\n", 73 | "\n", 74 | "\n" 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "## learning rate decay\n", 82 | "---\n", 83 | "在epoch层面逐步降低learning rate大小;\n" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": {}, 89 | "source": [ 90 | "## local optima\n", 91 | "---\n", 92 | "\n", 93 | "当训练一个参数非常多的、大的神经网络的时候,不太可能陷入局部最优。\n", 94 | "\n", 95 | "平稳端(Plateaus)会时学习时间增长。" 96 | ] 97 | } 98 | ], 99 | "metadata": { 100 | "kernelspec": { 101 | "display_name": "Python 3", 102 | "language": "python", 103 | "name": "python3" 104 | }, 105 | "language_info": { 106 | "codemirror_mode": { 107 | "name": "ipython", 108 | "version": 3 109 | }, 110 | "file_extension": ".py", 111 | "mimetype": "text/x-python", 112 | "name": "python", 113 | "nbconvert_exporter": "python", 114 | "pygments_lexer": "ipython3", 115 | "version": "3.6.1" 116 | } 117 | }, 118 | "nbformat": 4, 119 | "nbformat_minor": 2 120 | } 121 | -------------------------------------------------------------------------------- /notes_deeplearning.ai/23 第二课_第三周学习笔记-未完成.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 023 course2 week3 学习笔记\n", 8 | "---\n", 9 | "\n", 10 | "超参优化\n", 11 | "\n", 12 | "1. Try random values:Don't use a grid\n", 13 | "2. \n", 14 | "\n", 15 | "使用对数标尺,而不是线性标尺。\n", 16 | "\n", 17 | "跨领域找灵感\n", 18 | "\n", 19 | "\n", 20 | "### 如何组织超参优化?\n", 21 | "---\n", 22 | "babysitting one model\n", 23 | "training many models in parallel\n", 24 | "\n", 25 | "\n", 26 | "### batch normarlization\n", 27 | "----\n", 28 | "每次只能处理一个mini-batch。\n" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "## Softmax回归\n", 36 | "----\n", 37 | "LR的一般形式。\n", 38 | "\n", 39 | "\n", 40 | "最大似然估计\n", 41 | "\n" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "这周内容主要分四块:第一部分是超参数,第二部分是batchnorm,第三部分是softmax做多类分类,第四部分是深度学习框架和tensorflow\n", 49 | "\n", 50 | "第一部分首先讲了超参数的优先级,也就是哪些重要、哪些不那么重要\n", 51 | "ng认为,超参数的重要性排序如下:\n", 52 | "1. learning rate;\n", 53 | "2. momentum参数(0.9是不错的默认值和初始值)、mini-batch的batch size、各层的隐藏单元数;\n", 54 | "3. 层数和learning rate decay\n", 55 | "在使用ADAM优化算法时,ng总是把beta1、beta2、epsilon这三个值直接设置为固定的0.9、0.999、10^(-8)而不去调整它们,也就是说,ng认为调这几个超参数意义不大\n", 56 | "\n", 57 | "ng讲了两种搜索超参数的方法:grid search和random search\n", 58 | "grid search在超参数数量较少时比较实用,但random search在超参数数量较多时更实用。原因是,当我们不知道哪些超参数不重要、对结果没什么影响的时候,用grid search能够试验的重要参数上的值要比random search少得多\n", 59 | "还有一种搜索超参数的策略是由粗到细(coarse to fine),顾名思义,就是先在大范围内的少数超参数上测试模型性能,然后再在性能比较好的几个超参数的附近区域中采更多超参数进行测试,逐步递进\n", 60 | "计算资源多的话就可以多random search一些超参数,后面ng也提到测试更多超参数总是好的\n", 61 | "\n", 62 | "有一些超参数需要做尺度变换,不能直接做均匀随机采样,比如learning rate。因为比如说在0.0001到1之间搜索learning rate,如果均匀随机采样,那么0.1到1之间会占据90%的点,所以这里要用对数尺度下的搜索,先对超参数取值范围的最大最小值取log,然后在log范围内均匀随机采样\n", 63 | "\n", 64 | "指数加权平均里的beta是另一个需要考虑尺度变换的超参数,这个值不能直接取log,因为用的是1-beta而不是beta,所以这里需要对1-beta取log\n", 65 | "\n", 66 | "不过ng也提到,即使没有在合适的尺度上采样,只要采样数足够多,也能获得还算过得去的效果,特别是结合前面讲的由粗到细的搜索策略,在后面较细范围的搜索中还是有机会聚焦到合适的超参数范围上、找到不错的超参数\n", 67 | "\n", 68 | "第二部分是batchnorm,这是非常重要的方法,它让超参数搜索变得更简单,让神经网络选择超参数的过程变得更robust或不那么敏感,工作效果好的超参数范围会变得比之前大了很多,训练非常深的网络也变得容易很多\n", 69 | "方法类似于前面讲逻辑回归时做的对每个维度的特征求均值和方差,然后对每个特征做归一化,batchnorm是对mini-batch每一层神经元输出的z做归一化\n", 70 | "注意,为了保持数值稳定性,分母根号内需要加上epsilon以防方差为0\n", 71 | "但同时,我们并不想让神经元总是均值为0和标准差为1,它们具有不同的分布可能更有意义,所以可以再乘上gamma并加上beta,这两个参数是可学习的,可以在反向传播时更新它们\n", 72 | "batchnorm起作用的原因是:可以让网络中较后或较深的层中的权重对前面层的权重变化更鲁棒或不敏感\n", 73 | "从后面的层看来,因为前面层的权重在不断更新,所以对于同样的输入,它接收到的输入或前面层给的激活输出都在不断变化,这样就出现了covariate shift问题\n", 74 | "\n", 75 | "batchnorm的作用,就是减小了隐层输出的变化范围,使得不管前面层的权值如何变化,后面层接收到的输入的均值和方差都保持不变\n", 76 | "所以即使它们的数值有变化,但由gamma和beta控制的均值和方差却没有变化,限制了前面层参数变化对后面层输入分布的影响,所以后面层就可以在相对固定的分布上进行训练\n", 77 | "\n", 78 | "同时,batchnorm也有轻微的正则化效果\n", 79 | "dropout是通过将激活随机设置为0或1来引入噪声,而bn则是因为mini-batch估计均值和方差时带有噪声,这样给前面神经元引入噪声,使得后面神经元训练时不过于依赖于任何一个前面层的神经元\n", 80 | "但因为添加的噪声比较微小,所以正则化效果也比较小,实践中可以将bn和dropout一起使用\n", 81 | "\n", 82 | "测试阶段没有mini-batch去估计均值和方差,所以使用指数加权平均在训练阶段的每个batch上计算的均值和方差,作为测试阶段的均值和方差\n", 83 | "\n", 84 | "第三部分是用softmax训练多类分类模型,它实际上是逻辑回归的sigmoid+cross entropy在多类上的推广\n", 85 | "前向和反向过程都和逻辑回归比较像,这里就不再赘述了\n", 86 | "\n", 87 | "最后一部分是深度学习框架和tensorflow\n", 88 | "ng认为,好的深度学习框架有三个标准:\n", 89 | "1. 便于编程,包括神经网络的开发和迭代,以及训练好网络之后对产品做部署\n", 90 | "2. 运行速度要快\n", 91 | "3. 真正的开放,这指的不仅是开源,而且要有良好的管理,有些公司会逐渐闭源并且把功能转移到他们自己的产品中比如云服务,所以能长时间保持开源、并且不在单一一家公司的掌控之下是很重要的,因为这家公司可能在未来出于某个原因而停止开源\n", 92 | "\n", 93 | "最后ng用一个编程实例来解释了如何用tensorflow定义cost function、如何设置变量和placeholder、如何把输入数据和placeholder绑定起来、如何使用optimizer来做优化,以及构建计算图的概念\n", 94 | "\n", 95 | "covariate shift 怎么理解呢?\n", 96 | "那个其实我也不是很理解,说是输入分布变化对训练模型造成的影响\n", 97 | "\n", 98 | "\n", 99 | "我看了一些batchnorm的解释,包括https://www.zhihu.com/question/38102762 ,说是主要还是解决反向传播时的梯度爆炸和梯度消失问题,说那个什么covariate shift是论文作者瞎解释的,哈哈。。\n", 100 | "\n", 101 | "\n", 102 | "我漏了一个视频的内容。。补一下,第一部分超参数的最后一小部分,那个熊猫模式和鱼子酱模式\n", 103 | "\n", 104 | "训练模型的过程有两类方法:第一种是在没有足够的计算资源或计算能力时,不能同时试验大量模型,只能照看单一模型,每天都调整这个模型的超参数,让性能变得更好;第二种是同时试验很多不同超参数的模型,最后选择学习曲线看起来效果最好的那个模型\n", 105 | "ng形象地把这两种方法称作熊猫模式和鱼子酱模式\n", 106 | "ng建议,在计算资源充足的情况下,绝对应该选择鱼子酱模式,充分尝试各种超参数\n", 107 | "但他同时也说,在某些领域,比如在线广告和计算机视觉领域,数据很多而且模型很大(层数、节点都很多),很难同时训练很多模型,所以用熊猫模式还是鱼子酱模式也和应用领域相关" 108 | ] 109 | } 110 | ], 111 | "metadata": { 112 | "kernelspec": { 113 | "display_name": "Python 3", 114 | "language": "python", 115 | "name": "python3" 116 | }, 117 | "language_info": { 118 | "codemirror_mode": { 119 | "name": "ipython", 120 | "version": 3 121 | }, 122 | "file_extension": ".py", 123 | "mimetype": "text/x-python", 124 | "name": "python", 125 | "nbconvert_exporter": "python", 126 | "pygments_lexer": "ipython3", 127 | "version": "3.6.1" 128 | } 129 | }, 130 | "nbformat": 4, 131 | "nbformat_minor": 2 132 | } 133 | -------------------------------------------------------------------------------- /notes_deeplearning.ai/31 第三课_第一周学习笔记-未完成.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# " 8 | ] 9 | } 10 | ], 11 | "metadata": { 12 | "kernelspec": { 13 | "display_name": "Python 3", 14 | "language": "python", 15 | "name": "python3" 16 | }, 17 | "language_info": { 18 | "codemirror_mode": { 19 | "name": "ipython", 20 | "version": 3 21 | }, 22 | "file_extension": ".py", 23 | "mimetype": "text/x-python", 24 | "name": "python", 25 | "nbconvert_exporter": "python", 26 | "pygments_lexer": "ipython3", 27 | "version": "3.6.1" 28 | } 29 | }, 30 | "nbformat": 4, 31 | "nbformat_minor": 2 32 | } 33 | -------------------------------------------------------------------------------- /notes_deeplearning.ai/42 第四课_第二周学习笔记-未完成.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 第四课 第二周 学习笔记\n", 8 | "---\n", 9 | "要想构建一个好的网络,学习别人构建的经典网络是一条捷径;这就像学习python的快速方法是大量阅读优秀的代码一样。\n", 10 | "LeNet-5\n", 11 | "AlexNet\n", 12 | "VGG-16, VGG-19\n", 13 | "\n", 14 | "paper阅读路线:AlexNet -> VGG-16/19 -> LeNet-5" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "## 为什么残差网络有效?\n", 22 | "---\n", 23 | "\n" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "## 1by1 convolution\n", 31 | "---\n", 32 | "信道(channels)压缩" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "## Inception Network\n", 40 | "---\n" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "## transfer learning\n", 48 | "---\n", 49 | "迁移学习值的考虑,特别是当数据集很小,计算资源有限的时候" 50 | ] 51 | } 52 | ], 53 | "metadata": { 54 | "kernelspec": { 55 | "display_name": "Python 3", 56 | "language": "python", 57 | "name": "python3" 58 | }, 59 | "language_info": { 60 | "codemirror_mode": { 61 | "name": "ipython", 62 | "version": 3 63 | }, 64 | "file_extension": ".py", 65 | "mimetype": "text/x-python", 66 | "name": "python", 67 | "nbconvert_exporter": "python", 68 | "pygments_lexer": "ipython3", 69 | "version": "3.6.1" 70 | } 71 | }, 72 | "nbformat": 4, 73 | "nbformat_minor": 2 74 | } 75 | -------------------------------------------------------------------------------- /notes_deeplearning.ai/43 第四课_第三周学习笔记-未完成.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Course4 Week3 学习笔记\n", 8 | "\n", 9 | "---\n", 10 | "\n", 11 | "内容概要\n", 12 | "* what are localization and detection?\n", 13 | "* landmark detection\n", 14 | "* object detection -- sliding windows detection\n", 15 | "* turning FC layer into convolutional layers\n" 16 | ] 17 | } 18 | ], 19 | "metadata": { 20 | "kernelspec": { 21 | "display_name": "Python 3", 22 | "language": "python", 23 | "name": "python3" 24 | }, 25 | "language_info": { 26 | "codemirror_mode": { 27 | "name": "ipython", 28 | "version": 3 29 | }, 30 | "file_extension": ".py", 31 | "mimetype": "text/x-python", 32 | "name": "python", 33 | "nbconvert_exporter": "python", 34 | "pygments_lexer": "ipython3", 35 | "version": "3.6.1" 36 | } 37 | }, 38 | "nbformat": 4, 39 | "nbformat_minor": 2 40 | } 41 | -------------------------------------------------------------------------------- /notes_deeplearning.ai/C4W1_CNN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# C4W1 - 卷积神经网络\n", 8 | "---\n", 9 | "内容概要\n", 10 | "\n", 11 | "1. 为什么需要卷积神经网络?\n", 12 | "2. 卷积操作:filter、padding、stride等\n", 13 | "3. 边缘检测\n", 14 | "4. 池化:Max-Pooling 和 Average-Pooling" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "## 1 - 为什么需要卷积神经网络?\n", 22 | "\n", 23 | "---\n", 24 | "\n", 25 | "在Course1中,ng已经向我们展示了用标准神经网络(又称全连接神经网络)来处理图像分类任务,即识别图片中是否有猫。那么,为什么还需要卷积神经网络呢?\n", 26 | "\n", 27 | "需要注意的是,Course1中处理的图片大小为 $64 \\times64 \\times 3$,使用全连接神经网络,其第一个隐含层的权重参数只有12288个;如果图片大小为 $1000\\times1000\\times3$,那么权重参数将会是1百万个。也就是说,随着图片的增大,全连接神经网络的参数数量会急剧增多,这造成两个问题:1)神经网络的训练时长大幅上升;2)容易过拟合。\n", 28 | "\n", 29 | "由此可见,当处理图像变大之后,使用全连接神经网络对其进行处理是行不通的,最主要的原因就是参数太多了。**在卷积神经网络中,有两种方法可以大幅降低参数数量,分别是:1、权值共享(parameter sharing);2、稀疏连接(sparsity of connections),又称局部感知。**\n", 30 | "\n", 31 | "Parameter sharing 中隐含的假设是图像中任一部分的统计特性与其他部分是一样的,这意味着,对图像中某一部分有效的特征检测器(feature detector)在图像的其他部分同样有效。\n", 32 | "\n", 33 | "Sparsity of connections 的意思是,在网络中的每一层的每一个输出值仅依赖于图像输入的很小一部分区域。\n", 34 | "\n", 35 | "---" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "## 2 - 卷积(convolutions)\n", 43 | "\n", 44 | "---\n", 45 | "\n", 46 | "典型的卷积过程如下图所示:\n", 47 | "\n", 48 | "\n", 49 | "上图中,黄色的$3\\times3$矩阵为filter(过滤器,也有人称之为卷积核)。所谓卷积,就是filter在image矩阵上按照一定的规律移动、计算点积的过程。\n", 50 | "\n", 51 | "关于点积,请查看:https://en.wikipedia.org/wiki/Dot_product\n", 52 | "\n", 53 | "卷积运算的实现:\n", 54 | "* tensorflow: tf.nn.conv2d\n", 55 | "* keras: Conv2D\n", 56 | "\n", 57 | "filter的大小f通常是奇数,这样做,有两个优势:\n", 58 | "1. 方便padding操作\n", 59 | "2. f是奇数的filter有一个中心点,便于计算,指出filter位置\n", 60 | "\n", 61 | "---\n", 62 | "\n", 63 | "### 2.1 - padding(填充)\n", 64 | "\n", 65 | "---\n", 66 | "\n", 67 | "使用 $f\\times f$ 的filter在大小为 $n\\times n$ 的矩阵上进行卷积操作,得到的结果矩阵会比原始矩阵小。但是,有的时候,我们并不希望结果矩阵比原始矩阵更小。此时,我们就需要对原始矩阵进行padding(填充)操作。\n", 68 | "\n", 69 | "padding操作,其实就是在原始矩阵的四周填上合适数量的数值(通常是0),使得 $f\\times f$ 的filter在矩阵上进行卷积操作之后能够得到与原始矩阵大小一样的结果矩阵。\n", 70 | "\n", 71 | "使用padding的另外一个原因是:在没有经过padding的输入矩阵上进行卷积操作,矩阵边缘仅被卷积一次,这有可能造成信息丢失。\n", 72 | "\n", 73 | "根据卷积过程是否进行了padding操作,卷积操作分为Valid convolutions(无padding)和Same convolutions(有padding)。\n", 74 | "\n", 75 | "\n", 76 | "---\n", 77 | "\n", 78 | "\n", 79 | "### 2.2 - stride(步幅)\n", 80 | "\n", 81 | "---\n", 82 | "\n", 83 | "步幅是控制filter在图像矩阵中某一维度移动跨度的度量。fliter可以在不同的维度有不同的stride,但是,通常将所有维度的步幅都设置成同样的大小。\n", 84 | "\n", 85 | "### 2.3 - 边缘检测(edge detection)\n", 86 | "\n", 87 | "---\n", 88 | "\n", 89 | "图片中的边缘,最主要的一个特点是颜色骤变,在RGB矩阵的中体现是边缘两侧的数值相差较大。基于边缘的这个特性,设计一个合适的filter,将图像RGB矩阵中边缘两侧的数值转换成同样大小(通常转化为0),同时突出边缘两侧中间的数值,就能实现图像中的边缘检测。\n", 90 | "\n", 91 | "\n", 92 | "\n", 93 | "上图是垂直边缘检测的一个简单例子,观察可以发现,原始输入矩阵有一条非常明显的垂直边缘,经过中间的filter卷积之后,在输出矩阵中就能看到边缘的位置了。\n", 94 | "\n", 95 | "\n", 96 | "---\n", 97 | "\n", 98 | "### 2.4 - Convolutions over volumes(立体卷积)\n", 99 | "\n", 100 | "---\n", 101 | "\n", 102 | "图片一般有RGB三个channels(通道),那么如何在图片上进行卷积操作呢?一个可行的方法是,将filter也设计成三通道,这样的话,RGB每个通道都有对应的filter矩阵,各通道计算点积之后,对应求和,就得到一个立体卷积的结果。示意图如下:\n", 103 | "\n", 104 | "\n", 105 | "\n", 106 | "请注意,立体卷积之后,得到的结果是一个二维矩阵,也就是仅有一个channel,这很显然会丢失很多信息。为了使尽可能多的信息被保留下来,需要使用多个filter来进行卷积计算,需要多少个channels就使用多少个filter。使用多个filters的示意图如下:\n", 107 | "\n", 108 | "\n", 109 | "\n", 110 | "\n", 111 | "---\n", 112 | "\n", 113 | "### 2.5 - 卷积过程总结\n", 114 | "\n", 115 | "---\n", 116 | "\n", 117 | "对于大小为 $n \\times n$ 的图片,假设filter大小为 $f \\times f$,padding参数为p,stride为s,则输出矩阵可以按如下公式计算:\n", 118 | "\n", 119 | "\n", 120 | "下面是从网上找到的一张gif图,展现了两个filter在图像(RGB三个通道)上的卷积过程,试着理解这张图:\n", 121 | "" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "## 3 - Pooling(池化)\n", 129 | "\n", 130 | "---\n", 131 | "\n", 132 | "图像经过卷积层之后,输出矩阵往往比输入矩阵有所减小,但是依然很大。为了大幅减少参数的数量(也就是将权重系数矩阵减小),防止过拟合,通常会在卷积层后面加一个Pooling(池化)层。具体来说,池化层主要有两种:Max-Pooling(最大池化) 和 Average-Pooling(平均池化)。\n", 133 | "\n", 134 | "**池化过程和卷积过程非常相似,同样可以有 padding、stride等参数,不同的是,池化过程中的filter没有参数,只有大小。**\n", 135 | "\n", 136 | "以Max-Pooling为例,其过程如下图所示:\n", 137 | "\n", 138 | "\n", 139 | "\n", 140 | "从图中可以看出,其计算过程为:大小为 $ 2 \\times 2$ 的filter以2个步幅在输入矩阵上进行移动,每次计算出filter范围内的最大值,作为输出。\n", 141 | "\n", 142 | "Average-Pooling与Max-Pooling类似,不过,平均池化过程将filter范围内的平均值作为输出。\n" 143 | ] 144 | }, 145 | { 146 | "cell_type": "markdown", 147 | "metadata": {}, 148 | "source": [ 149 | "## 4 - 群内讨论总结\n", 150 | "\n", 151 | "---\n", 152 | "\n", 153 | "* 定义:分类=“图片里有什么”或”属于什么”,检测=“标定出特定的目标”;那么,原始CNN结构的分类能力强过检测能力\n", 154 | "* 卷积操作对”平移不变性”有了良好的支持,但是对”旋转不变性”就无能为力了;“平移”指不旋转之外的上下左右移动,沿着斜线移动也可" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "## 参考资料\n", 162 | "\n", 163 | "---\n", 164 | "\n", 165 | "1. [梯度上升可视化卷积神经网络](https://mp.weixin.qq.com/s/tpfmh4PTMjFaMHKZLZw3Pg)\n", 166 | "2. filter的解释:https://www.zhihu.com/question/52237725/answer/172479030\n", 167 | "3. [在定义卷积时为什么要对其中一个函数进行翻转?](https://www.zhihu.com/question/20500497)" 168 | ] 169 | } 170 | ], 171 | "metadata": { 172 | "kernelspec": { 173 | "display_name": "Python 3", 174 | "language": "python", 175 | "name": "python3" 176 | }, 177 | "language_info": { 178 | "codemirror_mode": { 179 | "name": "ipython", 180 | "version": 3 181 | }, 182 | "file_extension": ".py", 183 | "mimetype": "text/x-python", 184 | "name": "python", 185 | "nbconvert_exporter": "python", 186 | "pygments_lexer": "ipython3", 187 | "version": "3.6.2" 188 | } 189 | }, 190 | "nbformat": 4, 191 | "nbformat_minor": 2 192 | } 193 | -------------------------------------------------------------------------------- /notes_deeplearning.ai/README.md: -------------------------------------------------------------------------------- 1 | # DeepLearing.ai -- 学习笔记 2 | 3 | 学习的过程,我做了一些笔记,用jupyter notebook写的,有兴趣的可以下载看看。 4 | 5 | homework 文件夹里面是 课程作业 6 | pdf 文件夹里面是 课程ppt 7 | 8 | -------------------------------------------------------------------------------- /notes_deeplearning.ai/datasets/test_catvnoncat.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/datasets/test_catvnoncat.h5 -------------------------------------------------------------------------------- /notes_deeplearning.ai/datasets/train_catvnoncat.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/datasets/train_catvnoncat.h5 -------------------------------------------------------------------------------- /notes_deeplearning.ai/images/Conv2D.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/images/Conv2D.gif -------------------------------------------------------------------------------- /notes_deeplearning.ai/images/LogReg_kiank.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/images/LogReg_kiank.png -------------------------------------------------------------------------------- /notes_deeplearning.ai/images/NlayerNN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/images/NlayerNN.png -------------------------------------------------------------------------------- /notes_deeplearning.ai/images/activation.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/images/activation.jpg -------------------------------------------------------------------------------- /notes_deeplearning.ai/images/basic_recipe_for_ML.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/images/basic_recipe_for_ML.png -------------------------------------------------------------------------------- /notes_deeplearning.ai/images/bias_variance1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/images/bias_variance1.png -------------------------------------------------------------------------------- /notes_deeplearning.ai/images/cg1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/images/cg1.jpg -------------------------------------------------------------------------------- /notes_deeplearning.ai/images/classification_kiank.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/images/classification_kiank.png -------------------------------------------------------------------------------- /notes_deeplearning.ai/images/conv3d.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/images/conv3d.png -------------------------------------------------------------------------------- /notes_deeplearning.ai/images/convolutions.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/images/convolutions.gif -------------------------------------------------------------------------------- /notes_deeplearning.ai/images/dc1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/images/dc1.jpg -------------------------------------------------------------------------------- /notes_deeplearning.ai/images/dc2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/images/dc2.jpg -------------------------------------------------------------------------------- /notes_deeplearning.ai/images/dc3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/images/dc3.jpg -------------------------------------------------------------------------------- /notes_deeplearning.ai/images/dc4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/images/dc4.jpg -------------------------------------------------------------------------------- /notes_deeplearning.ai/images/dd1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/images/dd1.jpg -------------------------------------------------------------------------------- /notes_deeplearning.ai/images/dropout1_kiank.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/images/dropout1_kiank.mp4 -------------------------------------------------------------------------------- /notes_deeplearning.ai/images/dropout2_kiank.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/images/dropout2_kiank.mp4 -------------------------------------------------------------------------------- /notes_deeplearning.ai/images/edge_detect.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/images/edge_detect.png -------------------------------------------------------------------------------- /notes_deeplearning.ai/images/final outline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/images/final outline.png -------------------------------------------------------------------------------- /notes_deeplearning.ai/images/grad_summary1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/images/grad_summary1.png -------------------------------------------------------------------------------- /notes_deeplearning.ai/images/gradient_descent.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/images/gradient_descent.jpg -------------------------------------------------------------------------------- /notes_deeplearning.ai/images/leaky_relu.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/images/leaky_relu.jpg -------------------------------------------------------------------------------- /notes_deeplearning.ai/images/multi_filters.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/images/multi_filters.png -------------------------------------------------------------------------------- /notes_deeplearning.ai/images/nn1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/images/nn1.jpg -------------------------------------------------------------------------------- /notes_deeplearning.ai/images/nn2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/images/nn2.jpg -------------------------------------------------------------------------------- /notes_deeplearning.ai/images/nn4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/images/nn4.jpg -------------------------------------------------------------------------------- /notes_deeplearning.ai/images/normalize.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/images/normalize.png -------------------------------------------------------------------------------- /notes_deeplearning.ai/images/normalize2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/images/normalize2.png -------------------------------------------------------------------------------- /notes_deeplearning.ai/images/padding_strides.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/images/padding_strides.gif -------------------------------------------------------------------------------- /notes_deeplearning.ai/images/pooling.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/images/pooling.jpg -------------------------------------------------------------------------------- /notes_deeplearning.ai/images/relu.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/images/relu.jpg -------------------------------------------------------------------------------- /notes_deeplearning.ai/images/sgd.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/images/sgd.gif -------------------------------------------------------------------------------- /notes_deeplearning.ai/images/sgd_bad.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/images/sgd_bad.gif -------------------------------------------------------------------------------- /notes_deeplearning.ai/images/sigmoid.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/images/sigmoid.jpg -------------------------------------------------------------------------------- /notes_deeplearning.ai/images/split.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/images/split.png -------------------------------------------------------------------------------- /notes_deeplearning.ai/images/summary_conv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/images/summary_conv.png -------------------------------------------------------------------------------- /notes_deeplearning.ai/images/tanh.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/images/tanh.jpg -------------------------------------------------------------------------------- /notes_deeplearning.ai/images/流程图.vsdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/images/流程图.vsdx -------------------------------------------------------------------------------- /notes_deeplearning.ai/pdf/11 第一课_第一周.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/pdf/11 第一课_第一周.pdf -------------------------------------------------------------------------------- /notes_deeplearning.ai/pdf/12 第一课_第二周.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/pdf/12 第一课_第二周.pdf -------------------------------------------------------------------------------- /notes_deeplearning.ai/pdf/13 第一课_第三周.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/pdf/13 第一课_第三周.pdf -------------------------------------------------------------------------------- /notes_deeplearning.ai/pdf/14 第一课_第四周.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/pdf/14 第一课_第四周.pdf -------------------------------------------------------------------------------- /notes_deeplearning.ai/pdf/21 第二课_第一周.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/pdf/21 第二课_第一周.pdf -------------------------------------------------------------------------------- /notes_deeplearning.ai/pdf/22 第二课_第二周.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/pdf/22 第二课_第二周.pdf -------------------------------------------------------------------------------- /notes_deeplearning.ai/pdf/23 第二课_第三周.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/pdf/23 第二课_第三周.pdf -------------------------------------------------------------------------------- /notes_deeplearning.ai/pdf/31 第三课_第一周.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/pdf/31 第三课_第一周.pdf -------------------------------------------------------------------------------- /notes_deeplearning.ai/pdf/32 第三课_第二周.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/pdf/32 第三课_第二周.pdf -------------------------------------------------------------------------------- /notes_deeplearning.ai/pdf/41 第四课_第一周.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/pdf/41 第四课_第一周.pdf -------------------------------------------------------------------------------- /notes_deeplearning.ai/pdf/42 第四课_第二周.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/pdf/42 第四课_第二周.pdf -------------------------------------------------------------------------------- /notes_deeplearning.ai/pdf/43 第四课_第三周.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/pdf/43 第四课_第三周.pdf -------------------------------------------------------------------------------- /notes_deeplearning.ai/pdf/44 第四课_第四周.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/pdf/44 第四课_第四周.pdf -------------------------------------------------------------------------------- /notes_deeplearning.ai/pdf/51 第五课第一周讲义.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/pdf/51 第五课第一周讲义.pdf -------------------------------------------------------------------------------- /notes_deeplearning.ai/pdf/53 第五课第三周讲义.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_deeplearning.ai/pdf/53 第五课第三周讲义.pdf -------------------------------------------------------------------------------- /notes_paper/Clustering by fast search and find of density peaks/fast_clustering/Aggregation结果_v3.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_paper/Clustering by fast search and find of density peaks/fast_clustering/Aggregation结果_v3.tif -------------------------------------------------------------------------------- /notes_paper/Clustering by fast search and find of density peaks/fast_clustering/Clustering_v3.py: -------------------------------------------------------------------------------- 1 | #encoding:utf-8 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import random 5 | 6 | MAX = 1000000 7 | 8 | # 计算最近邻类簇中心 9 | def nearestNeighbor(index): 10 | dd = MAX 11 | neighbor = -1 12 | for i in range(length): 13 | if dist[index, i] < dd and rho[index] < rho[i]: 14 | dd = dist[index, i] 15 | neighbor = i 16 | if result[neighbor] == -1: 17 | result[neighbor] = nearestNeighbor(neighbor) 18 | return result[neighbor] 19 | 20 | # Read data 21 | fileName = input("Enter the file's name: ") 22 | location = [] 23 | label = [] 24 | for line in open(fileName, "r"): 25 | items = line.strip("\n").split(",") 26 | label.append(int(items.pop())) # pop 从列表中移除并返回最后一个对象 27 | tmp = [] 28 | for item in items: 29 | tmp.append(float(item)) 30 | location.append(tmp) 31 | location = np.array(location) # n*2 32 | label = np.array(label) # n*1 33 | length = len(location) # length 位置(对象)总数 34 | 35 | # Calculate distance 36 | dist = np.zeros((length, length)) # 存储各点之间的距离 37 | ll = [] 38 | begin = 0 39 | while begin < length-1: # begin控制外循环 40 | end = begin + 1 41 | while end < length: # end控制内循环 42 | dd = np.linalg.norm(location[begin]-location[end]) # ?距离计算公式 43 | dist[begin][end] = dd 44 | dist[end][begin] = dd 45 | ll.append(dd) 46 | end = end + 1 47 | begin = begin + 1 48 | ll = np.array(ll) 49 | 50 | # 阈值确定思路:对所有距离进行排序,取2%分位数 51 | # percent = float(raw_input("Enter the average percentage of neighbors: ")) 52 | percent = 2.0 53 | position = int(len(ll) * percent / 100) 54 | sortedll = np.sort(ll) 55 | dc = sortedll[position] #阈值 56 | 57 | # local density 58 | rho = np.zeros((length, 1)) # rho n*1 记录每个点的局部密度 59 | begin = 0 60 | while begin < length-1: 61 | end = begin + 1 62 | while end < length: 63 | rho[begin] = rho[begin] + np.exp(-(dist[begin][end]/dc) ** 2) 64 | rho[end] = rho[end] + np.exp(-(dist[begin][end]/dc) ** 2) 65 | #if dist[begin][end] < dc: 66 | # rho[begin] = rho[begin] + 1 67 | # rho[end] = rho[end] + 1 68 | end = end + 1 69 | begin = begin + 1 70 | 71 | # 求比点的局部密度大的点到该点的最小距离 72 | delta = np.ones((length, 1)) * MAX 73 | maxDensity = np.max(rho) 74 | begin = 0 75 | while begin < length: 76 | if rho[begin] < maxDensity: 77 | end = 0 78 | while end < length: 79 | if rho[end] > rho[begin] and dist[begin][end] < delta[begin]: 80 | delta[begin] = dist[begin][end] 81 | end = end + 1 82 | else: 83 | delta[begin] = 0.0 84 | end = 0 85 | while end < length: 86 | if dist[begin][end] > delta[begin]: 87 | delta[begin] = dist[begin][end] 88 | end = end + 1 89 | begin = begin + 1 90 | 91 | rate1 = 0.5 92 | #Aggregation Spiral 0.6 93 | #Jain Flame 0.8 94 | #D31 0.75 95 | #R15 0.6 96 | #Compound 0.5 97 | #Pathbased 0.2 98 | thRho = rate1 * (np.max(rho) - np.min(rho)) + np.min(rho) 99 | 100 | rate2 = 0.3 101 | #Aggregation Spiral 0.2 102 | #Jain Flame 0.2 103 | #D31 0.05 104 | #R15 0.1 105 | #Compound 0.08 106 | #Pathbased 0.4 107 | thDel = rate2 * (np.max(delta) - np.min(delta)) + np.min(delta) 108 | 109 | 110 | result = np.ones(length, dtype=np.int) * (-1) 111 | center = 0 112 | for i in range(length): #items: 113 | if rho[i] > thRho and delta[i] > thDel: 114 | result[i] = center 115 | center = center + 1 116 | 117 | for i in range(length): 118 | dist[i][i] = MAX 119 | 120 | for i in range(length): 121 | if result[i] == -1: 122 | result[i] = nearestNeighbor(i) 123 | else: 124 | continue 125 | 126 | # 作决策图,rho 局部密度,delta 到具有更高局部密度对象的距离的最小值 127 | plt.plot(rho, delta, '.') 128 | plt.xlabel('rho'), plt.ylabel('delta') 129 | plt.show() 130 | 131 | # 设定colors 132 | R = list(range(256)) 133 | random.shuffle(R) 134 | R = np.array(R)/255.0 135 | G = list(range(256)) 136 | random.shuffle(G) 137 | G = np.array(G)/255.0 138 | B = list(range(256)) 139 | random.shuffle(B) 140 | B = np.array(B)/255.0 141 | colors = [] 142 | for i in range(256): 143 | colors.append((R[i], G[i], B[i])) 144 | 145 | # 聚类结果作图 146 | plt.figure() 147 | for i in range(length): 148 | index = result[i] 149 | plt.plot(location[i][0], location[i][1], color = colors[index], marker = '.') 150 | plt.xlabel('x'), plt.ylabel('y') 151 | plt.show() 152 | 153 | # 利用原始类标签作图,验证聚类结果的准确性 154 | plt.figure() 155 | for i in range(length): # 利用循环将每一个点画到图上 156 | index = label[i] 157 | plt.plot(location[i][0], location[i][1], color = colors[index], marker = '.') 158 | plt.xlabel('x'), plt.ylabel('y') 159 | plt.show() -------------------------------------------------------------------------------- /notes_paper/Clustering by fast search and find of density peaks/fast_clustering/Compound.txt: -------------------------------------------------------------------------------- 1 | 26.75,22.15,1 2 | 29.8,22.15,1 3 | 31.55,21.1,1 4 | 27.7,20.85,1 5 | 29.9,19.95,1 6 | 26.8,19.05,1 7 | 28.35,18.25,1 8 | 30.4,17.85,1 9 | 27.25,16.7,1 10 | 29.05,16,1 11 | 27.15,14.85,1 12 | 28.2,13.95,1 13 | 30.35,13.85,1 14 | 27.25,11.95,1 15 | 29.45,12.05,1 16 | 31.55,12.2,1 17 | 33.05,10.65,1 18 | 29.95,9.85,1 19 | 28,9.75,1 20 | 27.15,7.85,1 21 | 29.15,8.1,1 22 | 31.95,8.6,1 23 | 34.7,8.55,1 24 | 34.8,12.25,1 25 | 36.3,15.25,1 26 | 36.6,13.2,1 27 | 38.7,14.25,1 28 | 40.3,15.5,1 29 | 42.25,14.25,1 30 | 40.7,12.8,1 31 | 38.6,12.1,1 32 | 36.1,10.5,1 33 | 38.35,10.4,1 34 | 37.65,8.4,1 35 | 40.15,8.55,1 36 | 40.8,10.65,1 37 | 42.9,11.25,1 38 | 41.95,8.5,1 39 | 42.45,17.45,1 40 | 40.25,18.45,1 41 | 42.55,19.45,1 42 | 40.95,20.65,1 43 | 42.25,22.15,1 44 | 38.85,22.4,1 45 | 38.4,20,1 46 | 35.25,20.2,1 47 | 33.25,21,1 48 | 34.15,22.35,1 49 | 35.55,22.5,1 50 | 36.55,21.4,1 51 | 33.35,19.6,2 52 | 32.85,19.55,2 53 | 32.4,19.15,2 54 | 32.45,18.7,2 55 | 32.8,18.9,2 56 | 33.2,19.2,2 57 | 33.7,19.05,2 58 | 33.4,18.75,2 59 | 33.05,18.5,2 60 | 32.8,18.2,2 61 | 34,18.7,2 62 | 33.85,18.25,2 63 | 33.35,18.15,2 64 | 32.8,17.7,2 65 | 33.15,17.55,2 66 | 33.75,17.75,2 67 | 34.15,17.85,2 68 | 34.35,18.35,2 69 | 34.95,18.5,2 70 | 34.75,18.05,2 71 | 35.15,18.05,2 72 | 35.65,18.15,2 73 | 35.45,18.7,2 74 | 36.05,18.75,2 75 | 36.25,18.2,2 76 | 36.6,18.7,2 77 | 37.1,18.5,2 78 | 36.75,18.1,2 79 | 37.65,18.3,2 80 | 37.15,17.85,2 81 | 37.65,17.75,2 82 | 38.05,18.1,2 83 | 38.45,17.7,2 84 | 38.8,17.3,2 85 | 38.2,17.25,2 86 | 38.6,16.8,2 87 | 38.25,16.35,2 88 | 37.9,16.85,2 89 | 37.5,17.3,2 90 | 37.65,16.4,2 91 | 37.15,16.7,2 92 | 37,17.15,2 93 | 36.6,17.4,2 94 | 36.15,17.55,2 95 | 35.75,17.65,2 96 | 36.6,16.9,2 97 | 36.05,16.95,2 98 | 35.45,17,2 99 | 35.3,17.55,2 100 | 34.9,17,2 101 | 34.75,17.45,2 102 | 34.3,17.35,2 103 | 34.3,16.8,2 104 | 33.9,17.2,2 105 | 33.35,17.05,2 106 | 32.85,16.95,2 107 | 33.55,16.6,2 108 | 34,16.4,2 109 | 32.45,17.2,2 110 | 32.1,16.85,2 111 | 31.7,16.65,2 112 | 31.2,16.35,2 113 | 30.95,15.75,2 114 | 31.15,15.35,2 115 | 31.45,15.1,2 116 | 31.75,14.7,2 117 | 32.15,14.35,2 118 | 32.65,14.15,2 119 | 33.15,14.05,2 120 | 33.8,13.9,2 121 | 34.35,14.2,2 122 | 34.3,14.85,2 123 | 34.05,15.35,2 124 | 33.9,15.95,2 125 | 33.35,16.05,2 126 | 33,16.5,2 127 | 32.45,16.6,2 128 | 31.95,16.25,2 129 | 31.5,15.85,2 130 | 31.75,15.4,2 131 | 32.15,15.8,2 132 | 32.55,16.1,2 133 | 32.9,15.7,2 134 | 32.55,15.4,2 135 | 32.05,15.2,2 136 | 32.5,14.8,2 137 | 33,15.25,2 138 | 33.5,15.6,2 139 | 33.6,15.05,2 140 | 32.9,14.7,2 141 | 33.3,14.5,2 142 | 33.8,14.5,2 143 | 9.2,22.35,3 144 | 10.9,22.35,3 145 | 12.45,22.3,3 146 | 13.95,22.05,3 147 | 14.65,20.3,3 148 | 13.15,20.8,3 149 | 11.6,20.95,3 150 | 10.25,21.25,3 151 | 9.2,20.8,3 152 | 8.05,21.55,3 153 | 7.15,19.9,3 154 | 8.55,20,3 155 | 8.5,19.2,3 156 | 7.35,18.3,3 157 | 8.25,16.65,3 158 | 8.95,18,3 159 | 9.6,18.85,3 160 | 9.65,19.75,3 161 | 10.2,20.25,3 162 | 10.9,20.3,3 163 | 12.15,20,3 164 | 11.25,19.75,3 165 | 10.8,19.6,3 166 | 10.4,19.55,3 167 | 10.65,19.35,3 168 | 10.3,19.15,3 169 | 10.95,19.1,3 170 | 10.6,18.85,3 171 | 10.05,18.1,3 172 | 10.35,16.9,3 173 | 10.05,15.9,3 174 | 11.15,18.1,3 175 | 12.1,18.75,3 176 | 13.2,19.2,3 177 | 11.5,17.1,3 178 | 12.65,17.65,3 179 | 14.45,18.35,4 180 | 13.9,16.7,3 181 | 12.6,15.8,3 182 | 15.95,20.75,4 183 | 16.95,21.6,4 184 | 17.9,21.95,4 185 | 19,22.7,4 186 | 20.45,22.75,4 187 | 19.1,21.7,4 188 | 20.4,21.4,4 189 | 21.95,21.9,4 190 | 18.65,20.7,4 191 | 17.75,20.55,4 192 | 17.05,19.85,4 193 | 15.75,19.45,4 194 | 15.75,18.25,4 195 | 16.35,16.9,4 196 | 17.2,15.9,4 197 | 17.9,17,4 198 | 17.3,17.75,4 199 | 17,18.9,4 200 | 17.8,18.65,4 201 | 17.85,19.5,4 202 | 18.5,19.9,4 203 | 19.1,19.95,4 204 | 19.55,20.55,4 205 | 20.1,19.9,4 206 | 19.55,19.3,4 207 | 18.95,19.3,4 208 | 18.55,19.2,4 209 | 18.45,18.85,4 210 | 18.85,18.9,4 211 | 19.2,18.8,4 212 | 18.75,18.55,4 213 | 18.3,18.1,4 214 | 19.1,17.8,4 215 | 19,16.75,4 216 | 18.75,15.5,4 217 | 19.65,18.2,4 218 | 20.1,18.95,4 219 | 21.25,20.4,4 220 | 21.45,19,4 221 | 20.9,17.9,4 222 | 20.25,17.2,4 223 | 20.1,15.4,4 224 | 21.4,15.95,4 225 | 22.2,17.15,4 226 | 11.4,12.55,5 227 | 12.05,12.75,5 228 | 12.7,13,5 229 | 13.35,13.05,5 230 | 14.2,12.95,5 231 | 15.05,12.95,5 232 | 15.6,12.95,5 233 | 16.1,13.1,5 234 | 15.95,12.6,5 235 | 15.4,12.45,5 236 | 14.65,12.4,5 237 | 13.85,12.4,5 238 | 13.15,12.2,5 239 | 12.65,12.4,5 240 | 11.9,12.1,5 241 | 12,11.5,5 242 | 12.65,11.65,5 243 | 13.4,11.65,5 244 | 14.1,11.7,5 245 | 14.6,11.8,5 246 | 15.2,11.95,5 247 | 15.05,11.55,5 248 | 14.45,11.2,5 249 | 13.95,10.9,5 250 | 13.05,11.1,5 251 | 13.55,10.65,5 252 | 12.45,10.9,5 253 | 13.2,10.25,5 254 | 11.25,11.1,5 255 | 11.25,11.85,5 256 | 10.7,12.25,5 257 | 10.05,11.85,5 258 | 10.6,11.6,5 259 | 9.75,11.35,5 260 | 10.4,10.9,5 261 | 9.75,10.6,5 262 | 9.75,9.8,5 263 | 10.35,10.2,5 264 | 10.9,10.4,5 265 | 11.7,10.55,5 266 | 12.4,10.1,5 267 | 12.9,9.7,5 268 | 12.35,9.65,5 269 | 11.85,10,5 270 | 11.15,9.8,5 271 | 10.65,9.55,5 272 | 10.1,9.25,5 273 | 10.75,9,5 274 | 11.1,9.3,5 275 | 11.7,9.4,5 276 | 12.15,9.1,5 277 | 12.85,9.05,5 278 | 12.45,8.7,5 279 | 11.95,8.25,5 280 | 11.7,8.85,5 281 | 11.3,8.5,5 282 | 11.55,7.95,5 283 | 12.9,8.5,5 284 | 13.25,8.05,5 285 | 12.65,7.95,5 286 | 12.1,7.6,5 287 | 11.65,7.35,5 288 | 12.2,7,5 289 | 11.8,6.65,5 290 | 12.65,7.3,5 291 | 13.2,7.55,5 292 | 13.65,7.75,5 293 | 14.35,7.55,5 294 | 13.8,7.3,5 295 | 13.35,6.85,5 296 | 12.7,6.7,5 297 | 12.45,6.25,5 298 | 13.2,5.85,5 299 | 13.65,6.25,5 300 | 14.1,6.75,5 301 | 14.7,6.9,5 302 | 15,7.5,5 303 | 15.85,7.3,5 304 | 15.35,7.05,5 305 | 15.1,6.35,5 306 | 14.45,6.3,5 307 | 14.75,5.75,5 308 | 13.95,5.8,5 309 | 15.5,5.9,5 310 | 15.8,6.4,5 311 | 16.05,6.85,5 312 | 16.55,7.1,5 313 | 16.7,6.5,5 314 | 16.25,6.1,5 315 | 17.05,6.25,5 316 | 15.85,11.55,5 317 | 15.9,12.1,5 318 | 16.3,11.65,5 319 | 16.55,12.05,5 320 | 16.5,12.6,5 321 | 16.75,13.1,5 322 | 17.5,13,5 323 | 17.15,12.65,5 324 | 17.1,12.1,5 325 | 16.9,11.7,5 326 | 17.4,11.65,5 327 | 17.55,12.1,5 328 | 17.75,12.65,5 329 | 18.3,12.75,5 330 | 18.25,12.25,5 331 | 18,11.95,5 332 | 17.85,11.5,5 333 | 18.3,11.65,5 334 | 18.6,12,5 335 | 18.85,12.45,5 336 | 19.1,11.8,5 337 | 18.85,11.45,5 338 | 18.5,11.15,5 339 | 18.95,10.8,5 340 | 19.3,11.15,5 341 | 19.4,10.7,5 342 | 19.25,10.35,5 343 | 19.9,10.6,5 344 | 19.65,10.15,5 345 | 19.45,9.75,5 346 | 19.9,9.45,5 347 | 20.3,10.05,5 348 | 20.65,10.35,5 349 | 21.25,10.1,5 350 | 20.9,9.9,5 351 | 21.65,9.65,5 352 | 21.15,9.35,5 353 | 20.5,9.4,5 354 | 19.5,9.2,5 355 | 19.95,8.85,5 356 | 20.65,8.8,5 357 | 21.2,8.7,5 358 | 21.9,8.85,5 359 | 21.75,8.25,5 360 | 21.65,7.8,5 361 | 21.05,8,5 362 | 20.3,8.2,5 363 | 19.4,8.7,5 364 | 19.6,8.05,5 365 | 18.95,8.1,5 366 | 20,7.6,5 367 | 20.55,7.55,5 368 | 21.25,7.25,5 369 | 20.85,6.85,5 370 | 20.25,7.05,5 371 | 19.55,7.05,5 372 | 19.05,7.45,5 373 | 18.35,7.6,5 374 | 17.85,7.3,5 375 | 18.3,7.1,5 376 | 18.95,6.85,5 377 | 19.6,6.25,5 378 | 20.15,6.45,5 379 | 18.8,6.25,5 380 | 18.35,6.55,5 381 | 17.65,6.55,5 382 | 17.25,6.9,5 383 | 17.95,6.2,5 384 | 17.45,9.85,6 385 | 17.2,9.25,6 386 | 17,9.6,6 387 | 17,10.05,6 388 | 16.45,10.1,6 389 | 16.5,9.8,6 390 | 16.6,9.45,6 391 | 16.6,9.05,6 392 | 15.9,9,6 393 | 16.05,9.35,6 394 | 16.05,9.65,6 395 | 15.85,9.95,6 396 | 15.35,9.9,6 397 | 15.6,9.45,6 398 | 15.3,9.15,6 399 | 15.1,9.55,6 400 | -------------------------------------------------------------------------------- /notes_paper/Clustering by fast search and find of density peaks/fast_clustering/Compound结果_v3.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_paper/Clustering by fast search and find of density peaks/fast_clustering/Compound结果_v3.tif -------------------------------------------------------------------------------- /notes_paper/Clustering by fast search and find of density peaks/fast_clustering/D31结果_v3.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_paper/Clustering by fast search and find of density peaks/fast_clustering/D31结果_v3.tif -------------------------------------------------------------------------------- /notes_paper/Clustering by fast search and find of density peaks/fast_clustering/Flame.txt: -------------------------------------------------------------------------------- 1 | 1.85,27.8,1 2 | 1.35,26.65,1 3 | 1.4,23.25,2 4 | 0.85,23.05,2 5 | 0.5,22.35,2 6 | 0.65,21.35,2 7 | 1.1,22.05,2 8 | 1.35,22.65,2 9 | 1.95,22.8,2 10 | 2.4,22.45,2 11 | 1.8,22,2 12 | 2.5,21.85,2 13 | 2.95,21.4,2 14 | 1.9,21.25,2 15 | 1.35,21.45,2 16 | 1.35,20.9,2 17 | 1.25,20.35,2 18 | 1.75,20.05,2 19 | 2,20.6,2 20 | 2.5,21,2 21 | 1.7,19.05,2 22 | 2.4,20.05,2 23 | 3.05,20.45,2 24 | 3.7,20.45,2 25 | 3.45,19.9,2 26 | 2.95,19.5,2 27 | 2.4,19.4,2 28 | 2.4,18.25,2 29 | 2.85,18.75,2 30 | 3.25,19.05,2 31 | 3.95,19.6,2 32 | 2.7,17.8,2 33 | 3.45,18.05,2 34 | 3.8,18.55,2 35 | 4,19.1,2 36 | 4.45,19.9,2 37 | 4.65,19.15,2 38 | 4.85,18.45,2 39 | 4.3,18.05,2 40 | 3.35,17.3,2 41 | 3.7,16.3,2 42 | 4.4,16.95,2 43 | 4.25,17.4,2 44 | 4.8,17.65,2 45 | 5.25,18.25,2 46 | 5.75,18.55,2 47 | 5.3,19.25,2 48 | 6.05,19.55,2 49 | 6.5,18.9,2 50 | 6.05,18.2,2 51 | 5.6,17.8,2 52 | 5.45,17.15,2 53 | 5.05,16.55,2 54 | 4.55,16.05,2 55 | 4.95,15.45,2 56 | 5.85,14.8,2 57 | 5.6,15.3,2 58 | 5.65,16,2 59 | 5.95,16.8,2 60 | 6.25,16.4,2 61 | 6.1,17.45,2 62 | 6.6,17.65,2 63 | 6.65,18.3,2 64 | 7.3,18.35,2 65 | 7.85,18.3,2 66 | 7.15,17.8,2 67 | 7.6,17.7,2 68 | 6.7,17.25,2 69 | 7.3,17.25,2 70 | 6.7,16.8,2 71 | 7.3,16.65,2 72 | 6.75,16.3,2 73 | 7.4,16.2,2 74 | 6.55,15.75,2 75 | 7.35,15.8,2 76 | 6.8,14.95,2 77 | 7.45,15.1,2 78 | 6.85,14.45,2 79 | 7.6,14.6,2 80 | 8.55,14.65,2 81 | 8.2,15.5,2 82 | 7.9,16.1,2 83 | 8.05,16.5,2 84 | 7.8,17,2 85 | 8,17.45,2 86 | 8.4,18.1,2 87 | 8.65,17.75,2 88 | 8.9,17.1,2 89 | 8.4,17.1,2 90 | 8.65,16.65,2 91 | 8.45,16.05,2 92 | 8.85,15.35,2 93 | 9.6,15.3,2 94 | 9.15,16,2 95 | 10.2,16,2 96 | 9.5,16.65,2 97 | 10.75,16.6,2 98 | 10.45,17.2,2 99 | 9.85,17.1,2 100 | 9.4,17.6,2 101 | 10.15,17.7,2 102 | 9.85,18.15,2 103 | 9.05,18.25,2 104 | 9.3,18.7,2 105 | 9.15,19.15,2 106 | 8.5,18.8,2 107 | 11.65,17.45,2 108 | 11.1,17.65,2 109 | 10.4,18.25,2 110 | 10,18.95,2 111 | 11.95,18.25,2 112 | 11.25,18.4,2 113 | 10.6,18.9,2 114 | 11.15,19,2 115 | 11.9,18.85,2 116 | 12.6,18.9,2 117 | 11.8,19.45,2 118 | 11.05,19.45,2 119 | 10.3,19.4,2 120 | 9.9,19.75,2 121 | 10.45,20,2 122 | 13.05,19.9,2 123 | 12.5,19.75,2 124 | 11.9,20.05,2 125 | 11.2,20.25,2 126 | 10.85,20.85,2 127 | 11.4,21.25,2 128 | 11.7,20.6,2 129 | 12.3,20.45,2 130 | 12.95,20.55,2 131 | 12.55,20.95,2 132 | 12.05,21.25,2 133 | 11.75,22.1,2 134 | 12.25,21.85,2 135 | 12.8,21.5,2 136 | 13.55,21,2 137 | 13.6,21.6,2 138 | 12.95,22,2 139 | 12.5,22.25,2 140 | 12.2,22.85,2 141 | 12.7,23.35,2 142 | 13,22.7,2 143 | 13.55,22.2,2 144 | 14.05,22.25,2 145 | 14.2,23.05,2 146 | 14.1,23.6,2 147 | 13.5,22.8,2 148 | 13.35,23.5,2 149 | 13.3,24,2 150 | 7.3,19.15,2 151 | 7.95,19.35,2 152 | 7.7,20.05,2 153 | 6.75,19.9,2 154 | 5.25,20.35,2 155 | 6.15,20.7,1 156 | 7,20.7,1 157 | 7.6,21.2,1 158 | 8.55,20.6,1 159 | 9.35,20.5,1 160 | 8.3,21.45,1 161 | 7.9,21.6,1 162 | 7.15,21.75,1 163 | 6.7,21.3,1 164 | 5.2,21.1,2 165 | 6.2,21.95,1 166 | 6.75,22.4,1 167 | 6.15,22.5,1 168 | 5.65,22.2,1 169 | 4.65,22.55,1 170 | 4.1,23.45,1 171 | 5.35,22.8,1 172 | 7.4,22.6,1 173 | 7.75,22.1,1 174 | 8.5,22.3,1 175 | 9.3,22,1 176 | 9.7,22.95,1 177 | 8.8,22.95,1 178 | 8.05,22.9,1 179 | 7.6,23.15,1 180 | 6.85,23,1 181 | 6.2,23.25,1 182 | 5.7,23.4,1 183 | 5.1,23.55,1 184 | 4.55,24.15,1 185 | 5.5,24,1 186 | 6.1,24.05,1 187 | 6.5,23.6,1 188 | 6.75,23.95,1 189 | 7.3,23.75,1 190 | 8.3,23.4,1 191 | 8.9,23.7,1 192 | 9.55,23.65,1 193 | 10.35,24.1,1 194 | 7.95,24.05,1 195 | 3.95,24.4,1 196 | 3.75,25.25,1 197 | 3.9,25.95,1 198 | 4.55,26.65,1 199 | 5.25,26.75,1 200 | 6.5,27.6,1 201 | 7.45,27.6,1 202 | 8.35,27.35,1 203 | 9.25,27.2,1 204 | 9.95,26.5,1 205 | 10.55,25.6,1 206 | 9.9,24.95,1 207 | 9.2,24.5,1 208 | 8.55,24.2,1 209 | 8.8,24.8,1 210 | 9.2,25.35,1 211 | 9.55,26.05,1 212 | 9.05,26.6,1 213 | 8.8,25.8,1 214 | 8.15,26.35,1 215 | 8.05,25.8,1 216 | 8.35,25.2,1 217 | 7.9,25.3,1 218 | 8.05,24.7,1 219 | 7.3,24.4,1 220 | 7.55,24.85,1 221 | 6.85,24.45,1 222 | 6.25,24.65,1 223 | 5.55,24.5,1 224 | 4.65,25.1,1 225 | 5,25.55,1 226 | 5.55,26.1,1 227 | 5.55,25.25,1 228 | 6.2,25.2,1 229 | 6.8,25.05,1 230 | 7.4,25.25,1 231 | 6.65,25.45,1 232 | 6.15,25.8,1 233 | 6.5,26.1,1 234 | 6.6,26.6,1 235 | 7.7,26.65,1 236 | 7.5,26.2,1 237 | 7.5,25.65,1 238 | 7.05,25.85,1 239 | 6.9,27.15,1 240 | 6.15,26.9,1 241 | -------------------------------------------------------------------------------- /notes_paper/Clustering by fast search and find of density peaks/fast_clustering/Flame结果_v3.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_paper/Clustering by fast search and find of density peaks/fast_clustering/Flame结果_v3.tif -------------------------------------------------------------------------------- /notes_paper/Clustering by fast search and find of density peaks/fast_clustering/Jain.txt: -------------------------------------------------------------------------------- 1 | 0.85,17.45,2 2 | 0.75,15.6,2 3 | 3.3,15.45,2 4 | 5.25,14.2,2 5 | 4.9,15.65,2 6 | 5.35,15.85,2 7 | 5.1,17.9,2 8 | 4.6,18.25,2 9 | 4.05,18.75,2 10 | 3.4,19.7,2 11 | 2.9,21.15,2 12 | 3.1,21.85,2 13 | 3.9,21.85,2 14 | 4.4,20.05,2 15 | 7.2,14.5,2 16 | 7.65,16.5,2 17 | 7.1,18.65,2 18 | 7.05,19.9,2 19 | 5.85,20.55,2 20 | 5.5,21.8,2 21 | 6.55,21.8,2 22 | 6.05,22.3,2 23 | 5.2,23.4,2 24 | 4.55,23.9,2 25 | 5.1,24.4,2 26 | 8.1,26.35,2 27 | 10.15,27.7,2 28 | 9.75,25.5,2 29 | 9.2,21.1,2 30 | 11.2,22.8,2 31 | 12.6,23.1,2 32 | 13.25,23.5,2 33 | 11.65,26.85,2 34 | 12.45,27.55,2 35 | 13.3,27.85,2 36 | 13.7,27.75,2 37 | 14.15,26.9,2 38 | 14.05,26.55,2 39 | 15.15,24.2,2 40 | 15.2,24.75,2 41 | 12.2,20.9,2 42 | 12.15,21.45,2 43 | 12.75,22.05,2 44 | 13.15,21.85,2 45 | 13.75,22,2 46 | 13.95,22.7,2 47 | 14.4,22.65,2 48 | 14.2,22.15,2 49 | 14.1,21.75,2 50 | 14.05,21.4,2 51 | 17.2,24.8,2 52 | 17.7,24.85,2 53 | 17.55,25.2,2 54 | 17,26.85,2 55 | 16.55,27.1,2 56 | 19.15,25.35,2 57 | 18.8,24.7,2 58 | 21.4,25.85,2 59 | 15.8,21.35,2 60 | 16.6,21.15,2 61 | 17.45,20.75,2 62 | 18,20.95,2 63 | 18.25,20.2,2 64 | 18,22.3,2 65 | 18.6,22.25,2 66 | 19.2,21.95,2 67 | 19.45,22.1,2 68 | 20.1,21.6,2 69 | 20.1,20.9,2 70 | 19.9,20.35,2 71 | 19.45,19.05,2 72 | 19.25,18.7,2 73 | 21.3,22.3,2 74 | 22.9,23.65,2 75 | 23.15,24.1,2 76 | 24.25,22.85,2 77 | 22.05,20.25,2 78 | 20.95,18.25,2 79 | 21.65,17.25,2 80 | 21.55,16.7,2 81 | 21.6,16.3,2 82 | 21.5,15.5,2 83 | 22.4,16.5,2 84 | 22.25,18.1,2 85 | 23.15,19.05,2 86 | 23.5,19.8,2 87 | 23.75,20.2,2 88 | 25.15,19.8,2 89 | 25.5,19.45,2 90 | 23,18,2 91 | 23.95,17.75,2 92 | 25.9,17.55,2 93 | 27.65,15.65,2 94 | 23.1,14.6,2 95 | 23.5,15.2,2 96 | 24.05,14.9,2 97 | 24.5,14.7,2 98 | 14.15,17.35,1 99 | 14.3,16.8,1 100 | 14.3,15.75,1 101 | 14.75,15.1,1 102 | 15.35,15.5,1 103 | 15.95,16.45,1 104 | 16.5,17.05,1 105 | 17.35,17.05,1 106 | 17.15,16.3,1 107 | 16.65,16.1,1 108 | 16.5,15.15,1 109 | 16.25,14.95,1 110 | 16,14.25,1 111 | 15.9,13.2,1 112 | 15.15,12.05,1 113 | 15.2,11.7,1 114 | 17,15.65,1 115 | 16.9,15.35,1 116 | 17.35,15.45,1 117 | 17.15,15.1,1 118 | 17.3,14.9,1 119 | 17.7,15,1 120 | 17,14.6,1 121 | 16.85,14.3,1 122 | 16.6,14.05,1 123 | 17.1,14,1 124 | 17.45,14.15,1 125 | 17.8,14.2,1 126 | 17.6,13.85,1 127 | 17.2,13.5,1 128 | 17.25,13.15,1 129 | 17.1,12.75,1 130 | 16.95,12.35,1 131 | 16.5,12.2,1 132 | 16.25,12.5,1 133 | 16.05,11.9,1 134 | 16.65,10.9,1 135 | 16.7,11.4,1 136 | 16.95,11.25,1 137 | 17.3,11.2,1 138 | 18.05,11.9,1 139 | 18.6,12.5,1 140 | 18.9,12.05,1 141 | 18.7,11.25,1 142 | 17.95,10.9,1 143 | 18.4,10.05,1 144 | 17.45,10.4,1 145 | 17.6,10.15,1 146 | 17.7,9.85,1 147 | 17.3,9.7,1 148 | 16.95,9.7,1 149 | 16.75,9.65,1 150 | 19.8,9.95,1 151 | 19.1,9.55,1 152 | 17.5,8.3,1 153 | 17.55,8.1,1 154 | 17.85,7.55,1 155 | 18.2,8.35,1 156 | 19.3,9.1,1 157 | 19.4,8.85,1 158 | 19.05,8.85,1 159 | 18.9,8.5,1 160 | 18.6,7.85,1 161 | 18.7,7.65,1 162 | 19.35,8.2,1 163 | 19.95,8.3,1 164 | 20,8.9,1 165 | 20.3,8.9,1 166 | 20.55,8.8,1 167 | 18.35,6.95,1 168 | 18.65,6.9,1 169 | 19.3,7,1 170 | 19.1,6.85,1 171 | 19.15,6.65,1 172 | 21.2,8.8,1 173 | 21.4,8.8,1 174 | 21.1,8,1 175 | 20.4,7,1 176 | 20.5,6.35,1 177 | 20.1,6.05,1 178 | 20.45,5.15,1 179 | 20.95,5.55,1 180 | 20.95,6.2,1 181 | 20.9,6.6,1 182 | 21.05,7,1 183 | 21.85,8.5,1 184 | 21.9,8.2,1 185 | 22.3,7.7,1 186 | 21.85,6.65,1 187 | 21.3,5.05,1 188 | 22.6,6.7,1 189 | 22.5,6.15,1 190 | 23.65,7.2,1 191 | 24.1,7,1 192 | 21.95,4.8,1 193 | 22.15,5.05,1 194 | 22.45,5.3,1 195 | 22.45,4.9,1 196 | 22.7,5.5,1 197 | 23,5.6,1 198 | 23.2,5.3,1 199 | 23.45,5.95,1 200 | 23.75,5.95,1 201 | 24.45,6.15,1 202 | 24.6,6.45,1 203 | 25.2,6.55,1 204 | 26.05,6.4,1 205 | 25.3,5.75,1 206 | 24.35,5.35,1 207 | 23.3,4.9,1 208 | 22.95,4.75,1 209 | 22.4,4.55,1 210 | 22.8,4.1,1 211 | 22.9,4,1 212 | 23.25,3.85,1 213 | 23.45,3.6,1 214 | 23.55,4.2,1 215 | 23.8,3.65,1 216 | 23.8,4.75,1 217 | 24.2,4,1 218 | 24.55,4,1 219 | 24.7,3.85,1 220 | 24.7,4.3,1 221 | 24.9,4.75,1 222 | 26.4,5.7,1 223 | 27.15,5.95,1 224 | 27.3,5.45,1 225 | 27.5,5.45,1 226 | 27.55,5.1,1 227 | 26.85,4.95,1 228 | 26.6,4.9,1 229 | 26.85,4.4,1 230 | 26.2,4.4,1 231 | 26,4.25,1 232 | 25.15,4.1,1 233 | 25.6,3.9,1 234 | 25.85,3.6,1 235 | 24.95,3.35,1 236 | 25.1,3.25,1 237 | 25.45,3.15,1 238 | 26.85,2.95,1 239 | 27.15,3.15,1 240 | 27.2,3,1 241 | 27.95,3.25,1 242 | 27.95,3.5,1 243 | 28.8,4.05,1 244 | 28.8,4.7,1 245 | 28.75,5.45,1 246 | 28.6,5.75,1 247 | 29.25,6.3,1 248 | 30,6.55,1 249 | 30.6,3.4,1 250 | 30.05,3.45,1 251 | 29.75,3.45,1 252 | 29.2,4,1 253 | 29.45,4.05,1 254 | 29.05,4.55,1 255 | 29.4,4.85,1 256 | 29.5,4.7,1 257 | 29.9,4.45,1 258 | 30.75,4.45,1 259 | 30.4,4.05,1 260 | 30.8,3.95,1 261 | 31.05,3.95,1 262 | 30.9,5.2,1 263 | 30.65,5.85,1 264 | 30.7,6.15,1 265 | 31.5,6.25,1 266 | 31.65,6.55,1 267 | 32,7,1 268 | 32.5,7.95,1 269 | 33.35,7.45,1 270 | 32.6,6.95,1 271 | 32.65,6.6,1 272 | 32.55,6.35,1 273 | 32.35,6.1,1 274 | 32.55,5.8,1 275 | 32.2,5.05,1 276 | 32.35,4.25,1 277 | 32.9,4.15,1 278 | 32.7,4.6,1 279 | 32.75,4.85,1 280 | 34.1,4.6,1 281 | 34.1,5,1 282 | 33.6,5.25,1 283 | 33.35,5.65,1 284 | 33.75,5.95,1 285 | 33.4,6.2,1 286 | 34.45,5.8,1 287 | 34.65,5.65,1 288 | 34.65,6.25,1 289 | 35.25,6.25,1 290 | 34.35,6.8,1 291 | 34.1,7.15,1 292 | 34.45,7.3,1 293 | 34.7,7.2,1 294 | 34.85,7,1 295 | 34.35,7.75,1 296 | 34.55,7.85,1 297 | 35.05,8,1 298 | 35.5,8.05,1 299 | 35.8,7.1,1 300 | 36.6,6.7,1 301 | 36.75,7.25,1 302 | 36.5,7.4,1 303 | 35.95,7.9,1 304 | 36.1,8.1,1 305 | 36.15,8.4,1 306 | 37.6,7.35,1 307 | 37.9,7.65,1 308 | 29.15,4.4,1 309 | 34.9,9,1 310 | 35.3,9.4,1 311 | 35.9,9.35,1 312 | 36,9.65,1 313 | 35.75,10,1 314 | 36.7,9.15,1 315 | 36.6,9.8,1 316 | 36.9,9.75,1 317 | 37.25,10.15,1 318 | 36.4,10.15,1 319 | 36.3,10.7,1 320 | 36.75,10.85,1 321 | 38.15,9.7,1 322 | 38.4,9.45,1 323 | 38.35,10.5,1 324 | 37.7,10.8,1 325 | 37.45,11.15,1 326 | 37.35,11.4,1 327 | 37,11.75,1 328 | 36.8,12.2,1 329 | 37.15,12.55,1 330 | 37.25,12.15,1 331 | 37.65,11.95,1 332 | 37.95,11.85,1 333 | 38.6,11.75,1 334 | 38.5,12.2,1 335 | 38,12.95,1 336 | 37.3,13,1 337 | 37.5,13.4,1 338 | 37.85,14.5,1 339 | 38.3,14.6,1 340 | 38.05,14.45,1 341 | 38.35,14.35,1 342 | 38.5,14.25,1 343 | 39.3,14.2,1 344 | 39,13.2,1 345 | 38.95,12.9,1 346 | 39.2,12.35,1 347 | 39.5,11.8,1 348 | 39.55,12.3,1 349 | 39.75,12.75,1 350 | 40.2,12.8,1 351 | 40.4,12.05,1 352 | 40.45,12.5,1 353 | 40.55,13.15,1 354 | 40.45,14.5,1 355 | 40.2,14.8,1 356 | 40.65,14.9,1 357 | 40.6,15.25,1 358 | 41.3,15.3,1 359 | 40.95,15.7,1 360 | 41.25,16.8,1 361 | 40.95,17.05,1 362 | 40.7,16.45,1 363 | 40.45,16.3,1 364 | 39.9,16.2,1 365 | 39.65,16.2,1 366 | 39.25,15.5,1 367 | 38.85,15.5,1 368 | 38.3,16.5,1 369 | 38.75,16.85,1 370 | 39,16.6,1 371 | 38.25,17.35,1 372 | 39.5,16.95,1 373 | 39.9,17.05,1 374 | -------------------------------------------------------------------------------- /notes_paper/Clustering by fast search and find of density peaks/fast_clustering/Jain结果_v3.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_paper/Clustering by fast search and find of density peaks/fast_clustering/Jain结果_v3.tif -------------------------------------------------------------------------------- /notes_paper/Clustering by fast search and find of density peaks/fast_clustering/Pathbased.txt: -------------------------------------------------------------------------------- 1 | 11.25,5.05,1 2 | 10.95,4.7,1 3 | 9.85,5.8,1 4 | 9.8,5.75,1 5 | 9.15,6.8,1 6 | 8.65,6.6,1 7 | 8.4,7.5,1 8 | 7.9,7.75,1 9 | 6.95,8.7,1 10 | 7.25,9.75,1 11 | 7.3,10.25,1 12 | 5.9,10.7,1 13 | 5.85,11.8,1 14 | 6.45,12.05,1 15 | 5.7,12.95,1 16 | 5.35,13.45,1 17 | 5.4,14.65,1 18 | 4.7,14.85,1 19 | 5.4,15.4,1 20 | 5.1,16.25,1 21 | 5.75,16.7,1 22 | 4.85,17.65,1 23 | 5,18,1 24 | 6.05,18,1 25 | 5.7,19.45,1 26 | 5.3,19.55,1 27 | 5.85,21.25,1 28 | 6.1,21.35,1 29 | 6.55,22.15,1 30 | 5.9,22.8,1 31 | 7.15,23.7,1 32 | 6.75,24.25,1 33 | 7.95,24.65,1 34 | 7.75,25.3,1 35 | 8.8,26.05,1 36 | 8.85,26.95,1 37 | 9.35,27.45,1 38 | 9.95,27.1,1 39 | 11.25,28.2,1 40 | 10.7,28.55,1 41 | 11.95,29.45,1 42 | 11.95,28.65,1 43 | 13.1,30.05,1 44 | 13.4,29.3,1 45 | 14.7,30.2,1 46 | 14.7,30.6,1 47 | 16.1,30.4,1 48 | 16.1,31.05,1 49 | 17.55,30.8,1 50 | 17.65,31.75,1 51 | 18.55,31.6,1 52 | 18.85,30.6,1 53 | 19.85,30.9,1 54 | 20.1,31.3,1 55 | 21.5,31.35,1 56 | 20.85,30.4,1 57 | 22.95,30.05,1 58 | 23.4,30.3,1 59 | 24.2,29.9,1 60 | 24.75,30,1 61 | 25.55,29.3,1 62 | 25.55,28.45,1 63 | 26.7,28.3,1 64 | 26.85,28.75,1 65 | 27.6,27.15,1 66 | 28.25,27.4,1 67 | 29.05,27,1 68 | 29.05,26.2,1 69 | 29.45,25.55,1 70 | 30.05,25.55,1 71 | 30.3,23.3,1 72 | 30.6,23.95,1 73 | 30.9,22.75,1 74 | 31,22.3,1 75 | 30.65,21.3,1 76 | 31.3,20.8,1 77 | 31.85,21.2,1 78 | 31.45,19.3,1 79 | 32.7,19.3,1 80 | 31.9,17.9,1 81 | 33.05,18.05,1 82 | 32.8,16.6,1 83 | 32.2,16.3,1 84 | 32.4,15.15,1 85 | 31.8,14.75,1 86 | 32.35,13.25,1 87 | 31.65,13.35,1 88 | 31.15,12.05,1 89 | 32,11.9,1 90 | 31.05,10.3,1 91 | 31.95,10.4,1 92 | 30.05,9.55,1 93 | 30.5,8.25,1 94 | 29.6,8.25,1 95 | 29.8,7.6,1 96 | 29,7.05,1 97 | 29,6.7,1 98 | 27.6,5.95,1 99 | 28.15,5.45,1 100 | 26.5,4.8,1 101 | 26.4,4.4,1 102 | 25.8,3.65,1 103 | 25.5,4.1,1 104 | 31.6,16.6,1 105 | 30.7,17.4,1 106 | 29.65,17.95,1 107 | 29.15,16.5,1 108 | 30.5,15.55,1 109 | 29.95,13.55,3 110 | 30,11.85,1 111 | 28.65,14.1,3 112 | 27.45,10.85,3 113 | 26.4,10.75,3 114 | 25.05,10.1,3 115 | 26.2,11.55,3 116 | 27.2,13.3,3 117 | 28.3,14.45,3 118 | 27.95,14.65,3 119 | 27.95,14.7,3 120 | 23.15,11,3 121 | 22.9,11.6,3 122 | 23.9,11.6,3 123 | 24.55,11.6,3 124 | 23.85,12.45,3 125 | 23.35,13.1,3 126 | 24.85,13.2,3 127 | 25.1,12.25,3 128 | 25.15,12.5,3 129 | 25.65,12.9,3 130 | 25.7,13.5,3 131 | 26.3,13.3,3 132 | 27.1,14.55,3 133 | 27.15,14.6,3 134 | 26.4,14.35,3 135 | 26.4,14.35,3 136 | 25.75,14.55,3 137 | 25.75,14.9,3 138 | 25.35,14.65,3 139 | 23.7,14.55,3 140 | 24.05,14.9,3 141 | 23.65,15.3,3 142 | 22.75,14.5,3 143 | 22,14,3 144 | 20.9,12.95,3 145 | 20.3,13.1,3 146 | 22.2,16.45,3 147 | 22.15,16.65,3 148 | 22.4,15.15,3 149 | 22.15,15.2,3 150 | 23.95,15.95,3 151 | 24.25,16.1,3 152 | 24.8,16.1,3 153 | 25.15,16.15,3 154 | 25.5,16.7,3 155 | 25.75,16.85,3 156 | 26.2,16.85,3 157 | 26.25,16.9,3 158 | 26.25,16.35,3 159 | 26.75,16.2,3 160 | 27.4,16.15,3 161 | 27.6,16.85,3 162 | 26.95,17.2,3 163 | 26.3,18.1,3 164 | 27.55,17.95,3 165 | 27.7,17.6,3 166 | 28.25,18.25,3 167 | 28.8,19.15,3 168 | 28.5,19.15,3 169 | 28.1,19.35,3 170 | 28.05,20.3,3 171 | 27.3,20.5,3 172 | 27.1,21.6,3 173 | 26.75,19.5,3 174 | 26.5,20,3 175 | 25.9,19.8,3 176 | 25.1,19.8,3 177 | 24.75,20.7,3 178 | 24.35,20.55,3 179 | 23.55,20.35,3 180 | 24.3,19.7,3 181 | 24.9,19,3 182 | 24.7,16.8,3 183 | 24.35,16.8,3 184 | 24.4,17.15,3 185 | 24.9,17.3,3 186 | 24.35,17.7,3 187 | 24.95,17.8,3 188 | 24.95,18.05,3 189 | 24.4,18.35,3 190 | 23.65,18.6,3 191 | 22.85,18.9,3 192 | 22.4,20.65,3 193 | 22.5,17.8,3 194 | 22.45,18.25,3 195 | 21.6,17.7,3 196 | 21.35,18.05,3 197 | 21.3,18.25,3 198 | 19.95,19.8,3 199 | 20.45,20.45,3 200 | 20.35,16.95,3 201 | 19.7,17.45,3 202 | 19.35,17.45,3 203 | 12.45,9.15,2 204 | 10.1,10.05,2 205 | 11.75,12.2,2 206 | 9.55,12.4,2 207 | 8.65,13.35,2 208 | 7.75,13.55,2 209 | 8.55,15.15,2 210 | 8.05,15.9,2 211 | 8.45,15.9,2 212 | 8.6,16.85,2 213 | 9,17.05,2 214 | 9,16.3,2 215 | 9.35,16.3,2 216 | 9.55,15.3,2 217 | 9.65,14.85,2 218 | 10.15,15.05,2 219 | 10.05,15.6,2 220 | 10.4,16,2 221 | 10.65,16,2 222 | 10.9,15.95,2 223 | 10.7,15.35,2 224 | 11.35,15.05,2 225 | 11.15,14.75,2 226 | 11.05,14.6,2 227 | 11.15,14.2,2 228 | 11.1,13.6,2 229 | 12.5,13,2 230 | 13.3,12.45,2 231 | 13.5,12.4,2 232 | 13.95,11.75,2 233 | 14.4,12.2,2 234 | 15.4,12.2,2 235 | 15.25,12.45,2 236 | 14.6,12.75,2 237 | 14.1,13.05,2 238 | 14.2,13.25,2 239 | 14.75,13.45,2 240 | 13.15,13.4,2 241 | 13.05,13.7,2 242 | 12.65,13.65,2 243 | 15.45,13.75,2 244 | 14.65,14.2,2 245 | 13.75,14.05,2 246 | 13.75,14.5,2 247 | 12.95,14.8,2 248 | 13.2,14.9,2 249 | 13.25,15.5,2 250 | 12.1,15.35,2 251 | 12.15,15.5,2 252 | 11.25,16.4,2 253 | 12.7,15.6,2 254 | 12.5,16.15,2 255 | 12.7,16.6,2 256 | 12.15,16.2,2 257 | 11.95,16.5,2 258 | 11.45,16.8,2 259 | 11.05,17.2,2 260 | 11.3,17.6,2 261 | 11.65,17.6,2 262 | 11.25,18.25,2 263 | 11.05,18.45,2 264 | 11.05,18.55,2 265 | 10.55,18.55,2 266 | 10.8,19.2,2 267 | 7.45,19,1 268 | 10.05,20.1,2 269 | 9.95,20.5,2 270 | 10.65,20.45,2 271 | 10.3,22.75,2 272 | 11.7,19.6,2 273 | 12.2,19.65,2 274 | 13.2,20.1,2 275 | 13.55,20.05,2 276 | 14.15,20.05,2 277 | 14.25,21.5,2 278 | 13.25,21.4,2 279 | 12.85,18.1,2 280 | 13.75,18.3,2 281 | 14.2,18.35,2 282 | 14.25,18.8,2 283 | 13.75,16,2 284 | 13.75,16.7,2 285 | 13.75,17.05,2 286 | 14.05,16.8,2 287 | 14.5,16.95,2 288 | 14.75,16.65,2 289 | 15.25,16.05,2 290 | 15.4,16.2,2 291 | 15.85,16.2,2 292 | 15.5,15.55,2 293 | 15,14.95,2 294 | 16.6,16.15,2 295 | 17.9,15.6,2 296 | 17.5,18.05,2 297 | 16.65,17.5,2 298 | 15.45,17.3,2 299 | 15.45,17.8,2 300 | 15.7,18.4,2 301 | -------------------------------------------------------------------------------- /notes_paper/Clustering by fast search and find of density peaks/fast_clustering/Pathbased结果_v3.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_paper/Clustering by fast search and find of density peaks/fast_clustering/Pathbased结果_v3.tif -------------------------------------------------------------------------------- /notes_paper/Clustering by fast search and find of density peaks/fast_clustering/R15结果_v3.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_paper/Clustering by fast search and find of density peaks/fast_clustering/R15结果_v3.tif -------------------------------------------------------------------------------- /notes_paper/Clustering by fast search and find of density peaks/fast_clustering/Spiral.txt: -------------------------------------------------------------------------------- 1 | 31.95,7.95,3 2 | 31.15,7.3,3 3 | 30.45,6.65,3 4 | 29.7,6,3 5 | 28.9,5.55,3 6 | 28.05,5,3 7 | 27.2,4.55,3 8 | 26.35,4.15,3 9 | 25.4,3.85,3 10 | 24.6,3.6,3 11 | 23.6,3.3,3 12 | 22.75,3.15,3 13 | 21.85,3.05,3 14 | 20.9,3,3 15 | 20,2.9,3 16 | 19.1,3,3 17 | 18.2,3.2,3 18 | 17.3,3.25,3 19 | 16.55,3.5,3 20 | 15.7,3.7,3 21 | 14.85,4.1,3 22 | 14.15,4.4,3 23 | 13.4,4.75,3 24 | 12.7,5.2,3 25 | 12.05,5.65,3 26 | 11.45,6.15,3 27 | 10.9,6.65,3 28 | 10.3,7.25,3 29 | 9.7,7.85,3 30 | 9.35,8.35,3 31 | 8.9,9.05,3 32 | 8.55,9.65,3 33 | 8.15,10.35,3 34 | 7.95,10.95,3 35 | 7.75,11.7,3 36 | 7.55,12.35,3 37 | 7.45,13,3 38 | 7.35,13.75,3 39 | 7.3,14.35,3 40 | 7.35,14.95,3 41 | 7.35,15.75,3 42 | 7.55,16.35,3 43 | 7.7,16.95,3 44 | 7.8,17.55,3 45 | 8.05,18.15,3 46 | 8.3,18.75,3 47 | 8.65,19.3,3 48 | 8.9,19.85,3 49 | 9.3,20.3,3 50 | 9.65,20.8,3 51 | 10.2,21.25,3 52 | 10.6,21.65,3 53 | 11.1,22.15,3 54 | 11.55,22.45,3 55 | 11.95,22.7,3 56 | 12.55,23,3 57 | 13.05,23.2,3 58 | 13.45,23.4,3 59 | 14,23.55,3 60 | 14.55,23.6,3 61 | 15.1,23.75,3 62 | 15.7,23.75,3 63 | 16.15,23.85,3 64 | 16.7,23.8,3 65 | 17.15,23.75,3 66 | 17.75,23.75,3 67 | 18.2,23.6,3 68 | 18.65,23.5,3 69 | 19.1,23.35,3 70 | 19.6,23.15,3 71 | 20,22.95,3 72 | 20.4,22.7,3 73 | 20.7,22.55,3 74 | 21,22.15,3 75 | 21.45,21.95,3 76 | 21.75,21.55,3 77 | 22,21.25,3 78 | 22.25,21,3 79 | 22.5,20.7,3 80 | 22.65,20.35,3 81 | 22.75,20.05,3 82 | 22.9,19.65,3 83 | 23,19.35,3 84 | 23.1,19,3 85 | 23.15,18.65,3 86 | 23.2,18.25,3 87 | 23.2,18.05,3 88 | 23.2,17.8,3 89 | 23.1,17.45,3 90 | 23.05,17.15,3 91 | 22.9,16.9,3 92 | 22.85,16.6,3 93 | 22.7,16.4,3 94 | 22.6,16.2,3 95 | 22.55,16.05,3 96 | 22.4,15.95,3 97 | 22.35,15.8,3 98 | 22.2,15.65,3 99 | 22.15,15.55,3 100 | 22,15.4,3 101 | 21.9,15.3,3 102 | 21.85,15.25,3 103 | 21.75,15.15,3 104 | 21.65,15.05,3 105 | 21.55,15,3 106 | 21.5,14.9,3 107 | 19.35,31.65,1 108 | 20.35,31.45,1 109 | 21.35,31.1,1 110 | 22.25,30.9,1 111 | 23.2,30.45,1 112 | 23.95,30.05,1 113 | 24.9,29.65,1 114 | 25.6,29.05,1 115 | 26.35,28.5,1 116 | 27.15,27.9,1 117 | 27.75,27.35,1 118 | 28.3,26.6,1 119 | 28.95,25.85,1 120 | 29.5,25.15,1 121 | 29.95,24.45,1 122 | 30.4,23.7,1 123 | 30.6,22.9,1 124 | 30.9,22.1,1 125 | 31.25,21.3,1 126 | 31.35,20.55,1 127 | 31.5,19.7,1 128 | 31.55,18.9,1 129 | 31.65,18.15,1 130 | 31.6,17.35,1 131 | 31.45,16.55,1 132 | 31.3,15.8,1 133 | 31.15,15.05,1 134 | 30.9,14.35,1 135 | 30.6,13.65,1 136 | 30.3,13,1 137 | 29.9,12.3,1 138 | 29.5,11.75,1 139 | 29,11.15,1 140 | 28.5,10.6,1 141 | 28,10.1,1 142 | 27.55,9.65,1 143 | 26.9,9.1,1 144 | 26.25,8.8,1 145 | 25.7,8.4,1 146 | 25.15,8.05,1 147 | 24.5,7.75,1 148 | 23.9,7.65,1 149 | 23.15,7.4,1 150 | 22.5,7.3,1 151 | 21.9,7.1,1 152 | 21.25,7.05,1 153 | 20.5,7,1 154 | 19.9,6.95,1 155 | 19.25,7.05,1 156 | 18.75,7.1,1 157 | 18.05,7.25,1 158 | 17.5,7.35,1 159 | 16.9,7.6,1 160 | 16.35,7.8,1 161 | 15.8,8.05,1 162 | 15.4,8.35,1 163 | 14.9,8.7,1 164 | 14.45,8.9,1 165 | 13.95,9.3,1 166 | 13.6,9.65,1 167 | 13.25,10.1,1 168 | 12.95,10.55,1 169 | 12.65,10.9,1 170 | 12.35,11.4,1 171 | 12.2,11.75,1 172 | 11.95,12.2,1 173 | 11.8,12.65,1 174 | 11.75,13.05,1 175 | 11.55,13.6,1 176 | 11.55,14,1 177 | 11.55,14.35,1 178 | 11.55,14.7,1 179 | 11.6,15.25,1 180 | 11.65,15.7,1 181 | 11.8,16.05,1 182 | 11.85,16.5,1 183 | 12,16.75,1 184 | 12.15,17.2,1 185 | 12.3,17.6,1 186 | 12.55,17.85,1 187 | 12.8,18.05,1 188 | 13.1,18.4,1 189 | 13.3,18.6,1 190 | 13.55,18.85,1 191 | 13.8,19.05,1 192 | 14.15,19.25,1 193 | 14.45,19.5,1 194 | 14.85,19.55,1 195 | 15,19.7,1 196 | 15.25,19.7,1 197 | 15.55,19.85,1 198 | 15.95,19.9,1 199 | 16.2,19.9,1 200 | 16.55,19.9,1 201 | 16.85,19.9,1 202 | 17.2,19.9,1 203 | 17.4,19.8,1 204 | 17.65,19.75,1 205 | 17.8,19.7,1 206 | 18,19.6,1 207 | 18.2,19.55,1 208 | 3.9,9.6,2 209 | 3.55,10.65,2 210 | 3.35,11.4,2 211 | 3.1,12.35,2 212 | 3.1,13.25,2 213 | 3.05,14.15,2 214 | 3,15.1,2 215 | 3.1,16,2 216 | 3.2,16.85,2 217 | 3.45,17.75,2 218 | 3.7,18.7,2 219 | 3.95,19.55,2 220 | 4.35,20.25,2 221 | 4.7,21.1,2 222 | 5.15,21.8,2 223 | 5.6,22.5,2 224 | 6.2,23.3,2 225 | 6.8,23.85,2 226 | 7.35,24.45,2 227 | 8.05,24.95,2 228 | 8.8,25.45,2 229 | 9.5,26,2 230 | 10.2,26.35,2 231 | 10.9,26.75,2 232 | 11.7,27,2 233 | 12.45,27.25,2 234 | 13.3,27.6,2 235 | 14.05,27.6,2 236 | 14.7,27.75,2 237 | 15.55,27.75,2 238 | 16.4,27.75,2 239 | 17.1,27.75,2 240 | 17.9,27.75,2 241 | 18.55,27.7,2 242 | 19.35,27.6,2 243 | 20.1,27.35,2 244 | 20.7,27.1,2 245 | 21.45,26.8,2 246 | 22.05,26.5,2 247 | 22.7,26.15,2 248 | 23.35,25.65,2 249 | 23.8,25.3,2 250 | 24.3,24.85,2 251 | 24.75,24.35,2 252 | 25.25,23.95,2 253 | 25.65,23.45,2 254 | 26.05,23,2 255 | 26.2,22.3,2 256 | 26.6,21.8,2 257 | 26.75,21.25,2 258 | 27,20.7,2 259 | 27.15,20.15,2 260 | 27.15,19.6,2 261 | 27.35,19.1,2 262 | 27.35,18.45,2 263 | 27.4,18,2 264 | 27.3,17.4,2 265 | 27.15,16.9,2 266 | 27,16.4,2 267 | 27,15.9,2 268 | 26.75,15.35,2 269 | 26.55,14.85,2 270 | 26.3,14.45,2 271 | 25.95,14.1,2 272 | 25.75,13.7,2 273 | 25.35,13.3,2 274 | 25.05,12.95,2 275 | 24.8,12.7,2 276 | 24.4,12.45,2 277 | 24.05,12.2,2 278 | 23.55,11.85,2 279 | 23.2,11.65,2 280 | 22.75,11.4,2 281 | 22.3,11.3,2 282 | 21.9,11.1,2 283 | 21.45,11.05,2 284 | 21.1,11,2 285 | 20.7,10.95,2 286 | 20.35,10.95,2 287 | 19.95,11,2 288 | 19.55,11,2 289 | 19.15,11.05,2 290 | 18.85,11.1,2 291 | 18.45,11.25,2 292 | 18.15,11.35,2 293 | 17.85,11.5,2 294 | 17.5,11.7,2 295 | 17.2,11.95,2 296 | 17,12.05,2 297 | 16.75,12.2,2 298 | 16.65,12.35,2 299 | 16.5,12.5,2 300 | 16.35,12.7,2 301 | 16.2,12.8,2 302 | 16.15,12.95,2 303 | 16,13.1,2 304 | 15.95,13.25,2 305 | 15.9,13.4,2 306 | 15.8,13.5,2 307 | 15.8,13.65,2 308 | 15.75,13.85,2 309 | 15.65,14.05,2 310 | 15.65,14.25,2 311 | 15.65,14.5,2 312 | 15.65,14.6,2 313 | -------------------------------------------------------------------------------- /notes_paper/Clustering by fast search and find of density peaks/fast_clustering/Spiral结果_v3.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_paper/Clustering by fast search and find of density peaks/fast_clustering/Spiral结果_v3.tif -------------------------------------------------------------------------------- /notes_paper/Clustering by fast search and find of density peaks/文献阅读 - Clustering by Fast Search and Find of Density Peaks.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## 文献阅读 - Clustering by Fast Search and Find of Density Peaks\n", 8 | "---\n", 9 | "聚类(clustering)是一类典型的无监督学习(unsupervised learning)方法,它通过对无标记训练样本的学习将数据集中的样本划分为若干个通常是不想交的子集(簇/cluster)。聚类分析的目标是基于元素的相似性进行归类,在生物信息学和模式识别等领域有着广泛的应用,常用的聚类算法有:knn、K-means、K-medoids、DBSCAN等。\n", 10 | "\n", 11 | "---\n", 12 | "### 作者主要做了什么工作?\n", 13 | "\n", 14 | "---\n", 15 | "首先,作者提出了一个核心假设:类簇中心(cluster center)周围都是低局部密度(low local density)的点,并且与任何一个局部密度较高(a higher local density)的点保持相对较远的距离;基于这个假设,作者提出了一种新的、不需要迭代的快速聚类方法(paper中没有给这个方法命名,这里暂且称之为*Fast-Cluster*)。\n", 16 | "\n", 17 | "---\n", 18 | "### Fast-Cluster算法有哪些优点?\n", 19 | "\n", 20 | "---\n", 21 | "1. 不需要进行迭代计算,求解速度快,不存在不收敛的情况\n", 22 | "2. 可以对非球形数据进行聚类\n", 23 | "\n", 24 | "---\n", 25 | "\n" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "\n", 33 | "### Fast-Cluster算法描述\n", 34 | "\n", 35 | "---\n", 36 | "#### 1. 符号列表\n", 37 | "---\n", 38 | "| 符号 | 含义 | \n", 39 | "| :--------: | :--------| \n", 40 | "| ![][f01] | 截断距离 |\n", 41 | "| ![][f02] | 点$i$与点$j$之间的距离|\n", 42 | "| ![][f03] | 局部密度 |\n", 43 | "| ![][f04] | 与其他具有更高局部密度点集的最小距离 |\n", 44 | "\n", 45 | "\n", 46 | "[f01]: http://latex.codecogs.com/svg.latex?d_{c}\n", 47 | "[f02]: http://latex.codecogs.com/svg.latex?d_{ij}\n", 48 | "[f03]: http://latex.codecogs.com/svg.latex?\\rho_{i}\n", 49 | "[f04]: http://latex.codecogs.com/svg.latex?\\delta_{i}\n", 50 | "\n", 51 | "---\n", 52 | "\n", 53 | "#### 2. 基本概念\n", 54 | "---\n", 55 | "##### 局部密度(local density)$\\rho_{i}$\n", 56 | "\n", 57 | "直观的理解,某点的局部密度等于其周围相距小于截断距离$d_{c}$的点的数量。\n", 58 | "\n", 59 | "局部密度计算方法:\n", 60 | "\n", 61 | "$\\rho_{i}=\\sum_{j}\\chi(d_{ij}-d_{c})$\n", 62 | "\n", 63 | "其中,\n", 64 | "\n", 65 | "$\\chi(x)=\\left\\{\\begin{matrix}1,x<0\\\\0,x=0\\end{matrix}\\right.$\n", 66 | "\n", 67 | "$d_{c}$ 表示截断距离,是一个可调的参数\n", 68 | "\n", 69 | "---\n", 70 | "\n", 71 | "##### 最小距离$\\delta_{i}$\n", 72 | "\n", 73 | "这里的最小距离指的是点$i$与其他具有更高局部密度点集的最小距离(原文:The minimum distance between the point i and any other point with higher density)。\n", 74 | "\n", 75 | "最小距离计算方法:\n", 76 | "\n", 77 | "$\\delta_{i}=min_{j:\\rho_{j}>\\rho_{i}}(d_{ij})$\n", 78 | "\n", 79 | "对于具有最高局部密度的点,通常定义: $\\delta_{i}=max_{j}(d_{ij})$\n", 80 | "\n", 81 | "---\n", 82 | "##### 决策图(decision graph)\n", 83 | "\n", 84 | "辅助识别类簇中心,它是一张以$\\rho_{i}$作为横轴,$\\delta_{i}$作为纵轴的图形。\n", 85 | "\n", 86 | "注:当类簇的数量特别多的时候,可以将$\\rho_{i}$和$\\delta_{i}$的乘积$\\gamma$作为纵轴,$n$作为横轴作图,这种方式可以更直观的看到数据集中有多少个类簇中心(cluster center)。\n", 87 | "\n", 88 | "\n", 89 | "---\n", 90 | "###### 类簇中心(cluster center)\n", 91 | "基于$\\rho_{i}$和$\\delta_{i}$,Fast-Cluster算法中将类簇中心(cluster center)定义为:同时具有较大的$\\rho_{i}$和$\\delta_{i}$的点(原文:the only points of high $\\delta_{i}$ and relatively high $\\rho_{i}$ arethe cluster centers)。\n", 92 | "\n", 93 | "\n", 94 | "---\n", 95 | "\n", 96 | "\n", 97 | "#### 3. 算法流程\n", 98 | "---\n", 99 | "给定数据集$D=\\begin{Bmatrix}x_{1},x_{2},\\cdots ,x_{m}\\end{Bmatrix}$,使用Fast-Cluster进行聚类的过程如下:\n", 100 | "\n", 101 | "step 1. 根据距离计算公式(针对不同的数据集,距离计算公式不一样),计算每一个数据点$x_{i}$与其他数据点$\\begin{Bmatrix}x_{1},x_{2},x_{i-1},\\cdots,x_{i+1},x_{m}\\end{Bmatrix}$的距离,得到整个数据集的距离矩阵(矩阵中第$i$行第$j$列的元素表示$x_{i}$与$x_{j}$的距离$d_{ij}$);\n", 102 | "\n", 103 | "step 2. 由距离矩阵,分别计算每个点的局部密度$\\rho_{i}$(local density)以及与具有更高局部密度点集的最小距离$\\delta_{i}$;\n", 104 | "\n", 105 | "step 3. 以所有数据点的$\\rho_{i}$为横轴、$\\delta_{i}$为纵轴作决策图(decision graph),确定cluster的数量以及类簇中心(cluster center);\n", 106 | "\n", 107 | "step 4. 将每一个数据点$x_{i}$归入距离其最近的一个类簇中心(cluster center)所在的cluster中,聚类完成。\n", 108 | "\n", 109 | "---" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": { 115 | "collapsed": true 116 | }, 117 | "source": [ 118 | "### 相关资源整理\n", 119 | "---\n", 120 | "1、JDPlus博客(含python实现):[Science论文\"Clustering by fast search and find of density peaks\"学习笔记][1]\n", 121 | "\n", 122 | "2、[Paper专属页面][2],包含一些样例数据和matlab代码,[原始matlab代码的python实现在这里][5]\n", 123 | "\n", 124 | "3、[jasonwbw做的python实现][3],star数量129,参考价值较高\n", 125 | "\n", 126 | "4、[cwehmeyer做的python实现][4]\n", 127 | "\n", 128 | "\n", 129 | "[1]: http://blog.csdn.net/jdplus/article/details/40351541\n", 130 | "[2]: http://people.sissa.it/~laio/Research/Res_clustering.php\n", 131 | "[3]: https://github.com/jasonwbw/DensityPeakCluster\n", 132 | "[4]: https://github.com/cwehmeyer/pydpc\n", 133 | "[5]: http://nbviewer.jupyter.org/gist/tclarke/54ed4c12e8344e4b5ddb" 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "metadata": {}, 139 | "source": [] 140 | } 141 | ], 142 | "metadata": { 143 | "kernelspec": { 144 | "display_name": "Python 3", 145 | "language": "python", 146 | "name": "python3" 147 | }, 148 | "language_info": { 149 | "codemirror_mode": { 150 | "name": "ipython", 151 | "version": 3 152 | }, 153 | "file_extension": ".py", 154 | "mimetype": "text/x-python", 155 | "name": "python", 156 | "nbconvert_exporter": "python", 157 | "pygments_lexer": "ipython3", 158 | "version": "3.6.1" 159 | } 160 | }, 161 | "nbformat": 4, 162 | "nbformat_minor": 2 163 | } 164 | -------------------------------------------------------------------------------- /notes_paper/Personalized News Recommendation Based on Click Behavior/文献阅读 — Personalized News Recommendation Based on Click Behavior.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 文献阅读 — Personalized News Recommendation Based on Click Behavior\n", 8 | "\n", 9 | "---\n", 10 | "\n", 11 | "文献:Personalized News Recommendation Based on Click Behavior\n", 12 | "\n", 13 | "作者:{Jiahui Liu, Peter Dolan, Elin Rønby Pedersen} @ GOOGLE\n", 14 | "\n", 15 | "地址:http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.308.3087&rep=rep1&type=pdf\n", 16 | "\n", 17 | "http://www.jianshu.com/p/f3d147fbce3f\n", 18 | "\n", 19 | "---\n", 20 | "\n", 21 | "## 作者主要做了什么?\n", 22 | "---\n", 23 | "在这篇paper中,作者提出了一种针对大规模新闻推荐的信息过滤机制。首先,作者通过分析用户日志发现,用户的兴趣受其所在地区新闻趋势的影响。基于这个发现,作者将用户阅读新闻的兴趣分成两个部分:真实兴趣和受当地新闻趋势影响的兴趣。作者基于贝叶斯框架对用户的真实兴趣进行建模,使用用户的历史点击数据来预测她当前阅读新闻的兴趣,预测时会考虑当地的新闻趋势。将这个用户兴趣预测的方法用在新闻信息过滤(news information filtering)中,结合已有的协同过滤方法(collaborative filtering),就能生成个性化的新闻推荐信息。Google News的线上测试显示,新方法改善了推荐信息质量,提高了用户访问频次。\n", 24 | "\n", 25 | "\n", 26 | "\n", 27 | "\n", 28 | "\n", 29 | "做新闻推荐,现实中存在两个约束:1) 用户的兴趣可能会随着时间发生改变;2)不同用户之间的点击记录数量存在很大的差别。\n", 30 | "\n", 31 | "推荐系统领域主要有两种不同的技术:信息过滤(Information Filtering)和协同过滤(Collaborative Filtering)。信息过滤技术基于档案(profiles)来推荐内容,档案的构建可以通过分析用户历史数据来完成。协同过滤技术则基于同类用户的数据来推荐内容。\n", 32 | "\n", 33 | "在2009年之前,Google News的新闻推荐系统是基于协同过滤(Collaborative Filtering)方法开发的。这种方法主要存在两个问题:\n", 34 | "\n", 35 | "1) 系统不会推荐没有被其他用户读过的新闻;\n", 36 | "2) 忽略了用户之间的差异性。\n", 37 | "\n", 38 | "解决这两个问题的一种可行的方法是构建用户的真实兴趣档案(profiles of user's genuine interests),基于此档案进行新闻推荐。用户的兴趣档案可以有效的帮助系统过滤掉用户不感兴趣的内容,此外,与用户兴趣相匹配的新内容也有可能推荐给用户。\n", 39 | "\n", 40 | "\n", 41 | "\n", 42 | "\n", 43 | "\n", 44 | "这篇paper中,作者基于用户在新闻站点的点击行为推测用户的兴趣。\n", 45 | "对于用户建模(user modeling)而言,如何计算用户兴趣随时间的变化是一个非常重要的问题。\n", 46 | "\n", 47 | "用户阅读新闻的兴趣分长短期。短期兴趣通常是热门新闻时间,随时间变化很快;长期兴趣则可以反映用户的真实兴趣。\n", 48 | "\n", 49 | "作者提出的新方法可以捕捉用户兴趣在大环境中的动态变化。\n", 50 | "\n", 51 | "\n", 52 | "在2010年前后,Google News使用这篇paper中提出的新信息过滤方法与协同过滤方法结合体进行新闻个性化推荐。\n", 53 | "\n", 54 | "个性化推荐系统的基本假设是:用户的兴趣具有合理性和一致性。\n", 55 | "\n", 56 | "用户访问新闻网站往往希望看到一些有趣的事情,并且,用户的兴趣会受到大新闻事件的影响。\n", 57 | "\n", 58 | "\n", 59 | "为了深入理解用户兴趣随时间发生变化的规律,作者对Google News中的大量点击行为日志进行了分析。日志数据来自不同国家和区域的16848位用户的点击历史记录,时间范围是2007/7/1 - 2008/6/30。在这个时间范围内,每个用户每个月至少有十次点击行为被记录下来。\n", 60 | "通过对这些日志进行分析,作者得到以下结论:\n", 61 | "\n", 62 | "1. 用户阅读新闻的兴趣确实会随着时间发生改变;\n", 63 | "2. “全体用户”的点击行为分布能够反映新闻趋势,并与重大新闻事件相符;\n", 64 | "3. 不同区域的新闻趋势不同;\n", 65 | "4. 在一定程度上,用户阅读新闻的兴趣与他所在的地区新闻趋势相符。\n", 66 | "\n", 67 | "\n", 68 | "\n", 69 | "\n", 70 | "在这篇paper中,作者提出了一个用于推荐系统的信息过滤(Information Filter)方法,该方法基于主题分类(topic categories)。\n", 71 | "Google News的系统会记录用户的页面点击事件和时间。每一个点击都可以认为是用户对该类别的一个投票。\n" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "## 贝叶斯方法理解\n", 79 | "---\n", 80 | "\n", 81 | "\n" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": { 88 | "collapsed": true 89 | }, 90 | "outputs": [], 91 | "source": [] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "## 使用贝叶斯框架(Bayesian Framework)预测用户兴趣\n", 98 | "\n", 99 | "---\n", 100 | "\n", 101 | "基于用户自己的点击模式(click pattern)和她所在地区所有用户的点击模式,作者提出了一种使用贝叶斯框架对用户当前阅读新闻进行预测的算法。\n", 102 | "\n", 103 | "\n", 104 | "\n", 105 | "\n", 106 | "\n", 107 | "\n", 108 | "\n", 109 | "\n", 110 | "## 符号列表\n", 111 | "---\n", 112 | "| 符号 | 含义 | \n", 113 | "| :--------------:| :--------| \n", 114 | "| $D\\left ( u, t \\right )$ | 用户点击分布 |\n", 115 | "| $D\\left ( t \\right )$ | 点$i$与点$j$之间的距离|\n", 116 | "| $c_i$ | 局部密度 |\n", 117 | "|$p^{t}\\left(click | category=c_i \\right )$ | 与其他具有更高局部密度点集的最小距离 |\n", 118 | "|$p^{t}\\left(click \\right )$||\n", 119 | "|||\n", 120 | "|||\n", 121 | "|||\n", 122 | "\n", 123 | "\n" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "\n", 131 | "\n", 132 | "\n" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "metadata": { 139 | "collapsed": true 140 | }, 141 | "outputs": [], 142 | "source": [] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "metadata": { 148 | "collapsed": true 149 | }, 150 | "outputs": [], 151 | "source": [] 152 | } 153 | ], 154 | "metadata": { 155 | "kernelspec": { 156 | "display_name": "Python 3", 157 | "language": "python", 158 | "name": "python3" 159 | }, 160 | "language_info": { 161 | "codemirror_mode": { 162 | "name": "ipython", 163 | "version": 3 164 | }, 165 | "file_extension": ".py", 166 | "mimetype": "text/x-python", 167 | "name": "python", 168 | "nbconvert_exporter": "python", 169 | "pygments_lexer": "ipython3", 170 | "version": "3.6.1" 171 | } 172 | }, 173 | "nbformat": 4, 174 | "nbformat_minor": 2 175 | } 176 | -------------------------------------------------------------------------------- /notes_paper/Personalized News Recommendation Based on Click Behavior/研究路线.xmind: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_paper/Personalized News Recommendation Based on Click Behavior/研究路线.xmind -------------------------------------------------------------------------------- /notes_paper/TextRank-Bring Order into Texts/textrank_sample.data: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/notes_paper/TextRank-Bring Order into Texts/textrank_sample.data -------------------------------------------------------------------------------- /notes_paper/papers_for_ dialogue_system.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "# 对话系统相关文献阅读笔记\n", 9 | "\n", 10 | "---\n", 11 | "\n", 12 | "* Sequence-to-Sequence Learning for Task-oriented Dialogue with Dialogue State Representation[J]. arXiv:1806.04441 [cs], 2018." 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": { 19 | "collapsed": true 20 | }, 21 | "outputs": [], 22 | "source": [] 23 | } 24 | ], 25 | "metadata": { 26 | "kernelspec": { 27 | "display_name": "Python 3", 28 | "language": "python", 29 | "name": "python3" 30 | }, 31 | "language_info": { 32 | "codemirror_mode": { 33 | "name": "ipython", 34 | "version": 3 35 | }, 36 | "file_extension": ".py", 37 | "mimetype": "text/x-python", 38 | "name": "python", 39 | "nbconvert_exporter": "python", 40 | "pygments_lexer": "ipython3", 41 | "version": "3.6.2" 42 | } 43 | }, 44 | "nbformat": 4, 45 | "nbformat_minor": 2 46 | } 47 | -------------------------------------------------------------------------------- /others/cs229/cs229-notes4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/others/cs229/cs229-notes4.pdf -------------------------------------------------------------------------------- /others/cs229/cs229-notes5.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/others/cs229/cs229-notes5.pdf -------------------------------------------------------------------------------- /others/dokcer_study_notes/images/p001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/others/dokcer_study_notes/images/p001.png -------------------------------------------------------------------------------- /others/dokcer_study_notes/images/p002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/others/dokcer_study_notes/images/p002.png -------------------------------------------------------------------------------- /others/interview.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# ZB - 面试总结\n", 8 | "\n", 9 | "---\n", 10 | "\n", 11 | "面试是一个双方相互了解的机会,作为应聘者,没有必要刻意降低自己的身位;抓住一切机会去了解自己正在申请的岗位和公司的情况,以便于自己做出一个更为明智、理性的决定。\n", 12 | "\n" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "## 面试思考\n", 20 | "---\n", 21 | "\n", 22 | "### 1 - 面试快结束的时候,可以向招聘人员询问哪些信息?\n", 23 | "\n", 24 | "1. 公司、部门介绍\n", 25 | "2. 面试岗位的主要工作内容\n", 26 | "3. 请招聘人员对自己做一个评价\n" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "## 2018-04-05 | 腾讯 - 实习生笔试\n", 34 | "---\n", 35 | "\n", 36 | "正式考-技术研究类和数据分析-2018实习生招聘(正式卷)\n", 37 | "\n", 38 | "题型:25道不定项选择;3道问答题;没有编程题。\n", 39 | "\n", 40 | "不定项选择考点:1)切比雪夫不等式;2)冒泡排序;3)中心极限定理;4)求极限;5)不定积分;6)方程组非零解的情况;7)克莱姆法则;8)秩统计量。\n", 41 | "\n", 42 | "问答题考点:1)偏差、方差的理解,以及如何结果对应的问题;2)描述假设检验的基本步骤;3)假设微信用户每分钟上传一次位置信息,这些数据可以用来做一些什么对社会有价值的事情?" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "## 2018-08-23 | 加推科技一面\n", 50 | "---\n", 51 | "\n", 52 | "面试官对我的评价:\n", 53 | "**1、机器学习基础知识不扎实**\n", 54 | "2、思路清晰\n", 55 | "3、项目经验上缺乏亮眼的成绩\n", 56 | "\n", 57 | "项目介绍:几乎每个项目都问了一下,评价是这些项目与算法的相关性都不是很强;重点介绍了JDDC比赛、招标信息采集与挖掘、宝安区城市运行指数项目、TMA。\n", 58 | "\n", 59 | "知识点:\n", 60 | "1、LSTM三个门结构的原理\n", 61 | "2、SVM常用核函数\n", 62 | "3、决策树的三个划分属性选择标准的数学公式\n", 63 | "4、维特比算法解决CRF的预测问题\n", 64 | "5、bagging、boosting、模型融合\n", 65 | "6、RNN中的步长、层数\n", 66 | "7、2014 science发表文章中的聚类算法\n", 67 | "8、卡方检验\n", 68 | "9、LDA算法\n", 69 | "\n", 70 | "问题:\n", 71 | "1、异常值检测的常用方法\n", 72 | "2、特征筛选的常用方法\n", 73 | "3、如何判断一个数据集是否满足正态分布,不满足的话,怎样进行转换\n", 74 | "\n", 75 | "\n", 76 | "反思:\n", 77 | "这个面试非常深入,历时约1个半小时。面试官是大数据架构师,两年前开始了解机器学习方面的知识。整个面试过程中,项目方面的内容都描述的还行,算法思想和实现流程方面也答的还行,但是算法方面更为细节性的 知识,如计算公式等,表现非常差。\n", 78 | "\n" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "metadata": {}, 84 | "source": [ 85 | "## 2018-08-17 | 苹果树科技一面\n", 86 | "\n", 87 | "---\n", 88 | "\n", 89 | "python垃圾回收机制\n", 90 | "\n", 91 | "编程题:给出一篇文档和一组关键词,如果某个关键词在文档中出现,在对应的位置标红。\n", 92 | "解答:先对关键词按长度排序,然后,逐一匹配,并将匹配到的关键词所在的位置标记为0,最后将所有0对应的位置标红。\n", 93 | "\n", 94 | "最大正向匹配\n", 95 | "seq2seq模型原理\n", 96 | "LSTM原理\n" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "## 2018-08-20 | 追一科技 | 电话面试\n", 104 | "\n", 105 | "---\n", 106 | "\n", 107 | "电话面试:1)seq2seq;2)LSTM原理;3)对话系统的主要分类\n", 108 | "\n", 109 | "\n", 110 | "---\n", 111 | "**给出一组关键词,如何判断哪些词在一段长文本中?**\n", 112 | "\n", 113 | "step 1. 将长文本分词,加上所有关键词,构建词典,并给出每个词的one-hot编码;\n", 114 | "\n", 115 | "step 2. 将长文本转换成由每个词组成的one-hot编码矩阵,维度为 (total_words, n_words_in_text);\n", 116 | "\n", 117 | "step 3. 由这组关键词构建一个one-hot向量,维度为(1,total_words),与长文本的one-hot矩阵相乘,得到一个维度为(1,n_words_in_text)的结果,其中元素等于1的位置对应的词就是存在于长文本中的关键词。" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "metadata": {}, 123 | "source": [ 124 | "## 2018-08-22 | 一满乐面试总结 \n", 125 | "\n", 126 | "总共三轮面试,花了很长的时间聊项目,由此看来,公司招人,除了看能力之外,项目经验也是一个非常重要的考量。\n", 127 | "\n", 128 | "知识点考察:\n", 129 | "K-Means\n", 130 | "决策树\n", 131 | "\n", 132 | "编程考察:\n", 133 | "用sklearn完成minst多分类\n", 134 | "\n", 135 | "项目介绍:\n", 136 | "1、招标信息爬取与挖掘\n", 137 | "2、宝安区城市运行指数项目\n", 138 | "3、TMA" 139 | ] 140 | } 141 | ], 142 | "metadata": { 143 | "kernelspec": { 144 | "display_name": "Python 3", 145 | "language": "python", 146 | "name": "python3" 147 | }, 148 | "language_info": { 149 | "codemirror_mode": { 150 | "name": "ipython", 151 | "version": 3 152 | }, 153 | "file_extension": ".py", 154 | "mimetype": "text/x-python", 155 | "name": "python", 156 | "nbconvert_exporter": "python", 157 | "pygments_lexer": "ipython3", 158 | "version": "3.6.2" 159 | } 160 | }, 161 | "nbformat": 4, 162 | "nbformat_minor": 2 163 | } 164 | -------------------------------------------------------------------------------- /others/markdown_on_ipynb/Markdown 语法示例.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Markdown 语法示例\n", 8 | "---\n" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "### 调整字体颜色\n", 16 | "\n", 17 | "\n", 18 | "**What you need to remember**\n" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "\n", 26 | "### 添加图片\n", 27 | "\n", 28 | "---\n", 29 | "\n", 30 | "
**k-means**

" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "### 添加表格\n", 38 | "---\n", 39 | "\n", 40 | "| 符号 | 含义 | \n", 41 | "| :--------: | :--------| \n", 42 | "| ![][f01] | 截断距离 |\n", 43 | "| ![][f02] | 点$i$与点$j$之间的距离|\n", 44 | "| ![][f03] | 局部密度 |\n", 45 | "| ![][f04] | 与其他具有更高局部密度点集的最小距离 |\n", 46 | "\n", 47 | "\n", 48 | "[f01]: http://latex.codecogs.com/svg.latex?d_{c}\n", 49 | "[f02]: http://latex.codecogs.com/svg.latex?d_{ij}\n", 50 | "[f03]: http://latex.codecogs.com/svg.latex?\\rho_{i}\n", 51 | "[f04]: http://latex.codecogs.com/svg.latex?\\delta_{i}" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [ 58 | "## html表格\n", 59 | "---\n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | "
设备 设备文件名 文件描述符 类型
1 /dev/stdin 0 标准输入
2 /dev/stdout 1 标准输出
3 /dev/stderr 2 标准错误输出
" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "### 数学公式编写\n", 73 | "---\n", 74 | "\n", 75 | "$$J_{regularized} = \\small \\underbrace{-\\frac{1}{m} \\sum\\limits_{i = 1}^{m} \\large{(}\\small y^{(i)}\\log\\left(a^{[L](i)}\\right) + (1-y^{(i)})\\log\\left(1- a^{[L](i)}\\right) \\large{)} }_\\text{cross-entropy cost} + \\underbrace{\\frac{1}{m} \\frac{\\lambda}{2} \\sum\\limits_l\\sum\\limits_k\\sum\\limits_j W_{k,j}^{[l]2} }_\\text{L2 regularization cost} \\tag{2}$$\n", 76 | "\n" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 1, 82 | "metadata": { 83 | "collapsed": true 84 | }, 85 | "outputs": [], 86 | "source": [ 87 | "# Autoreload of modules in IPython\n", 88 | "\n", 89 | "%load_ext autoreload\n", 90 | "%autoreload 2\n", 91 | "\n", 92 | "import autoreload\n", 93 | "?autoreload" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": { 100 | "collapsed": true 101 | }, 102 | "outputs": [], 103 | "source": [] 104 | } 105 | ], 106 | "metadata": { 107 | "kernelspec": { 108 | "display_name": "Python 3", 109 | "language": "python", 110 | "name": "python3" 111 | }, 112 | "language_info": { 113 | "codemirror_mode": { 114 | "name": "ipython", 115 | "version": 3 116 | }, 117 | "file_extension": ".py", 118 | "mimetype": "text/x-python", 119 | "name": "python", 120 | "nbconvert_exporter": "python", 121 | "pygments_lexer": "ipython3", 122 | "version": "3.6.2" 123 | } 124 | }, 125 | "nbformat": 4, 126 | "nbformat_minor": 2 127 | } 128 | -------------------------------------------------------------------------------- /others/markdown_on_ipynb/images/k-means_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/others/markdown_on_ipynb/images/k-means_1.png -------------------------------------------------------------------------------- /others/markdown_on_ipynb/images/k-means_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/others/markdown_on_ipynb/images/k-means_2.png -------------------------------------------------------------------------------- /others/mongodb.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# MongoDB学习笔记\n", 8 | "---\n" 9 | ] 10 | } 11 | ], 12 | "metadata": { 13 | "kernelspec": { 14 | "display_name": "Python 3", 15 | "language": "python", 16 | "name": "python3" 17 | }, 18 | "language_info": { 19 | "codemirror_mode": { 20 | "name": "ipython", 21 | "version": 3 22 | }, 23 | "file_extension": ".py", 24 | "mimetype": "text/x-python", 25 | "name": "python", 26 | "nbconvert_exporter": "python", 27 | "pygments_lexer": "ipython3", 28 | "version": "3.6.2" 29 | } 30 | }, 31 | "nbformat": 4, 32 | "nbformat_minor": 2 33 | } 34 | -------------------------------------------------------------------------------- /others/resume.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 个人简历 - 曾斌\n", 8 | "\n", 9 | "---" 10 | ] 11 | } 12 | ], 13 | "metadata": { 14 | "kernelspec": { 15 | "display_name": "Python 3", 16 | "language": "python", 17 | "name": "python3" 18 | }, 19 | "language_info": { 20 | "codemirror_mode": { 21 | "name": "ipython", 22 | "version": 3 23 | }, 24 | "file_extension": ".py", 25 | "mimetype": "text/x-python", 26 | "name": "python", 27 | "nbconvert_exporter": "python", 28 | "pygments_lexer": "ipython3", 29 | "version": "3.6.2" 30 | } 31 | }, 32 | "nbformat": 4, 33 | "nbformat_minor": 2 34 | } 35 | -------------------------------------------------------------------------------- /python/Keras 学习笔记 - 以TensorFlow为后端.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Keras 学习笔记 - 以TensorFlow为后端\n", 8 | "---\n", 9 | "\n", 10 | "Keras是一个高层神经网络API,由纯Python编写而成并基Tensorflow、Theano以及CNTK后端。\n", 11 | "\n", 12 | "Keras的核心数据结构是“模型”,模型是一种组织网络层的方式。Keras中主要的模型是Sequential模型,Sequential是一系列网络层按顺序构成的栈。" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "## 1- 序贯(Sequential)模型\n", 20 | "---\n", 21 | "\n", 22 | "http://keras-cn.readthedocs.io/en/latest/getting_started/sequential_model/\n", 23 | "\n", 24 | "序贯模型是多个网络层的线性堆叠。\n", 25 | "\n", 26 | "创建Sequential模型有两种方法:1)传递layers的list;2)使用.add()方法逐个添加layer\n", 27 | "\n" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": { 34 | "collapsed": true 35 | }, 36 | "outputs": [], 37 | "source": [ 38 | "from keras.models import Sequential\n", 39 | "from keras.layers import Dense, Activation\n", 40 | "\n", 41 | "# 传递layers的list 来创建 Sequential 模型\n", 42 | "model = Sequential([Dense(32, units=784),\n", 43 | " Activation('relu'),\n", 44 | " Dense(10),\n", 45 | " Activation('softmax')])\n", 46 | "\n", 47 | "# 使用 .add() 方法 逐个添加layer\n", 48 | "model = Sequential()\n", 49 | "model.add(Dense(32, input_shape=(784,)))\n", 50 | "model.add(Activation('relu'))" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "## 使用Keras建模的典型过程" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": { 64 | "collapsed": true 65 | }, 66 | "outputs": [], 67 | "source": [ 68 | "from keras.models import Sequential\n", 69 | "from keras.layers import Dense, Activation\n", 70 | "from keras.optimizers import SGD\n", 71 | "\n", 72 | "# 模型搭建\n", 73 | "model = Sequential()\n", 74 | "model.add(Dense(units=64, input_dim=100))\n", 75 | "model.add(Activation(\"relu\"))\n", 76 | "model.add(Dense(units=10))\n", 77 | "model.add(Activation(\"softmax\"))\n", 78 | "\n", 79 | "# 编译模型\n", 80 | "# model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])\n", 81 | "model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=0.01, momentum=0.9, nesterov=True))\n", 82 | "\n", 83 | "# 训练模型\n", 84 | "# model.train_on_batch(x_batch, y_batch)\n", 85 | "model.fit(x_train, y_train, epochs=5, batch_size=32)\n", 86 | "\n", 87 | "# 评估模型\n", 88 | "loss_and_metrics = model.evaluate(x_test, y_test, batch_size=128)\n", 89 | "\n", 90 | "# 预测新数据\n", 91 | "classes = model.predict(x_test, batch_size=128)" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "## 参考资料\n", 99 | "---\n", 100 | "* [keras中文文档](http://keras-cn.readthedocs.io/en/latest/)\n" 101 | ] 102 | } 103 | ], 104 | "metadata": { 105 | "kernelspec": { 106 | "display_name": "Python 3", 107 | "language": "python", 108 | "name": "python3" 109 | }, 110 | "language_info": { 111 | "codemirror_mode": { 112 | "name": "ipython", 113 | "version": 3 114 | }, 115 | "file_extension": ".py", 116 | "mimetype": "text/x-python", 117 | "name": "python", 118 | "nbconvert_exporter": "python", 119 | "pygments_lexer": "ipython3", 120 | "version": "3.6.2" 121 | } 122 | }, 123 | "nbformat": 4, 124 | "nbformat_minor": 2 125 | } 126 | -------------------------------------------------------------------------------- /python/MNIST_data/t10k-images-idx3-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/python/MNIST_data/t10k-images-idx3-ubyte.gz -------------------------------------------------------------------------------- /python/MNIST_data/t10k-labels-idx1-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/python/MNIST_data/t10k-labels-idx1-ubyte.gz -------------------------------------------------------------------------------- /python/MNIST_data/train-images-idx3-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/python/MNIST_data/train-images-idx3-ubyte.gz -------------------------------------------------------------------------------- /python/MNIST_data/train-labels-idx1-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengbin93/blog/217c51ac456ad1cc893cdc9602ecaa6b390ebaf4/python/MNIST_data/train-labels-idx1-ubyte.gz -------------------------------------------------------------------------------- /python/TensorFlow_学习笔记.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# TensorFlow 学习笔记\n", 8 | "---\n", 9 | "内容概要\n", 10 | "\n" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "\n", 18 | "TF的系统构架分为两部分:\n", 19 | "\n", 20 | " 前端:提供编程模型,负责构造计算图,提供Python,C++,Java,Go等多种语言支持。\n", 21 | " 后端:提供运行时环境,负责执行计算图,采用C++实现。" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "### 写日志\n", 29 | "---\n", 30 | "\n", 31 | "writer = tf.train.SummaryWriter()\n" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "## TensorBoard\n", 39 | "---\n", 40 | "日志文件可视化;独立进程;展示TensorFlow运行过程中的计算图、各种指标随时间的变化趋势以及训练中使用的图像等信息。\n", 41 | "\n", 42 | "使用tensorboard分两步:\n", 43 | "\n", 44 | "1. 写入日志到logdir\n", 45 | "2. 命令行执行 tensorboard --logdir=logdir\n", 46 | "\n", 47 | "\n", 48 | "### 使用命名空间整理可视化效果图\n", 49 | "---\n", 50 | "* 同一个命名空间下的所有节点会被缩略成一个节点\n", 51 | "* 命名空间管理:tf.variable_scope/tf.name_scope\n", 52 | "\n", 53 | "### 添加节点信息\n", 54 | "---\n", 55 | "\n" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": { 62 | "collapsed": true 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "logdir = \"c:/\"\n", 67 | "writer = tf.train.SummaryWriter(logdir, tf.get_dafault_graph())\n", 68 | "wirter.close()\n" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "### tensorboard usage\n", 76 | "---\n", 77 | "usage: tensorboard.exe [-h] [--logdir LOGDIR] [--host HOST] [--port PORT]\n", 78 | " [--purge_orphaned_data [PURGE_ORPHANED_DATA]]\n", 79 | " [--nopurge_orphaned_data]\n", 80 | " [--reload_interval RELOAD_INTERVAL]\n", 81 | " [--inspect [INSPECT]] [--noinspect] [--tag TAG]\n", 82 | " [--event_file EVENT_FILE]\n", 83 | "\n", 84 | "optional arguments:\n", 85 | " -h, --help show this help message and exit\n", 86 | " --logdir LOGDIR logdir specifies the directory where TensorBoard will\n", 87 | " look to find TensorFlow event files that it can\n", 88 | " display. TensorBoard will recursively walk the\n", 89 | " directory structure rooted at logdir, looking for\n", 90 | " .*tfevents.* files. You may also pass a comma\n", 91 | " separated list of log directories, and TensorBoard\n", 92 | " will watch each directory. You can also assign names\n", 93 | " to individual log directories by putting a colon\n", 94 | " between the name and the path, as in tensorboard\n", 95 | " --logdir=name1:/path/to/logs/1,name2:/path/to/logs/2\n", 96 | " --host HOST What host to listen to. Defaults to serving on all\n", 97 | " interfaces, set to 127.0.0.1 (localhost) todisable\n", 98 | " remote access (also quiets security warnings).\n", 99 | " --port PORT What port to serve TensorBoard on.\n", 100 | " --purge_orphaned_data [PURGE_ORPHANED_DATA]\n", 101 | " Whether to purge data that may have been orphaned due\n", 102 | " to TensorBoard restarts. Disabling purge_orphaned_data\n", 103 | " can be used to debug data disappearance.\n", 104 | " --nopurge_orphaned_data\n", 105 | " --reload_interval RELOAD_INTERVAL\n", 106 | " How often the backend should load more data.\n", 107 | " --inspect [INSPECT] Use this flag to print out a digest of your event\n", 108 | " files to the command line, when no data is shown on\n", 109 | " TensorBoard or the data shown looks weird. Example\n", 110 | " usages: tensorboard --inspect\n", 111 | " --event_file=myevents.out tensorboard --inspect\n", 112 | " --event_file=myevents.out --tag=loss tensorboard\n", 113 | " --inspect --logdir=mylogdir tensorboard --inspect\n", 114 | " --logdir=mylogdir --tag=loss See\n", 115 | " tensorflow/python/summary/event_file_inspector.py for\n", 116 | " more info and detailed usage.\n", 117 | " --noinspect\n", 118 | " --tag TAG The particular tag to query for. Only used if\n", 119 | " --inspect is present\n", 120 | " --event_file EVENT_FILE\n", 121 | " The particular event file to query for. Only used if\n", 122 | " --inspect is present and --logdir is not specified." 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "## 使用Softmax回归进行MNIST字符识别\n", 130 | "\n", 131 | "[独家 | 一文读懂TensorFlow(附代码、学习资料) ](https://mp.weixin.qq.com/s?__biz=MzI1MjQ2OTQ3Ng==&mid=2247486310&idx=1&sn=a2181dc3957fb8ff91b739200f066dc3&chksm=e9e202edde958bfb449eb319a61056174c22a62c7cd0f68913351eeb9f7a4a50d089b1e8dee6&scene=0##)" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 11, 137 | "metadata": {}, 138 | "outputs": [ 139 | { 140 | "name": "stdout", 141 | "output_type": "stream", 142 | "text": [ 143 | "Extracting MNIST_data/train-images-idx3-ubyte.gz\n", 144 | "Extracting MNIST_data/train-labels-idx1-ubyte.gz\n", 145 | "Extracting MNIST_data/t10k-images-idx3-ubyte.gz\n", 146 | "Extracting MNIST_data/t10k-labels-idx1-ubyte.gz\n", 147 | "0.098\n" 148 | ] 149 | } 150 | ], 151 | "source": [ 152 | "# 导入 mnist 数据集\n", 153 | "from tensorflow.examples.tutorials.mnist import input_data\n", 154 | "mnist = input_data.read_data_sets(\"MNIST_data/\", one_hot=True)\n", 155 | "\n", 156 | "import tensorflow as tf\n", 157 | "\n", 158 | "# 输入图\n", 159 | "x = tf.placeholder('float', [None, 784]) # None 表示任意维度\n", 160 | "y_ = tf.placeholder('float', [None, 10])\n", 161 | "\n", 162 | "# 推断图\n", 163 | "W = tf.Variable(tf.zeros([784, 10]))\n", 164 | "b = tf.Variable(tf.zeros([10]))\n", 165 | "y = tf.nn.softmax(tf.matmul(x, W) + b)\n", 166 | "\n", 167 | "# 训练图\n", 168 | "cross_entropy = tf.reduce_sum(y_ *tf.log(y)) # 计算交叉熵\n", 169 | "train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)\n", 170 | "\n", 171 | "# 创建session,运行模型\n", 172 | "sess = tf.InteractiveSession()\n", 173 | "tf.global_variables_initializer().run()\n", 174 | "for _ in range(1000):\n", 175 | " batch_xs, batch_ys = mnist.train.next_batch(100)\n", 176 | " sess.run(train_step, feed_dict={x:batch_xs, y_:batch_ys})\n", 177 | " \n", 178 | "# 评估图\n", 179 | "correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))\n", 180 | "accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))\n", 181 | "\n", 182 | "print(sess.run(accuracy, feed_dict={x:mnist.test.images, y_:mnist.test.labels}))" 183 | ] 184 | }, 185 | { 186 | "cell_type": "markdown", 187 | "metadata": {}, 188 | "source": [ 189 | "## 参考资料\n", 190 | "---\n", 191 | "* https://github.com/tensorflow/tensorflow" 192 | ] 193 | } 194 | ], 195 | "metadata": { 196 | "kernelspec": { 197 | "display_name": "Python 3", 198 | "language": "python", 199 | "name": "python3" 200 | }, 201 | "language_info": { 202 | "codemirror_mode": { 203 | "name": "ipython", 204 | "version": 3 205 | }, 206 | "file_extension": ".py", 207 | "mimetype": "text/x-python", 208 | "name": "python", 209 | "nbconvert_exporter": "python", 210 | "pygments_lexer": "ipython3", 211 | "version": "3.6.2" 212 | } 213 | }, 214 | "nbformat": 4, 215 | "nbformat_minor": 2 216 | } 217 | -------------------------------------------------------------------------------- /python/list_学习笔记.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Python 数据结构 —— list\n", 8 | "********\n", 9 | "## list数据结构要点\n", 10 | "1. list中的每个项目之间用逗号隔开;\n", 11 | "2. list是可以修改的数据结构,也就是说,数据的增加、删除、查找都是允许的;\n", 12 | "3. list对数据类型的容忍度很高,任何类型的数据都可以添加到同一个list中,但这将给数据处理带来麻烦,通常同一个list中存储统一类型的数据\n", 13 | "4. list支持切片操作,返回新的list\n", 14 | " \n", 15 | "## 创建list\n", 16 | "1. 使用中括号[]定义,如 [],[a], [a, b, c]\n", 17 | "2. 从列表推导式得到 [x for x in iterable], iterable表示可迭代对象\n", 18 | "3. 使用list()函数, list() or list(iterable)" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 1, 24 | "metadata": {}, 25 | "outputs": [ 26 | { 27 | "data": { 28 | "text/plain": [ 29 | "([],\n", 30 | " ['a'],\n", 31 | " ['a', 'b', 'c'],\n", 32 | " [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],\n", 33 | " [],\n", 34 | " [0, 1, 2, 3, 4, 5, 6, 7, 8, 9])" 35 | ] 36 | }, 37 | "execution_count": 1, 38 | "metadata": {}, 39 | "output_type": "execute_result" 40 | } 41 | ], 42 | "source": [ 43 | "# 创建一个空的list\n", 44 | "ls1 = []; ls2 = ['a']; ls3 = ['a','b','c'] \n", 45 | "# 从列表推导式得到一个list\n", 46 | "iterable = range(10)\n", 47 | "ls4 = [x for x in iterable]\n", 48 | "# 使用list函数\n", 49 | "ls5 = list(); ls6 = list(range(10)) \n", 50 | "\n", 51 | "# 显示结果\n", 52 | "ls1, ls2, ls3, ls4, ls5, ls6" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "## list中的切片操作\n", 60 | "\n", 61 | "---\n", 62 | " \n", 63 | "1. 正序切片\n", 64 | " * ls[i] -- 返回第i+1个位置的元素\n", 65 | " * ls[i:j] -- 返回第i+1到第j个位置的元素\n", 66 | " * ls[i:j:s] -- 返回第i+1到第j个位置中间隔为s的元素\n", 67 | "\n", 68 | "2. 反序切片\n", 69 | " * ls[-i] -- 返回第-i个位置的元素\n", 70 | " * ls[-i:-j] -- -i所表示的位置必须在-j所表示的位置前面,否则返回空list\n", 71 | " * ls[-i:-j:s] -- s必须是正整数\n", 72 | "\n", 73 | "3. 混合切片\n", 74 | " * ls[-i:j] -- -i表示的位置必须在j所表示的位置前面,否则返回空list\n", 75 | "\n", 76 | "*注:尽可能使用正序切片,避免使用反序切片和混合切片,因为反序和混合的方法不方便理解,代码可读性低!*" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "# 正序切片\n", 86 | "ls4[0] # 0\n", 87 | "ls4[0:4] # [0, 1, 2, 3]\n", 88 | "ls4[0:8:2] # [0, 2, 4, 6]\n", 89 | "\n", 90 | "# 反序切片\n", 91 | "ls4[-1] # 9\n", 92 | "ls4[-4:-1] # [6, 7, 8]\n", 93 | "ls4[-8:-1:2] # [2, 4, 6, 8]\n", 94 | "\n", 95 | "# 混合切片\n", 96 | "ls4[-9:6] # [1, 2, 3, 4, 5]" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "## list中的增删改查\n", 104 | "**********\n", 105 | "### 增加数据\n", 106 | "1. ls.append(x) -- 将元素x增加到list尾部,每次只能增加一个项目\n", 107 | "2. ls.extend(iterable) -- 增加多个项目到list尾部\n", 108 | "3. ls.insert(i,x) -- 在位置i+1处插入新元素x" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "# 将'a'增加到ls1尾部\n", 118 | "ls1.append('a') # ['a']\n", 119 | "# 将[0,1,2]增加到ls3尾部\n", 120 | "ls3.extend(range(3)) # ['a', 'b', 'c', 0, 1, 2]\n", 121 | "# 在ls6的第4个位置插入10\n", 122 | "ls6.insert(3,10) # [0, 1, 2, 10, 3, 4, 5, 6, 7, 8, 9]" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "### 删除数据\n", 130 | "1. ls.remove(x) -- 删除list中第一次出现的元素x\n", 131 | "2. del ls[i] -- 删除list中的第i+1个元素\n", 132 | "3. del ls[i:j] -- 按切片删除,即删除list中的第i+1到第j个元素\n", 133 | "4. ls.pop(i) -- 取出list中位置i+1的元素,并删除它" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": {}, 140 | "outputs": [], 141 | "source": [ 142 | "ls3.remove('a')\n", 143 | "del ls3[2]\n", 144 | "del ls3[0:2]\n", 145 | "x = ls3.pop(2) # 取出ls3中第3个元素,并赋给x" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "### 修改数据\n", 153 | "* list中的元素可以修改,找到元素的索引,将该索引对应的值更新即可。\n" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": null, 159 | "metadata": {}, 160 | "outputs": [], 161 | "source": [ 162 | "ls1[0] = 'b'\n", 163 | "ls3[2:4] = ['b'] * 2 # 切片大小必须和新元素的大小一致\n", 164 | "ls1, ls3" 165 | ] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "metadata": {}, 170 | "source": [ 171 | "### 查找数据\n", 172 | "* ls.index(value,[start,[stop]]) -- 返回列表中指定元素所在的索引位置,可以通过start和stop参数设置搜索范围\n" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": null, 178 | "metadata": {}, 179 | "outputs": [], 180 | "source": [ 181 | "# 找出ls6中元素8第一次出现的位置\n", 182 | "ls6.index(8)\n", 183 | "\n", 184 | "# 在位置7到10之间查找元素8\n", 185 | "ls6.index(8,7,10)" 186 | ] 187 | }, 188 | { 189 | "cell_type": "markdown", 190 | "metadata": {}, 191 | "source": [ 192 | "## list中的其他操作\n", 193 | "1. ls.sort() -- 将列表元素排序\n", 194 | "2. ls.reverse() -- 将序列元素反转\n", 195 | "3. ls.count(x) -- 返回元素x在列表中的数量\n", 196 | "4. ls1 + ls2 -- list中的加法运算,将ls1和ls2合并到同一个list中" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": null, 202 | "metadata": { 203 | "collapsed": true 204 | }, 205 | "outputs": [], 206 | "source": [ 207 | "ls = [5,7,1,6,2,7,4,5,8]\n", 208 | "ls.sort() # [1, 2, 4, 5, 5, 6, 7, 7, 8]\n", 209 | "ls.reverse() # [8, 5, 4, 7, 2, 6, 1, 7, 5]\n", 210 | "ls.count(7) # 2\n", 211 | "\n", 212 | "# list中的加法运算\n", 213 | "ls1 = ls[0:3]; ls2 = ls[3:6]\n", 214 | "ls3 = ls1 + ls2 # [5, 7, 1, 6, 2, 7]" 215 | ] 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "metadata": {}, 220 | "source": [ 221 | "## 综合运用\n", 222 | "1. 找出某元素在list中出现的所有位置\n", 223 | "2. 找出两个list中的相同元素\n", 224 | "3. 将英文句子中的aeiou去除" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": 2, 230 | "metadata": {}, 231 | "outputs": [ 232 | { 233 | "data": { 234 | "text/plain": [ 235 | "[0, 3, 16]" 236 | ] 237 | }, 238 | "execution_count": 2, 239 | "metadata": {}, 240 | "output_type": "execute_result" 241 | } 242 | ], 243 | "source": [ 244 | "# 找出某元素在list中出现的所有位置\n", 245 | "ls = list('ajdajdlkfhgksjdkahgsdhk')\n", 246 | "find = 'a'\n", 247 | "[i for i,v in enumerate(ls) if v == find]" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": 3, 253 | "metadata": {}, 254 | "outputs": [ 255 | { 256 | "data": { 257 | "text/plain": [ 258 | "[3, 4, 5, 6]" 259 | ] 260 | }, 261 | "execution_count": 3, 262 | "metadata": {}, 263 | "output_type": "execute_result" 264 | } 265 | ], 266 | "source": [ 267 | "# 找出两个list中的相同元素\n", 268 | "ls1 = [1,2,3,4,5,6]\n", 269 | "ls2 = [3,4,5,6,7,8]\n", 270 | "[i for i in ls1 if i in ls2]" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": 4, 276 | "metadata": {}, 277 | "outputs": [ 278 | { 279 | "data": { 280 | "text/plain": [ 281 | "'my nm s rshy jn!'" 282 | ] 283 | }, 284 | "execution_count": 4, 285 | "metadata": {}, 286 | "output_type": "execute_result" 287 | } 288 | ], 289 | "source": [ 290 | "# 将英文句子中的aeiou去除\n", 291 | "sentence = 'My name is Aarshay Jain!'.lower()\n", 292 | "''.join([i for i in sentence if i not in 'aeiou'])" 293 | ] 294 | } 295 | ], 296 | "metadata": { 297 | "kernelspec": { 298 | "display_name": "Python 3", 299 | "language": "python", 300 | "name": "python3" 301 | }, 302 | "language_info": { 303 | "codemirror_mode": { 304 | "name": "ipython", 305 | "version": 3 306 | }, 307 | "file_extension": ".py", 308 | "mimetype": "text/x-python", 309 | "name": "python", 310 | "nbconvert_exporter": "python", 311 | "pygments_lexer": "ipython3", 312 | "version": "3.6.2" 313 | } 314 | }, 315 | "nbformat": 4, 316 | "nbformat_minor": 1 317 | } 318 | -------------------------------------------------------------------------------- /python/pymongo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# PyMongo\n", 8 | "\n", 9 | "---\n", 10 | "\n", 11 | "Data in MongoDB is represented (and stored) using JSON-style documents. In PyMongo we use dictionaries to represent documents. \n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "## Native and cross-platform MongoDB manager\n", 19 | "* https://robomongo.org/" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 1, 25 | "metadata": { 26 | "collapsed": true 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "import pymongo\n", 31 | "from pymongo import MongoClient\n", 32 | "from datetime import datetime" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 2, 38 | "metadata": { 39 | "collapsed": true 40 | }, 41 | "outputs": [], 42 | "source": [ 43 | "client = MongoClient('47.100.12.99', 27017)" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "## 数据查询\n", 51 | "\n", 52 | "---" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 3, 58 | "metadata": {}, 59 | "outputs": [ 60 | { 61 | "data": { 62 | "text/plain": [ 63 | "['system.indexes', 'profiles']" 64 | ] 65 | }, 66 | "execution_count": 3, 67 | "metadata": {}, 68 | "output_type": "execute_result" 69 | } 70 | ], 71 | "source": [ 72 | "# 连接数据库\n", 73 | "db = client.test_database\n", 74 | "\n", 75 | "# 获取一个Collection\n", 76 | "collection = db.test_collection\n", 77 | "\n", 78 | "# 查看数据库中的所有Collection名称\n", 79 | "db.list_collection_names()" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": { 86 | "collapsed": true 87 | }, 88 | "outputs": [], 89 | "source": [ 90 | "# 创建一个名为 posts 的 Collection\n", 91 | "posts = db.posts\n", 92 | "\n", 93 | "# documents\n", 94 | "post = {\"author\": \"Mike\",\n", 95 | " \"text\": \"My first blog post!\",\n", 96 | " \"tags\": [\"mongodb\", \"python\", \"pymongo\"],\n", 97 | " \"date\": datetime.utcnow()}\n", 98 | "\n", 99 | "# 插入一篇文档,获得一个对应的独立id\n", 100 | "post_id = posts.insert_one(post).inserted_id\n", 101 | "\n", 102 | "# 从集合中获取一篇文档\n", 103 | "# posts.find_one()\n", 104 | "posts.find_one({\"author\": \"Mike\"})\n", 105 | "# posts.find_one({\"_id\": post_id})\n", 106 | "\n", 107 | "# post_id应该是一个ObjectId对象,不是str\n", 108 | "from bson.objectid import ObjectId\n", 109 | "\n", 110 | "# The web framework gets post_id from the URL and passes it as a string\n", 111 | "def get(post_id):\n", 112 | " # Convert from string to ObjectId:\n", 113 | " document = client.db.collection.find_one({'_id': ObjectId(post_id)})\n", 114 | " return document" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "metadata": { 121 | "collapsed": true, 122 | "scrolled": true 123 | }, 124 | "outputs": [], 125 | "source": [ 126 | "# 插入多篇文档\n", 127 | "new_posts = [{\"author\": \"Mike\",\n", 128 | "\"text\": \"Another post!\",\n", 129 | " \"tags\": [\"bulk\", \"insert\"],\n", 130 | "\"date\": datetime(2009, 11, 12, 11, 14)},\n", 131 | "{\"author\": \"Eliot\",\n", 132 | "\"title\": \"MongoDB is fun\",\n", 133 | "\"text\": \"and pretty easy too!\",\n", 134 | "\"date\": datetime(2009, 11, 10, 10, 45)}]\n", 135 | "\n", 136 | "result = posts.insert_many(new_posts)\n", 137 | "result.inserted_ids" 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": {}, 143 | "source": [ 144 | "## 索引操作\n", 145 | "---" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": null, 151 | "metadata": { 152 | "collapsed": true, 153 | "scrolled": true 154 | }, 155 | "outputs": [], 156 | "source": [ 157 | "# 创建索引, _id_ 是插入文档是自动创建的索引\n", 158 | "# 在user_id字段上创建索引之后\n", 159 | "result = db.profiles.create_index([('user_id', pymongo.ASCENDING)], unique=True)\n", 160 | "sorted(list(db.profiles.index_information()))" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": null, 166 | "metadata": { 167 | "collapsed": true 168 | }, 169 | "outputs": [], 170 | "source": [ 171 | "# 删除集合\n", 172 | "db.posts.drop() #是用来从数据库中删除一个集合" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": { 178 | "collapsed": true 179 | }, 180 | "source": [ 181 | "## 参考资料\n", 182 | "\n", 183 | "---\n", 184 | "[MongoDB3.2中插入数据前如何去重](https://segmentfault.com/q/1010000005615722/a-1020000005625988)\n", 185 | "\n", 186 | "[MongoDB distinct too big问题](http://forum.foxera.com/mongodb/topic/776/mongodb-distinct-too-big%E9%97%AE%E9%A2%98?un=jerry)" 187 | ] 188 | } 189 | ], 190 | "metadata": { 191 | "kernelspec": { 192 | "display_name": "Python 3", 193 | "language": "python", 194 | "name": "python3" 195 | }, 196 | "language_info": { 197 | "codemirror_mode": { 198 | "name": "ipython", 199 | "version": 3 200 | }, 201 | "file_extension": ".py", 202 | "mimetype": "text/x-python", 203 | "name": "python", 204 | "nbconvert_exporter": "python", 205 | "pygments_lexer": "ipython3", 206 | "version": "3.6.2" 207 | } 208 | }, 209 | "nbformat": 4, 210 | "nbformat_minor": 2 211 | } 212 | -------------------------------------------------------------------------------- /python/xgboost_study.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Xgboost 原理与应用\n", 8 | "\n", 9 | "---\n", 10 | "\n", 11 | "\n", 12 | "**Xgboost的一些特性:**\n", 13 | "\n", 14 | "* 支持GPU加速\n", 15 | "* 支持并行处理\n", 16 | "\n", 17 | "\n", 18 | "\n", 19 | "## 参考资料\n", 20 | "\n", 21 | "* https://xgboost.readthedocs.io/en/latest/" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "\n", 29 | "## 参数配置\n", 30 | "\n", 31 | "---\n", 32 | "\n", 33 | "* https://xgboost.readthedocs.io/en/latest/parameter.html\n", 34 | "\n", 35 | "\n" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [] 42 | } 43 | ], 44 | "metadata": { 45 | "kernelspec": { 46 | "display_name": "Python 3", 47 | "language": "python", 48 | "name": "python3" 49 | }, 50 | "language_info": { 51 | "codemirror_mode": { 52 | "name": "ipython", 53 | "version": 3 54 | }, 55 | "file_extension": ".py", 56 | "mimetype": "text/x-python", 57 | "name": "python", 58 | "nbconvert_exporter": "python", 59 | "pygments_lexer": "ipython3", 60 | "version": "3.6.2" 61 | } 62 | }, 63 | "nbformat": 4, 64 | "nbformat_minor": 2 65 | } 66 | -------------------------------------------------------------------------------- /python/利用百度地图坐标转换API实现gcj02转bd09.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 利用百度地图坐标转换API实现gcj02转bd09\n", 8 | "\n", 9 | "## 百度地图坐标转换API\n", 10 | "### 功能\n", 11 | "将常用的非百度坐标转换成百度地图中使用的坐标,如:GPS设备获取的坐标、google地图坐标、soso地图坐标、amap地图坐标、mapbar地图坐标等\n", 12 | "\n", 13 | "### 特点\n", 14 | "1. 以HTTP形式提供坐标转换服务\n", 15 | "2. 支持批量坐标转化,一次最多可转换100个坐标点。\n", 16 | "3. 转化后的坐标可以在百度地图JavaScript API、车联网API、静态图API、web服务API等产品中使用\n", 17 | "\n", 18 | "### 服务地址\n", 19 | "* http://api.map.baidu.com/geoconv/v1/?\n", 20 | "* 组成说明: \n", 21 | "\t* 域名:api.map.baidu.com \n", 22 | "\t* 服务名:geoconv \n", 23 | " * 服务版本号:v1 \n", 24 | "\n", 25 | "### 服务参数说明\n", 26 | "\n", 27 | "\n", 28 | "### 返回值说明\n", 29 | "\n", 30 | "### 状态码说明\n", 31 | "\n", 32 | "### 示例\n", 33 | "http://api.map.baidu.com/geoconv/v1/?coords=114.21892734521,29.575429778924;114.21892734521,29.575429778924&ak=E4805d16520de693a3fe707cdc962045&output=json\n", 34 | "\n", 35 | "\n", 36 | "## 使用Python3.5编写脚本调用API实现坐标转换" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 14, 42 | "metadata": { 43 | "collapsed": false, 44 | "scrolled": true 45 | }, 46 | "outputs": [ 47 | { 48 | "name": "stdout", 49 | "output_type": "stream", 50 | "text": [ 51 | "需要转换的gcj02经纬度数据如下:\n" 52 | ] 53 | }, 54 | { 55 | "data": { 56 | "text/html": [ 57 | "
\n", 58 | "\n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | "
lng_gcj02lat_gcj02
0120.67087028.017590
1120.65780328.010946
2120.65989828.035593
3120.68261527.994313
\n", 89 | "
" 90 | ], 91 | "text/plain": [ 92 | " lng_gcj02 lat_gcj02\n", 93 | "0 120.670870 28.017590\n", 94 | "1 120.657803 28.010946\n", 95 | "2 120.659898 28.035593\n", 96 | "3 120.682615 27.994313" 97 | ] 98 | }, 99 | "execution_count": 14, 100 | "metadata": {}, 101 | "output_type": "execute_result" 102 | } 103 | ], 104 | "source": [ 105 | "import pandas as pd\n", 106 | "from urllib.request import urlopen\n", 107 | "import json\n", 108 | "\n", 109 | "# 需要转换的坐标\n", 110 | "datas = pd.DataFrame({'lng_gcj02':[120.67087,120.657803,120.659898,120.682615],\n", 111 | " 'lat_gcj02':[28.01759,28.010946,28.035593,27.994313]},\n", 112 | " columns=['lng_gcj02','lat_gcj02'])\n", 113 | "print('需要转换的gcj02经纬度数据如下:')\n", 114 | "datas" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 16, 120 | "metadata": { 121 | "collapsed": false 122 | }, 123 | "outputs": [ 124 | { 125 | "name": "stdout", 126 | "output_type": "stream", 127 | "text": [ 128 | "转换结果如下:\n" 129 | ] 130 | }, 131 | { 132 | "data": { 133 | "text/html": [ 134 | "
\n", 135 | "\n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | "
lng_gcj02lat_gcj02lng_bd09lat_bd09
0120.67087028.017590120.68144928.020004
1120.65780328.010946120.66838728.013235
2120.65989828.035593120.67046328.037881
3120.68261527.994313120.69318327.996945
\n", 176 | "
" 177 | ], 178 | "text/plain": [ 179 | " lng_gcj02 lat_gcj02 lng_bd09 lat_bd09\n", 180 | "0 120.670870 28.017590 120.681449 28.020004\n", 181 | "1 120.657803 28.010946 120.668387 28.013235\n", 182 | "2 120.659898 28.035593 120.670463 28.037881\n", 183 | "3 120.682615 27.994313 120.693183 27.996945" 184 | ] 185 | }, 186 | "execution_count": 16, 187 | "metadata": {}, 188 | "output_type": "execute_result" 189 | } 190 | ], 191 | "source": [ 192 | "def gcj02_to_bd09(datas):\n", 193 | " \n", 194 | " # 在数据集中添加[lng_bd09 ,lat_bd09] 保存转换结果\n", 195 | " datas['lng_bd09'] = 0 \n", 196 | " datas['lat_bd09'] = 0\n", 197 | " \n", 198 | " for i in range(len(gcj02_lng_lat)):\n", 199 | " coords = str(datas['lng_gcj02'][i])+','+str(datas['lat_gcj02'][i])\n", 200 | " url = 'http://api.map.baidu.com/geoconv/v1/?coords='+coords+'&ak=9KtraRRDZT8xnAoVe3o41wGGgy4X3rMW'\n", 201 | " try: \n", 202 | " with urlopen(url,timeout=3) as html:\n", 203 | " http_info = html.info()\n", 204 | " raw_data = html.read().decode(http_info.get_content_charset())\n", 205 | " data = json.loads(raw_data)\n", 206 | " datas.iloc[i,2] = data['result'][0]['x']\n", 207 | " datas.iloc[i,3] = data['result'][0]['y']\n", 208 | " except:\n", 209 | " print('转换失败:'+coords)\n", 210 | " return datas\n", 211 | "\n", 212 | "print('转换结果如下:')\n", 213 | "gcj02_to_bd09(datas)" 214 | ] 215 | } 216 | ], 217 | "metadata": { 218 | "kernelspec": { 219 | "display_name": "Python [Root]", 220 | "language": "python", 221 | "name": "Python [Root]" 222 | }, 223 | "language_info": { 224 | "codemirror_mode": { 225 | "name": "ipython", 226 | "version": 3 227 | }, 228 | "file_extension": ".py", 229 | "mimetype": "text/x-python", 230 | "name": "python", 231 | "nbconvert_exporter": "python", 232 | "pygments_lexer": "ipython3", 233 | "version": "3.5.2" 234 | } 235 | }, 236 | "nbformat": 4, 237 | "nbformat_minor": 0 238 | } 239 | -------------------------------------------------------------------------------- /python/日期和时间的处理.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Pyhon中日期和时间数据的处理\n", 8 | "\n", 9 | "日期和时间是非常常见的数据类型,Python标准库中有多个模块可以用来处理这些数据,比如:[time](https://docs.python.org/3/library/time.html#module-time)、[calendar](https://docs.python.org/3/library/calendar.html#module-calendar)以及[datetime](https://docs.python.org/3/library/datetime.html)等。本文主要介绍datetime模块中datetime类的使用。\n", 10 | "\n", 11 | "## datetime模块简介\n", 12 | "\n", 13 | "datetime模块提供了多个用于操作日期和时间的类,并且支持datetime的加减法运算。该某块中含有两个常数,即datetime.MINYEAR = 1 和 datetime.MAXYEAR = 9999。根据官方文档的说法,时间和日期对象分为两类:“naive” 和 “aware”,按我的理解,其实就是 *带时区信息* 和 *不带时区信息* 的两类。这两类对象的官方说明如下:\n", 14 | "\n", 15 | ">An aware object has sufficient knowledge of applicable algorithmic and political time adjustments, such as time zone and daylight saving time information, to locate itself relative to other aware objects. An aware object is used to represent a specific moment in time that is not open to interpretation.\n", 16 | "\n", 17 | ">A naive object does not contain enough information to unambiguously locate itself relative to other date/time objects. Whether a naive object represents Coordinated Universal Time (UTC), local time, or time in some other timezone is purely up to the program, just like it is up to the program whether a particular number represents metres, miles, or mass. Naive objects are easy to understand and to work with, at the cost of ignoring some aspects of reality.\n", 18 | "\n", 19 | "datetime模块共有6类对象,如下:\n", 20 | "* datetime.date\n", 21 | "* datetime.time\n", 22 | "* datetime.datetime\n", 23 | "* datetime.timedelta\n", 24 | "* datetime.tzinfo\n", 25 | "* datetime.timezone\n", 26 | "\n", 27 | "下面,进入正题,介绍datetime.datetime对象的操作。" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "## datetime.datetime 对象的操作\n", 35 | "\n", 36 | "datetime对象是由date对象和time对象合并得到的。因此,datetime对象同时包含了date对象和time对象中的假设,即:\n", 37 | "* 现行的公历可以向两个方向延伸\n", 38 | "* 每一天都是精确的3600\\*24秒\n", 39 | "\n", 40 | "\n", 41 | "Attributes: year, month, day, hour, minute, second, microsecond, and tzinf\n", 42 | "\n", 43 | "时间戳是指格林威治时间1970年01月01日00时00分00秒(北京时间1970年01月01日08时00分00秒)起至现在的总秒数。" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "### 创建datetime对象" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 3, 56 | "metadata": {}, 57 | "outputs": [ 58 | { 59 | "data": { 60 | "text/plain": [ 61 | "'2017-09-20 10:27:42.437635'" 62 | ] 63 | }, 64 | "execution_count": 3, 65 | "metadata": {}, 66 | "output_type": "execute_result" 67 | } 68 | ], 69 | "source": [ 70 | "from datetime import datetime\n", 71 | "\n", 72 | "dt1 = datetime.today().isoformat(sep=' ')\n", 73 | "dt1" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": [ 80 | "### datetime对象与string表示的时间之间的相互转换\n", 81 | "\n", 82 | "在平常的工作中,我们接触到的时间数据大多数是用字符串表示的,绝大多数的表现形式为 '2017-03-05 13:01:43'。为了方便分析,通常需要先将这类字符串表示的数据转换成datetime对象或者timestamp对象,分析结束之后再转换成字符串保存分析结果。" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "#### string表示的时间转换成datetime对象" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 4, 95 | "metadata": {}, 96 | "outputs": [ 97 | { 98 | "data": { 99 | "text/plain": [ 100 | "datetime.datetime(2017, 3, 5, 13, 1, 43)" 101 | ] 102 | }, 103 | "execution_count": 4, 104 | "metadata": {}, 105 | "output_type": "execute_result" 106 | } 107 | ], 108 | "source": [ 109 | "dt1 = '2017-03-05 13:01:43'\n", 110 | "\n", 111 | "# 使用 datetime.strptime()\n", 112 | "dt2 = datetime.strptime(dt1,'%Y-%m-%d %H:%M:%S')\n", 113 | "dt2 # 返回datetime.datetime(2017, 3, 5, 13, 1, 43)\n", 114 | "\n" 115 | ] 116 | } 117 | ], 118 | "metadata": { 119 | "kernelspec": { 120 | "display_name": "Python 3", 121 | "language": "python", 122 | "name": "python3" 123 | }, 124 | "language_info": { 125 | "codemirror_mode": { 126 | "name": "ipython", 127 | "version": 3 128 | }, 129 | "file_extension": ".py", 130 | "mimetype": "text/x-python", 131 | "name": "python", 132 | "nbconvert_exporter": "python", 133 | "pygments_lexer": "ipython3", 134 | "version": "3.6.1" 135 | } 136 | }, 137 | "nbformat": 4, 138 | "nbformat_minor": 1 139 | } 140 | --------------------------------------------------------------------------------