├── .DS_Store ├── .ipynb_checkpoints └── iris_demo-checkpoint.ipynb ├── README.md ├── __pycache__ └── input_data.cpython-36.pyc ├── a2c-tf2.py ├── a2c.py ├── ac.py ├── adaboost.py ├── alldata ├── iris.csv ├── iris_test.csv └── iris_training.csv ├── asr_lstm_ctc.py ├── cgan-tf.py ├── datasets ├── .DS_Store ├── mnist.npz ├── mnist_data.zip └── mnist_data │ ├── t10k-images-idx3-ubyte.gz │ ├── t10k-labels-idx1-ubyte.gz │ ├── train-images-idx3-ubyte.gz │ └── train-labels-idx1-ubyte.gz ├── detect.py ├── face.py ├── image2.jpg ├── image2new.jpg ├── img └── ddpg.jpeg ├── input_data.py ├── iris_demo.ipynb ├── mate-learn.py ├── mnist_demo.py ├── router.py ├── sample.py ├── text_classifer.ipynb ├── tf-conv-error.py ├── tf-conv.py ├── tf-fenbu.py ├── tf-sm.py └── xgboostdemo.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koryako/FundamentalsOfAI_book_code/ef5bd1ead892dc07315a0eec56f826263709e676/.DS_Store -------------------------------------------------------------------------------- /.ipynb_checkpoints/iris_demo-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 105, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "1.10.1\n", 13 | "(120, 5)\n", 14 | "(30, 5)\n" 15 | ] 16 | }, 17 | { 18 | "data": { 19 | "text/html": [ 20 | "

\n", 21 | "\n", 34 | "\n", 35 | " \n", 36 | " \n", 37 | " \n", 38 | " \n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | "

	a	b	c	d	t
0	6.4	2.8	5.6	2.2	2
1	5.0	2.3	3.3	1.0	1
2	4.9	2.5	4.5	1.7	2
3	4.9	3.1	1.5	0.1	0
4	5.7	3.8	1.7	0.3	0

\n", 88 | "

" 89 | ], 90 | "text/plain": [ 91 | " a b c d t\n", 92 | "0 6.4 2.8 5.6 2.2 2\n", 93 | "1 5.0 2.3 3.3 1.0 1\n", 94 | "2 4.9 2.5 4.5 1.7 2\n", 95 | "3 4.9 3.1 1.5 0.1 0\n", 96 | "4 5.7 3.8 1.7 0.3 0" 97 | ] 98 | }, 99 | "execution_count": 105, 100 | "metadata": {}, 101 | "output_type": "execute_result" 102 | } 103 | ], 104 | "source": [ 105 | "import pandas as pd\n", 106 | "import tensorflow as tf\n", 107 | "print (tf.__version__)\n", 108 | "column=[\"a\",\"b\",\"c\",\"d\",\"t\"]\n", 109 | "#column=[\"花萼长度\",\"花萼宽度\",\"花瓣长度\",\"花瓣宽度\",\"花的种类\"]\n", 110 | "data_train=pd.read_csv('alldata/iris_training.csv',names=column,header=0)\n", 111 | "data_test=pd.read_csv('alldata/iris_test.csv',names=column,header=0)\n", 112 | "print data_train.shape\n", 113 | "print data_test.shape\n", 114 | "data_train.head()" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 89, 120 | "metadata": {}, 121 | "outputs": [ 122 | { 123 | "data": { 124 | "text/plain": [ 125 | "0 2\n", 126 | "1 1\n", 127 | "2 2\n", 128 | "3 0\n", 129 | "4 0\n", 130 | "Name: t, dtype: int64" 131 | ] 132 | }, 133 | "execution_count": 89, 134 | "metadata": {}, 135 | "output_type": "execute_result" 136 | } 137 | ], 138 | "source": [ 139 | "train_x, train_y = data_train, data_train.pop('t')\n", 140 | "test_x, test_y = data_test, data_test.pop('t')\n", 141 | "train_x.shape\n", 142 | "train_y.head()\n", 143 | "#train_x.head()" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 90, 149 | "metadata": {}, 150 | "outputs": [ 151 | { 152 | "name": "stdout", 153 | "output_type": "stream", 154 | "text": [ 155 | "[_NumericColumn(key='a', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), _NumericColumn(key='b', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), _NumericColumn(key='c', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), _NumericColumn(key='d', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None)]\n" 156 | ] 157 | } 158 | ], 159 | "source": [ 160 | "my_feature_columns=[]\n", 161 | "for key in train_x.keys():\n", 162 | " my_feature_columns.append(tf.feature_column.numeric_column(key=key))\n", 163 | "print(my_feature_columns)" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 91, 169 | "metadata": {}, 170 | "outputs": [ 171 | { 172 | "name": "stdout", 173 | "output_type": "stream", 174 | "text": [ 175 | "INFO:tensorflow:Using default config.\n", 176 | "WARNING:tensorflow:Using temporary folder as model directory: /tmp/tmp3sxmlI\n", 177 | "INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_global_id_in_cluster': 0, '_is_chief': True, '_cluster_spec': , '_evaluation_master': '', '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_num_ps_replicas': 0, '_tf_random_seed': None, '_master': '', '_device_fn': None, '_num_worker_replicas': 1, '_task_id': 0, '_log_step_count_steps': 100, '_model_dir': '/tmp/tmp3sxmlI', '_train_distribute': None, '_save_summary_steps': 100}\n" 178 | ] 179 | } 180 | ], 181 | "source": [ 182 | "classifier = tf.estimator.DNNClassifier(\n", 183 | " # 这个模型接受哪些输入的特征\n", 184 | " feature_columns=my_feature_columns,\n", 185 | " # 包含两个隐藏层，每个隐藏层包含10个神经元.\n", 186 | " hidden_units=[10, 10],\n", 187 | " # 最终结果要分成的几类\n", 188 | " n_classes=3)" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 141, 194 | "metadata": {}, 195 | "outputs": [], 196 | "source": [ 197 | "def train_func(train_x,train_y):\n", 198 | " dataset=tf.data.Dataset.from_tensor_slices((dict(train_x), train_y))\n", 199 | " dataset = dataset.shuffle(200).repeat().batch(1)\n", 200 | " return dataset" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": 142, 206 | "metadata": {}, 207 | "outputs": [ 208 | { 209 | "name": "stdout", 210 | "output_type": "stream", 211 | "text": [ 212 | "INFO:tensorflow:Calling model_fn.\n", 213 | "INFO:tensorflow:Done calling model_fn.\n", 214 | "INFO:tensorflow:Create CheckpointSaverHook.\n", 215 | "INFO:tensorflow:Graph was finalized.\n", 216 | "INFO:tensorflow:Restoring parameters from /tmp/tmp3sxmlI/model.ckpt-13000\n", 217 | "INFO:tensorflow:Running local_init_op.\n", 218 | "INFO:tensorflow:Done running local_init_op.\n", 219 | "INFO:tensorflow:Saving checkpoints for 13000 into /tmp/tmp3sxmlI/model.ckpt.\n", 220 | "INFO:tensorflow:loss = 0.0040664137, step = 13000\n", 221 | "INFO:tensorflow:global_step/sec: 662.445\n", 222 | "INFO:tensorflow:loss = 0.005831965, step = 13100 (0.153 sec)\n", 223 | "INFO:tensorflow:global_step/sec: 877.301\n", 224 | "INFO:tensorflow:loss = 0.00026008085, step = 13200 (0.114 sec)\n", 225 | "INFO:tensorflow:global_step/sec: 854.394\n", 226 | "INFO:tensorflow:loss = 0.0004749362, step = 13300 (0.117 sec)\n", 227 | "INFO:tensorflow:global_step/sec: 825.559\n", 228 | "INFO:tensorflow:loss = 0.00013386307, step = 13400 (0.121 sec)\n", 229 | "INFO:tensorflow:global_step/sec: 803.645\n", 230 | "INFO:tensorflow:loss = 0.17415029, step = 13500 (0.124 sec)\n", 231 | "INFO:tensorflow:global_step/sec: 788.668\n", 232 | "INFO:tensorflow:loss = 2.3722367e-05, step = 13600 (0.127 sec)\n", 233 | "INFO:tensorflow:global_step/sec: 870.444\n", 234 | "INFO:tensorflow:loss = 6.413254e-05, step = 13700 (0.115 sec)\n", 235 | "INFO:tensorflow:global_step/sec: 743.583\n", 236 | "INFO:tensorflow:loss = 0.0058170315, step = 13800 (0.134 sec)\n", 237 | "INFO:tensorflow:global_step/sec: 792.38\n", 238 | "INFO:tensorflow:loss = 3.373566e-05, step = 13900 (0.127 sec)\n", 239 | "INFO:tensorflow:Saving checkpoints for 14000 into /tmp/tmp3sxmlI/model.ckpt.\n", 240 | "INFO:tensorflow:Loss for final step: 0.00046993178.\n" 241 | ] 242 | }, 243 | { 244 | "data": { 245 | "text/plain": [ 246 | "" 247 | ] 248 | }, 249 | "execution_count": 142, 250 | "metadata": {}, 251 | "output_type": "execute_result" 252 | } 253 | ], 254 | "source": [ 255 | "classifier.train(input_fn=lambda:train_func(train_x,train_y),steps=1000)" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": 143, 261 | "metadata": {}, 262 | "outputs": [], 263 | "source": [ 264 | "def eval_input_fn(features, labels, batch_size):\n", 265 | " features=dict(features)\n", 266 | " if labels is None:\n", 267 | " # No labels, use only features.\n", 268 | " inputs = features\n", 269 | " else:\n", 270 | " inputs = (features, labels)\n", 271 | " dataset = tf.data.Dataset.from_tensor_slices(inputs)\n", 272 | " \n", 273 | " assert batch_size is not None, \"batch_size must not be None\"\n", 274 | " dataset = dataset.batch(batch_size)\n", 275 | " return dataset" 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": 146, 281 | "metadata": {}, 282 | "outputs": [ 283 | { 284 | "name": "stdout", 285 | "output_type": "stream", 286 | "text": [ 287 | "INFO:tensorflow:Calling model_fn.\n", 288 | "INFO:tensorflow:Done calling model_fn.\n", 289 | "INFO:tensorflow:Graph was finalized.\n", 290 | "INFO:tensorflow:Restoring parameters from /tmp/tmp3sxmlI/model.ckpt-14000\n", 291 | "INFO:tensorflow:Running local_init_op.\n", 292 | "INFO:tensorflow:Done running local_init_op.\n", 293 | "准确率为 96.6666666667\n" 294 | ] 295 | } 296 | ], 297 | "source": [ 298 | "from __future__ import division\n", 299 | "predict_arr = []\n", 300 | "predictions = classifier.predict(\n", 301 | " input_fn=lambda:eval_input_fn(test_x,labels=test_y,batch_size=100))\n", 302 | "for predict in predictions:\n", 303 | " predict_arr.append(predict['probabilities'].argmax())\n", 304 | "result = predict_arr == test_y\n", 305 | "result1 = [w for w in result if w == True]\n", 306 | "print(\"准确率为 %s\"%str((len(result1)/len(result)*100)))\n" 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": null, 312 | "metadata": {}, 313 | "outputs": [], 314 | "source": [] 315 | } 316 | ], 317 | "metadata": { 318 | "kernelspec": { 319 | "display_name": "Python 2", 320 | "language": "python", 321 | "name": "python2" 322 | }, 323 | "language_info": { 324 | "codemirror_mode": { 325 | "name": "ipython", 326 | "version": 2 327 | }, 328 | "file_extension": ".py", 329 | "mimetype": "text/x-python", 330 | "name": "python", 331 | "nbconvert_exporter": "python", 332 | "pygments_lexer": "ipython2", 333 | "version": "2.7.12" 334 | } 335 | }, 336 | "nbformat": 4, 337 | "nbformat_minor": 2 338 | } 339 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # FundamentalsOfAI_book_code 2 | 本代码是人工智能基础（高中版）的代码，本代码仓库已经废弃 3 | 4 | 请关注 https://gitee.com/koryako/FundamentalsOfAI_book_code 5 | 6 | ### 重大事件 7 | 8 | 2012年，提出了一个深度网络的分布并行训练的框架DistBelief，并在该框架中发现了一些有效的分布优化策略（参加论文：Large ScaleDistributed Deep Networks）。提出了一种非监督的猫脸识别模型（参见论文：BuildingHigh-level Features Using Large Scale Unsupervised Learning）。 9 | 10 | 2013年，开源了词向量（word2vec）模型。（参见论文：Distributed Representationsof Words and Phrases and their Compositionality） 11 | 12 | 2014年，提出了序列到序列的神经网络学习（sequence to sequence learning）（参见论文：Sequence toSequence Learning with Neural Networks），也可用于生成图片标题（参见论文：Show andTell: A Neural Image Caption Generator）。 13 | 14 | 2015年，开源了Inception，一种新的图像分类卷积神经网络模型（参见链接：https://github.com/tensorflow/models/blob/master/slim/README.md）。Deep Dream（参见链接：https://deepdreamgenerator.com/generator-style）。开源了TensorFlow（参加链接：https://www.tensorflow.org/）。 15 | 16 | 2016年，提出了神经网络机器翻译系统（neural translation）（参见论文：Google’s Neural Machine Translation System: Bridging the Gap betweenHuman and Machine Translation）。医学图像的研究（Development and Validation of a Deep Learning Algorithm forDetection of Diabetic Retinopathy in Retinal Fundus Photographs）。神经架构搜索（ArchitectureSearch） 17 | 18 | ### 基础知识 19 | 20 | 1. 2019哥大应用机器学习课程 [课程主页](https://www.cs.columbia.edu/~amueller/comsw4995s19/schedule/)|[GitHub地址](https://github.com/amueller/COMS4995-s19)|[PPT获取地址](https://amueller.github.io/COMS4995-s19/slides/)|[视频油管原版](https://www.youtube.com/playlist?list=PL_pVmAaAnxIQGzQS2oI3OWEPT-dpmwTfA)|[B站](https://www.bilibili.com/video/av41655158/) 21 | 22 | 2. [认识深度学习](http://www.cnblogs.com/Leo_wl/p/5852010.html) 23 | 24 | 25 | ### 目录 26 | 27 | - 鸢尾花分类 28 | - 中文文本分类 29 | - 手写mnist 分类 30 | - 强化学习ddpg 31 | - 语音助理 32 | - 条件对抗网络cgan 33 | 34 | ### 基础 35 | 1. 鸢尾花分类（tensorflow 1.10.1） 36 | 2. 中文文本分类（fasttext） 37 | - 词向量 [word2vec](http://www.hankcs.com/nlp/word2vec.html)|[代码](https://code.google.com/archive/p/word2vec/) | [gensim](https://github.com/RaRe-Technologies/gensim)|[Wikimedia Downloads数据源](https://dumps.wikimedia.org/)|[jieba](https://github.com/fxsjy/jieba)| 38 | 39 | - [中文短文句相似度代码](https://github.com/fssqawj/SentenceSim/) | [文字的相似度](https://blog.csdn.net/miner_zhu/article/details/81566456) | [句子的相似度计算方法是什么？](https://github.com/huyingxi/Synonyms/issues/64) 40 | 41 | - [nlp 库](https://github.com/stanfordnlp/stanfordnlp) 42 | 43 | - pytorch 版本 [bert](https://github.com/huggingface/pytorch-pretrained-BERT) GPT-2 44 | 45 | - nlp 技巧 [教你给NLP任务加速100倍](https://medium.com/huggingface/100-times-faster-natural-language-processing-in-python-ee32033bdced) | [用PyTorch实现了从语言中识别情绪的DeepMoji模型](https://github.com/huggingface/torchMoji) | [PyTorch实现了OpenAI优化过的Transformer模型](https://github.com/huggingface/pytorch-openai-transformer-lm) 46 | 47 | 3. 手写mnist 分类（tensorflow 1.12.0） 48 | 49 | 4. 强化学习ddpg [DDPG原理和算法](https://blog.csdn.net/kenneth_yu/article/details/78478356)|[代码]( https://github.com/princewen/tensorflow_practice/blob/master/RL/Basic-DDPG/DDPG.py)|[ppo 数学公式](https://mp.weixin.qq.com/s?__biz=MjM5ODU3OTIyOA==&mid=2650675168&idx=1&sn=739bf1e2f7d72494469620d9216b4dd7&chksm=bec22a9389b5a385dc20b695eb26cbad62239f0b14be0bcfcf8cb0c24a95913cb7cae1cf1460&mpshare=1&scene=1&srcid=#rd])|[ppo代码](https://github.com/simoninithomas/Deep_reinforcement_learning_Course/blob/master/PPO%20with%20Sonic%20the%20Hedgehog/model.py)| [ppo 控制索尼可](https://github.com/simoninithomas/Deep_reinforcement_learning_Course/tree/master/PPO%20with%20Sonic%20the%20Hedgehog)|[ppo源码分析](https://blog.csdn.net/jinzhuojun/article/details/80417179)| Open Source Python | [fire 结构](https://d4mucfpksywv.cloudfront.net/research-covers/openai-five/network-architecture.pdf) 50 | 51 | 52 | - Game Agent Framework serpent.ai 53 | 54 | ![这里写图片描述](https://github.com/koryako/FundamentalsOfAI_book_code/raw/master/img/ddpg.jpeg) 55 | 56 | ---- 57 | 5. 语音助理 58 | 59 | 语音识别（网红写的开源[demo](https://github.com/nl8590687/ASRT_SpeechRecognition)） ---chatbot ----wavenet 合成 60 | 61 | [百度提出使用GAN构建语音识别新框架](https://mp.weixin.qq.com/s?__biz=MzA3MzI4MjgzMw==&mid=2650733110&idx=4&sn=e8636eed546644cf7b14d64386d4a0bf&chksm=871b3e48b06cb75efe377b1e1254335dc0b5280e464cceaea93fa86652e9982d43350cbba414&mpshare=1&scene=1&srcid=11136d4rKoVwd8R09nc9wZJ7#rd]) | [参考代码](https://github.com/wangkenpu/rsrgan) | [其他开源软件](https://github.com/mozilla/DeepSpeech ) [安装指南](https://blog.csdn.net/weixin_36071429/article/details/86554065) | 62 | [谷歌发布迄今最准确商用端到端语音识别系统，词错率将至5.6%，性能提升16%](https://mp.weixin.qq.com/s?__biz=MzI3MTA0MTk1MA==&mid=2652010075&idx=5&sn=41f41f26079063dd065e47d77ce38616&chksm=f12102aac6568bbc8c4ff43ee9f8df797606d21621efe211c5c14a5dbb9bb28ec87aea1fe3bb&mpshare=1&scene=1&srcid=0107DJlvMowSKl1zJ4bhiMC4#rd]) 63 | | [语音识别数据集](https://mp.weixin.qq.com/s?__biz=MzA3MzI4MjgzMw==&mid=2650740583&idx=2&sn=9687798dbfe1375a4342125cdee28224&chksm=871ad319b06d5a0fa58fa24859ab0ab8ba5b8d300c615119c4bab6a8d4ddc6a73d379da7a1c3&mpshare=1&scene=1&srcid=0411p52u2If9ANJhiwyzJpjq#rd]) | 64 | [对端到端语音识别网络的两种全新探索](https://mp.weixin.qq.com/s?__biz=MzA5ODEzMjIyMA==&mid=2247496219&idx=3&sn=db61635b606bfb4031c0f45d28a1a57d&source=41#wechat_redirect) 65 | 66 | 67 | [流星花园的字幕还能这样玩，语音识别数据集搜集超简单](https://mp.weixin.qq.com/s?__biz=MzI1NjQ0Mzc1Mw==&mid=2247497191&idx=1&sn=43779a7ceb3ffb2d8c5e4d38152e5c79&source=41#wechat_redirect) | 68 | [ctc 代码](https://github.com/thewintersun/tensorflowbook/tree/master/Chapter6/asr_lstm_ctc) | [如何用 RNN 实现语音识别](https://mp.weixin.qq.com/s?__biz=MzI1NjQ0Mzc1Mw==&mid=2247497581&idx=2&sn=4564117e3f7a7a911e22c209d66dd4c3&source=41#wechat_redirect) | [语音数据增强](https://github.com/jiaaro/pydub) | [tensorflow语音识别及 python音频处理库](https://blog.csdn.net/u012436149/article/details/71179314) 69 | 70 | https://github.com/KeithYin/simple_speech_recog 71 | 72 | https://www.tensorflow.org/api_docs/python/tf/nn/ctc_loss 73 | https://www.tensorflow.org/api_docs/python/tf/nn/ctc_greedy_decoder 74 | https://www.tensorflow.org/api_docs/python/tf/nn/ctc_beam_search_decoder 75 | 76 | http://stackoverflow.com/questions/38059247/using-tensorflows-connectionist-temporal-classification-ctc-implementation 77 | https://www.tensorflow.org/versions/r0.10/api_docs/python/nn/conectionist_temporal_classification__ctc_ 78 | 79 | 80 | 81 | ---- 82 | 83 | 6. cgan 84 | 85 | [一句话脑部图片](https://mp.weixin.qq.com/s?__biz=MzIzNjc1NzUzMw==&mid=2247497045&idx=2&sn=0ee27b910c192f9fc91dc0c790c8f68d&chksm=e8d04227dfa7cb31fe81cdca28e8ed7679bfbdd1b228c88382735c3e40ebd5da0fed1b1bc0d6&mpshare=1&scene=1&srcid=04168ECGz3iJLeBsmc9jA2Ua#rd]) 86 | 87 | WGAN:Wasserstein GAN.Martin Arjovsky, Soumith Chintala, and Lon Bottou.2017.03.09 88 | 89 | [Anime Characters Creation ](https://arxiv.org/abs/1708.05509) 90 | 91 | https://makegirlsmoe.github.io/assets/pdf/technical_report.pdf 92 | 93 | 94 | 95 | [CGAN](https://blog.csdn.net/stalbo/article/details/79359380) 96 | 97 | [如何使用变分自编码器VAE生成动漫人物形象](https://mp.weixin.qq.com/s?__biz=MzA3MzI4MjgzMw==&mid=2650740857&idx=3&sn=d77b4f1231c2a0308e61b88109530631&chksm=871adc07b06d5511d9231a50d0457053f3b17f0d1a4290796f3dc2cf47fd98a61fabcc90596d&mpshare=1&scene=1&srcid=0416V1uf2VzKNiDfYoyo8E7K#rd]) 98 | 99 | 100 | Imagine This! Scripts to Compositions to Videos 101 | https://arxiv.org/abs/1804.03608 102 | 103 | 104 | InfoGAN: Interpretable Representation Learning by Information Maximizing Generative Adversarial Nets.Xi Chen, Yan Duan, Rein Houthooft, John Schulman.2016.06.12 105 | 106 | SeqGAN: Sequence Generative Adversarial Nets with Policy Gradient.Lantao Yuy, Weinan Zhangy, Jun Wangz, Yong Yuy.2016.12.09 107 | 108 | StackGAN: Text to Photo-realistic Image Synthesis with Stacked Generative Adversarial Networks.Han Zhang, Tao Xu, Hongsheng Li, Shaoting Zhang.2016.12.10 109 | 110 | ---- 111 | 112 | ### 高阶应用 113 | 114 | 115 | 1. 自动生成动作[认识gan](https://www.jianshu.com/p/5dc2486c70cf)|[代码](https://github.com/nyoki-mtl/pytorch-EverybodyDanceNow) 116 | 117 | 使用技术如下技术: 118 | 119 | - 第一步 cgan [说明](https://blog.csdn.net/zhl493722771/article/details/82781988) 120 | 衍生功能 [animeGA](https://github.com/jayleicn/animeGAN)|[IllustrationGAN](https://github.com/tdrussell/IllustrationGAN)|[Variational-Auto-Encoder](https://github.com/wuga214/IMPLEMENTATION_Variational-Auto-Encoder)|[sngan](https://github.com/pfnet-research/sngan_projection)|[GirlsManifold](https://github.com/shaform/GirlsManifold)|[扩展阅读](https://github.com/nashory/gans-awesome-applications)| lbpcascade_animeface | confusion matrix 121 | 122 | 123 | - 第二步面部三维重建、对齐 124 | VoxCeleb2 https://gvv.mpi-inf.mpg.de/projects/FML19/paper.pdf 125 | DeepSDF DeepSDF: Learning Continuous Signed Distance Functions for Shape Representation https://arxiv.org/abs/1901.05103 126 | shapnet数据集 127 | 3d 人脸识别工具face3d 128 | [3d重建到人体姿态估计](https://mp.weixin.qq.com/s?__biz=MzA3MzI4MjgzMw==&mid=2650734471&idx=2&sn=5e1e47847ceb48cb7b5482bef9b96c3a&chksm=871b3bf9b06cb2ef1fa8e17885334d56648103615c39bae92c3bb711ebbaf9facc8c1f8a09a2&mpshare=1&scene=1&srcid=1219fpAfjA9nOFbOf8TIQZ7Q#rd]) 129 | 130 | - 第三步：换脸 131 | 132 | - 第四步：[超分辨率](https://mp.weixin.qq.com/s?__biz=MzA3MzI4MjgzMw==&mid=2650734197&idx=2&sn=4e205aac262da52978e2286f5bde649c&chksm=871b3a0bb06cb31d3ad73908c98d2b6ff68eef3d47e7bd96ab958b6b07159e7b1d10f8da7db5&scene=21#wechat_redirect) demo[EDSR-Pytorch](https://www.pytorchtutorial.com/cvpr-ntire-2018-image-super-resolution/) 133 | 134 | 135 | vidtovidHD +动作关键点序列 136 | pixtopixHD 137 | 138 | psgan 渐近式gan ---Full-body High-resolution Anime Generation with Progressive Structure-conditional Generative Adversarial Networks 139 | 140 | ---- 141 | 2. 代码生成 142 | https://www.jianshu.com/p/3ed01284473c 143 | 第一个是由 pix2code 论文给出的公开数据集 PixCo； 144 | https://github.com/roertech/open_cv_ui2code 145 | 第二个是我们自己的数据集：PixCo-e 数据集。 146 | https://github.com/roertech/UI2code 147 | https://arxiv.org/pdf/1810.11536.pdf 148 | https://github.com/floydhub/pix2code-template 149 | https://github.com/tonybeltramelli/pix2code 150 | https://github.com/fjbriones/pix2code2 151 | https://github.com/tonybeltramelli/pix2code 152 | https://github.com/HiroakiMikami/deep-coder 153 | https://github.com/dkamm/deepcoder 154 | https://github.com/water-vapor/DeepCoder-tensorflow 155 | https://github.com/ashnkumar/sketch-code 重要 156 | https://github.com/pranayranjan/MobilenetV2_SSD_Keras 157 | https://github.com/xiaochus/MobileNetV2 158 | https://github.com/microsoft/ailab 159 | https://github.com/microsoft/ailab/tree/master/Sketch2Code 160 | https://github.com/bruceyang2012/Face-detection-with-mobilenet-ssd 161 | https://github.com/tensorflow/models/tree/master/research/object_detection tensorflow 官方 162 | https://www.jianshu.com/p/1cf3b543afff?utm_source=oschina-app Python 3 & Keras 实现Mobilenet v2 163 | https://github.com/xiaochus/MobileNetV2 164 | https://github.com/keras-team/keras-applications/blob/master/keras_applications/mobilenet.py keras 官方 165 | 166 | ---- 167 | 168 | 169 | 5. 阅读理解 QA 170 | 171 | - [数据集](https://ai.google.com/research/NaturalQuestions) 172 | 173 | ReasoNet: Learning to Stop Reading in Machine Comprehension https://arxiv.org/pdf/1609.05284.pdf 174 | https://arxiv.org/pdf/1609.05284.pdf 175 | 176 | 6. 人流监控 177 | 178 | - [论文： multi-scale cnn for crowd counting](https://github.com/xiaochus/mscnn ) 179 | 180 | ---- 181 | 7. 注意力transformer 182 | 183 | base paper :recurrent model of visual attension 184 | Neural machine translation by jointly learning to align and translater 185 | 186 | 8. 线路动态规划 187 | [图网络](https://www.jiqizhixin.com/articles/2018-11-27-12) 188 | 189 | 190 | ------ 191 | 192 | 193 | 10. 自动驾驶 194 | [物体检测](https://www.pytorchtutorial.com/pytorch-retinanet/) 195 | 196 | calar 数据 197 | 物体距离检测+scnn 道路检测 +ppo+ cma-es 198 | ------ 199 | 200 | 11. [多目标强化学习](https://mp.weixin.qq.com/s?__biz=MzA5MDMwMTIyNQ==&mid=2649292846&idx=7&sn=2095809575e572a2ceb7595cde9051ed&chksm=88101668bf679f7e3f77faedacb1c1769815e84a04012f001e8b9f654c4ba83cd23fa45ea322&mpshare=1&scene=1&srcid=021178yNuAeZsakgbulkimqP#rd]) 201 | 202 | - [50篇文章](https://mp.weixin.qq.com/s?__biz=MjM5ODU3OTIyOA==&mid=2650670171&idx=1&sn=de8a8fb17d35d302365aff34283539ed&chksm=bec2392889b5b03e784c331150cfc95d88532f7c7f3c8699e003bca576ba25ad4d2a4014ca57&mpshare=1&scene=1&srcid=0313YpnaZbyS1ooggodFdM5E#rd]) 203 | 204 | ------ 205 | 12. 元学习 206 | 207 | https://github.com/openai/supervised-reptile 208 | https://github.com/floodsung/Meta-Learning-Papers 209 | 210 | - 论文地址：https://arxiv.org/abs/1703.03400 211 | 212 | 代码地址：https://github.com/cbfinn/maml 213 | 214 | - https://github.com/openai/mlsh 215 | 216 | - https://d4mucfpksywv.cloudfront.net/research-covers/reptile/reptile_update.pdf 217 | https://github.com/openai/supervised-reptile 218 | 219 | - https://github.com/brendenlake/omniglot 220 | 221 | 222 | 223 | - OPTIMIZATION AS A MODEL FOR FEW-SHOT LEARNING 使用lstm 进行元参数更新 224 | https://github.com/markdtw/meta-learning-lstm-pytorch 225 | https://github.com/gitabcworld/FewShotLearning 226 | 227 | 228 | - EPG 与之前为强化学习智能体设计适合的奖励函数的研究（Genetic Programming for Reward Function Search 等）有关 229 | 论文地址：https://storage.googleapis.com/epg-blog-data/epg_2.pdf 230 | 231 | 代码地址：https://github.com/openai/EPG 232 | 233 | 234 | 论文：On the convergence of Adam and Beyond 235 | 236 | 论文地址：https://openreview.net/pdf?id=ryQu7f-RZ 237 | 238 | 机器之心文章：超越 Adam，从适应性学习率家族出发解读 ICLR 2018 高分论文 239 | 240 | 241 | 242 | 论文：Spherical CNNs 243 | 244 | 论文地址：https://openreview.net/pdf?id=Hkbd5xZRb 245 | 246 | 机器之心文章：ICLR 2018 | 阿姆斯特丹大学论文提出球面 CNN：可用于 3D 模型识别和雾化能量 247 | 248 | 249 | 250 | 论文：Continuous adaptation via meta-learning in nonstationary and competitive environments 251 | 252 | 论文地址：https://openreview.net/pdf?id=Sk2u1g-0- 253 | 254 | Meta- and Few-shot Learning 255 | https://mp.weixin.qq.com/s?__biz=MzAwMjM3MTc5OA==&mid=2652692454&idx=1&sn=727f1dab3e0392ed480feba78a001f41&chksm=8123065fb6548f499bcaa4dc99915a9bc34ccfe284d8086108754007808091cafd83560c14fa&mpshare=1&scene=1&srcid=0727GmK03hE3dnyld4jYfgub#rd] 256 | 257 | ------ 258 | 259 | 14. 脉冲神经网络 260 | 261 | http://papers.nips.cc/paper/7359-long-short-term-memory-and-learning-to-learn-in-networks-of-spiking-neurons 262 | 263 | ------ 264 | 265 | 15. 行人重识别 266 | 267 | 268 | 常用 Large-scale 数据集: 269 | 270 | DukeMTMC-reID 271 | 272 | 该数据集在杜克大学内采集，图像来自8个不同摄像头。该数据集提供训练集和测试集。训练集包含16,522张图像，测试集包含 17,661 张图像。训练数据中一共有702人，平均每类（每个人）有23.5 张训练数据。是目前最大的行人重识别数据集，并且提供了行人属性（性别/长短袖/是否背包等）的标注。 273 | 274 | Market-1501 275 | 276 | 该数据集在清华大学校园中采集，图像来自6个不同的摄像头，其中有一个摄像头为低像素。同时该数据集提供训练集和测试集。训练集包含12,936张图像，测试集包含19,732 张图像。图像由检测器自动检测并切割，包含一些检测误差（接近实际使用情况）。训练数据中一共有751人，测试集中有750人。所以在训练集中，平均每类（每个人）有17.2张训练数据。 277 | 278 | CUHK03 279 | 280 | 该数据集在香港中文大学内采集，图像来自2个不同摄像头。该数据集提供机器检测和手工检测两个数据集。其中检测数据集包含一些检测误差，更接近实际情况。平均每个人有9.6张训练数据。 281 | 282 | 283 | 284 | 285 | https://blog.csdn.net/chanmufeng/article/details/82955730 286 | 287 | 288 | 289 | 290 | ------ 291 | 17. 验证码生成 292 | 293 | https://github.com/machine-lv 294 | 295 | https://github.com/machine-lv/Production-verification-code 296 | 297 | 298 | GPT-2 transformer 299 | 300 | openai/blog/musenet 301 | 302 | microsoft/neuron block 303 | 304 | ------ 305 | 18. 视频追踪 306 | 307 | https://github.com/foolwood/SiamMask 308 | https://github.com/PengBoXiangShang/SiamRPN_plus_plus_PyTorch 309 | 310 | 311 | 312 | 19. 视频模拟器 313 | 314 | https://nv-tlabs.github.io/meta-sim/ 315 | 316 | 30. 对抗攻击 317 | 318 | https://gitlab.com/EAVISE/adversarial-yolo 319 | 320 | 20. 剪吱 321 | 322 | https://github.com/google-research/lottery-ticket-hypothesis 323 | 324 | 21. automl block 325 | 326 | RMSProp 327 | factorized 7X7 328 | BATCHNorm 329 | 标签平滑正则 330 | 331 | 332 | 333 | 334 | 23. unit3d avater 动画骨骼系统 335 | 336 | 24. 推荐算法开源框架 337 | 338 | https://github.com/alibaba/x-deeplearning.git 339 | 340 | 341 | ------ 342 | 343 | 25. 3d重建 344 | 345 | 1、Hierarchical Surface Prediction for 3D Object Reconstruction（Christian Häne等） https://arxiv.org/pdf/1704.00710.pdf 346 | 347 | 2、3D-R2N2: A Unified Approach for Single and Multi-view 3D Object Reconstruction（Choy等） https://arxiv.org/pdf/1604.00449.pdf 348 | 349 | 3、Learning a Predictable and Generative Vector Representation for Objects（Girdhar等） https://arxiv.org/pdf/1603.08637.pdf 350 | 351 | 4、ShapeNet: An Information-Rich 3D Model Repository（关于ShapeNet数据集的论文） https://arxiv.org/pdf/1512.03012.pdf 352 | 353 | 354 | Multi-view Supervision for Single-view Reconstruction 355 | 356 | via Differentiable Ray Consistency. S. Tulsiani, T. Zhou, A. A. Efros, J. Malik. In CVPR, 2017：https://shubhtuls.github.io/drc/ 357 | 358 | 359 | 360 | Unsupervised Learning of Depth and Ego-Motion from Video. T. Zhou, M. Brown, N. Snavely, D. Lowe. In CVPR, 2017：https://people.eecs.berkeley.edu/~tinghuiz/projects/SfMLearner/ 361 | 362 | 363 | 364 | 近期其他多视角 3D 监督预测方法的研究： 365 | 366 | 367 | 368 | Unsupervised CNN for Single View Depth Estimation: Geometry to the Rescue. R. Garg, B. G. Vijay Kumar, G. Carneiro, I. Reid. In ECCV, 2016：https://arxiv.org/abs/1603.04992 369 | 370 | 371 | 372 | Perspective Transformer Nets: Learning Single-View 3D Object Reconstruction without 3D Supervision. X. Yan, J. Yang, E. Yumer, Y. Guo, H. Lee. In NIPS, 2016：https://sites.google.com/site/skywalkeryxc/perspective_transformer_nets 373 | 374 | 375 | 376 | Unsupervised Learning of 3D Structure from Images. D. J. Rezende, S. M. Ali Eslami, S. Mohamed, P. Battaglia, M. Jaderberg, N. Heess. In NIPS, 2016：https://arxiv.org/abs/1607.00662 377 | 378 | 379 | 380 | 3D Shape Induction from 2D Views of Multiple Objects. M. Gadelha, S. Maji, R. Wang. arXiv preprint, 2016：http://mgadelha.me/home/prgan/index.html 381 | 382 | 383 | 384 | Unsupervised Monocular Depth Estimation 385 | 386 | with Left-Right Consistency. C. Godard, O. M. Aodha, G. J. Brostow. In CVPR, 2017：http://visual.cs.ucl.ac.uk/pubs/monoDepth/ 387 | 388 | 三维物体追踪笔记(1)-基于边缘的三维物体追踪 389 | https://blog.csdn.net/kevin_cc98/article/details/79582906 390 | 391 | 392 | opencv 计算旋转矩阵R，平移矩阵Thttps://blog.csdn.net/u014679795/article/details/53467264 393 | 394 | 395 | ### 附加能力 396 | 397 | [把模型部署到移动端-安卓系统](https://mp.weixin.qq.com/s?__biz=MzA3MzI4MjgzMw==&mid=2650745469&idx=3&sn=1e6ce5f47c3cc254412ad929015b0d91&chksm=871aee03b06d6715ef5b94c07fa22ca81ffd11b1bc99036e694531de11e6e26b1c8162344e80&mpshare=1&scene=1&srcid=0720XGO5XjTYP9ao1Hq3ZTG1#rd]) 398 | 399 | [谷歌算法库，有代码和实例](https://aihub.cloud.google.com/) 400 | 401 | [教程：使用iPhone相机和openCV来完成3D重建（第一部分）](https://mp.weixin.qq.com/s?__biz=MjM5ODU3OTIyOA==&mid=2650674986&idx=1&sn=ac5dd2543df4acedf2bb0b54f5b0968f&chksm=bec22a5989b5a34f677f4e80853c9395af089c88e71f1332678f711a54d5b6feb8a8f9df98c0&mpshare=1&scene=1&srcid=0119ip9MkiWgfDMxMX99VDrX#rd]) 402 | 403 | 404 | [深度学习新应用：在PyTorch中用单个2D图像创建3D模型](https://mp.weixin.qq.com/s?__biz=MzA3MzI4MjgzMw==&mid=2650755443&idx=5&sn=9383d92eb44d098f06e0a7e55d62df13&chksm=871a950db06d1c1be9c2d669029a37f62e57b318a6984e1bc9f41294a1ca8c068a55182b0d0b&mpshare=1&scene=1&srcid=0111BvClY9Cyiz0ekn34J2q5#rd]) 405 | 406 | 407 | [用单张图片推理场景结构：UC Berkeley提出3D景深联合学习方法](https://mp.weixin.qq.com/s?__biz=MzA3MzI4MjgzMw==&mid=2650728901&idx=4&sn=ba4b89646fbbb143ae08744f084ed428&chksm=871b2dbbb06ca4ad54096f29636be4d2ebbc8e2004b69d0aa7c063380d524a057ec682d82527&mpshare=1&scene=1&srcid=0718WC0s1syBxYjWBUIuePp1#rd]) 408 | 409 | [手把手教你用1行代码实现人脸识别](https://mp.weixin.qq.com/s?__biz=MzA3NDY0MjAyOA==&mid=2247491476&idx=4&sn=064a8959bb5284d828edcce26e5e0797&source=41#wechat_redirect) 410 | 411 | 412 | [手机端运行卷积神经网络实践：基于TensorFlow和OpenCV实现文档检测功能](https://mp.weixin.qq.com/s?__biz=MzA3MzI4MjgzMw==&mid=2650727250&idx=2&sn=2cedf785cd4c67837c599c5251c8e4ea&chksm=871b272cb06cae3a9d6718392aa6dbd7c6854f730d564b377e64c66fb208da3a681ec75339cd&mpshare=1&scene=1&srcid=06045lSWi3zw78o72JBjYBbE#rd]) 413 | 414 | 415 | 416 | [2017CV技术报告：从3D物体重建到人体姿态估计](https://mp.weixin.qq.com/s?__biz=MzA3MzI4MjgzMw==&mid=2650734471&idx=2&sn=5e1e47847ceb48cb7b5482bef9b96c3a&chksm=871b3bf9b06cb2ef1fa8e17885334d56648103615c39bae92c3bb711ebbaf9facc8c1f8a09a2&mpshare=1&scene=1&srcid=1219fpAfjA9nOFbOf8TIQZ7Q#rd]) 417 | 418 | 419 | 27. 分布式 420 | 421 | 422 | [分布式TensorFlow入坑指南：从实例到代码带你玩转多机器深度学习](https://mp.weixin.qq.com/s?__biz=MzA3MzI4MjgzMw==&mid=2650734471&idx=1&sn=be4cd4b85ed84f997baf4c88543dc3f4&chksm=871b3bf9b06cb2ef94ea9531ec74fef14b8db5d1996b0cf0c9bd31ca3594ef1f54feaea17109&mpshare=1&scene=1&srcid=1219LLaCfSlLMQtKpDfPivRB#rd]) 423 | 424 | https://github.com/huyingxi/wechaty_selfie 图片评分 425 | 426 | 427 | 428 | -------------------------------------------------------------------------------- /__pycache__/input_data.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koryako/FundamentalsOfAI_book_code/ef5bd1ead892dc07315a0eec56f826263709e676/__pycache__/input_data.cpython-36.pyc -------------------------------------------------------------------------------- /a2c-tf2.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import logging 3 | import numpy as np 4 | import tensorflow as tf 5 | import matplotlib.pyplot as plt 6 | import tensorflow.keras.layers as kl 7 | import tensorflow.keras.losses as kls 8 | import tensorflow.keras.optimizers as ko 9 | 10 | 11 | class ProbabilityDistribution(tf.keras.Model): 12 | def call(self, logits): 13 | # sample a random categorical action from given logits 14 | return tf.squeeze(tf.random.categorical(logits, 1), axis=-1) 15 | 16 | 17 | class Model(tf.keras.Model): 18 | def __init__(self, num_actions): 19 | super().__init__('mlp_policy') 20 | # no tf.get_variable(), just simple Keras API 21 | self.hidden1 = kl.Dense(128, activation='relu') 22 | self.hidden2 = kl.Dense(128, activation='relu') 23 | self.value = kl.Dense(1, name='value') 24 | # logits are unnormalized log probabilities 25 | self.logits = kl.Dense(num_actions, name='policy_logits') 26 | self.dist = ProbabilityDistribution() 27 | 28 | def call(self, inputs): 29 | # inputs is a numpy array, convert to Tensor 30 | x = tf.convert_to_tensor(inputs) 31 | # separate hidden layers from the same input tensor 32 | hidden_logs = self.hidden1(x) 33 | hidden_vals = self.hidden2(x) 34 | return self.logits(hidden_logs), self.value(hidden_vals) 35 | 36 | def action_value(self, obs): 37 | # executes call() under the hood 38 | logits, value = self.predict(obs) 39 | action = self.dist.predict(logits) 40 | # a simpler option, will become clear later why we don't use it 41 | # action = tf.random.categorical(logits, 1) 42 | return np.squeeze(action, axis=-1), np.squeeze(value, axis=-1) 43 | 44 | 45 | class A2CAgent: 46 | def __init__(self, model): 47 | # hyperparameters for loss terms, gamma is the discount coefficient 48 | self.params = { 49 | 'gamma': 0.99, 50 | 'value': 0.5, 51 | 'entropy': 0.0001 52 | } 53 | self.model = model 54 | self.model.compile( 55 | optimizer=ko.RMSprop(lr=0.0007), 56 | # define separate losses for policy logits and value estimate 57 | loss=[self._logits_loss, self._value_loss] 58 | ) 59 | 60 | def train(self, env, batch_sz=32, updates=1000): 61 | # storage helpers for a single batch of data 62 | actions = np.empty((batch_sz,), dtype=np.int32) 63 | rewards, dones, values = np.empty((3, batch_sz)) 64 | observations = np.empty((batch_sz,) + env.observation_space.shape) 65 | # training loop: collect samples, send to optimizer, repeat updates times 66 | ep_rews = [0.0] 67 | next_obs = env.reset() 68 | for update in range(updates): 69 | for step in range(batch_sz): 70 | observations[step] = next_obs.copy() 71 | actions[step], values[step] = self.model.action_value(next_obs[None, :]) 72 | next_obs, rewards[step], dones[step], _ = env.step(actions[step]) 73 | 74 | ep_rews[-1] += rewards[step] 75 | if dones[step]: 76 | ep_rews.append(0.0) 77 | next_obs = env.reset() 78 | logging.info("Episode: %03d, Reward: %03d" % (len(ep_rews)-1, ep_rews[-2])) 79 | 80 | _, next_value = self.model.action_value(next_obs[None, :]) 81 | returns, advs = self._returns_advantages(rewards, dones, values, next_value) 82 | # a trick to input actions and advantages through same API 83 | acts_and_advs = np.concatenate([actions[:, None], advs[:, None]], axis=-1) 84 | # performs a full training step on the collected batch 85 | # note: no need to mess around with gradients, Keras API handles it 86 | losses = self.model.train_on_batch(observations, [acts_and_advs, returns]) 87 | logging.debug("[%d/%d] Losses: %s" % (update+1, updates, losses)) 88 | return ep_rews 89 | 90 | def test(self, env, render=False): 91 | obs, done, ep_reward = env.reset(), False, 0 92 | while not done: 93 | action, _ = self.model.action_value(obs[None, :]) 94 | obs, reward, done, _ = env.step(action) 95 | ep_reward += reward 96 | if render: 97 | env.render() 98 | return ep_reward 99 | 100 | def _returns_advantages(self, rewards, dones, values, next_value): 101 | # next_value is the bootstrap value estimate of a future state (the critic) 102 | returns = np.append(np.zeros_like(rewards), next_value, axis=-1) 103 | # returns are calculated as discounted sum of future rewards 104 | for t in reversed(range(rewards.shape[0])): 105 | returns[t] = rewards[t] + self.params['gamma'] * returns[t+1] * (1-dones[t]) 106 | returns = returns[:-1] 107 | # advantages are returns - baseline, value estimates in our case 108 | advantages = returns - values 109 | return returns, advantages 110 | 111 | def _value_loss(self, returns, value): 112 | # value loss is typically MSE between value estimates and returns 113 | return self.params['value']*kls.mean_squared_error(returns, value) 114 | 115 | def _logits_loss(self, acts_and_advs, logits): 116 | # a trick to input actions and advantages through same API 117 | actions, advantages = tf.split(acts_and_advs, 2, axis=-1) 118 | # sparse categorical CE loss obj that supports sample_weight arg on call() 119 | # from_logits argument ensures transformation into normalized probabilities 120 | weighted_sparse_ce = kls.SparseCategoricalCrossentropy(from_logits=True) 121 | # policy loss is defined by policy gradients, weighted by advantages 122 | # note: we only calculate the loss on the actions we've actually taken 123 | actions = tf.cast(actions, tf.int32) 124 | policy_loss = weighted_sparse_ce(actions, logits, sample_weight=advantages) 125 | # entropy loss can be calculated via CE over itself 126 | entropy_loss = kls.categorical_crossentropy(logits, logits, from_logits=True) 127 | # here signs are flipped because optimizer minimizes 128 | return policy_loss - self.params['entropy']*entropy_loss 129 | 130 | 131 | if __name__ == '__main__': 132 | logging.getLogger().setLevel(logging.INFO) 133 | 134 | env = gym.make('CartPole-v0') 135 | model = Model(num_actions=env.action_space.n) 136 | agent = A2CAgent(model) 137 | 138 | rewards_history = agent.train(env) 139 | print("Finished training.") 140 | print("Total Episode Reward: %d out of 200" % agent.test(env, True)) 141 | 142 | plt.style.use('seaborn') 143 | plt.plot(np.arange(0, len(rewards_history), 25), rewards_history[::25]) 144 | plt.xlabel('Episode') 145 | plt.ylabel('Total Reward') 146 | plt.show() -------------------------------------------------------------------------------- /a2c.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | import gym 4 | import pandas as pd 5 | 6 | OUTPUT_GRAPH = False 7 | MAX_EPISODE = 500 8 | DISPLAY_REWARD_THRESHOLD = 200 # renders environment if total episode reward is greater then this threshold 9 | MAX_EP_STEPS = 2000 # maximum time step in one episode 10 | RENDER = False # rendering wastes time 11 | GAMMA = 0.9 # reward discount in TD error 12 | LR_A = 0.001 # learning rate for actor 13 | LR_C = 0.01 # learning rate for critic 14 | 15 | 16 | 17 | 18 | class Actor(object): 19 | def __init__(self, sess, n_features, n_actions, lr=0.001): 20 | self.sess = sess 21 | 22 | self.s = tf.placeholder(tf.float32, [1, n_features], "state") 23 | self.a = tf.placeholder(tf.int32, None, "action") 24 | self.td_error = tf.placeholder(tf.float32, None, "td_error") # TD_error 25 | 26 | with tf.variable_scope('Actor'): 27 | l1 = tf.layers.dense( 28 | inputs=self.s, 29 | units=20, # number of hidden units 30 | activation=tf.nn.relu, 31 | kernel_initializer=tf.random_normal_initializer(0., .1), # weights 32 | bias_initializer=tf.constant_initializer(0.1), # biases 33 | name='l1' 34 | ) 35 | 36 | self.acts_prob = tf.layers.dense( 37 | inputs=l1, 38 | units=n_actions, # output units 39 | activation=tf.nn.softmax, # get action probabilities 40 | kernel_initializer=tf.random_normal_initializer(0., .1), # weights 41 | bias_initializer=tf.constant_initializer(0.1), # biases 42 | name='acts_prob' 43 | ) 44 | 45 | with tf.variable_scope('exp_v'): 46 | log_prob = tf.log(self.acts_prob[0, self.a]) 47 | self.exp_v = tf.reduce_mean(log_prob * self.td_error) # advantage (TD_error) guided loss 48 | 49 | with tf.variable_scope('train'): 50 | self.train_op = tf.train.AdamOptimizer(lr).minimize(-self.exp_v) # minimize(-exp_v) = maximize(exp_v) 51 | 52 | def learn(self, s, a, td): 53 | s = s[np.newaxis, :] 54 | feed_dict = {self.s: s, self.a: a, self.td_error: td} 55 | _, exp_v = self.sess.run([self.train_op, self.exp_v], feed_dict) 56 | return exp_v 57 | 58 | def choose_action(self, s): 59 | s = s[np.newaxis, :] 60 | probs = self.sess.run(self.acts_prob, {self.s: s}) # get probabilities for all actions 61 | return np.random.choice(np.arange(probs.shape[1]), p=probs.ravel()) # return a int 62 | 63 | 64 | class Critic(object): 65 | def __init__(self, sess, n_features, lr=0.01): 66 | self.sess = sess 67 | 68 | self.s = tf.placeholder(tf.float32, [1, n_features], "state") 69 | self.v_ = tf.placeholder(tf.float32, [1, 1], "v_next") 70 | self.r = tf.placeholder(tf.float32, None, 'r') 71 | 72 | with tf.variable_scope('Critic'): 73 | l1 = tf.layers.dense( 74 | inputs=self.s, 75 | units=20, # number of hidden units 76 | activation=tf.nn.relu, # None 77 | kernel_initializer=tf.random_normal_initializer(0., .1), # weights 78 | bias_initializer=tf.constant_initializer(0.1), # biases 79 | name='l1' 80 | ) 81 | 82 | self.v = tf.layers.dense( 83 | inputs=l1, 84 | units=1, # output units 85 | activation=None, 86 | kernel_initializer=tf.random_normal_initializer(0., .1), # weights 87 | bias_initializer=tf.constant_initializer(0.1), # biases 88 | name='V' 89 | ) 90 | 91 | with tf.variable_scope('squared_TD_error'): 92 | self.td_error = self.r + GAMMA * self.v_ - self.v 93 | self.loss = tf.square(self.td_error) # TD_error = (r+gamma*V_next) - V_eval 94 | with tf.variable_scope('train'): 95 | self.train_op = tf.train.AdamOptimizer(lr).minimize(self.loss) 96 | 97 | def learn(self, s, r, s_): 98 | s, s_ = s[np.newaxis, :], s_[np.newaxis, :] 99 | 100 | v_ = self.sess.run(self.v, {self.s: s_}) 101 | td_error, _ = self.sess.run([self.td_error, self.train_op], 102 | {self.s: s, self.v_: v_, self.r: r}) 103 | return td_error 104 | 105 | # action有两个，即向左或向右移动小车 106 | # state是四维 107 | 108 | env = gym.make('CartPole-v0') 109 | env.seed(1) # reproducible 110 | env = env.unwrapped 111 | 112 | N_F = env.observation_space.shape[0] 113 | N_A = env.action_space.n 114 | 115 | sess = tf.Session() 116 | 117 | actor = Actor(sess, n_features=N_F, n_actions=N_A, lr=LR_A) 118 | critic = Critic(sess, n_features=N_F, lr=LR_C) 119 | 120 | sess.run(tf.global_variables_initializer()) 121 | 122 | res = [] 123 | for i_episode in range(MAX_EPISODE): 124 | s = env.reset() 125 | t = 0 126 | track_r = [] 127 | while True: 128 | if RENDER: env.render() 129 | 130 | a = actor.choose_action(s) 131 | 132 | s_, r, done, info = env.step(a) 133 | 134 | if done: r = -20 135 | 136 | track_r.append(r) 137 | 138 | td_error = critic.learn(s, r, s_) # gradient = grad[r + gamma * V(s_) - V(s)] 139 | actor.learn(s, a, td_error) # true_gradient = grad[logPi(s,a) * td_error] 140 | 141 | s = s_ 142 | t += 1 143 | 144 | if done or t >= MAX_EP_STEPS: 145 | ep_rs_sum = sum(track_r) 146 | 147 | if 'running_reward' not in globals(): 148 | running_reward = ep_rs_sum 149 | else: 150 | running_reward = running_reward * 0.95 + ep_rs_sum * 0.05 151 | if running_reward > DISPLAY_REWARD_THRESHOLD: RENDER = True # rendering 152 | print("episode:", i_episode, " reward:", int(running_reward)) 153 | res.append([i_episode, running_reward]) 154 | break 155 | 156 | pd.DataFrame(res,columns=['episode','a2c_reward']).to_csv('../a2c_reward.csv') -------------------------------------------------------------------------------- /ac.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | import gym 4 | import pandas as pd 5 | 6 | OUTPUT_GRAPH = False 7 | MAX_EPISODE = 500 8 | DISPLAY_REWARD_THRESHOLD = 200 # renders environment if total episode reward is greater then this threshold 9 | MAX_EP_STEPS = 2000 # maximum time step in one episode 10 | RENDER = False # rendering wastes time 11 | GAMMA = 0.9 # reward discount in TD error 12 | LR_A = 0.001 # learning rate for actor 13 | LR_C = 0.001 # learning rate for critic 14 | 15 | 16 | class Actor(object): 17 | def __init__(self, sess, n_features, n_actions, lr=0.001): 18 | self.sess = sess 19 | 20 | self.s = tf.placeholder(tf.float32, [1, n_features], "state") 21 | self.a = tf.placeholder(tf.int32, None, "action") 22 | self.q = tf.placeholder(tf.float32, None, "q") # TD_error 23 | 24 | with tf.variable_scope('Actor'): 25 | l1 = tf.layers.dense( 26 | inputs=self.s, 27 | units=20, # number of hidden units 28 | activation=tf.nn.relu, 29 | kernel_initializer=tf.random_normal_initializer(0., .1), # weights 30 | bias_initializer=tf.constant_initializer(0.1), # biases 31 | name='l1' 32 | ) 33 | 34 | self.acts_prob = tf.layers.dense( 35 | inputs=l1, 36 | units=n_actions, # output units 37 | activation=tf.nn.softmax, # get action probabilities 38 | kernel_initializer=tf.random_normal_initializer(0., .1), # weights 39 | bias_initializer=tf.constant_initializer(0.1), # biases 40 | name='acts_prob' 41 | ) 42 | 43 | with tf.variable_scope('exp_v'): 44 | log_prob = tf.log(self.acts_prob[0, self.a]) 45 | self.exp_v = tf.reduce_mean(log_prob * self.q) # advantage (TD_error) guided loss 46 | 47 | with tf.variable_scope('train'): 48 | self.train_op = tf.train.AdamOptimizer(lr).minimize(-self.exp_v) # minimize(-exp_v) = maximize(exp_v)#PG的思想是采用上面的函数一步步做梯度上升（等价于负方向的梯度下降）使智能体在行动中获取更高的回报奖励。 49 | 50 | def learn(self, s, a, q): 51 | s = s[np.newaxis, :] 52 | feed_dict = {self.s: s, self.a: a, self.q: q} 53 | _, exp_v = self.sess.run([self.train_op, self.exp_v], feed_dict) 54 | return exp_v 55 | 56 | def choose_action(self, s): 57 | s = s[np.newaxis, :] 58 | probs = self.sess.run(self.acts_prob, {self.s: s}) # get probabilities for all actions 59 | return np.random.choice(np.arange(probs.shape[1]), p=probs.ravel()) # return a int 60 | 61 | 62 | class Critic(object): 63 | def __init__(self, sess, n_features,n_actions, lr=0.01): 64 | self.sess = sess 65 | 66 | self.s = tf.placeholder(tf.float32, [None, n_features], "state") 67 | self.a = tf.placeholder(tf.int32,[None, 1],"action") 68 | self.r = tf.placeholder(tf.float32, None, 'r') 69 | self.q_ = tf.placeholder(tf.float32,[None,1],'q_next') 70 | 71 | self.a_onehot = tf.one_hot(self.a, n_actions, dtype=tf.float32) 72 | self.a_onehot = tf.squeeze(self.a_onehot,axis=1) 73 | 74 | self.input = tf.concat([self.s,self.a_onehot],axis=1) 75 | 76 | with tf.variable_scope('Critic'): 77 | l1 = tf.layers.dense( 78 | inputs=self.input, 79 | units=20, # number of hidden units 80 | activation=tf.nn.relu, # None 81 | kernel_initializer=tf.random_normal_initializer(0., .1), # weights 82 | bias_initializer=tf.constant_initializer(0.1), # biases 83 | name='l1' 84 | ) 85 | 86 | self.q = tf.layers.dense( 87 | inputs=l1, 88 | units=1, # output units 89 | activation=None, 90 | kernel_initializer=tf.random_normal_initializer(0., .1), # weights 91 | bias_initializer=tf.constant_initializer(0.1), # biases 92 | name='Q' 93 | ) 94 | 95 | with tf.variable_scope('squared_TD_error'): 96 | self.td_error = self.r + GAMMA * self.q_ - self.q 97 | self.loss = tf.square(self.td_error) # TD_error = (r+gamma*V_next) - V_eval 98 | with tf.variable_scope('train'): 99 | self.train_op = tf.train.AdamOptimizer(lr).minimize(self.loss) 100 | 101 | def learn(self, s, a, r, s_): 102 | 103 | s, s_ = s[np.newaxis, :], s_[np.newaxis, :] 104 | next_a = [[i] for i in range(N_A)] 105 | s_ = np.tile(s_,[N_A,1]) 106 | q_ = self.sess.run(self.q, {self.s: s_,self.a:next_a}) 107 | q_ = np.max(q_,axis=0,keepdims=True) 108 | q, _ = self.sess.run([self.q, self.train_op], 109 | {self.s: s, self.q_: q_, self.r: r,self.a:[[a]]}) 110 | return q 111 | 112 | 113 | # action有两个，即向左或向右移动小车 114 | # state是四维 115 | 116 | env = gym.make('CartPole-v0') 117 | env.seed(1) # reproducible 118 | env = env.unwrapped 119 | 120 | N_F = env.observation_space.shape[0] 121 | N_A = env.action_space.n 122 | 123 | sess = tf.Session() 124 | 125 | actor = Actor(sess, n_features=N_F, n_actions=N_A, lr=LR_A) 126 | critic = Critic(sess, n_features=N_F,n_actions=N_A,lr=LR_C) 127 | 128 | sess.run(tf.global_variables_initializer()) 129 | 130 | res = [] 131 | for i_episode in range(MAX_EPISODE): 132 | s = env.reset() 133 | t = 0 134 | track_r = [] 135 | while True: 136 | if RENDER: env.render() 137 | 138 | a = actor.choose_action(s) 139 | 140 | s_, r, done, info = env.step(a) 141 | 142 | if done: r = -20 143 | 144 | track_r.append(r) 145 | 146 | q = critic.learn(s, a,r, s_) # gradient = grad[r + gamma * V(s_) - V(s)] 147 | actor.learn(s, a, q) # true_gradient = grad[logPi(s,a) * td_error] 148 | 149 | s = s_ 150 | t += 1 151 | 152 | if done or t >= MAX_EP_STEPS: 153 | ep_rs_sum = sum(track_r) 154 | 155 | if 'running_reward' not in globals(): 156 | running_reward = ep_rs_sum 157 | else: 158 | running_reward = running_reward * 0.95 + ep_rs_sum * 0.05 159 | if running_reward > DISPLAY_REWARD_THRESHOLD: RENDER = True # rendering 160 | print("episode:", i_episode, " reward:", int(running_reward)) 161 | res.append([i_episode,running_reward]) 162 | 163 | break 164 | 165 | pd.DataFrame(res,columns=['episode','ac_reward']).to_csv('../ac_reward.csv') 166 | -------------------------------------------------------------------------------- /adaboost.py: -------------------------------------------------------------------------------- 1 | # coding: UTF-8 2 | #https://blog.csdn.net/Dark_Scope/article/details/14103983 3 | 4 | #1】《Pattern Recognition And Machine Learning》 5 | #【2】《统计学习方法》 6 | from __future__ import division 7 | import numpy as np 8 | import scipy as sp 9 | from weakclassify import WEAKC 10 | from dml.tool import sign 11 | class ADABC: 12 | def __init__(self,X,y,Weaker=WEAKC): 13 | ''' 14 | Weaker is a class of weak classifier 15 | It should have a train(self.W) method pass the weight parameter to train 16 | pred(test_set) method which return y formed by 1 or -1 17 | see detail in <统计学习方法> 18 | ''' 19 | self.X=np.array(X) 20 | self.y=np.array(y) 21 | self.Weaker=Weaker 22 | self.sums=np.zeros(self.y.shape) 23 | self.W=np.ones((self.X.shape[1],1)).flatten(1)/self.X.shape[1] 24 | self.Q=0 25 | #print self.W 26 | def train(self,M=4): 27 | ''' 28 | M is the maximal Weaker classification 29 | ''' 30 | self.G={} 31 | self.alpha={} 32 | for i in range(M): 33 | self.G.setdefault(i) 34 | self.alpha.setdefault(i) 35 | for i in range(M): 36 | self.G[i]=self.Weaker(self.X,self.y) 37 | e=self.G[i].train(self.W) 38 | #print self.G[i].t_val,self.G[i].t_b,e 39 | self.alpha[i]=1/2*np.log((1-e)/e) 40 | #print self.alpha[i] 41 | sg=self.G[i].pred(self.X) 42 | Z=self.W*np.exp(-self.alpha[i]*self.y*sg.transpose()) 43 | self.W=(Z/Z.sum()).flatten(1) 44 | self.Q=i 45 | #print self.finalclassifer(i),'===========' 46 | if self.finalclassifer(i)==0: 47 | 48 | print i+1," weak classifier is enough to make the error to 0" 49 | break 50 | def finalclassifer(self,t): 51 | ''' 52 | the 1 to t weak classifer come together 53 | ''' 54 | self.sums=self.sums+self.G[t].pred(self.X).flatten(1)*self.alpha[t] 55 | #print self.sums 56 | pre_y=sign(self.sums) 57 | #sums=np.zeros(self.y.shape) 58 | #for i in range(t+1): 59 | # sums=sums+self.G[i].pred(self.X).flatten(1)*self.alpha[i] 60 | # print sums 61 | #pre_y=sign(sums) 62 | t=(pre_y!=self.y).sum() 63 | return t 64 | def pred(self,test_set): 65 | sums=np.zeros(self.y.shape) 66 | for i in range(self.Q+1): 67 | sums=sums+self.G[i].pred(self.X).flatten(1)*self.alpha[i] 68 | #print sums 69 | pre_y=sign(sums) 70 | return pre_y 71 | 72 | -------------------------------------------------------------------------------- /alldata/iris.csv: -------------------------------------------------------------------------------- 1 | 5.1,3.5,1.4,0.2,setosa 2 | 4.9,3.0,1.4,0.2,setosa 3 | 4.7,3.2,1.3,0.2,setosa 4 | 4.6,3.1,1.5,0.2,setosa 5 | 5.0,3.6,1.4,0.2,setosa 6 | 5.4,3.9,1.7,0.4,setosa 7 | 4.6,3.4,1.4,0.3,setosa 8 | 5.0,3.4,1.5,0.2,setosa 9 | 4.4,2.9,1.4,0.2,setosa 10 | 4.9,3.1,1.5,0.1,setosa 11 | 5.4,3.7,1.5,0.2,setosa 12 | 4.8,3.4,1.6,0.2,setosa 13 | 4.8,3.0,1.4,0.1,setosa 14 | 4.3,3.0,1.1,0.1,setosa 15 | 5.8,4.0,1.2,0.2,setosa 16 | 5.7,4.4,1.5,0.4,setosa 17 | 5.4,3.9,1.3,0.4,setosa 18 | 5.1,3.5,1.4,0.3,setosa 19 | 5.7,3.8,1.7,0.3,setosa 20 | 5.1,3.8,1.5,0.3,setosa 21 | 5.4,3.4,1.7,0.2,setosa 22 | 5.1,3.7,1.5,0.4,setosa 23 | 4.6,3.6,1.0,0.2,setosa 24 | 5.1,3.3,1.7,0.5,setosa 25 | 4.8,3.4,1.9,0.2,setosa 26 | 5.0,3.0,1.6,0.2,setosa 27 | 5.0,3.4,1.6,0.4,setosa 28 | 5.2,3.5,1.5,0.2,setosa 29 | 5.2,3.4,1.4,0.2,setosa 30 | 4.7,3.2,1.6,0.2,setosa 31 | 4.8,3.1,1.6,0.2,setosa 32 | 5.4,3.4,1.5,0.4,setosa 33 | 5.2,4.1,1.5,0.1,setosa 34 | 5.5,4.2,1.4,0.2,setosa 35 | 4.9,3.1,1.5,0.1,setosa 36 | 5.0,3.2,1.2,0.2,setosa 37 | 5.5,3.5,1.3,0.2,setosa 38 | 4.9,3.1,1.5,0.1,setosa 39 | 4.4,3.0,1.3,0.2,setosa 40 | 5.1,3.4,1.5,0.2,setosa 41 | 5.0,3.5,1.3,0.3,setosa 42 | 4.5,2.3,1.3,0.3,setosa 43 | 4.4,3.2,1.3,0.2,setosa 44 | 5.0,3.5,1.6,0.6,setosa 45 | 5.1,3.8,1.9,0.4,setosa 46 | 4.8,3.0,1.4,0.3,setosa 47 | 5.1,3.8,1.6,0.2,setosa 48 | 4.6,3.2,1.4,0.2,setosa 49 | 5.3,3.7,1.5,0.2,setosa 50 | 5.0,3.3,1.4,0.2,setosa 51 | 7.0,3.2,4.7,1.4,versicolor 52 | 6.4,3.2,4.5,1.5,versicolor 53 | 6.9,3.1,4.9,1.5,versicolor 54 | 5.5,2.3,4.0,1.3,versicolor 55 | 6.5,2.8,4.6,1.5,versicolor 56 | 5.7,2.8,4.5,1.3,versicolor 57 | 6.3,3.3,4.7,1.6,versicolor 58 | 4.9,2.4,3.3,1.0,versicolor 59 | 6.6,2.9,4.6,1.3,versicolor 60 | 5.2,2.7,3.9,1.4,versicolor 61 | 5.0,2.0,3.5,1.0,versicolor 62 | 5.9,3.0,4.2,1.5,versicolor 63 | 6.0,2.2,4.0,1.0,versicolor 64 | 6.1,2.9,4.7,1.4,versicolor 65 | 5.6,2.9,3.6,1.3,versicolor 66 | 6.7,3.1,4.4,1.4,versicolor 67 | 5.6,3.0,4.5,1.5,versicolor 68 | 5.8,2.7,4.1,1.0,versicolor 69 | 6.2,2.2,4.5,1.5,versicolor 70 | 5.6,2.5,3.9,1.1,versicolor 71 | 5.9,3.2,4.8,1.8,versicolor 72 | 6.1,2.8,4.0,1.3,versicolor 73 | 6.3,2.5,4.9,1.5,versicolor 74 | 6.1,2.8,4.7,1.2,versicolor 75 | 6.4,2.9,4.3,1.3,versicolor 76 | 6.6,3.0,4.4,1.4,versicolor 77 | 6.8,2.8,4.8,1.4,versicolor 78 | 6.7,3.0,5.0,1.7,versicolor 79 | 6.0,2.9,4.5,1.5,versicolor 80 | 5.7,2.6,3.5,1.0,versicolor 81 | 5.5,2.4,3.8,1.1,versicolor 82 | 5.5,2.4,3.7,1.0,versicolor 83 | 5.8,2.7,3.9,1.2,versicolor 84 | 6.0,2.7,5.1,1.6,versicolor 85 | 5.4,3.0,4.5,1.5,versicolor 86 | 6.0,3.4,4.5,1.6,versicolor 87 | 6.7,3.1,4.7,1.5,versicolor 88 | 6.3,2.3,4.4,1.3,versicolor 89 | 5.6,3.0,4.1,1.3,versicolor 90 | 5.5,2.5,4.0,1.3,versicolor 91 | 5.5,2.6,4.4,1.2,versicolor 92 | 6.1,3.0,4.6,1.4,versicolor 93 | 5.8,2.6,4.0,1.2,versicolor 94 | 5.0,2.3,3.3,1.0,versicolor 95 | 5.6,2.7,4.2,1.3,versicolor 96 | 5.7,3.0,4.2,1.2,versicolor 97 | 5.7,2.9,4.2,1.3,versicolor 98 | 6.2,2.9,4.3,1.3,versicolor 99 | 5.1,2.5,3.0,1.1,versicolor 100 | 5.7,2.8,4.1,1.3,versicolor 101 | 6.3,3.3,6.0,2.5,virginica 102 | 5.8,2.7,5.1,1.9,virginica 103 | 7.1,3.0,5.9,2.1,virginica 104 | 6.3,2.9,5.6,1.8,virginica 105 | 6.5,3.0,5.8,2.2,virginica 106 | 7.6,3.0,6.6,2.1,virginica 107 | 4.9,2.5,4.5,1.7,virginica 108 | 7.3,2.9,6.3,1.8,virginica 109 | 6.7,2.5,5.8,1.8,virginica 110 | 7.2,3.6,6.1,2.5,virginica 111 | 6.5,3.2,5.1,2.0,virginica 112 | 6.4,2.7,5.3,1.9,virginica 113 | 6.8,3.0,5.5,2.1,virginica 114 | 5.7,2.5,5.0,2.0,virginica 115 | 5.8,2.8,5.1,2.4,virginica 116 | 6.4,3.2,5.3,2.3,virginica 117 | 6.5,3.0,5.5,1.8,virginica 118 | 7.7,3.8,6.7,2.2,virginica 119 | 7.7,2.6,6.9,2.3,virginica 120 | 6.0,2.2,5.0,1.5,virginica 121 | 6.9,3.2,5.7,2.3,virginica 122 | 5.6,2.8,4.9,2.0,virginica 123 | 7.7,2.8,6.7,2.0,virginica 124 | 6.3,2.7,4.9,1.8,virginica 125 | 6.7,3.3,5.7,2.1,virginica 126 | 7.2,3.2,6.0,1.8,virginica 127 | 6.2,2.8,4.8,1.8,virginica 128 | 6.1,3.0,4.9,1.8,virginica 129 | 6.4,2.8,5.6,2.1,virginica 130 | 7.2,3.0,5.8,1.6,virginica 131 | 7.4,2.8,6.1,1.9,virginica 132 | 7.9,3.8,6.4,2.0,virginica 133 | 6.4,2.8,5.6,2.2,virginica 134 | 6.3,2.8,5.1,1.5,virginica 135 | 6.1,2.6,5.6,1.4,virginica 136 | 7.7,3.0,6.1,2.3,virginica 137 | 6.3,3.4,5.6,2.4,virginica 138 | 6.4,3.1,5.5,1.8,virginica 139 | 6.0,3.0,4.8,1.8,virginica 140 | 6.9,3.1,5.4,2.1,virginica 141 | 6.7,3.1,5.6,2.4,virginica 142 | 6.9,3.1,5.1,2.3,virginica 143 | 5.8,2.7,5.1,1.9,virginica 144 | 6.8,3.2,5.9,2.3,virginica 145 | 6.7,3.3,5.7,2.5,virginica 146 | 6.7,3.0,5.2,2.3,virginica 147 | 6.3,2.5,5.0,1.9,virginica 148 | 6.5,3.0,5.2,2.0,virginica 149 | 6.2,3.4,5.4,2.3,virginica 150 | 5.9,3.0,5.1,1.8,virginica 151 | -------------------------------------------------------------------------------- /alldata/iris_test.csv: -------------------------------------------------------------------------------- 1 | 30,4,setosa,versicolor,virginica 2 | 5.9,3.0,4.2,1.5,1 3 | 6.9,3.1,5.4,2.1,2 4 | 5.1,3.3,1.7,0.5,0 5 | 6.0,3.4,4.5,1.6,1 6 | 5.5,2.5,4.0,1.3,1 7 | 6.2,2.9,4.3,1.3,1 8 | 5.5,4.2,1.4,0.2,0 9 | 6.3,2.8,5.1,1.5,2 10 | 5.6,3.0,4.1,1.3,1 11 | 6.7,2.5,5.8,1.8,2 12 | 7.1,3.0,5.9,2.1,2 13 | 4.3,3.0,1.1,0.1,0 14 | 5.6,2.8,4.9,2.0,2 15 | 5.5,2.3,4.0,1.3,1 16 | 6.0,2.2,4.0,1.0,1 17 | 5.1,3.5,1.4,0.2,0 18 | 5.7,2.6,3.5,1.0,1 19 | 4.8,3.4,1.9,0.2,0 20 | 5.1,3.4,1.5,0.2,0 21 | 5.7,2.5,5.0,2.0,2 22 | 5.4,3.4,1.7,0.2,0 23 | 5.6,3.0,4.5,1.5,1 24 | 6.3,2.9,5.6,1.8,2 25 | 6.3,2.5,4.9,1.5,1 26 | 5.8,2.7,3.9,1.2,1 27 | 6.1,3.0,4.6,1.4,1 28 | 5.2,4.1,1.5,0.1,0 29 | 6.7,3.1,4.7,1.5,1 30 | 6.7,3.3,5.7,2.5,2 31 | 6.4,2.9,4.3,1.3,1 32 | -------------------------------------------------------------------------------- /alldata/iris_training.csv: -------------------------------------------------------------------------------- 1 | 120,4,setosa,versicolor,virginica 2 | 6.4,2.8,5.6,2.2,2 3 | 5.0,2.3,3.3,1.0,1 4 | 4.9,2.5,4.5,1.7,2 5 | 4.9,3.1,1.5,0.1,0 6 | 5.7,3.8,1.7,0.3,0 7 | 4.4,3.2,1.3,0.2,0 8 | 5.4,3.4,1.5,0.4,0 9 | 6.9,3.1,5.1,2.3,2 10 | 6.7,3.1,4.4,1.4,1 11 | 5.1,3.7,1.5,0.4,0 12 | 5.2,2.7,3.9,1.4,1 13 | 6.9,3.1,4.9,1.5,1 14 | 5.8,4.0,1.2,0.2,0 15 | 5.4,3.9,1.7,0.4,0 16 | 7.7,3.8,6.7,2.2,2 17 | 6.3,3.3,4.7,1.6,1 18 | 6.8,3.2,5.9,2.3,2 19 | 7.6,3.0,6.6,2.1,2 20 | 6.4,3.2,5.3,2.3,2 21 | 5.7,4.4,1.5,0.4,0 22 | 6.7,3.3,5.7,2.1,2 23 | 6.4,2.8,5.6,2.1,2 24 | 5.4,3.9,1.3,0.4,0 25 | 6.1,2.6,5.6,1.4,2 26 | 7.2,3.0,5.8,1.6,2 27 | 5.2,3.5,1.5,0.2,0 28 | 5.8,2.6,4.0,1.2,1 29 | 5.9,3.0,5.1,1.8,2 30 | 5.4,3.0,4.5,1.5,1 31 | 6.7,3.0,5.0,1.7,1 32 | 6.3,2.3,4.4,1.3,1 33 | 5.1,2.5,3.0,1.1,1 34 | 6.4,3.2,4.5,1.5,1 35 | 6.8,3.0,5.5,2.1,2 36 | 6.2,2.8,4.8,1.8,2 37 | 6.9,3.2,5.7,2.3,2 38 | 6.5,3.2,5.1,2.0,2 39 | 5.8,2.8,5.1,2.4,2 40 | 5.1,3.8,1.5,0.3,0 41 | 4.8,3.0,1.4,0.3,0 42 | 7.9,3.8,6.4,2.0,2 43 | 5.8,2.7,5.1,1.9,2 44 | 6.7,3.0,5.2,2.3,2 45 | 5.1,3.8,1.9,0.4,0 46 | 4.7,3.2,1.6,0.2,0 47 | 6.0,2.2,5.0,1.5,2 48 | 4.8,3.4,1.6,0.2,0 49 | 7.7,2.6,6.9,2.3,2 50 | 4.6,3.6,1.0,0.2,0 51 | 7.2,3.2,6.0,1.8,2 52 | 5.0,3.3,1.4,0.2,0 53 | 6.6,3.0,4.4,1.4,1 54 | 6.1,2.8,4.0,1.3,1 55 | 5.0,3.2,1.2,0.2,0 56 | 7.0,3.2,4.7,1.4,1 57 | 6.0,3.0,4.8,1.8,2 58 | 7.4,2.8,6.1,1.9,2 59 | 5.8,2.7,5.1,1.9,2 60 | 6.2,3.4,5.4,2.3,2 61 | 5.0,2.0,3.5,1.0,1 62 | 5.6,2.5,3.9,1.1,1 63 | 6.7,3.1,5.6,2.4,2 64 | 6.3,2.5,5.0,1.9,2 65 | 6.4,3.1,5.5,1.8,2 66 | 6.2,2.2,4.5,1.5,1 67 | 7.3,2.9,6.3,1.8,2 68 | 4.4,3.0,1.3,0.2,0 69 | 7.2,3.6,6.1,2.5,2 70 | 6.5,3.0,5.5,1.8,2 71 | 5.0,3.4,1.5,0.2,0 72 | 4.7,3.2,1.3,0.2,0 73 | 6.6,2.9,4.6,1.3,1 74 | 5.5,3.5,1.3,0.2,0 75 | 7.7,3.0,6.1,2.3,2 76 | 6.1,3.0,4.9,1.8,2 77 | 4.9,3.1,1.5,0.1,0 78 | 5.5,2.4,3.8,1.1,1 79 | 5.7,2.9,4.2,1.3,1 80 | 6.0,2.9,4.5,1.5,1 81 | 6.4,2.7,5.3,1.9,2 82 | 5.4,3.7,1.5,0.2,0 83 | 6.1,2.9,4.7,1.4,1 84 | 6.5,2.8,4.6,1.5,1 85 | 5.6,2.7,4.2,1.3,1 86 | 6.3,3.4,5.6,2.4,2 87 | 4.9,3.1,1.5,0.1,0 88 | 6.8,2.8,4.8,1.4,1 89 | 5.7,2.8,4.5,1.3,1 90 | 6.0,2.7,5.1,1.6,1 91 | 5.0,3.5,1.3,0.3,0 92 | 6.5,3.0,5.2,2.0,2 93 | 6.1,2.8,4.7,1.2,1 94 | 5.1,3.5,1.4,0.3,0 95 | 4.6,3.1,1.5,0.2,0 96 | 6.5,3.0,5.8,2.2,2 97 | 4.6,3.4,1.4,0.3,0 98 | 4.6,3.2,1.4,0.2,0 99 | 7.7,2.8,6.7,2.0,2 100 | 5.9,3.2,4.8,1.8,1 101 | 5.1,3.8,1.6,0.2,0 102 | 4.9,3.0,1.4,0.2,0 103 | 4.9,2.4,3.3,1.0,1 104 | 4.5,2.3,1.3,0.3,0 105 | 5.8,2.7,4.1,1.0,1 106 | 5.0,3.4,1.6,0.4,0 107 | 5.2,3.4,1.4,0.2,0 108 | 5.3,3.7,1.5,0.2,0 109 | 5.0,3.6,1.4,0.2,0 110 | 5.6,2.9,3.6,1.3,1 111 | 4.8,3.1,1.6,0.2,0 112 | 6.3,2.7,4.9,1.8,2 113 | 5.7,2.8,4.1,1.3,1 114 | 5.0,3.0,1.6,0.2,0 115 | 6.3,3.3,6.0,2.5,2 116 | 5.0,3.5,1.6,0.6,0 117 | 5.5,2.6,4.4,1.2,1 118 | 5.7,3.0,4.2,1.2,1 119 | 4.4,2.9,1.4,0.2,0 120 | 4.8,3.0,1.4,0.1,0 121 | 5.5,2.4,3.7,1.0,1 122 | -------------------------------------------------------------------------------- /asr_lstm_ctc.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | import time 3 | 4 | import tensorflow as tf 5 | import scipy.io.wavfile as wav 6 | import numpy as np 7 | 8 | from six.moves import xrange as range 9 | 10 | try: 11 | from python_speech_features import mfcc 12 | except ImportError: 13 | print("Failed to import python_speech_features.\n Try pip install python_speech_features.") 14 | raise ImportError 15 | 16 | 17 | 18 | # 常量 19 | SPACE_TOKEN = '' 20 | SPACE_INDEX = 0 21 | FIRST_INDEX = ord('a') - 1 # 0 is reserved to space 22 | 23 | # mfcc默认提取出来的一帧13个特征 24 | num_features = 13 25 | # 26个英文字母 + 1个空白 + 1个no label = 28 label个数 26 | num_classes = ord('z') - ord('a') + 1 + 1 + 1 27 | 28 | # 迭代次数 29 | num_epochs = 200 30 | # lstm隐藏单元数 31 | num_hidden = 40 32 | # 2层lstm网络 33 | num_layers = 1 34 | # batch_size设置为1 35 | batch_size = 1 36 | # 初始学习率 37 | initial_learning_rate = 0.01 38 | 39 | # 样本个数 40 | num_examples = 1 41 | # 一个epoch有多少个batch 42 | num_batches_per_epoch = int(num_examples/batch_size) 43 | 44 | 45 | def sparse_tuple_from(sequences, dtype=np.int32): 46 | """得到一个list的稀疏表示，为了直接将数据赋值给tensorflow的tf.sparse_placeholder稀疏矩阵 47 | Args: 48 | sequences: 序列的列表 49 | Returns: 50 | 一个三元组，和tensorflow的tf.sparse_placeholder同结构 51 | """ 52 | indices = [] 53 | values = [] 54 | 55 | for n, seq in enumerate(sequences): 56 | indices.extend(zip([n]*len(seq), range(len(seq)))) 57 | values.extend(seq) 58 | 59 | indices = np.asarray(indices, dtype=np.int64) 60 | values = np.asarray(values, dtype=dtype) 61 | shape = np.asarray([len(sequences), np.asarray(indices).max(0)[1]+1], dtype=np.int64) 62 | 63 | return indices, values, shape 64 | 65 | 66 | def get_audio_feature(): 67 | ''' 68 | 获取wav文件提取mfcc特征之后的数据 69 | ''' 70 | 71 | audio_filename = "audio.wav" 72 | 73 | #读取wav文件内容，fs为采样率， audio为数据 74 | fs, audio = wav.read(audio_filename) 75 | 76 | #提取mfcc特征 77 | inputs = mfcc(audio, samplerate=fs) 78 | # 对特征数据进行归一化，减去均值除以方差 79 | feature_inputs = np.asarray(inputs[np.newaxis, :]) 80 | feature_inputs = (feature_inputs - np.mean(feature_inputs))/np.std(feature_inputs) 81 | 82 | #特征数据的序列长度 83 | feature_seq_len = [feature_inputs.shape[1]] 84 | 85 | return feature_inputs, feature_seq_len 86 | 87 | def get_audio_label(): 88 | ''' 89 | 将label文本转换成整数序列，然后再换成稀疏三元组 90 | ''' 91 | target_filename = 'label.txt' 92 | 93 | with open(target_filename, 'r') as f: 94 | #原始文本为“she had your dark suit in greasy wash water all year” 95 | line = f.readlines()[0].strip() 96 | targets = line.replace(' ', ' ') 97 | # 放入list中，空格用''代替 98 | #['she', '', 'had', '', 'your', '', 'dark', '', 'suit', '', 'in', '', 'greasy', '', 'wash', '', 'water', '', 'all', '', 'year'] 99 | targets = targets.split(' ') 100 | 101 | # 每个字母作为一个label,转换成如下： 102 | #['s' 'h' 'e' '' 'h' 'a' 'd' '' 'y' 'o' 'u' 'r' '' 'd' 103 | # 'a' 'r' 'k' '' 's' 'u' 'i' 't' '' 'i' 'n' '' 'g' 'r' 104 | # 'e' 'a' 's' 'y' '' 'w' 'a' 's' 'h' '' 'w' 'a' 't' 'e' 'r' 105 | #'' 'a' 'l' 'l' '' 'y' 'e' 'a' 'r'] 106 | targets = np.hstack([SPACE_TOKEN if x == '' else list(x) for x in targets]) 107 | 108 | # 将label转换成整数序列表示: 109 | # [19 8 5 0 8 1 4 0 25 15 21 18 0 4 1 18 11 0 19 21 9 20 0 9 14 110 | # 0 7 18 5 1 19 25 0 23 1 19 8 0 23 1 20 5 18 0 1 12 12 0 25 5 111 | # 1 18] 112 | targets = np.asarray([SPACE_INDEX if x == SPACE_TOKEN else ord(x) - FIRST_INDEX 113 | for x in targets]) 114 | 115 | # 将列表转换成稀疏三元组 116 | train_targets = sparse_tuple_from([targets]) 117 | return train_targets 118 | 119 | 120 | 121 | def inference(inputs, seq_len): 122 | ''' 123 | 2层双向LSTM的网络结构定义 124 | 125 | Args： 126 | inputs：输入数据，形状是[batch_size, 序列最大长度，一帧特征的个数13] 127 | 序列最大长度是指，一个样本在转成特征矩阵之后保存在一个矩阵中， 128 | 在n个样本组成的batch中，因为不同的样本的序列长度不一样，在组成的3维数据中， 129 | 第2维的长度要足够容纳下所有的样本的特征序列长度。 130 | seq_len: batch里每个样本的有效的序列长度 131 | ''' 132 | 133 | #定义一个向前计算的LSTM单元，40个隐藏单元 134 | cell_fw = tf.contrib.rnn.LSTMCell(num_hidden, 135 | initializer=tf.random_normal_initializer( 136 | mean=0.0, stddev=0.1), 137 | state_is_tuple=True) 138 | 139 | # 组成一个有2个cell的list 140 | cells_fw = [cell_fw] * num_layers 141 | # 定义一个向后计算的LSTM单元，40个隐藏单元 142 | cell_bw = tf.contrib.rnn.LSTMCell(num_hidden, 143 | initializer=tf.random_normal_initializer( 144 | mean=0.0, stddev=0.1), 145 | state_is_tuple=True) 146 | # 组成一个有2个cell的list 147 | cells_bw = [cell_bw] * num_layers 148 | 149 | # 将前面定义向前计算和向后计算的2个cell的list组成双向lstm网络 150 | # sequence_length为实际有效的长度，大小为batch_size， 151 | # 相当于表示batch中每个样本的实际有用的序列长度有多长。 152 | # 输出的outputs宽度是隐藏单元的个数，即num_hidden的大小 153 | outputs, _, _ = tf.contrib.rnn.stack_bidirectional_dynamic_rnn(cells_fw, 154 | cells_bw, 155 | inputs, 156 | dtype=tf.float32, 157 | sequence_length=seq_len) 158 | 159 | #获得输入数据的形状 160 | shape = tf.shape(inputs) 161 | batch_s, max_timesteps = shape[0], shape[1] 162 | 163 | # 将2层LSTM的输出转换成宽度为40的矩阵 164 | # 后面进行全连接计算 165 | outputs = tf.reshape(outputs, [-1, num_hidden]) 166 | 167 | W = tf.Variable(tf.truncated_normal([num_hidden, 168 | num_classes], 169 | stddev=0.1)) 170 | 171 | b = tf.Variable(tf.constant(0., shape=[num_classes])) 172 | 173 | # 进行全连接线性计算 174 | logits = tf.matmul(outputs, W) + b 175 | 176 | # 将全连接计算的结果，由宽度40变成宽度80， 177 | # 即最后的输入给CTC的数据宽度必须是26+2的宽度 178 | logits = tf.reshape(logits, [batch_s, -1, num_classes]) 179 | 180 | # 转置，将第一维和第二维交换。 181 | # 变成序列的长度放第一维，batch_size放第二维。 182 | # 也是为了适应Tensorflow的CTC的输入格式 183 | logits = tf.transpose(logits, (1, 0, 2)) 184 | 185 | return logits 186 | 187 | def main(): 188 | # 输入特征数据，形状为：[batch_size, 序列长度，一帧特征数] 189 | inputs = tf.placeholder(tf.float32, [None, None, num_features]) 190 | 191 | # 输入数据的label，定义成稀疏sparse_placeholder会生成稀疏的tensor：SparseTensor 192 | # 这个结构可以直接输入给ctc求loss 193 | targets = tf.sparse_placeholder(tf.int32) 194 | 195 | # 序列的长度，大小是[batch_size]大小 196 | # 表示的是batch中每个样本的有效序列长度是多少 197 | seq_len = tf.placeholder(tf.int32, [None]) 198 | 199 | # 向前计算网络，定义网络结构，输入是特征数据，输出提供给ctc计算损失值。 200 | logits = inference(inputs, seq_len) 201 | 202 | # ctc计算损失 203 | # 参数targets必须是一个值为int32的稀疏tensor的结构：tf.SparseTensor 204 | # 参数logits是前面lstm网络的输出 205 | # 参数seq_len是这个batch的样本中，每个样本的序列长度。 206 | loss = tf.nn.ctc_loss(targets, logits, seq_len) 207 | 208 | # 计算损失的平均值 209 | cost = tf.reduce_mean(loss) 210 | 211 | # 采用冲量优化方法 212 | optimizer = tf.train.MomentumOptimizer(initial_learning_rate, 0.9).minimize(cost) 213 | 214 | # 还有另外一个ctc的函数：tf.contrib.ctc.ctc_beam_search_decoder 215 | # 本函数会得到更好的结果，但是效果比ctc_beam_search_decoder低 216 | # 返回的结果中，decode是ctc解码的结果，即输入的数据解码出结果序列是什么 217 | decoded, _ = tf.nn.ctc_greedy_decoder(logits, seq_len) 218 | 219 | # 采用计算编辑距离的方式计算，计算decode后结果的错误率。 220 | ler = tf.reduce_mean(tf.edit_distance(tf.cast(decoded[0], tf.int32), 221 | targets)) 222 | config = tf.ConfigProto() 223 | config.gpu_options.allow_growth = True 224 | 225 | with tf.Session(config=config) as session: 226 | # 初始化变量 227 | tf.global_variables_initializer().run() 228 | 229 | for curr_epoch in range(num_epochs): 230 | train_cost = train_ler = 0 231 | start = time.time() 232 | 233 | for batch in range(num_batches_per_epoch): 234 | #获取训练数据，本例中只去一个样本的训练数据 235 | train_inputs, train_seq_len = get_audio_feature() 236 | # 获取这个样本的label 237 | train_targets = get_audio_label() 238 | feed = {inputs: train_inputs, 239 | targets: train_targets, 240 | seq_len: train_seq_len} 241 | 242 | # 一次训练，更新参数 243 | batch_cost, _ = session.run([cost, optimizer], feed) 244 | # 计算累加的训练的损失值 245 | train_cost += batch_cost * batch_size 246 | # 计算训练集的错误率 247 | train_ler += session.run(ler, feed_dict=feed)*batch_size 248 | 249 | train_cost /= num_examples 250 | train_ler /= num_examples 251 | 252 | # 打印每一轮迭代的损失值，错误率 253 | log = "Epoch {}/{}, train_cost = {:.3f}, train_ler = {:.3f}, time = {:.3f}" 254 | print(log.format(curr_epoch+1, num_epochs, train_cost, train_ler, 255 | time.time() - start)) 256 | # 在进行了1200次训练之后，计算一次实际的测试，并且输出 257 | # 读取测试数据，这里读取的和训练数据的同一个样本 258 | test_inputs, test_seq_len = get_audio_feature() 259 | test_targets = get_audio_label() 260 | test_feed = {inputs: test_inputs, 261 | targets: test_targets, 262 | seq_len: test_seq_len} 263 | d = session.run(decoded[0], feed_dict=test_feed) 264 | # 将得到的测试语音经过ctc解码后的整数序列转换成字母 265 | str_decoded = ''.join([chr(x) for x in np.asarray(d[1]) + FIRST_INDEX]) 266 | # 将no label转换成空 267 | str_decoded = str_decoded.replace(chr(ord('z') + 1), '') 268 | # 将空白转换成空格 269 | str_decoded = str_decoded.replace(chr(ord('a') - 1), ' ') 270 | # 打印最后的结果 271 | print('Decoded:\n%s' % str_decoded) 272 | 273 | if __name__ == "__main__": 274 | main(); -------------------------------------------------------------------------------- /cgan-tf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | #https://blog.csdn.net/zhl493722771/article/details/82781988 3 | import tensorflow as tf 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import os, imageio 7 | from tqdm import tqdm 8 | 9 | import input_data 10 | mnist = input_data.read_data_sets("datasets/MNIST_data/", one_hot=True) 11 | 12 | 13 | batch_size = 100 14 | z_dim = 100 15 | WIDTH = 28 16 | HEIGHT = 28 17 | LABEL = 10 18 | 19 | OUTPUT_DIR = 'samples' 20 | if not os.path.exists(OUTPUT_DIR): 21 | os.mkdir(OUTPUT_DIR) 22 | 23 | X = tf.placeholder(dtype=tf.float32, shape=[None, HEIGHT, WIDTH, 1], name='X') 24 | y_label = tf.placeholder(dtype=tf.float32, shape=[None, HEIGHT, WIDTH, LABEL], name='y_label') 25 | noise = tf.placeholder(dtype=tf.float32, shape=[None, z_dim], name='noise') 26 | y_noise = tf.placeholder(dtype=tf.float32, shape=[None, LABEL], name='y_noise') 27 | is_training = tf.placeholder(dtype=tf.bool, name='is_training') 28 | 29 | def lrelu(x, leak=0.2): 30 | return tf.maximum(x, leak * x) 31 | 32 | def sigmoid_cross_entropy_with_logits(x, y): 33 | return tf.nn.sigmoid_cross_entropy_with_logits(logits=x, labels=y) 34 | 35 | 36 | def discriminator(image, label, reuse=None, is_training=is_training): 37 | momentum = 0.9 38 | with tf.variable_scope('discriminator', reuse=reuse): 39 | h0 = tf.concat([image, label], axis=3) 40 | h0 = lrelu(tf.layers.conv2d(h0, kernel_size=5, filters=64, strides=2, padding='same')) 41 | 42 | h1 = tf.layers.conv2d(h0, kernel_size=5, filters=128, strides=2, padding='same') 43 | h1 = lrelu(tf.contrib.layers.batch_norm(h1, is_training=is_training, decay=momentum)) 44 | 45 | h2 = tf.layers.conv2d(h1, kernel_size=5, filters=256, strides=2, padding='same') 46 | h2 = lrelu(tf.contrib.layers.batch_norm(h2, is_training=is_training, decay=momentum)) 47 | 48 | h3 = tf.layers.conv2d(h2, kernel_size=5, filters=512, strides=2, padding='same') 49 | h3 = lrelu(tf.contrib.layers.batch_norm(h3, is_training=is_training, decay=momentum)) 50 | 51 | h4 = tf.contrib.layers.flatten(h3) 52 | h4 = tf.layers.dense(h4, units=1) 53 | return tf.nn.sigmoid(h4), h4 54 | 55 | 56 | def generator(z, label, is_training=is_training): 57 | momentum = 0.9 58 | with tf.variable_scope('generator', reuse=None): 59 | d = 3 60 | z = tf.concat([z, label], axis=1) 61 | h0 = tf.layers.dense(z, units=d * d * 512) 62 | h0 = tf.reshape(h0, shape=[-1, d, d, 512]) 63 | h0 = tf.nn.relu(tf.contrib.layers.batch_norm(h0, is_training=is_training, decay=momentum)) 64 | 65 | h1 = tf.layers.conv2d_transpose(h0, kernel_size=5, filters=256, strides=2, padding='same') 66 | h1 = tf.nn.relu(tf.contrib.layers.batch_norm(h1, is_training=is_training, decay=momentum)) 67 | 68 | h2 = tf.layers.conv2d_transpose(h1, kernel_size=5, filters=128, strides=2, padding='same') 69 | h2 = tf.nn.relu(tf.contrib.layers.batch_norm(h2, is_training=is_training, decay=momentum)) 70 | 71 | h3 = tf.layers.conv2d_transpose(h2, kernel_size=5, filters=64, strides=2, padding='same') 72 | h3 = tf.nn.relu(tf.contrib.layers.batch_norm(h3, is_training=is_training, decay=momentum)) 73 | 74 | h4 = tf.layers.conv2d_transpose(h3, kernel_size=5, filters=1, strides=1, padding='valid', activation=tf.nn.tanh, name='g') 75 | return h4 76 | 77 | 78 | g = generator(noise, y_noise) 79 | d_real, d_real_logits = discriminator(X, y_label) 80 | d_fake, d_fake_logits = discriminator(g, y_label, reuse=True) 81 | 82 | vars_g = [var for var in tf.trainable_variables() if var.name.startswith('generator')] 83 | vars_d = [var for var in tf.trainable_variables() if var.name.startswith('discriminator')] 84 | 85 | loss_d_real = tf.reduce_mean(sigmoid_cross_entropy_with_logits(d_real_logits, tf.ones_like(d_real))) 86 | loss_d_fake = tf.reduce_mean(sigmoid_cross_entropy_with_logits(d_fake_logits, tf.zeros_like(d_fake))) 87 | loss_g = tf.reduce_mean(sigmoid_cross_entropy_with_logits(d_fake_logits, tf.ones_like(d_fake))) 88 | loss_d = loss_d_real + loss_d_fake 89 | 90 | 91 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 92 | with tf.control_dependencies(update_ops): 93 | optimizer_d = tf.train.AdamOptimizer(learning_rate=0.0002, beta1=0.5).minimize(loss_d, var_list=vars_d) 94 | optimizer_g = tf.train.AdamOptimizer(learning_rate=0.0002, beta1=0.5).minimize(loss_g, var_list=vars_g) 95 | 96 | 97 | def montage(images): 98 | if isinstance(images, list): 99 | images = np.array(images) 100 | img_h = images.shape[1] 101 | img_w = images.shape[2] 102 | n_plots = int(np.ceil(np.sqrt(images.shape[0]))) 103 | m = np.ones((images.shape[1] * n_plots + n_plots + 1, images.shape[2] * n_plots + n_plots + 1)) * 0.5 104 | for i in range(n_plots): 105 | for j in range(n_plots): 106 | this_filter = i * n_plots + j 107 | if this_filter < images.shape[0]: 108 | this_img = images[this_filter] 109 | m[1 + i + i * img_h:1 + i + (i + 1) * img_h, 110 | 1 + j + j * img_w:1 + j + (j + 1) * img_w] = this_img 111 | return m 112 | 113 | sess = tf.Session() 114 | sess.run(tf.global_variables_initializer()) 115 | z_samples = np.random.uniform(-1.0, 1.0, [batch_size, z_dim]).astype(np.float32) 116 | y_samples = np.zeros([batch_size, LABEL]) 117 | for i in range(LABEL): 118 | for j in range(LABEL): 119 | y_samples[i * LABEL + j, i] = 1 120 | samples = [] 121 | loss = {'d': [], 'g': []} 122 | 123 | for i in tqdm(range(60000)): 124 | n = np.random.uniform(-1.0, 1.0, [batch_size, z_dim]).astype(np.float32) 125 | batch, label = mnist.train.next_batch(batch_size=batch_size) 126 | batch = np.reshape(batch, [batch_size, HEIGHT, WIDTH, 1]) 127 | batch = (batch - 0.5) * 2 128 | yn = np.copy(label) 129 | yl = np.reshape(label, [batch_size, 1, 1, LABEL]) 130 | yl = yl * np.ones([batch_size, HEIGHT, WIDTH, LABEL]) 131 | 132 | d_ls, g_ls = sess.run([loss_d, loss_g], feed_dict={X: batch, noise: n, y_label: yl, y_noise: yn, is_training: True}) 133 | loss['d'].append(d_ls) 134 | loss['g'].append(g_ls) 135 | 136 | sess.run(optimizer_d, feed_dict={X: batch, noise: n, y_label: yl, y_noise: yn, is_training: True}) 137 | sess.run(optimizer_g, feed_dict={X: batch, noise: n, y_label: yl, y_noise: yn, is_training: True}) 138 | sess.run(optimizer_g, feed_dict={X: batch, noise: n, y_label: yl, y_noise: yn, is_training: True}) 139 | 140 | if i % 1000 == 0: 141 | print(i, d_ls, g_ls) 142 | gen_imgs = sess.run(g, feed_dict={noise: z_samples, y_noise: y_samples, is_training: False}) 143 | gen_imgs = (gen_imgs + 1) / 2 144 | imgs = [img[:, :, 0] for img in gen_imgs] 145 | gen_imgs = montage(imgs) 146 | plt.axis('off') 147 | plt.imshow(gen_imgs, cmap='gray') 148 | imageio.imsave(os.path.join(OUTPUT_DIR, 'sample_%d.jpg' % i), gen_imgs) 149 | plt.show() 150 | samples.append(gen_imgs) 151 | 152 | plt.plot(loss['d'], label='Discriminator') 153 | plt.plot(loss['g'], label='Generator') 154 | plt.legend(loc='upper right') 155 | plt.savefig('Loss.png') 156 | plt.show() 157 | imageio.mimsave(os.path.join(OUTPUT_DIR, 'samples.gif'), samples, fps=5) 158 | 159 | saver = tf.train.Saver() 160 | saver.save(sess, './mnist_cgan', global_step=60000) 161 | 162 | -------------------------------------------------------------------------------- /datasets/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koryako/FundamentalsOfAI_book_code/ef5bd1ead892dc07315a0eec56f826263709e676/datasets/.DS_Store -------------------------------------------------------------------------------- /datasets/mnist.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koryako/FundamentalsOfAI_book_code/ef5bd1ead892dc07315a0eec56f826263709e676/datasets/mnist.npz -------------------------------------------------------------------------------- /datasets/mnist_data.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koryako/FundamentalsOfAI_book_code/ef5bd1ead892dc07315a0eec56f826263709e676/datasets/mnist_data.zip -------------------------------------------------------------------------------- /datasets/mnist_data/t10k-images-idx3-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koryako/FundamentalsOfAI_book_code/ef5bd1ead892dc07315a0eec56f826263709e676/datasets/mnist_data/t10k-images-idx3-ubyte.gz -------------------------------------------------------------------------------- /datasets/mnist_data/t10k-labels-idx1-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koryako/FundamentalsOfAI_book_code/ef5bd1ead892dc07315a0eec56f826263709e676/datasets/mnist_data/t10k-labels-idx1-ubyte.gz -------------------------------------------------------------------------------- /datasets/mnist_data/train-images-idx3-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koryako/FundamentalsOfAI_book_code/ef5bd1ead892dc07315a0eec56f826263709e676/datasets/mnist_data/train-images-idx3-ubyte.gz -------------------------------------------------------------------------------- /datasets/mnist_data/train-labels-idx1-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koryako/FundamentalsOfAI_book_code/ef5bd1ead892dc07315a0eec56f826263709e676/datasets/mnist_data/train-labels-idx1-ubyte.gz -------------------------------------------------------------------------------- /detect.py: -------------------------------------------------------------------------------- 1 | #https://github.com/OlafenwaMoses/ImageAI 2 | 3 | import tensorflow as tf 4 | import numpy as np 5 | 6 | from imageai.Detection import ObjectDetection 7 | import os 8 | 9 | execution_path = os.getcwd() 10 | 11 | detector = ObjectDetection() 12 | detector.setModelTypeAsRetinaNet() 13 | detector.setModelPath( os.path.join(execution_path , "resnet50_coco_best_v2.0.1.h5")) 14 | detector.loadModel() 15 | detections = detector.detectObjectsFromImage(input_image=os.path.join(execution_path , "image2.jpg"), output_image_path=os.path.join(execution_path , "image2new.jpg")) 16 | for eachObject in detections: 17 | print(eachObject["name"] , " : ", eachObject["percentage_probability"], " : ", eachObject["box_points"] ) 18 | print("--------------------------------") 19 | 20 | 21 | -------------------------------------------------------------------------------- /face.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | import sys 4 | 5 | reload(sys) 6 | 7 | sys.setdefaultencoding('utf8') 8 | 9 | 10 | 11 | import cv2 12 | 13 | # 待检测的图片路径 14 | 15 | imagepath = r'./heat.jpg' 16 | 17 | # 获取训练好的人脸的参数数据，这里直接从 GitHub 上使用默认值 18 | 19 | face_cascade = cv2.CascadeClassifier(r'./haarcascade_frontalface_default.xml') 20 | #https://github.com/opencv/opencv/tree/master/data/haarcascades 特征模型 21 | # 读取图片 22 | 23 | image = cv2.imread(imagepath) 24 | 25 | gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY) 26 | 27 | # 探测图片中的人脸 28 | 29 | faces = face_cascade.detectMultiScale( 30 | 31 | gray, 32 | 33 | scaleFactor = 1.15, 34 | 35 | minNeighbors = 5, 36 | 37 | minSize = (5,5), 38 | 39 | flags = cv2.cv.CV_HAAR_SCALE_IMAGE 40 | 41 | ) 42 | 43 | print "发现 {0} 个人脸!".format(len(faces)) 44 | 45 | for(x,y,w,h) in faces: 46 | 47 | # cv2.rectangle(image,(x,y),(x+w,y+w),(0,255,0),2) 48 | 49 | cv2.circle(image,((x+x+w)/2,(y+y+h)/2),w/2,(0,255,0),2) 50 | 51 | 52 | cv2.imshow("Find Faces!",image) 53 | 54 | cv2.waitKey(0) 55 | 56 | 57 | model = Sequential() 58 | 59 | model.add(BatchNormalization(input_shape=(96, 96, 1))) 60 | 61 | model.add(Convolution2D(24, 5, 5, border_mode=”same”, 62 | 63 | init=’he_normal’, input_shape=(96, 96, 1), 64 | dim_ordering=”tf”)) 65 | model.add(Activation(“relu”)) 66 | model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), 67 | border_mode=”valid”)) 68 | 69 | model.add(Convolution2D(36, 5, 5)) 70 | 71 | model.add(Activation(“relu”)) 72 | model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), 73 | border_mode=”valid”)) 74 | 75 | model.add(Convolution2D(48, 5, 5)) 76 | model.add(Activation(“relu”)) 77 | model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), 78 | border_mode=”valid”)) 79 | 80 | model.add(Convolution2D(64, 3, 3)) 81 | model.add(Activation(“relu”)) 82 | model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), 83 | border_mode=”valid”)) 84 | 85 | model.add(Convolution2D(64, 3, 3)) 86 | model.add(Activation(“relu”)) 87 | 88 | model.add(GlobalAveragePooling2D()); 89 | 90 | model.add(Dense(500, activation=”relu”)) 91 | model.add(Dense(90, activation=”relu”)) 92 | model.add(Dense(30)) 93 | 94 | model.compile(optimizer=’rmsprop’, loss=’mse’, metrics= 95 | 96 | [‘accuracy’]) 97 | 98 | checkpointer = ModelCheckpoint(filepath=’face_model.h5', 99 | verbose=1, save_best_only=True) 100 | 101 | epochs = 30 102 | 103 | hist = model.fit(X_train, y_train, validation_split=0.2, 104 | shuffle=True, epochs=epochs, batch_size=20, callbacks= 105 | [checkpointer], verbose=1) 106 | 107 | features = model.predict(region, batch_size=1) 108 | """ 109 | 如果上述的操作还不能满足你的需求，你还可以进行如下步骤： 110 | 111 | 实验如何在保持精度和提高推理速度的同时减少卷积层和滤波器的数量； 112 | 113 | 使用迁移学习来替代卷积的部分（Xception是我的最爱） 114 | 115 | 使用一个更详细的数据库 116 | 117 | 做一些高级的图像增强来提高鲁棒性 118 | 119 | 你可能依然觉得太简单了，那么推荐你学习去做一些3D的处理，你可以参考Facebook和NVIDIA是怎么进行人脸识别和追踪的。 120 | 121 | 另外，你可以用已经学到的这些进行一些新奇的事情（你可能一直想做但不知道怎么实现）： 122 | 123 | 在视频聊天时，把一些好玩的图片放置在人脸面部上，比如：墨镜，搞笑的帽子和胡子等； 124 | 125 | 交换面孔，包括你和朋友的脸，动物和物体等； 126 | 127 | 在自拍实时视频中用一些新发型、珠宝和化妆进行产品测试； 128 | 129 | 检测你的员工是因为喝酒无法胜任一些任务； 130 | 131 | 从人们的反馈表情中提取当下流行的表情； 132 | 133 | 使用对抗网络（GANs）来进行实时的人脸-卡通变换，并使用网络实现实时人脸和卡通动画表情的同步。 134 | 135 | 好了~你现在已经学会了怎么制作你自己的视频聊天滤镜了，快去制作一个有趣的吧 136 | """ 137 | 138 | 139 | #include "opencv2/objdetect.hpp" 140 | #include "opencv2/core.hpp" 141 | #include "opencv2/imgproc.hpp" 142 | #include "opencv2/highgui.hpp" 143 | #include 144 | #include 145 | 146 | using namespace cv; 147 | using namespace std; 148 | 149 | int resize_save(Mat& faceIn, char *path, int FaceSeq); 150 | int get_face(char *path); 151 | 152 | int main(int argc, char *argv[]) 153 | { 154 | if(argc != 2) 155 | { 156 | printf("usage: %s \n", argv[0]); 157 | return -1; 158 | } 159 | 160 | get_face(argv[1]); 161 | 162 | return 0; 163 | } 164 | 165 | int get_face(char *path) 166 | { 167 | CascadeClassifier face_cascade; 168 | VideoCapture camera; 169 | char key = 0; 170 | Mat frame; 171 | int ret = 0; 172 | int faceNum = 1; 173 | vector faces; 174 | Mat img_gray; 175 | Mat faceImg; 176 | 177 | camera.open(0); // 打开摄像头 178 | if(!camera.isOpened()) 179 | { 180 | cout << "open camera failed. " << endl; 181 | return -1; 182 | } 183 | cout << "open camera succeed. " << endl; 184 | 185 | // 加载人脸分类器 186 | ret = face_cascade.load("/root/library/opencv/opencv-3.2.0/data/haarcascades/haarcascade_frontalface_alt2.xml"); 187 | if( !ret ) 188 | { 189 | printf("load xml failed.\n"); 190 | return -1; 191 | } 192 | cout << "load xml succeed. " << endl; 193 | 194 | while (1) 195 | { 196 | camera >> frame; 197 | if(frame.empty()) 198 | { 199 | continue; 200 | } 201 | 202 | cvtColor(frame, img_gray, COLOR_BGR2GRAY); 203 | equalizeHist(img_gray, img_gray); 204 | 205 | // 检测目标 206 | face_cascade.detectMultiScale(img_gray, faces, 1.1, 3, 0, Size(50, 50)); 207 | 208 | for(size_t i =0; i 100) 257 | { 258 | resize(faceIn, faceOut, Size(92, 112)); // 调整大小，这里选择与官方人脸库图片大小兼容 259 | strName = format("%s/%d.jpg", path, FaceSeq); // 先要创建文件夹 260 | ret = imwrite(strName, faceOut); // 文件名后缀要正确 .jpg .bmp ... 261 | if(ret == false) // 出现错误，请检测文件名后缀、文件路径是否存在 262 | { 263 | printf("imwrite failed!\n"); 264 | printf("please check filename[%s] is legal ?!\n", strName.c_str()); 265 | return -1; 266 | } 267 | imshow(strName, faceOut); 268 | } 269 | waitKey(20); 270 | 271 | return 0; 272 | } 273 | 274 | --------------------- 275 | 作者：曾哥哥_zeng 276 | 来源：CSDN 277 | 原文：https://blog.csdn.net/qq_30155503/article/details/79776485 278 | 版权声明：本文为博主原创文章，转载请附上博文链接！ -------------------------------------------------------------------------------- /image2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koryako/FundamentalsOfAI_book_code/ef5bd1ead892dc07315a0eec56f826263709e676/image2.jpg -------------------------------------------------------------------------------- /image2new.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koryako/FundamentalsOfAI_book_code/ef5bd1ead892dc07315a0eec56f826263709e676/image2new.jpg -------------------------------------------------------------------------------- /img/ddpg.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koryako/FundamentalsOfAI_book_code/ef5bd1ead892dc07315a0eec56f826263709e676/img/ddpg.jpeg -------------------------------------------------------------------------------- /input_data.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Functions for downloading and reading MNIST data.""" 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | import gzip 20 | import os 21 | import tensorflow.python.platform 22 | import numpy 23 | from six.moves import urllib 24 | from six.moves import xrange # pylint: disable=redefined-builtin 25 | import tensorflow as tf 26 | SOURCE_URL = 'http://yann.lecun.com/exdb/mnist/' 27 | def maybe_download(filename, work_directory): 28 | """Download the data from Yann's website, unless it's already here.""" 29 | if not os.path.exists(work_directory): 30 | os.mkdir(work_directory) 31 | filepath = os.path.join(work_directory, filename) 32 | if not os.path.exists(filepath): 33 | filepath, _ = urllib.request.urlretrieve(SOURCE_URL + filename, filepath) 34 | statinfo = os.stat(filepath) 35 | print('Successfully downloaded', filename, statinfo.st_size, 'bytes.') 36 | return filepath 37 | def _read32(bytestream): 38 | dt = numpy.dtype(numpy.uint32).newbyteorder('>') 39 | return numpy.frombuffer(bytestream.read(4), dtype=dt)[0] 40 | def extract_images(filename): 41 | """Extract the images into a 4D uint8 numpy array [index, y, x, depth].""" 42 | print('Extracting', filename) 43 | with gzip.open(filename) as bytestream: 44 | magic = _read32(bytestream) 45 | if magic != 2051: 46 | raise ValueError( 47 | 'Invalid magic number %d in MNIST image file: %s' % 48 | (magic, filename)) 49 | num_images = _read32(bytestream) 50 | rows = _read32(bytestream) 51 | cols = _read32(bytestream) 52 | buf = bytestream.read(rows * cols * num_images) 53 | data = numpy.frombuffer(buf, dtype=numpy.uint8) 54 | data = data.reshape(num_images, rows, cols, 1) 55 | return data 56 | def dense_to_one_hot(labels_dense, num_classes=10): 57 | """Convert class labels from scalars to one-hot vectors.""" 58 | num_labels = labels_dense.shape[0] 59 | index_offset = numpy.arange(num_labels) * num_classes 60 | labels_one_hot = numpy.zeros((num_labels, num_classes)) 61 | labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1 62 | return labels_one_hot 63 | def extract_labels(filename, one_hot=False): 64 | """Extract the labels into a 1D uint8 numpy array [index].""" 65 | print('Extracting', filename) 66 | with gzip.open(filename) as bytestream: 67 | magic = _read32(bytestream) 68 | if magic != 2049: 69 | raise ValueError( 70 | 'Invalid magic number %d in MNIST label file: %s' % 71 | (magic, filename)) 72 | num_items = _read32(bytestream) 73 | buf = bytestream.read(num_items) 74 | labels = numpy.frombuffer(buf, dtype=numpy.uint8) 75 | if one_hot: 76 | return dense_to_one_hot(labels) 77 | return labels 78 | class DataSet(object): 79 | def __init__(self, images, labels, fake_data=False, one_hot=False, 80 | dtype=tf.float32): 81 | """Construct a DataSet. 82 | one_hot arg is used only if fake_data is true. `dtype` can be either 83 | `uint8` to leave the input as `[0, 255]`, or `float32` to rescale into 84 | `[0, 1]`. 85 | """ 86 | dtype = tf.as_dtype(dtype).base_dtype 87 | if dtype not in (tf.uint8, tf.float32): 88 | raise TypeError('Invalid image dtype %r, expected uint8 or float32' % 89 | dtype) 90 | if fake_data: 91 | self._num_examples = 10000 92 | self.one_hot = one_hot 93 | else: 94 | assert images.shape[0] == labels.shape[0], ( 95 | 'images.shape: %s labels.shape: %s' % (images.shape, 96 | labels.shape)) 97 | self._num_examples = images.shape[0] 98 | # Convert shape from [num examples, rows, columns, depth] 99 | # to [num examples, rows*columns] (assuming depth == 1) 100 | assert images.shape[3] == 1 101 | images = images.reshape(images.shape[0], 102 | images.shape[1] * images.shape[2]) 103 | if dtype == tf.float32: 104 | # Convert from [0, 255] -> [0.0, 1.0]. 105 | images = images.astype(numpy.float32) 106 | images = numpy.multiply(images, 1.0 / 255.0) 107 | self._images = images 108 | self._labels = labels 109 | self._epochs_completed = 0 110 | self._index_in_epoch = 0 111 | @property 112 | def images(self): 113 | return self._images 114 | @property 115 | def labels(self): 116 | return self._labels 117 | @property 118 | def num_examples(self): 119 | return self._num_examples 120 | @property 121 | def epochs_completed(self): 122 | return self._epochs_completed 123 | def next_batch(self, batch_size, fake_data=False): 124 | """Return the next `batch_size` examples from this data set.""" 125 | if fake_data: 126 | fake_image = [1] * 784 127 | if self.one_hot: 128 | fake_label = [1] + [0] * 9 129 | else: 130 | fake_label = 0 131 | return [fake_image for _ in xrange(batch_size)], [ 132 | fake_label for _ in xrange(batch_size)] 133 | start = self._index_in_epoch 134 | self._index_in_epoch += batch_size 135 | if self._index_in_epoch > self._num_examples: 136 | # Finished epoch 137 | self._epochs_completed += 1 138 | # Shuffle the data 139 | perm = numpy.arange(self._num_examples) 140 | numpy.random.shuffle(perm) 141 | self._images = self._images[perm] 142 | self._labels = self._labels[perm] 143 | # Start next epoch 144 | start = 0 145 | self._index_in_epoch = batch_size 146 | assert batch_size <= self._num_examples 147 | end = self._index_in_epoch 148 | return self._images[start:end], self._labels[start:end] 149 | def read_data_sets(train_dir, fake_data=False, one_hot=False, dtype=tf.float32): 150 | class DataSets(object): 151 | pass 152 | data_sets = DataSets() 153 | if fake_data: 154 | def fake(): 155 | return DataSet([], [], fake_data=True, one_hot=one_hot, dtype=dtype) 156 | data_sets.train = fake() 157 | data_sets.validation = fake() 158 | data_sets.test = fake() 159 | return data_sets 160 | TRAIN_IMAGES = 'train-images-idx3-ubyte.gz' 161 | TRAIN_LABELS = 'train-labels-idx1-ubyte.gz' 162 | TEST_IMAGES = 't10k-images-idx3-ubyte.gz' 163 | TEST_LABELS = 't10k-labels-idx1-ubyte.gz' 164 | VALIDATION_SIZE = 5000 165 | local_file = maybe_download(TRAIN_IMAGES, train_dir) 166 | train_images = extract_images(local_file) 167 | local_file = maybe_download(TRAIN_LABELS, train_dir) 168 | train_labels = extract_labels(local_file, one_hot=one_hot) 169 | local_file = maybe_download(TEST_IMAGES, train_dir) 170 | test_images = extract_images(local_file) 171 | local_file = maybe_download(TEST_LABELS, train_dir) 172 | test_labels = extract_labels(local_file, one_hot=one_hot) 173 | validation_images = train_images[:VALIDATION_SIZE] 174 | validation_labels = train_labels[:VALIDATION_SIZE] 175 | train_images = train_images[VALIDATION_SIZE:] 176 | train_labels = train_labels[VALIDATION_SIZE:] 177 | data_sets.train = DataSet(train_images, train_labels, dtype=dtype) 178 | data_sets.validation = DataSet(validation_images, validation_labels, 179 | dtype=dtype) 180 | data_sets.test = DataSet(test_images, test_labels, dtype=dtype) 181 | return data_sets -------------------------------------------------------------------------------- /iris_demo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 105, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "1.10.1\n", 13 | "(120, 5)\n", 14 | "(30, 5)\n" 15 | ] 16 | }, 17 | { 18 | "data": { 19 | "text/html": [ 20 | "

	a	b	c	d	t
0	6.4	2.8	5.6	2.2	2
1	5.0	2.3	3.3	1.0	1
2	4.9	2.5	4.5	1.7	2
3	4.9	3.1	1.5	0.1	0
4	5.7	3.8	1.7	0.3	0

\n", 88 | "

	a	b	c	d	t
0	6.4	2.8	5.6	2.2	2
1	5.0	2.3	3.3	1.0	1
2	4.9	2.5	4.5	1.7	2
3	4.9	3.1	1.5	0.1	0
4	5.7	3.8	1.7	0.3	0

	a	b	c	d	t
0	6.4	2.8	5.6	2.2	2
1	5.0	2.3	3.3	1.0	1
2	4.9	2.5	4.5	1.7	2
3	4.9	3.1	1.5	0.1	0
4	5.7	3.8	1.7	0.3	0

	a	b	c	d	t
0	6.4	2.8	5.6	2.2	2
1	5.0	2.3	3.3	1.0	1
2	4.9	2.5	4.5	1.7	2
3	4.9	3.1	1.5	0.1	0
4	5.7	3.8	1.7	0.3	0

	a	b	c	d	t
0	6.4	2.8	5.6	2.2	2
1	5.0	2.3	3.3	1.0	1
2	4.9	2.5	4.5	1.7	2
3	4.9	3.1	1.5	0.1	0
4	5.7	3.8	1.7	0.3	0

	a	b	c	d	t
0	6.4	2.8	5.6	2.2	2
1	5.0	2.3	3.3	1.0	1
2	4.9	2.5	4.5	1.7	2
3	4.9	3.1	1.5	0.1	0
4	5.7	3.8	1.7	0.3	0

	a	b	c	d	t
0	6.4	2.8	5.6	2.2	2
1	5.0	2.3	3.3	1.0	1
2	4.9	2.5	4.5	1.7	2
3	4.9	3.1	1.5	0.1	0
4	5.7	3.8	1.7	0.3	0