├── Anaconda2_infos_analysis.ipynb ├── Anaconda3_infos_analysis.ipynb ├── Anaconda环境安装以及搭建Python多内核环境.docx ├── Python_Common_Magic_Samples.ipynb ├── Python_PyODPS_HTML_to_PDF.ipynb ├── Python操作Mysql实例教程手册_代码.ipynb ├── README.md ├── lng_lat_2_geohash_two_way.ipynb ├── post_metas.csv ├── spider_huatu_civil_servant_post_metas.ipynb ├── spider_qiushibaike_content_datas.ipynb ├── spider_qiushibaike_content_datas.py ├── users_rise_up_period.ipynb └── users_rise_up_period_data /Anaconda2_infos_analysis.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### python在数据科学方面需要用到的库:\n", 8 | "\n", 9 | "- numpy:科学计算库。提供矩阵运算的库。\n", 10 | "\n", 11 | "- pandas:数据分析处理库\n", 12 | "\n", 13 | "- scipy:数值计算库。提供数值积分和常微分方程组求解算法。提供了一个非常广泛的特定函数集合。\n", 14 | "\n", 15 | "- Matplotlib:数据可视化库\n" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "### 1 当前路径信息(内核Python 2.7)" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 3, 28 | "metadata": {}, 29 | "outputs": [ 30 | { 31 | "data": { 32 | "text/plain": [ 33 | "['',\n", 34 | " '/root/anaconda2/lib/python27.zip',\n", 35 | " '/root/anaconda2/lib/python2.7',\n", 36 | " '/root/anaconda2/lib/python2.7/plat-linux2',\n", 37 | " '/root/anaconda2/lib/python2.7/lib-tk',\n", 38 | " '/root/anaconda2/lib/python2.7/lib-old',\n", 39 | " '/root/anaconda2/lib/python2.7/lib-dynload',\n", 40 | " '/root/anaconda2/lib/python2.7/site-packages',\n", 41 | " '/root/anaconda2/lib/python2.7/site-packages/Sphinx-1.5.6-py2.7.egg',\n", 42 | " '/root/anaconda2/lib/python2.7/site-packages/setuptools-27.2.0-py2.7.egg',\n", 43 | " '/root/anaconda2/lib/python2.7/site-packages/IPython/extensions',\n", 44 | " '/root/.ipython']" 45 | ] 46 | }, 47 | "execution_count": 3, 48 | "metadata": {}, 49 | "output_type": "execute_result" 50 | } 51 | ], 52 | "source": [ 53 | "import sys\n", 54 | "\n", 55 | "sys.path" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "### 2 科学计算常用包版本及路径信息" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 4, 68 | "metadata": {}, 69 | "outputs": [ 70 | { 71 | "data": { 72 | "text/html": [ 73 | "
\n", 74 | "\n", 87 | "\n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | "
pack_nameversionpath
0numpy1.12.1/root/anaconda2/lib/python2.7/site-packages/numpy/__init__.pyc
1matplotlib2.0.2/root/anaconda2/lib/python2.7/site-packages/matplotlib/__init__.pyc
2pandas0.20.1/root/anaconda2/lib/python2.7/site-packages/pandas/__init__.pyc
3scipy0.19.0/root/anaconda2/lib/python2.7/site-packages/scipy/__init__.pyc
\n", 123 | "
" 124 | ], 125 | "text/plain": [ 126 | " pack_name version \\\n", 127 | "0 numpy 1.12.1 \n", 128 | "1 matplotlib 2.0.2 \n", 129 | "2 pandas 0.20.1 \n", 130 | "3 scipy 0.19.0 \n", 131 | "\n", 132 | " path \n", 133 | "0 /root/anaconda2/lib/python2.7/site-packages/numpy/__init__.pyc \n", 134 | "1 /root/anaconda2/lib/python2.7/site-packages/matplotlib/__init__.pyc \n", 135 | "2 /root/anaconda2/lib/python2.7/site-packages/pandas/__init__.pyc \n", 136 | "3 /root/anaconda2/lib/python2.7/site-packages/scipy/__init__.pyc " 137 | ] 138 | }, 139 | "execution_count": 4, 140 | "metadata": {}, 141 | "output_type": "execute_result" 142 | } 143 | ], 144 | "source": [ 145 | "import numpy\n", 146 | "import matplotlib\n", 147 | "import pandas as pd\n", 148 | "import scipy\n", 149 | "\n", 150 | "packs = [\n", 151 | " (\"numpy\", numpy.__version__, numpy.__file__),\n", 152 | " (\"matplotlib\", matplotlib.__version__, matplotlib.__file__),\n", 153 | " (\"pandas\", pd.__version__, pd.__file__),\n", 154 | " (\"scipy\", scipy.__version__, scipy.__file__)\n", 155 | "]\n", 156 | "\n", 157 | "pd_packages = pd.DataFrame(packs, columns=[\"pack_name\", \"version\", \"path\"])\n", 158 | "pd.set_option(\"max_colwidth\", 120)\n", 159 | "pd_packages" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "### 3 Anaconda2 集成包简单分析" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": 10, 172 | "metadata": { 173 | "collapsed": true 174 | }, 175 | "outputs": [], 176 | "source": [ 177 | "import sys\n", 178 | "%matplotlib inline\n", 179 | "\n", 180 | "packages = [pack.split(\".\")[0] for pack in sys.modules.keys()]\n", 181 | "pd_packages = pd.DataFrame(packages, columns=[\"package\"])\n", 182 | "pack_series = pd_packages.groupby(by=\"package\").size()" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": 11, 188 | "metadata": {}, 189 | "outputs": [ 190 | { 191 | "data": { 192 | "text/plain": [ 193 | "count 214.000000\n", 194 | "mean 10.163551\n", 195 | "std 37.323436\n", 196 | "min 1.000000\n", 197 | "25% 1.000000\n", 198 | "50% 1.000000\n", 199 | "75% 1.000000\n", 200 | "max 411.000000\n", 201 | "dtype: float64" 202 | ] 203 | }, 204 | "execution_count": 11, 205 | "metadata": {}, 206 | "output_type": "execute_result" 207 | } 208 | ], 209 | "source": [ 210 | "pack_series.describe()" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": 12, 216 | "metadata": {}, 217 | "outputs": [ 218 | { 219 | "data": { 220 | "text/plain": [ 221 | "214" 222 | ] 223 | }, 224 | "execution_count": 12, 225 | "metadata": {}, 226 | "output_type": "execute_result" 227 | } 228 | ], 229 | "source": [ 230 | "pack_series.index.duplicated()\n", 231 | "pack_temp = pack_series.index.drop_duplicates()\n", 232 | "len(pack_temp)" 233 | ] 234 | }, 235 | { 236 | "cell_type": "markdown", 237 | "metadata": {}, 238 | "source": [ 239 | "### 3.1 根据 Anaconda 2包中的模块数量倒序 绘图" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": 14, 245 | "metadata": {}, 246 | "outputs": [ 247 | { 248 | "data": { 249 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAEKCAYAAAAcgp5RAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XucXWVh7vHfM3vul8xMksk9EC5B5SJgI6JctHiB1lao\n9VB6LITCObSnaG3P0R5pz6nYlkqrx6MgaFHUVFFKT/WQUitikIuKhHAnCSEBEpOQyyRkMpPMJXN5\n+8d6J9mZzGRmz+yZvfbO8/185rPXXnuttd+9Zu9nv/td73qXQgiYmVnpKit0AczMbHI56M3MSpyD\n3sysxDnozcxKnIPezKzEOejNzEqcg97MrMQ56M3MSpyD3sysxJUXugAAM2fODIsWLSp0MczMisqT\nTz65K4TQMtpyqQj6RYsWsWrVqkIXw8ysqEjaNJbl3HRjZlbiHPRmZiXOQW9mVuIc9GZmJc5Bb2ZW\n4hz0ZmYlzkFvZlbiUhH029u76e0fKHQxzMxKUiqCvrWjh9f3Hyh0MczMSlIqgh5gZ3tPoYtgZlaS\nxhz0kjKSnpZ0X7w/XdIDktbH2+asZW+QtEHSOkkXj2X7rfu6cy+9mZmNKpca/ceAtVn3PwmsCCEs\nBlbE+0g6FbgCOA24BLhdUma0jbtGb2Y2OcYU9JIWAO8HvpY1+1JgWZxeBlyWNf/uEEJPCOFVYANw\nzmjP0drhoDczmwxjrdF/AfgzILtrzOwQwrY4vR2YHafnA5uzltsS540oI7HTQW9mNilGDXpJvwHs\nDCE8OdIyIYQAhFyeWNJ1klZJWiUGXKM3M5skY6nRnwd8QNJG4G7gIknfBnZImgsQb3fG5bcCC7PW\nXxDnHSaEcEcIYUkIYUl1ZQU7O3ww1sxsMowa9CGEG0IIC0IIi0gOsj4YQvg9YDmwNC62FLg3Ti8H\nrpBUJekEYDGw8mjPUV4mWve5Rm9mNhkmcoWpm4F7JF0LbAIuBwghrJZ0D7AG6AOuDyH0H7UQmTJ2\ntvcQQkDSBIpkZmZD5RT0IYSHgIfi9G7g3SMsdxNw01i3W5ERPX0DdPT0Ma26IpcimZnZKFJxZmx5\nWVIM96U3M8u/dAR9Jmmucc8bM7P8S0XQVwzW6N3zxsws71IR9K7Rm5lNnlQEfaZMVJaXOejNzCZB\nKoIeoKW+ykFvZjYJ0hP0DVUe78bMbBKkJuhnNbhGb2Y2GVIT9EmN3r1uzMzyLTVBP6uhmj2dvRzo\n80XCzczyKT1BP60KgF0e3MzMLK9SE/Qt9UnQu53ezCy/UhP0gzV697wxM8uv1AR9S4Nr9GZmkyE1\nQT+zfrBG7543Zmb5lJqgr8iUMb2u0jV6M7M8S03QQ3LSlNvozczyK1VB3+KzY83M8s5Bb2ZW4lIZ\n9CGEQhfFzKxkpCroZzVUc6B/gL1dvYUuiplZyUhV0LsvvZlZ/qUq6Gc1+OxYM7N8S1XQu0ZvZpZ/\nqQr6QzV6nx1rZpYvqQr6+qpyqit8kXAzs3xKVdBLYlZDtdvozczyKFVBDz5pysws31IX9B7vxsws\nv1IX9K7Rm5nlV+qCflZDFXu7eunu7S90UczMSkLqgn6wL70vEm5mlh+pC/pZDdWAz441M8uX1AX9\nYI1+Z7uD3swsH1IX9INnx7a66cbMLC9SF/TT6yqRoLXdwyCYmeVD6oK+PFPGjLoq1+jNzPIkdUEP\nSTu92+jNzPIjlUE/q8E1ejOzfBk16CVVS1op6VlJqyV9Os6fLukBSevjbXPWOjdI2iBpnaSLcy2U\na/RmZvkzlhp9D3BRCOFM4CzgEknnAp8EVoQQFgMr4n0knQpcAZwGXALcLimTS6FmNVSxa18PAwO+\nSLiZ2USNGvQhsS/erYh/AbgUWBbnLwMui9OXAneHEHpCCK8CG4BzcilUS0MVfQOBPZ0HclnNzMyG\nMaY2ekkZSc8AO4EHQgiPA7NDCNviItuB2XF6PrA5a/Utcd6YDZ4d63Z6M7OJG1PQhxD6QwhnAQuA\ncySdPuTxQFLLHzNJ10laJWlVa2vrYY/57Fgzs/zJqddNCKEN+AlJ2/sOSXMB4u3OuNhWYGHWagvi\nvKHbuiOEsCSEsKSlpeWwx2b5IuFmZnkzll43LZKa4nQN8F7gRWA5sDQuthS4N04vB66QVCXpBGAx\nsDKXQh2s0TvozcwmrHwMy8wFlsWeM2XAPSGE+yQ9Btwj6VpgE3A5QAhhtaR7gDVAH3B9CCGnweXr\nqsqpq8y4Rm9mlgejBn0I4Tng7GHm7wbePcI6NwE3TaRgLQ1V7OzweDdmZhOVyjNjIel54xq9mdnE\npTbofe1YM7P8cNCbmZW4VAd9R08fXQd8kXAzs4lIbdC7L72ZWX6kNugP9aV3zxszs4lIbdAfHO/G\nNXozswlJbdD77Fgzs/xIbdBPr6skUybX6M3MJii1QZ8pEzPqKt1Gb2Y2QakNeoBZ09yX3sxsolId\n9C31VW6jNzOboFQH/ayGage9mdkEpTro5zYlA5v19PnsWDOz8Up10M9vqgFgW5sPyJqZjVe6g745\nCfqtbV0FLomZWfFKddAvbK4FYOseB72Z2XilOujnNFZTJtjiGr2Z2bilOugrMmXMnlbtGr2Z2QSk\nOughOSC7ta2z0MUwMyta6Q/65hofjDUzm4D0B31TDdvauukfCIUuiplZUUp/0DfX0DcQPLiZmdk4\npT/o40lTPiBrZjY+qQ/6BT5pysxsQlIf9PNijX6La/RmZuOS+qCvrSxnel2la/RmZuOU+qCH2Jfe\nNXozs3EpnqB3jd7MbFyKI+ibkxp9CO5Lb2aWq+II+qYaunr7eX3/gUIXxcys6BRH0LuLpZnZuBVH\n0PukKTOzcSuKoPdJU2Zm41cUQd9YU0FdZcYnTZmZjUNRBL0kD1dsZjZORRH04JOmzMzGq3iC3jV6\nM7NxKZ6gb6plb1cv+3r6Cl0UM7OiMmrQS1oo6SeS1khaLeljcf50SQ9IWh9vm7PWuUHSBknrJF2c\nj4Ie7Evv5hszs5yMpUbfB/yPEMKpwLnA9ZJOBT4JrAghLAZWxPvEx64ATgMuAW6XlJloQQ/2pfeF\nws3McjJq0IcQtoUQnorTHcBaYD5wKbAsLrYMuCxOXwrcHULoCSG8CmwAzploQRe4Rm9mNi45tdFL\nWgScDTwOzA4hbIsPbQdmx+n5wOas1bbEeRPSUl9FZaaMLT4ga2aWkzEHvaR64F+APwkhtGc/FpJh\nJXMaWlLSdZJWSVrV2to6ekHLxNymatfozcxyNKagl1RBEvJ3hRC+F2fvkDQ3Pj4X2BnnbwUWZq2+\nIM47TAjhjhDCkhDCkpaWljEV1uPSm5nlbiy9bgTcCawNIXw+66HlwNI4vRS4N2v+FZKqJJ0ALAZW\n5qOwPmnKzCx35WNY5jzgSuB5Sc/EeX8O3AzcI+laYBNwOUAIYbWke4A1JD12rg8h9OejsPOba9jZ\n0UNPXz9V5RPuyGNmdkwYNehDCD8FNMLD7x5hnZuAmyZQrmENdrHc1tbNopl1+d68mVlJKpozY8EX\nIDEzG4+iCvoFTbWA+9KbmeWiqIJ+TmM1Eu5Lb2aWg6IK+sryMmY3uC+9mVkuiiroYXC4Yo93Y2Y2\nVsUX9D5pyswsJ8UX9M01bGvrpn8gpxEXzMyOWcUX9E019A0EdnZ0F7ooZmZFofiC3sMVm5nlpOiC\nfkGTT5oyM8tF0QX9YI1+i2v0ZmZjUnRBX1tZTnNthWv0ZmZjVHRBD7EvvWv0ZmZjUpxB7770ZmZj\nVqRBX8vWPV0kVzA0M7OjKc6gb66hq7efPZ29hS6KmVnqFWfQN7kvvZnZWBVl0C84eAESD25mZjaa\nogz6wRq9+9KbmY2uKIO+qbaC2sqMe96YmY1BUQa9pKSLpWv0ZmajKsqgB3jDnAae2dzmLpZmZqMo\n2qC/cHELOzt6eGnHvkIXxcws1Yo26M9fPBOAR9e3FrgkZmbpVrRBP6+phpNa6nh0/a5CF8XMLNWK\nNugBLljcwuOv7qa7t7/QRTEzS62iDvoLT5lJd+8AT27aU+iimJmlVlEH/dtOmEFFRjzidnozsxEV\nddDXVZXzluOa+anb6c3MRlTUQQ9w4SktrH6tnV37egpdFDOzVCr6oL8gdrP82QbX6s3MhlP0QX/a\nvEaaayt45CUHvZnZcIo+6DNl4h0nz+TR9a0eDsHMbBhFH/QAFy6eyc6OHtbv9HAIZmZDlUTQn7+4\nBYBHXnI3SzOzoUoi6Od7OAQzsxGVRNCDh0MwMxtJCQV9MhzCUx4OwczsMCUT9OeeODgcgptvzMyy\nlUzQDw6H4PHpzcwON2rQS/q6pJ2SXsiaN13SA5LWx9vmrMdukLRB0jpJF09WwYdzweKZHg7BzGyI\nsdTovwlcMmTeJ4EVIYTFwIp4H0mnAlcAp8V1bpeUyVtpR3FB7Gbp4RDMzA4ZNehDCI8Arw+ZfSmw\nLE4vAy7Lmn93CKEnhPAqsAE4J09lHdXp8xtpqq1wN0szsyzjbaOfHULYFqe3A7Pj9Hxgc9ZyW+K8\nI0i6TtIqSataW/PTrp4pE+d5OAQzs8NM+GBsSBI151QNIdwRQlgSQljS0tIy0WIcdMHJM9nR7uEQ\nzMwGjTfod0iaCxBvd8b5W4GFWcstiPOmzPlx2OLP3r+Of3tuG1v2dLp2b2bHtPJxrrccWArcHG/v\nzZr/HUmfB+YBi4GVEy1kLhY01/KBM+fxw9XbeWDNDgBm1ldy5oImzlyY/C05vpm6qvG+dDOz4jJq\n2kn6LvAuYKakLcCnSAL+HknXApuAywFCCKsl3QOsAfqA60MIUz4mwS2/ezYH+gZYt72DZ7a08ezm\nNp7Z3MaD63YSAlRmynjrCc2885QW3nnKLE6ZXY+kqS6mmdmUUBqaNZYsWRJWrVo16c/T3t3Ls5vb\neHT9Lh5e18q6HR0AzJlWzTtPaeHdb5rFe0+d7dA3s6Ig6ckQwpJRlzuWgn6obXu7eHhdKw+/1MpP\n1++io6ePP3znSXzy19445WUxM8vVWIP+mG6onttYwxXnHMcV5xxHb/8ANy5fzVcefpl5TdVc9fZF\nhS6emVleHNNBn60iU8ZfXXo6O9p7+NTy1cxqqOaS0+cUulhmZhNWMoOa5UOmTNz6u2dz5oImPnb3\n0zy5aegJwWZmxcdBP0RNZYY7ly5hbmM11y5bxcutPvHKzIqbg34YM+qrWHbNOZSXiaVfX8nOju5C\nF8nMbNwc9CM4fkYddy59K7v3HeCabz7Bvp6+QhfJzGxcHPRHcebCJm778Nms3dbBf122iqd/ucfD\nKZhZ0XHQj+KiN87mMx88gyd/uYffuv3nvOtzD/H5H61jgwdNM7MicUyfMJWLvV293L96O8ufeY2f\nv7yLgQCnzZvGZWfN59Kz5jFrWnWhi2hmxxifGTuJdrZ386/PbWP5M1t5dsteGqrL+da1b+OshU2F\nLpqZHUPGGvRuuhmHWdOqufb8E7j3I+fzoz+9kKbaCq782uM89cs9hS6amdkRHPQTdMrsBv7purcz\nvb6Sq+5c6ZOszCx1HPR5MK+phruvO5eWhiquunMlT2x02JtZejjo82RuYxL2s6dVs/TrK3n8ld2F\nLpKZGeCgz6vZ06q5+7pzmdtYzdXfeILHXnbYm1nhOejzbNa0au6+7u0saK7h97+5kjt/+io/fGEb\nj728m7Xb2tm2t4vu3im/6JaZHcM8TPEkaGmo4rvXncuVd67kr+9bM+wyVeVlNNVW0FRTSWNtBU01\nFcn92koaB6drKmmqrTh0v7aSusqMr4BlZjlx0E+SmfVV3PfR89nR3k1bZy9tXQfY29lLW1cvezoP\n0NbZG+8n0798vZPntiT3u3sHRtxumZLhlIfTWFPJ1e84nqXvWERDdcVkvTQzKzIO+kmUKRPzmmqY\n11ST03rdvf3s7epNviA6D9DWdehLob2rj4ERTnJbu62dz/3oJe545BWuPf9Erj5vEY01DnyzY52D\nPoWqKzJUV2SYPY5hFZ7fspdbHlzP//3xS3ztp6/w++edwDXnLaKptnISSmpmxcBDIJSo1a/t5dYV\nG/jh6u3UV5XzvtNmU1We67F3ce6J0/mNN88bsbnIzArHY90YAC9ub+fWBzfwxKu5n8R1oH+Ats5e\nTpxZx0cuOpkPnDmP8ow7apmlhYPeJmxgIHD/6u18ccV6XtzewaIZtVz/qydz2dnzqXDgmxWcg97y\nZmAg8MDaHdyyYj2rX2tn4fQarn/Xyfz2ryxw4JsVkEevtLwpKxMXnzaH+z56Pl+7agnNtZV88nvP\nc/EXHmHF2h2+6pZZyjnobcwk8Z5TZ3Pv9efx1auWQIBrl63iyjtXsnZbe6GLZ2YjcNBbziTx3lNn\n88M/uZBP/eapPL91L++/5VFu+N5z7OzoLnTxzGwIt9HbhLV1HuCWFRv4x8c2UlVexh+88yTOPXEG\ni2bW0lJf5SEbzCaJD8balHuldR+f+fcXeWDNjoPzaiszHD+jjkUzajl+Rh0Lp9cwvXZwfJ/KOIZP\nBTUVHsPHLFdjDXqfGWt5c2JLPV+9agmbX+/k5dZ9bNrdycbd+9m0u5N1Ozr48dod9PYPX7GozJTR\n0lDF8fELYfCLYdHMWo6fXkdNZWaKX41Z6XDQW94tnF7Lwum1R8zvHwi0dvQcGtQtDujWFsf12dHe\nzabd+7l/9XZe33/gsHWb4gifjbWVh0b6jPdHOuNXgpNa6jlrYdO4hpMwKxUOepsymTIxp7GaOY2j\nh+7erl5+GX8RbNy1n9Z9PVlfCgfYuHs/bZ29tHf3MpbWxznTqjlzYSNnLWzmzIWNnDG/0SN82jHD\nQW+p1FhTwRkLGjljQeNRl+sfCPQPDJ/0fQMDvLi9g2c3t/HM5jae3dzG/asPHT+orig7csz/mkrq\nq8sZz9A+jTUVsdmpjuNn1jLNXySWEg56K2qZMo044FolZbzluGbeclzzwXl79h/gua17WfNae2xC\nOtR8tHFXJ21dbXR09+VcjhCga8iVw6bXVXL8jFpOmFF36HhDPP7g0URtKrnXjVmedB3oZ9Pr+9m4\nq5NNu/ezcXe83bWfbe3dhzUxNdZUsGhGciyjumL4A81lgmnVyS+Nw49NHP1XR01lxt1ajxHudWM2\nxWoqM7xxzjTeOGfaEY919/az+fXOQ+EfeyM9v3UvfSP0ROofCLR399J5IPdrDA/t1rpoRi2LZtbR\nVFuByO0LQIL6qnJ3gy1iDnqzKVBdkWHx7AYWz27Ied2evuSKY3uzeih1dPeOuHx7Vy+bXu8cU7fW\nXFVmyg67xnFDdQVlIwR/eZkOHvsYet5ES33VUX/NWH456M1Srqo8w6yGDLMaxtdFtH8g8FpbFxt3\n7x/X8YeBENjX3XfwS+Zgt9jYJXak1t++gQH2dvWyp7OXA31HXgdZgrnTqo84ftFYU0muPxoE1FeX\n0xSbuGor/csj26QFvaRLgC8CGeBrIYSbJ+u5zGxkmTKNeG7DVOnu7Y8HvQ+wZ//gOROHmrF+tHoH\nu4ecOzERFRnRWFNJc20FDdXlIx+wLy/juOmHN2+V4gl6kxL0kjLAbcB7gS3AE5KWhxDWTMbzmVm6\nVVdkmNOYOeo5FO3dvWza1UlHz8jNUiMJATq6+444Ca+t88BRz7XY39M/7JfM7GlVzGuqoTn+Qhja\n9FRVnvsXgQQNVeXJtqb4l8dk1ejPATaEEF4BkHQ3cCngoDezYU2rrhj1vInJ0t59+Al6G3d3sm1v\nFzs7unlpRwd7O3vp6Mm92Ws0g8c8GqrLyUxi4E9W0M8HNmfd3wK8LXsBSdcB1wEcd9xxk1QMM7PR\nTauu4PT5jZw+f+Qvmt7+AdqPcsxhNAMhsK+nb5jhPw7Q3tVHIPcD5j8e43IFOxgbQrgDuAOSfvSF\nKoeZ2VhUZMqYUV/FjPqqQhfloC//3tiWm6wLj2wFFmbdXxDnmZnZFJusoH8CWCzpBEmVwBXA8kl6\nLjMzO4pJaboJIfRJ+ghwP0n3yq+HEFZPxnOZmdnRTVobfQjhB8APJmv7ZmY2Nr44uJlZiXPQm5mV\nOAe9mVmJc9CbmZW4VFx4RFIHsK7Q5UiZmcCuQhciRbw/Duf9caRjcZ8cH0JoGW2htAxTvG4sV0k5\nlkha5X1yiPfH4bw/juR9MjI33ZiZlTgHvZlZiUtL0N9R6AKkkPfJ4bw/Duf9cSTvkxGk4mCsmZlN\nnrTU6M3MbJKkNuglPSSpaI6gS1ok6YUpeq4fSGqKf380Fc9ZCiT9eaHLcDSSbpT08UKXI+0kXS3p\nS3H6DyVdlTV/3hjW/2tJz0l6RtKPxrJOsUtt0NuRlCgLIfx6CKENaAJyCvrBbUxOCVMv1UGfK0lp\n6R5dMCGEr4QQ/jHevRoYS2h/NoTw5hDCWcB9wF9OVvnSYsIf+FiTfVHSXZLWSvp/kmol/aWkJyS9\nIOkOxSvgxpr630laKeklSRfE+TWS7o7b+D5Qk/UcX5a0StJqSZ/Omn+zpDXx2/lzE30tObzmt8bn\nrJZUJ2k1UJ/1+Gnx9T0Tl1s83DqSTpd0m6QPxPW+L+nrcfoaSTfF/btO0j8CLwALJW2UNBO4GTgp\nPs9n43qfiPv9ucF9Ndw2pmg/jfTe+BVJD0t6UtL9kuZKKo/lfldc9zOSborTGyX9vaTn4349Oc5v\nkfQvcb0nJJ0X59dL+kZc/jlJvy3pZqAm7qu7puL1Z+2Hq2I5npX0rbhfHozzVkg64lqaks6S9Iu4\nzPclNcf5D0n6gqRVwMck/aakxyU9LenHkmbH5W6UtEzSo5I2Sfpg1j78oaSKuNywn9OpFj8T/xb3\n0QuSfkfSJfH985SkWyTdN8x6N0r6uKQPAUuAu+L/uGa49xlACKE9axN1MI5r+BWbEMKE/oBFJDvq\nvHj/68DHgelZy3wL+M04/RDwf+L0rwM/jtP/nWTceoA3A33Aknh/erzNxPXfDMwgOZt28IBy00Rf\nS46v+2+AzwG3ATfE/fBCfOxW4MNxuhKoGW6dOO8KkhoGwErgF3H6G8DFcbsDwLlZz72R5CzAg88Z\n57+PpOeBSL7E7wMuHG4bU7SPhntvfAL4OdAS5/1O1v/9NGAt8B7gaaAy6/X+RZy+CrgvTn8HOD9O\nHwesjdN/B3whqxzN8XbfVL7+rNf0EjBz8L0M/CuwNN6/Bvj/cfpG4ONx+jngnXH6rwZfT3z/3579\n2rI+A/+FQ5+tG4GfAhXAmUAn8Gvxse8Dl2V/toZ+Tguwn34b+GrW/UaS604vju/ne7L+71cDXxpm\nnz3EocyoGOl9Fu/fFLf/wuAypfyXr5/wm0MIP4vT3wbOB3411jSeBy4iecMP+l68fZIkDCAJpG8D\nhBCeI3mjD7pc0lMkH/7TgFOBvUA3cKekD5K8kafSXwHvJalF/P2Qxx4D/lzS/yQ5RbnrKOs8Clwg\n6VRgDbAj1jzeTvJGBdgUQvjFGMr0vvj3NPAU8EaSD0ou28i3oe+Ni4HTgQckPQP8L5JLTRKSi9N8\ni+QL6poQwoGs7Xw36/btcfo9wJfidpYD0yTVx/m3Da4YQtgzGS9sjC4C/jmEsCuW5XWS8n8nPv4t\nks/LQZIaSSouD8dZy0g+H4P+KWt6AXB//Jx9gsM/Z/8eQugFniepJP0wzn+eQ5+7o31Op9LzwHuV\n/Nq/ADgBeDWEsD4kyfztHLf3BkZ4nwGEEP4ihLAQuAv4SF5eQYrlq41v6E+fANxO8u26WdKNQHXW\n4z3xtn+0Mkg6geQXwltDCHskfROoDslVrM4B3g18iOSfddFEX0gOZpA011Rw+GsjhPAdSY8D7wd+\nIOkPQggPDrPO/hDCVklNwCXAIyQ1vstJap8dkmYA+8dYJgGfCSH8w2EzpUU5bCPfhr43OoDVIYS3\nD7cwcAbQBsw6ynYGp8tIfqV0Zy9YoNaHqZT9v7wV+HwIYXls9rox67EegBDCgKTeGJiQ/Lorl1TN\n0T+nUyaE8JKkt5D8yv8bYMUENymO/j4bdBfJBZI+NcHnS7V81eiPkzS4Q/8zyU9GgF2xhvWhMWzj\nkbgukk4naZ4BmEbyxt4b2x9/LS5TDzSG5EpWf0ry83Qq/QPwv0neKH+X/YCkE4FXQgi3APdy6LWM\ntM4vgD8h2QePknyxPTqGMnQADVn37weuifsGSfMlDQ3MqTb0vfELoGVwnqQKSafF6Q+SfNFdCNwa\nvwAH/U7W7WNx+kfARwcXkHRWnHwAuD5rfnOc7B1sm55CDwL/KX5hI2k6yS+1K+LjH2bI/zqEsBfY\nE2u2AFcCDzO8RmBrnF6aY9kGQz2Xz+mkUNLzpTOE8G3gs8A7gEWSToqL/O4YNpP9eVjHyO+zxVnr\nXAq8mIeXkGr5qtGvA65XciBxDfBlkrbDF4DtJBcLH82XgW9IWkvSTvskQAjhWUlPk/wzNgODzQAN\nwL2xViKSNv4poaQ7V2+suWdIPrjZvyYuB66U1Evy+v92uHUkXRRr+o8C7wshbJC0iSTsRg36EMJu\nST9T0q3z30MIn5D0JuCxWKvdB/weyS+nQhn63riV5AvplthEUQ58QdIOkoPL7461yy8BX+RQeDVL\neo6kljr4of9j4LY4v5zki/IPSWqEt8X90g98mqS58A7gOUlPhRA+PNkvHJLmKCUHlR+W1E/SrPZR\nkvf6J4BW4PeHWXUp8BVJtcArIywDSQ3+nyXtIflSOSGHsrVJ+iq5fU4nyxnAZyUNAL3AfyM5DvVv\nkjpJPg8NR1kf4Jsk+6yLpHnsQwx5nwGrgZslvYHkl80mkvdMSZvwmbGxWeC+EMLp+SiQlY58vTck\nbSRpXjjWhqC1KDZLfTyE8BuFLksxOlb7U5uZHTM81o2ZWYlzjd7MrMQ56M3MSpyD3sysxDnozYaQ\n9K7hxlUxK1YOejOzEuegt5Ki3EdTPVnJqI/PKhkl8aQh23urkpEhT5J0jqTH4v2fx5NuiNu/R8lI\nqt+PY8csiY+9L67zlKR/Hjxr2WwqOeitFL2BZITHNwHtJGP2fymE8NZ48lYNMHjizV3AbSGEM0lO\nu982uBFJ7wC+AlwaQniZ5OzsC0IIZ5OMYf63cdE/AvaEEE4lGeLiV+L6M0kG03pPCOEtwCqm8Axu\ns0HH/IWIP/vZAAABaUlEQVQLrCQNHTHzj4FXJf0ZUEsyxMRqSQ8B80MI3wcYHBwtVvbfRDJkwvtC\nCK/FbTUCy+JYKYFkcDpIRp/8YtzGC3FIBoBzSUZa/VncZiWHxukxmzIOeitFuY6mOpxtcZmzgcGg\n/2vgJyGE34rDOzw0yjYEPBBCGMuAXGaTxk03VorGNJpqCKED2CLpMgBJVXEQMUiGSn4/8Jk4zgoc\nPlLk1VnP9zOSgexQcl2BM+L8XwDn6dAVseoknZKvF2k2Vg56K0WDI2auJRlF9cvA4CiN93P4KI1X\nAn8cm1t+DswZfCCEsIOkLf82SW8juVjMZ+Joqtm/hm8nGRJ3DcnImauBvSGEVpIvhO/G7T9GcjEY\nsynlsW6spBRiNNU47HRFCKE79tr5MfCGIVfIMisYt9GbTVwt8JN4URMBf+SQtzRxjd7MrMS5jd7M\nrMQ56M3MSpyD3sysxDnozcxKnIPezKzEOejNzErcfwBMMvAaOv+OJwAAAABJRU5ErkJggg==\n", 250 | "text/plain": [ 251 | "" 252 | ] 253 | }, 254 | "metadata": {}, 255 | "output_type": "display_data" 256 | } 257 | ], 258 | "source": [ 259 | "pack_series.sort_values(ascending=False)[:50].plot();" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": 9, 265 | "metadata": {}, 266 | "outputs": [ 267 | { 268 | "data": { 269 | "text/plain": [ 270 | "package\n", 271 | "pandas 406\n", 272 | "IPython 263\n", 273 | "prompt_toolkit 146\n", 274 | "matplotlib 92\n", 275 | "_pytest 91\n", 276 | "numpy 87\n", 277 | "ipykernel 71\n", 278 | "zmq 66\n", 279 | "jupyter_client 52\n", 280 | "py 51\n", 281 | "xlsxwriter 50\n", 282 | "email 49\n", 283 | "pygments 49\n", 284 | "ipywidgets 42\n", 285 | "pkg_resources 42\n", 286 | "traitlets 42\n", 287 | "dateutil 35\n", 288 | "bottleneck 27\n", 289 | "numexpr 25\n", 290 | "unittest 24\n", 291 | "dtype: int64" 292 | ] 293 | }, 294 | "execution_count": 9, 295 | "metadata": {}, 296 | "output_type": "execute_result" 297 | } 298 | ], 299 | "source": [ 300 | "pack_series.sort_values(ascending=False)[:20]" 301 | ] 302 | }, 303 | { 304 | "cell_type": "markdown", 305 | "metadata": {}, 306 | "source": [ 307 | "### 3.2 Aanaconda 2 中模块信息" 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": null, 313 | "metadata": { 314 | "scrolled": false 315 | }, 316 | "outputs": [ 317 | { 318 | "name": "stdout", 319 | "output_type": "stream", 320 | "text": [ 321 | "\n", 322 | "Please wait a moment while I gather a list of all available modules...\n", 323 | "\n" 324 | ] 325 | }, 326 | { 327 | "name": "stderr", 328 | "output_type": "stream", 329 | "text": [ 330 | "/root/anaconda2/lib/python2.7/site-packages/IPython/kernel/__init__.py:13: ShimWarning: The `IPython.kernel` package has been deprecated since IPython 4.0.You should import from ipykernel or jupyter_client instead.\n", 331 | " \"You should import from ipykernel or jupyter_client instead.\", ShimWarning)\n", 332 | "/root/anaconda2/lib/python2.7/site-packages/odo/backends/pandas.py:94: FutureWarning: pandas.tslib is deprecated and will be removed in a future version.\n", 333 | "You can access NaTType as type(pandas.NaT)\n", 334 | " @convert.register((pd.Timestamp, pd.Timedelta), (pd.tslib.NaTType, type(None)))\n", 335 | "/root/anaconda2/lib/python2.7/site-packages/blaze/server/server.py:17: ExtDeprecationWarning: Importing flask.ext.cors is deprecated, use flask_cors instead.\n", 336 | " from flask.ext.cors import cross_origin\n", 337 | "/root/anaconda2/lib/python2.7/site-packages/bokeh/util/deprecation.py:34: BokehDeprecationWarning: MPL compatibility can no longer be successfully maintained, and is now deprecated. All MPL compat functions will be removed completely on the release of Bokeh 1.0. See http://bokeh.pydata.org/en/latest/docs/releases/0.12.5.html for more information\n", 338 | " warn(message)\n", 339 | "/root/anaconda2/lib/python2.7/site-packages/matplotlib/cbook.py:136: MatplotlibDeprecationWarning: The matplotlib.delaunay module was deprecated in version 1.4. Use matplotlib.tri.Triangulation instead.\n", 340 | " warnings.warn(message, mplDeprecation, stacklevel=1)\n", 341 | "/root/anaconda2/lib/python2.7/site-packages/nltk/twitter/__init__.py:20: UserWarning: The twython library has not been installed. Some functionality from the twitter package will not be available.\n", 342 | " warnings.warn(\"The twython library has not been installed. \"\n", 343 | "/root/anaconda2/lib/python2.7/site-packages/skimage/viewer/utils/core.py:10: UserWarning: Recommended matplotlib backend is `Agg` for full skimage.viewer functionality.\n", 344 | " warn(\"Recommended matplotlib backend is `Agg` for full \"\n", 345 | "/root/anaconda2/lib/python2.7/site-packages/qtawesome/iconic_font.py:268: UserWarning: You need to have a running QApplication to use QtAwesome!\n", 346 | " warnings.warn(\"You need to have a running \"\n", 347 | "/root/anaconda2/lib/python2.7/site-packages/statsmodels/compat/pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.\n", 348 | " from pandas.core import datetools\n" 349 | ] 350 | }, 351 | { 352 | "name": "stdout", 353 | "output_type": "stream", 354 | "text": [ 355 | "BaseHTTPServer bs4 jupyter_core scripts\n", 356 | "Bastion bsddb keyword seaborn\n", 357 | "CDROM bz2 lazy_object_proxy select\n", 358 | "CGIHTTPServer cPickle lib2to3 sets\n", 359 | "Canvas cProfile linecache setuptools\n", 360 | "ConfigParser cStringIO linuxaudiodev sgmllib\n", 361 | "Cookie cairo llvmlite sha\n", 362 | "Crypto calendar locale shelve\n", 363 | "Cython cdecimal locket shlex\n", 364 | "DLFCN cffi logging shutil\n", 365 | "Dialog cgi lxml signal\n", 366 | "DocXMLRPCServer cgitb macpath simplegeneric\n", 367 | "FileDialog chardet macurl2path singledispatch\n", 368 | "FixTk chunk mailbox singledispatch_helpers\n", 369 | "HTMLParser click mailcap sip\n", 370 | "IN cloudpickle markupbase sipconfig\n", 371 | "IPython clyent markupsafe sipdistutils\n", 372 | "MimeWriter cmath marshal site\n", 373 | "OleFileIO_PL cmd math six\n", 374 | "OpenSSL code matplotlib skimage\n", 375 | "PIL codecs md5 sklearn\n", 376 | "PyQt5 codeop mhlib smtpd\n", 377 | "Queue collections mimetools smtplib\n", 378 | "ScrolledText colorama mimetypes sndhdr\n", 379 | "SimpleDialog colorsys mimify snowballstemmer\n", 380 | "SimpleHTTPServer command mistune socket\n", 381 | "SimpleXMLRPCServer commands mkl sortedcollections\n", 382 | "SocketServer compileall mmap sortedcontainers\n", 383 | "StringIO compiler modulefinder sphinx\n", 384 | "TYPES concurrent mpl_toolkits spwd\n", 385 | "Tix conda mpmath spyder\n", 386 | "Tkconstants conda_env msgpack spyder_breakpoints\n", 387 | "Tkdnd configparser multifile spyder_io_dcm\n", 388 | "Tkinter contextlib multipledispatch spyder_io_hdf5\n", 389 | "UserDict contextlib2 multiprocessing spyder_profiler\n", 390 | "UserList cookielib mutex spyder_pylint\n", 391 | "UserString copy navigator_updater sqlalchemy\n", 392 | "_LWPCookieJar copy_reg nbconvert sqlite3\n", 393 | "_MozillaCookieJar crypt nbformat sre\n", 394 | "__builtin__ cryptography netrc sre_compile\n", 395 | "__future__ csv networkx sre_constants\n", 396 | "_abcoll ctypes new sre_parse\n", 397 | "_ast curl nis ssl\n", 398 | "_bisect curses nltk stat\n", 399 | "_cffi_backend cycler nntplib statsmodels\n", 400 | "_codecs cython nose statvfs\n", 401 | "_codecs_cn cythonmagic notebook storemagic\n", 402 | "_codecs_hk cytoolz ntpath string\n", 403 | "_codecs_iso2022 dask nturl2path stringold\n", 404 | "_codecs_jp datashape numba stringprep\n", 405 | "_codecs_kr datetime numbers strop\n", 406 | "_codecs_tw dateutil numexpr struct\n", 407 | "_collections dbhash numpy subprocess\n", 408 | "_csv decimal numpydoc subprocess32\n", 409 | "_ctypes decorator odo sunau\n", 410 | "_ctypes_test difflib olefile sunaudio\n", 411 | "_curses dircache opcode symbol\n", 412 | "_curses_panel dis openpyxl sympy\n", 413 | "_elementtree distributed operator sympyprinting\n", 414 | "_functools distutils optparse symtable\n", 415 | "_hashlib doctest os sys\n", 416 | "_heapq docutils os2emxpath sysconfig\n", 417 | "_hotshot dumbdbm ossaudiodev syslog\n", 418 | "_io dummy_thread packaging tables\n", 419 | "_json dummy_threading pandas tabnanny\n", 420 | "_license easy_install pandocfilters tarfile\n", 421 | "_locale email parser tblib\n", 422 | "_lsprof encodings partd telnetlib\n", 423 | "_multibytecodec entrypoints path tempfile\n", 424 | "_multiprocessing enum pathlib2 terminado\n", 425 | "_osx_support errno patsy termios\n", 426 | "_posixsubprocess et_xmlfile pdb test_path\n", 427 | "_pyio exceptions pep8 test_pycosat\n", 428 | "_pytest extern pexpect testpath\n", 429 | "_random fastcache pickle tests\n", 430 | "_scandir fcntl pickleshare textwrap\n", 431 | "_socket filecmp pickletools this\n", 432 | "_sqlite3 fileinput pip thread\n", 433 | "_sre flask pipes threading\n", 434 | "_ssl flask_cors pkg_resources time\n", 435 | "_strptime fnmatch pkgutil timeit\n", 436 | "_struct formatter platform tkColorChooser\n", 437 | "_symtable fpformat plistlib tkCommonDialog\n", 438 | "_sysconfigdata fractions ply tkFileDialog\n", 439 | "_testcapi ftplib popen2 tkFont\n", 440 | "_threading_local funcsigs poplib tkMessageBox\n", 441 | "_tkinter functools posix tkSimpleDialog\n", 442 | "_vendor functools32 posixfile tlz\n", 443 | "_warnings future_builtins posixpath toaiff\n", 444 | "_weakref gc pprint token\n", 445 | "_weakrefset genericpath profile tokenize\n", 446 | "_yaml getopt prompt_toolkit toolz\n", 447 | "abc getpass pstats tornado\n", 448 | "aifc gettext psutil trace\n", 449 | "alabaster gevent pty traceback\n", 450 | "anaconda_navigator glob ptyprocess traitlets\n", 451 | "anaconda_project greenlet pwd ttk\n", 452 | "antigravity grin py tty\n", 453 | "anydbm grp py_compile turtle\n", 454 | "argparse gzip pyclbr types\n", 455 | "array h5py pycosat unicodecsv\n", 456 | "asn1crypto hashlib pycparser unicodedata\n", 457 | "ast heapdict pycurl unittest\n", 458 | "astroid heapq pydoc urllib\n", 459 | "astropy hmac pydoc_data urllib2\n", 460 | "asynchat hotshot pyexpat urlparse\n", 461 | "asyncore html5lib pyflakes user\n", 462 | "atexit htmlentitydefs pygments uu\n", 463 | "audiodev htmllib pylab uuid\n", 464 | "audioop httplib pylint warnings\n", 465 | "autoreload idlelib pyodbc wave\n", 466 | "babel idna pyparsing wcwidth\n", 467 | "backports ihooks pytest weakref\n", 468 | "backports_abc imagesize pytz webbrowser\n", 469 | "base64 imaplib pywt werkzeug\n", 470 | "bdb imghdr pyximport wheel\n", 471 | "binascii imp qtawesome whichdb\n", 472 | "binhex importlib qtconsole widgetsnbextension\n", 473 | "binstar_client imputil qtpy wrapt\n", 474 | "bisect inspect quopri wsgiref\n", 475 | "bitarray io random xdrlib\n", 476 | "blaze ipaddress re xlrd\n", 477 | "bleach ipykernel readline xlsxwriter\n", 478 | "bokeh ipykernel_launcher repr xlwt\n", 479 | "boto ipython_genutils requests xml\n", 480 | "bottleneck ipywidgets resource xmllib\n", 481 | "brain_builtin_inference isort rexec xmlrpclib\n", 482 | "brain_dateutil itertools rfc822 xxsubtype\n", 483 | "brain_gi itsdangerous rlcompleter yaml\n", 484 | "brain_mechanize jdcal rmagic zict\n", 485 | "brain_nose jedi robotparser zipfile\n", 486 | "brain_numpy jinja2 rope zipimport\n", 487 | "brain_pytest json ruamel_yaml zlib\n", 488 | "brain_qt jsonschema runpy zmq\n", 489 | "brain_six jupyter scandir \n", 490 | "brain_ssl jupyter_client sched \n", 491 | "brain_stdlib jupyter_console scipy \n", 492 | "\n", 493 | "Enter any module name to get more help. Or, type \"modules spam\" to search\n", 494 | "for modules whose descriptions contain the word \"spam\".\n", 495 | "\n" 496 | ] 497 | } 498 | ], 499 | "source": [ 500 | "help(\"modules\")" 501 | ] 502 | } 503 | ], 504 | "metadata": { 505 | "kernelspec": { 506 | "display_name": "Python 2", 507 | "language": "python", 508 | "name": "python2.7" 509 | }, 510 | "language_info": { 511 | "codemirror_mode": { 512 | "name": "ipython", 513 | "version": 2 514 | }, 515 | "file_extension": ".py", 516 | "mimetype": "text/x-python", 517 | "name": "python", 518 | "nbconvert_exporter": "python", 519 | "pygments_lexer": "ipython2", 520 | "version": "2.7.13" 521 | } 522 | }, 523 | "nbformat": 4, 524 | "nbformat_minor": 2 525 | } 526 | -------------------------------------------------------------------------------- /Anaconda3_infos_analysis.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### python在数据科学方面需要用到的库:\n", 8 | "\n", 9 | "- numpy:科学计算库。提供矩阵运算的库。\n", 10 | "\n", 11 | "- pandas:数据分析处理库\n", 12 | "\n", 13 | "- scipy:数值计算库。提供数值积分和常微分方程组求解算法。提供了一个非常广泛的特定函数集合。\n", 14 | "\n", 15 | "- Matplotlib:数据可视化库\n" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "### 1 当前路径信息(内核Python 3.6)" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 1, 28 | "metadata": {}, 29 | "outputs": [ 30 | { 31 | "data": { 32 | "text/plain": [ 33 | "['',\n", 34 | " '/root/anaconda3/lib/python36.zip',\n", 35 | " '/root/anaconda3/lib/python3.6',\n", 36 | " '/root/anaconda3/lib/python3.6/lib-dynload',\n", 37 | " '/root/anaconda3/lib/python3.6/site-packages',\n", 38 | " '/root/anaconda3/lib/python3.6/site-packages/Sphinx-1.5.6-py3.6.egg',\n", 39 | " '/root/anaconda3/lib/python3.6/site-packages/setuptools-27.2.0-py3.6.egg',\n", 40 | " '/root/anaconda3/lib/python3.6/site-packages/IPython/extensions',\n", 41 | " '/root/.ipython']" 42 | ] 43 | }, 44 | "execution_count": 1, 45 | "metadata": {}, 46 | "output_type": "execute_result" 47 | } 48 | ], 49 | "source": [ 50 | "import sys\n", 51 | "\n", 52 | "sys.path" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "### 2 科学计算常用包版本及路径信息" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 3, 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "data": { 69 | "text/html": [ 70 | "
\n", 71 | "\n", 84 | "\n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | "
pack_nameversionpath
0numpy1.12.1/root/anaconda3/lib/python3.6/site-packages/numpy/__init__.py
1matplotlib2.0.2/root/anaconda3/lib/python3.6/site-packages/matplotlib/__init__.py
2pandas0.20.1/root/anaconda3/lib/python3.6/site-packages/pandas/__init__.py
3scipy0.19.0/root/anaconda3/lib/python3.6/site-packages/scipy/__init__.py
\n", 120 | "
" 121 | ], 122 | "text/plain": [ 123 | " pack_name version \\\n", 124 | "0 numpy 1.12.1 \n", 125 | "1 matplotlib 2.0.2 \n", 126 | "2 pandas 0.20.1 \n", 127 | "3 scipy 0.19.0 \n", 128 | "\n", 129 | " path \n", 130 | "0 /root/anaconda3/lib/python3.6/site-packages/numpy/__init__.py \n", 131 | "1 /root/anaconda3/lib/python3.6/site-packages/matplotlib/__init__.py \n", 132 | "2 /root/anaconda3/lib/python3.6/site-packages/pandas/__init__.py \n", 133 | "3 /root/anaconda3/lib/python3.6/site-packages/scipy/__init__.py " 134 | ] 135 | }, 136 | "execution_count": 3, 137 | "metadata": {}, 138 | "output_type": "execute_result" 139 | } 140 | ], 141 | "source": [ 142 | "import numpy\n", 143 | "import matplotlib\n", 144 | "import pandas as pd\n", 145 | "import scipy\n", 146 | "\n", 147 | "packs = [\n", 148 | " (\"numpy\", numpy.__version__, numpy.__file__),\n", 149 | " (\"matplotlib\", matplotlib.__version__, matplotlib.__file__),\n", 150 | " (\"pandas\", pd.__version__, pd.__file__),\n", 151 | " (\"scipy\", scipy.__version__, scipy.__file__)\n", 152 | "]\n", 153 | "\n", 154 | "pd_packages = pd.DataFrame(packs, columns=[\"pack_name\", \"version\", \"path\"])\n", 155 | "pd.set_option(\"max_colwidth\", 120)\n", 156 | "pd_packages" 157 | ] 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "metadata": {}, 162 | "source": [ 163 | "### 3 Anaconda3 集成包简单分析" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 4, 169 | "metadata": { 170 | "collapsed": true 171 | }, 172 | "outputs": [], 173 | "source": [ 174 | "import sys\n", 175 | "%matplotlib inline\n", 176 | "\n", 177 | "packages = [pack.split(\".\")[0] for pack in sys.modules.keys()]\n", 178 | "pd_packages = pd.DataFrame(packages, columns=[\"package\"])\n", 179 | "pack_series = pd_packages.groupby(by=\"package\").size()" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 5, 185 | "metadata": {}, 186 | "outputs": [ 187 | { 188 | "data": { 189 | "text/plain": [ 190 | "count 229.000000\n", 191 | "mean 4.978166\n", 192 | "std 16.714697\n", 193 | "min 1.000000\n", 194 | "25% 1.000000\n", 195 | "50% 1.000000\n", 196 | "75% 1.000000\n", 197 | "max 169.000000\n", 198 | "dtype: float64" 199 | ] 200 | }, 201 | "execution_count": 5, 202 | "metadata": {}, 203 | "output_type": "execute_result" 204 | } 205 | ], 206 | "source": [ 207 | "pack_series.describe()" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": 6, 213 | "metadata": {}, 214 | "outputs": [ 215 | { 216 | "data": { 217 | "text/plain": [ 218 | "229" 219 | ] 220 | }, 221 | "execution_count": 6, 222 | "metadata": {}, 223 | "output_type": "execute_result" 224 | } 225 | ], 226 | "source": [ 227 | "pack_series.index.duplicated()\n", 228 | "pack_temp = pack_series.index.drop_duplicates()\n", 229 | "len(pack_temp)" 230 | ] 231 | }, 232 | { 233 | "cell_type": "markdown", 234 | "metadata": {}, 235 | "source": [ 236 | "### 3.1 根据 Anaconda 3包中的模块数量倒序 绘图" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": 10, 242 | "metadata": {}, 243 | "outputs": [ 244 | { 245 | "data": { 246 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAEKCAYAAAAcgp5RAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xl8XNV99/HPV5slS/IuS7bB2AYvEIIdMCalQKAJNElD\ngDShEJKGJg1JkzZb8/RputK0adOmCc3zEEiAUHhSAll5QillLUsDhGAIttlkbDBgW5JlbGzJi6zl\n1z/ulT02siVrZjSj0ff9eumlO2fuvXPmSvPV0bnnnquIwMzMSldZoStgZmb55aA3MytxDnozsxLn\noDczK3EOejOzEuegNzMrcQ56M7MS56A3Mytxgwa9pOslbZL0dEbZDyQ9lX6tk/RUWj5H0q6M576d\nz8qbmdngKoawzg3AlcD/6y+IiN/pX5b0dWBbxvprI2LJ4VRi2rRpMWfOnMPZxMxszHviiSc2R0TD\nYOsNGvQR8ZCkOQM9J0nAhcBvHG4FM82ZM4fly5dnswszszFH0stDWS/bPvrTgbaIeCGjbG7abfOg\npNOz3L+ZmWVpKF03h3IxcHPG4xZgdkS8Jukk4P9LelNEbD9wQ0mXAZcBzJ49O8tqmJnZwQy7RS+p\nAngf8IP+sojoiojX0uUngLXAgoG2j4hrImJpRCxtaBi0i8nMzIYpm66bdwDPR8T6/gJJDZLK0+V5\nwHzgxeyqaGZm2RjK8MqbgUeBhZLWS/pY+tRF7N9tA3AGsDIdbvlj4JMRsSWXFTYzs8MzlFE3Fx+k\n/NIByn4C/CT7apmZWa74ylgzsxJXFEHfum03vX2+paGZWT4URdC3d3bR3tFV6GqYmZWkogh6gI3b\ndhW6CmZmJalogr7l9d2FroKZWUkqnqB3i97MLC+KIujLJDa6RW9mlhdFEfSV5XKL3swsT4ok6MvY\nuM0tejOzfCiaoG953S16M7N8KJKgF+2dXezp6St0VczMSk5RBH1VeRkR0Lbd3TdmZrlWFEFfWZ5U\no8X99GZmOVccQV/RH/Tupzczy7XiCPpyAXgsvZlZHhRF0JdJTKiucIvezCwPiiLoAWZOqnGL3sws\nD4om6GdMrHaL3swsD4on6CfVeNSNmVkeFE3Qz5xYzZYde9jd3VvoqpiZlZSiCfoZE2sAj6U3M8u1\nQYNe0vWSNkl6OqPsckkbJD2Vfr0747kvSVojqVnSbw61IjMmVQN4zhszsxwbSov+BuCdA5RfERFL\n0q87ACQdB1wEvCnd5ipJ5UOpyMy0Re9ZLM3McmvQoI+Ih4AtQ9zfecAtEdEVES8Ba4BlQ9mwaaJb\n9GZm+ZBNH/0fSVqZdu1MTstmAa9mrLM+LRtUdWU5U2ur3KI3M8ux4Qb91cA8YAnQAnz9cHcg6TJJ\nyyUtb29vB5J+eo+lNzPLrWEFfUS0RURvRPQB17Kve2YDcGTGqkekZQPt45qIWBoRSxsaGoBk5E2L\nr441M8upYQW9pBkZDy8A+kfk3AZcJGmcpLnAfOCXQ93vzInVbHSL3swspyoGW0HSzcCZwDRJ64G/\nBs6UtAQIYB3wCYCIeEbSD4FngR7g0xEx5CugZkyqoWN3D51dPdSNG7RqZmY2BIOmaURcPEDxdw+x\n/leArwynMjMyRt7Mb6wfzi7MzOwARXNlLCQzWILH0puZ5VJRBf0Mj6U3M8u5ogr6xgnVSG7Rm5nl\nUlEFfWV5GdPrx7lFb2aWQ0UV9JCOpXeL3swsZ4ow6D2W3swsl4ow6JOrYyOi0FUxMysJRRf0MydV\ns6u7l227ugtdFTOzklB0Qd9/p6mNnvPGzCwnii/o0ztNtW53P72ZWS4UXdDPdIvezCynii7oG+rH\nUVEmz0tvZpYjRRf05WWicUK156U3M8uRogt68Fh6M7NcKs6gn+SrY83McqUog37mxGpatvmiKTOz\nXCjKoJ8xsZo9PX28tmNPoatiZjbqFWfQpzcg8QlZM7PsFWXQ7x1L7xOyZmZZK8qg77861vPSm5ll\nryiDfmptFVUVZR55Y2aWA4MGvaTrJW2S9HRG2dckPS9ppaRbJU1Ky+dI2iXpqfTr28OplKR0LL2D\n3swsW0Np0d8AvPOAsnuA4yPiBGA18KWM59ZGxJL065PDrdiMidXuujEzy4FBgz4iHgK2HFB2d0T0\npA9/ARyR64rN9C0FzcxyIhd99B8F/jPj8dy02+ZBSacfbCNJl0laLml5e3v7G56fMama1u276e3z\nRVNmZtnIKugl/TnQA9yUFrUAsyNiCfAF4PuSJgy0bURcExFLI2JpQ0PDG56fMbGG3r6gvaMrmyqa\nmY15ww56SZcC7wEuiXSugojoiojX0uUngLXAguHsf2Y6xNJj6c3MsjOsoJf0TuBPgPdGxM6M8gZJ\n5enyPGA+8OJwXqP/loK+OtbMLDsVg60g6WbgTGCapPXAX5OMshkH3CMJ4BfpCJszgC9L6gb6gE9G\nxJYBdzyI/qtjfQMSM7PsDBr0EXHxAMXfPci6PwF+km2lACbUVDC+qty3FDQzy1JRXhkL+y6acove\nzCw7RRv0ADMn1fjqWDOzLBV30E+sYcNWt+jNzLJR1EF/9PRaNnd2sdU3IDEzG7aiDvqFTcm1Vs1t\nHQWuiZnZ6FXUQb+oqR6A5lYHvZnZcBV10E+vH8fEmkqed9CbmQ1bUQe9JBY21dPcur3QVTEzG7WK\nOugh6b5Z3dZJOp2OmZkdpqIP+oVN9XR29bDBNyExMxuWog96n5A1M8tO0Qf9/MYk6H1C1sxseIo+\n6CdUVzJrUo1b9GZmw1T0QQ9JP/1qXzRlZjYsoybo17Z30t3bV+iqmJmNOqMj6Bvr6e4NXmzfUeiq\nmJmNOqMj6Jv6T8j6wikzs8M1KoL+6IY6KsrkE7JmZsMwKoK+qqKMeQ21PiFrZjYMoyLoIZmy2GPp\nzcwO36BBL+l6SZskPZ1RNkXSPZJeSL9PznjuS5LWSGqW9Ju5qujCxjrWb91FZ1dPrnZpZjYmDKVF\nfwPwzgPK/hS4LyLmA/elj5F0HHAR8KZ0m6skleeiontvQuJWvZnZYRk06CPiIWDLAcXnATemyzcC\n52eU3xIRXRHxErAGWJaLivbPeeN+ejOzwzPcPvrGiGhJl1uBxnR5FvBqxnrr07KszZpUQ21VuVv0\nZmaHKeuTsZFMFH/Yk8VLukzScknL29vbB12/rEzMb6z3WHozs8M03KBvkzQDIP2+KS3fAByZsd4R\nadkbRMQ1EbE0IpY2NDQM6UUXNdXT3Nrhm5CYmR2G4Qb9bcBH0uWPAD/LKL9I0jhJc4H5wC+zq+I+\nC5vq2bqzm/aOrlzt0sys5A1leOXNwKPAQknrJX0M+CpwtqQXgHekj4mIZ4AfAs8CdwKfjojeXFW2\nfyqEZp+QNTMbsorBVoiIiw/y1NsPsv5XgK9kU6mDWdi4725Tp88fWnePmdlYN2qujAWYWjeOaXXj\nfIWsmdlhGFVBD/tOyJqZ2dCMuqDvv9tUb59H3piZDcWoDPqunj5e2bKz0FUxMxsVRl/Q7z0h6wun\nzMyGYtQF/YLGeiR8QtbMbIhGXdDXVJVz1JTxPiFrZjZEoy7oIemn90VTZmZDM0qDfgLrNu9gd3fO\nLro1MytZozPoG+vpC1izqbPQVTEzK3qjM+jTOW98QtbMbHCjMujnTB1PVUWZh1iamQ3BqAz6ivIy\nFjXVc+uvNnDLL1+hu7ev0FUyMytaozLoAf7+gjdzxOTx/OlPV3H2Nx7kZ09toM/TIpiZvcGoDfrj\nZ03k1k+dynW/u5TqynI+e8tTvOub/81dz7T6DlRmZhlGbdADSOIdxzVyx2dO5/9e/Ba6e/v4xPee\n4PxvPex7y5qZpUZ10PcrKxPnLp7J3Z8/g3/67RNYv3UX//vHK92yNzOjRIK+X0V5GReefCR/fM5C\nVqzfxs/XbC50lczMCq6kgr7fb580i6YJ1Vz5X2sKXRUzs4IryaAfV1HOx8+Yx2MvbeHxdVsKXR0z\ns4IqyaAHuHjZkUyprXKr3szGvGEHvaSFkp7K+Nou6XOSLpe0IaP83bms8FCNr6rgY6fN5cHV7axa\nv60QVTAzKwrDDvqIaI6IJRGxBDgJ2Ancmj59Rf9zEXFHLio6HB/+taOor67gW/e7VW9mY1euum7e\nDqyNiJdztL+cmFBdyaWnzuHOZ1p5wfPXm9kYlaugvwi4OePxH0laKel6SZNz9BrD8nu/PpeaynKu\nemBtIathZlYwWQe9pCrgvcCP0qKrgXnAEqAF+PpBtrtM0nJJy9vb27OtxkFNqa3iklNmc9uKjbzy\n2s68vY6ZWbHKRYv+XcCTEdEGEBFtEdEbEX3AtcCygTaKiGsiYmlELG1oaMhBNQ7u42fMo1zi6gfd\nqjezsScXQX8xGd02kmZkPHcB8HQOXiMrjROq+cDSI/jxE6/Ssm1XoatjZjaisgp6SbXA2cBPM4r/\nSdIqSSuBs4DPZ/MaufLJtx1NX8A1D71Y6KqYmY2oimw2jogdwNQDyj6cVY3y5Mgp4zl/ySxu/uUr\nfPqsY5hWN67QVTIzGxEle2XsQD75tnns7u7jP1a2FLoqZmYjZkwF/THT65g0vtI3FTezMWVMBb0k\nFkyv98VTZjamjKmgB1jQVEdzW4dvSmJmY8bYC/rGejp299C2vavQVTEzGxFjMugBmt19Y2ZjxJgN\nevfTm9lYMeaCfkptFdPqxtHskTdmNkaMuaAHWNBYx+pNnYWuhpnZiBijQZ8Msezr88gbMyt9Yzbo\nd+7pZcPrnuDMzErfmAz6hU11AKz2CVkzGwPGZNAfMz0ZebO6zf30Zlb6xmTQT6ypZMbEarfozWxM\nGJNBDzC/sd5Bb2ZjwpgN+oWNdazZ1EmvR96YWYkbs0E/v7Gerp4+XtniG4abWWkbs0HfPxWCu2/M\nrNSN2aCfPz0dYumpEMysxI3ZoK8dV8ERk2s8FYKZlbysbg4uaR3QAfQCPRGxVNIU4AfAHGAdcGFE\nbM2umvmxsLHeLXozK3m5aNGfFRFLImJp+vhPgfsiYj5wX/q4KM1vrOfFzZ109/YVuipmZnmTj66b\n84Ab0+UbgfPz8Bo5sbCpju7eYN3mHYWuiplZ3mQb9AHcK+kJSZelZY0R0ZIutwKNWb5G3sz3VAhm\nNgZk1UcPnBYRGyRNB+6R9HzmkxERkga8Iin9w3AZwOzZs7OsxvAcM72OMiW3FfwtZhSkDmZm+ZZV\niz4iNqTfNwG3AsuANkkzANLvmw6y7TURsTQiljY0NGRTjWGrriznqKm1vq2gmZW0YQe9pFpJ9f3L\nwDnA08BtwEfS1T4C/CzbSubTgsY63yjczEpaNl03jcCtkvr38/2IuFPS48APJX0MeBm4MPtq5s+C\nxnrufW4Tu7t7qa4sL3R1zMxybthBHxEvAosHKH8NeHs2lRpJCxrr6e0LXmzfwXEzJxS6OmZmOTdm\nr4zt1z/nzQub3H1jZqVpzAf93Gm1VJSJZl8ha2YlaswHfVVFGXOn1XosvZmVrDEf9AALmny3KTMr\nXQ56YMH0el7dupNde3oLXRUzs5xz0JOMpY+ANZ6y2MxKkIOepOsG8IVTZlaSHPTAUVPGU1Ve5qkQ\nzKwkOeiBivIyjp7uqRDMrDQ56FMLGut4wUMszawEOehTCxrr2fD6Ljp2dxe6KmZmOeWgTx07Izkh\n+/W7V9PV42GWZlY6HPSpty2YziWnzOaGR9Zx3pUP8+zG7YWukplZTjjoU+Vl4isXvJl/vfRkXtux\nh/O+9XOufmAtvX0D3iDLzGzUcNAf4KxF07nrc2fwjmMb+cc7n+eiax7l1S07C10tM7Nhc9APYEpt\nFVddciLfuHAxz7d08M5/eYgbHn6Jte2d9PT2Fbp6ZmaHJdubg5csSbzvxCNYNncKX/zRCi7/92eB\nZLbL+dPrWNhYz4KmehY21fOWIycxaXxVgWtsZjYwRRS+D3rp0qWxfPnyQlfjoPr6gmdbtvN8awer\n2zpoTr+3bNsNwKTxldz88bdy7AzfocrMRo6kJyJi6WDruUU/BGVl4vhZEzl+1sT9yrft7OaZjdv4\n4x+t4EPXPcYPPvFWjpleX6BampkNzH30WZg4vpJTj5nGTb9/CmVl4oPXPsa6zTsKXS0zs/046HNg\nXkMdN/3+KXT39nHJdY+x4fVdha6Smdleww56SUdKul/Ss5KekfTZtPxySRskPZV+vTt31S1eCxrr\n+d7HTmH77m4+eO0vaNu+u9BVMjMDsmvR9wB/HBHHAW8FPi3puPS5KyJiSfp1R9a1HCWOnzWRGz+6\njM0dXVxy3WNs7uwqdJXMzIYf9BHREhFPpssdwHPArFxVbLQ6cfZkvnvpyazfupMPXfcYLdt2sX13\n94BfxTDiycxKX06GV0qaAzwEHA98Afg9YBuwnKTVv/VQ2xf78MrheGh1O79/43L2HOICq9lTxnPu\n4hm8d/EsFjZ5tI6ZHZ6hDq/MOugl1QEPAl+JiJ9KagQ2AwH8LTAjIj46wHaXAZcBzJ49+6SXX345\nq3oUoxWvvs7j67YM+FxPX/Dwms08svY1evuCBY11vHfxTM5dPJOjptaOcE3NbDQakaCXVAncDtwV\nEd8Y4Pk5wO0Rcfyh9lOKLfqh2tzZxX+uauG2FRt5fF3yj8/iIyby0dPmcu4JMykrU4FraGbFKu9B\nL0nAjcCWiPhcRvmMiGhJlz8PnBIRFx1qX2M56DNtfH0Xt6/cyI+Wr+eFTZ0saqrnC2cv4OzjGkkO\nt5nZPiMR9KcB/w2sAvo7ov8MuBhYQtJ1sw74RH/wH4yDfn99fcHtq1q44p7VvLR5B4uPnMQXz1nA\nacdMc+Cb2V4j1kefCw76gfX09vGTJ9fzzXtfYOO23ZwydwpfOHsBS+dModxdOmZjnoO+hHT19HLz\nY69w5f1r2dzZxbiKMo5JZ9Bc2JTOotlYz4yJ1W7xm40hDvoStHNPD3c+3cpzLdtpbutkdWsHrRlX\n4NZXV+ydPnlRUz0LGpPvnkLZrDR59soSNL6qgvedeMR+Za/v3MPqtk6a2zpobt3O6tZObl+xke8/\n1rN3nen141iYtvr7W//zG+sYX+Ufv9lY4E/6KDdpfBXL5k5h2dwpe8sigrbtXTS3dbC6tYPnWzto\nbtvO937xMl09yXlzKblga0FjPSfPmcx7TpjJzEk1hXobZpZH7roZQ3r7gle27KS5dd/NU55r3c6L\n7cnUyifPmcx7F8/kXW+ewbS6cQWurZkNxn30NmTrNu/g9pUbuW3FRla3dVJeJk49eirnLp7Jb76p\niYk1lYWuopkNwEFvw9Lc2sFtKzbw7ytaeGXLTqrKyzhzYQPnLp7JO45tpKaqvNBVNLOUg96yEhGs\nWL+N257ayO0rN7Kpo4vxVeWcfVwj554wkzMWNFBV4fvWmBWSg95yprcveOyl1/j3FRu5Y1Ur23Z1\nU19dwdTagYdtVleWc/Gy2Vy07EjGVfg/ALN8cdBbXuzp6ePna9q559lN7NzTM+A6r27ZyZOvvM6s\nSTV89u3zed+Js6god+vfLNcc9FYwEcHP12zmn+9qZsX6bcybVsvnzl7Ae948w7NxmuWQg94KLiK4\n+9k2vnH3aprbOljUVM+nzzqGk46a7OkazHLAQW9Fo7cvuH3lRq64ZzXrXtsJDDxdw6xJNRws+yeP\nr6J2nK/vM8vkoLei093bx5Mvb2V1W0c6ZUNy1W7H7oH7+g90xOSavX8UFjYlX/Om1Xn0j41ZnuvG\nik5leRmnzJvKKfOm7i2LCFq376a5tYNNHV0DbxiwqWM3zW2dNLdu54Hmdnr6kgZKRZmYO612v7l8\nFjXVc+Tk8T4fYJZy0FtBSWLGxBpmTBz6PDt7evp4cXPnflM5rFj/Orev3Hd/m5rKco6eXnvQidv6\n/0Bk/ofgWT6tVDnobdSpqihjUdMEFjVN2K+8s6uHF9qS4H++tYO17TvY09M74D52dfdy24qN3JQx\ny2fjhHEsaKxnztTanN/YpXFCdfJHpamemT4RbSPMQW8lo25cBW+ZPZm3zJ48pPUzu42aW/edN1i5\nfhu5PHcVAR1d+/6g1I+rSKaLbqpn/vQ6aot9umjBkZPHs6ipnskHuUjOiluR/4aZ5U9mt9GZC6fn\n9bW27exO/pCkU0c3t3bwHytb2LarO6+vm2sN9eP2dXc11nP09Fqqyg//6ufaceXMnjLeF9KNEAe9\n2QiYOL5ywPsGbO7cw57evgLWbHA9vX2se23n3nsbrG7r4KbHXmZ3d3b1rqooY/4Bt8Rc1FTPtLpx\njFTHVpk0Jk7aO+jNCkQSDfWjY97/o6bW8rYFDXsf9/YFr27ZyUubd9Dbd/jdXFt27uGFtg6a2zp5\neO1mfvqrDbms7pBVlot50+r2Dtft/6Mza1JNSf0ByFvQS3on8E2gHLguIr6ar9cys5FVXibmTKtl\nzrTanOxv7y0xW7fz+s6R687q7OrhhU2dPPHyVm5bsXFveW1VOTMm1YzYfxb5lpegl1QOfAs4G1gP\nPC7ptoh4Nh+vZ2aj20C3xBxpHbu7Wd3WmVzQ19rBpo7dBavLUN07xPXy1aJfBqyJiBcBJN0CnAc4\n6M2sKNVXV3LSUZM56aihjdoqBld/aGjr5euU9yzg1YzH69OyvSRdJmm5pOXt7e15qoaZmRVsbFNE\nXBMRSyNiaUNDw+AbmJnZsOQr6DcAR2Y8PiItMzOzEZavoH8cmC9prqQq4CLgtjy9lpmZHUJeTsZG\nRI+kPwTuIhleeX1EPJOP1zIzs0PL2zj6iLgDuCNf+zczs6HxRBNmZiXOQW9mVuKK4laCkjqA5kLX\no8hMAzYXuhJFxMdjfz4ebzQWj8lRETHo+PRimdSseSj3PRxLJC33MdnHx2N/Ph5v5GNycO66MTMr\ncQ56M7MSVyxBf02hK1CEfEz25+OxPx+PN/IxOYiiOBlrZmb5UywtejMzy5OiDXpJD0gq6jPokh4Z\n5nZzJD2d6/oM8pqXS/riSL7m4dZB0vmSjsti/3MkfTDj8VJJ/yddvlTSlcPd92hRDD/noZD0OUnj\nC12PsaJog340iIhTR/o1JRXLkNh8OB8YdtADc4C9QR8RyyPiM9lWyvLic0BOgj69o50dQtZBn7ai\nnpd0k6TnJP1Y0nhJfyXpcUlPS7pGktL1H5D0j5J+KWm1pNPT8hpJt6T7uBWoyXiNq9OblDwj6W8y\nyr8q6VlJKyX9c7bvZRjvvTP9fqakhyT9h6RmSd+WVCbpo5L+JWP9j0u64oB9zJP0K0knSyqX9LX0\nuK2U9ImM/f+3pNuAZ9Nj/pyka9NjcrekmnTdoyXdKemJdJtFI3hI3kDSn6c/558DC9Oyj6fvcYWk\nn6S/L6cC7wW+Jump9H0M+F4k3SDp/Rmv0ZkufhU4Pd3+8+lxu32E33LW0t+FlZKqJdWmP+M/lPSg\npJ9JejH93b8k/RytknR0oes9kIPkw2eAmcD9ku4/2OfkYNmSrrMuzZEngQ9IWiLpF+lxu1XS5HS9\nYyTdm/6uPdl/nCT9r4zP2d+kZbXpZ3iFktz6nbS8oDmTExGR1RdJKyqAX08fXw98EZiSsc73gHPT\n5QeAr6fL7wbuTZe/QDLLJcAJQA+wNH08Jf1enm5/AjCV5Gra/hPKk7J9L8N4753p9zOB3cC8tI73\nAO8H6oC1QGW63iPAm9Nj9jRJ8P0KWJw+fxnwF+nyOGA5MDfd/w5gbsYx7wGWpI9/CHwoXb4PmJ8u\nnwL8V7p8OfDFET4+JwGrSFpuE4A16e/G1Ix1/g74o3T5BuD9Gc8d7L0cuF7mz+H2jPK9j4FLgStH\n+ncki2P3d8A/k9x7+Uvpe3kdmJH+bmwA/iZd97PAvxTq5zzI+5jDwPmwDpiWlh3qc/KGbdPldcCf\nZLzOSuBt6fKXM47HY8AF6XJ1+rt4DskIHZE0dm8HzgB+G7g2Y58TKYKcycVXrrpuXo2Ih9PlfwNO\nA86S9JikVcBvAG/KWP+n6fcnSH6YpAf63wAiYmX6g+t3YfqX+1fpfo4DtpGE63clvQ/YmaP3Mly/\njIgXI6IXuBk4LSI6gf8C3pO2RisjYlW6fgPwM+CSiFiRlp0D/K6kp0h+QacC8zP2/1LG670UEU+l\ny08AcyTVAacCP0r38R2SYCiU04FbI2JnRGxn3z0Jjk9b6KuAS9j/dwOAInwvI+3LwNnAUuCf0rLH\nI6IlIrpIgvHutHwV+z5HxWigfNhrkM/Jobb9AYCkiSQB/GBafiNwhqR6YFZE3Jq+zu6I2EnyOTuH\nJE+eBBaRfM5WAWen/ymcHhHbKL6cGZZc9fceOEYzgKtIWuSvSrqc5K9pv670e+9gdZA0l6QFcHJE\nbJV0A1AdyZz3y4C3k7Se/5DkD0qhDHQMAK4D/gx4HvjXjOe3Aa+Q/OL23zRdJK3buzJ3JOlMkhZ9\npq6M5V6Srq4y4PWIWDK8tzBibgDOj4gVki4laa0e6FDvpSd9HkllQFV+qllQU0laupXs++xk/sz7\nMh73UTzTmQzkYJ+NTAf7nBxq2wM/E0Ml4B8i4jtveEI6kaSn4e8k3RcRXy6ynBmWXLXoZ0v6tXT5\ng8DP0+XNacvs/QNvtp+H0m2RdDxJ9wwk//LvALZJagTela5TB0yMZN77zwOLc/FGsrBMyR21yoDf\nIT0GEfEYyW0VP0jS0u+3B7iApAXffwLxLuAPJFUCSFogqXaoFUhbzS9J+kC6vSQV8rg8BJyv5PxL\nPXBuWl4PtKTv85KM9TvS5wZ7L+tIuoUg6devPHD7EvAd4C+Bm4B/LHBdsjVQPuz3szrE5+Rg2ULG\nttuArUrP9wEfBh6MiA5gvaTzASSNS/v47wI+mmYIkmZJmi5pJrAzIv4N+BpwYhHmzLDkqhXQDHxa\n0vUkrdOrgckk/dCtJLcWHMzVwL9Keg54jqQ7grTV9yuSv/SvAv3/xtUDP5NUTfIX+gs5ei/D9Thw\nJXAMcD9wa8ZzPyTpT9+auUFE7JD0HuAeJScUryP5F/xJSQLaSUaiHI5LgKsl/QVJAN4CrDj0JvkR\nEU9K+kH6+pvY93vwlyRdU+3p9/4P/C3AtenJuvdz8PdyLcnPfgVwJ/tadiuB3rT8BpJ/zUcdSb8L\ndEfE95XMb72VAAACs0lEQVSMKHmEfd2do9FA+bAHuFPSxog4K11voM/JQNsO5CPAt9MgfxH4vbT8\nw8B3JH0Z6AY+EBF3SzoWeDT5mNEJfIjks/s1SX3pun9A8eXMsGR9ZaykOSQnvI7PRYVGo7Rr5YsR\n8Z6DPH87cEVE3DeiFTMrsMPJhwM/J86W3PE4+jySNEnSamCXQ95sYP6c5J/nujEzK3Fu0ZuZlTgH\nvZlZiXPQm5mVOAe92QE0SufIMTsYB72ZWYlz0FtJ0eHPpjrg7IYZ+ztZyeyiR0taJunR9PEjkvpn\n4xwv6YdKZji8VckcT0vT585Jt3lS0o/6r8Y0G0kOeitFC4GrIuJYYDvwKZKZK09OL76pAfovbrsJ\n+FZELCaZRK2lfydKpk7+NnBeRKwluTr79Ih4C/BXwN+nq34K2BoRx5Fc9XtSuv004C+Ad0TEiSSz\nkY7KKyttdCvmiZDMhuvAGQ8/QzJvzp+QTFM7BXhG0gMcMLshQNrYP5ZkKttzImJjuq+JwI2S5pNM\nrtU/x85pwDfTfTwtqX/m1beSzLT6cLrPKuDRfLxhs0Nx0FspOtzZVAfSkq7zFqA/6P8WuD8iLkgv\nz39gkH0IuCciLh5yzc3ywF03VoqGNJvqIWY3hOQmH78F/EM6lxEkLfoN6fKlGa/3MHBhuo/jSG6a\nAfAL4NclHZM+VytpQa7epNlQOeitFPXPePgcySyqV5PMePk0yRS1mbOpfhj4TNrd8gjQ1P9ERLSR\n9OV/S9IpJDcA+Yd0NtXM/4avAhokPUtyZ6hngG0R0U7yB+HmdP+PktzkwmxEea4bKymFmPEwnUq4\nMiJ2p6N27gUWRsSekaqD2aG4j94se+NJbnRdSdIv/ymHvBUTt+jNzEqc++jNzEqcg97MrMQ56M3M\nSpyD3sysxDnozcxKnIPezKzE/Q/9/LoiMkkCBQAAAABJRU5ErkJggg==\n", 247 | "text/plain": [ 248 | "" 249 | ] 250 | }, 251 | "metadata": {}, 252 | "output_type": "display_data" 253 | } 254 | ], 255 | "source": [ 256 | "pack_series.sort_values(ascending=False)[:50].plot();" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": 11, 262 | "metadata": {}, 263 | "outputs": [ 264 | { 265 | "data": { 266 | "text/plain": [ 267 | "package\n", 268 | "pandas 169\n", 269 | "IPython 118\n", 270 | "matplotlib 85\n", 271 | "numpy 82\n", 272 | "prompt_toolkit 70\n", 273 | "_pytest 36\n", 274 | "xlsxwriter 32\n", 275 | "zmq 31\n", 276 | "ipywidgets 25\n", 277 | "py 25\n", 278 | "ipykernel 23\n", 279 | "pygments 20\n", 280 | "jupyter_client 19\n", 281 | "pkg_resources 18\n", 282 | "bottleneck 17\n", 283 | "dtype: int64" 284 | ] 285 | }, 286 | "execution_count": 11, 287 | "metadata": {}, 288 | "output_type": "execute_result" 289 | } 290 | ], 291 | "source": [ 292 | "pack_series.sort_values(ascending=False)[:15]" 293 | ] 294 | }, 295 | { 296 | "cell_type": "markdown", 297 | "metadata": {}, 298 | "source": [ 299 | "### 3.2 Aanaconda 3 中模块信息" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": 14, 305 | "metadata": { 306 | "scrolled": false 307 | }, 308 | "outputs": [ 309 | { 310 | "name": "stdout", 311 | "output_type": "stream", 312 | "text": [ 313 | "\n", 314 | "Please wait a moment while I gather a list of all available modules...\n", 315 | "\n" 316 | ] 317 | }, 318 | { 319 | "name": "stderr", 320 | "output_type": "stream", 321 | "text": [ 322 | "/root/anaconda3/lib/python3.6/site-packages/IPython/kernel/__init__.py:13: ShimWarning: The `IPython.kernel` package has been deprecated since IPython 4.0.You should import from ipykernel or jupyter_client instead.\n", 323 | " \"You should import from ipykernel or jupyter_client instead.\", ShimWarning)\n", 324 | "/root/anaconda3/lib/python3.6/site-packages/odo/backends/pandas.py:94: FutureWarning: pandas.tslib is deprecated and will be removed in a future version.\n", 325 | "You can access NaTType as type(pandas.NaT)\n", 326 | " @convert.register((pd.Timestamp, pd.Timedelta), (pd.tslib.NaTType, type(None)))\n", 327 | "/root/anaconda3/lib/python3.6/site-packages/blaze/server/server.py:17: ExtDeprecationWarning: Importing flask.ext.cors is deprecated, use flask_cors instead.\n", 328 | " from flask.ext.cors import cross_origin\n", 329 | "/root/anaconda3/lib/python3.6/site-packages/bokeh/util/deprecation.py:34: BokehDeprecationWarning: MPL compatibility can no longer be successfully maintained, and is now deprecated. All MPL compat functions will be removed completely on the release of Bokeh 1.0. See http://bokeh.pydata.org/en/latest/docs/releases/0.12.5.html for more information\n", 330 | " warn(message)\n", 331 | "/root/anaconda3/lib/python3.6/site-packages/matplotlib/cbook.py:136: MatplotlibDeprecationWarning: The matplotlib.delaunay module was deprecated in version 1.4. Use matplotlib.tri.Triangulation instead.\n", 332 | " warnings.warn(message, mplDeprecation, stacklevel=1)\n", 333 | "/root/anaconda3/lib/python3.6/site-packages/nltk/twitter/__init__.py:20: UserWarning: The twython library has not been installed. Some functionality from the twitter package will not be available.\n", 334 | " warnings.warn(\"The twython library has not been installed. \"\n", 335 | "/root/anaconda3/lib/python3.6/site-packages/skimage/viewer/utils/core.py:10: UserWarning: Recommended matplotlib backend is `Agg` for full skimage.viewer functionality.\n", 336 | " warn(\"Recommended matplotlib backend is `Agg` for full \"\n", 337 | "/root/anaconda3/lib/python3.6/site-packages/qtawesome/iconic_font.py:268: UserWarning: You need to have a running QApplication to use QtAwesome!\n", 338 | " warnings.warn(\"You need to have a running \"\n", 339 | "/root/anaconda3/lib/python3.6/site-packages/statsmodels/compat/pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.\n", 340 | " from pandas.core import datetools\n" 341 | ] 342 | }, 343 | { 344 | "name": "stdout", 345 | "output_type": "stream", 346 | "text": [ 347 | "Crypto brain_nose jupyter_core scipy\n", 348 | "Cython brain_numpy keyword scripts\n", 349 | "IPython brain_pytest lazy_object_proxy seaborn\n", 350 | "OleFileIO_PL brain_qt lib2to3 secrets\n", 351 | "OpenSSL brain_six linecache select\n", 352 | "PIL brain_ssl llvmlite selectors\n", 353 | "PyQt5 brain_stdlib locale setuptools\n", 354 | "__future__ bs4 locket shelve\n", 355 | "_ast builtins logging shlex\n", 356 | "_asyncio bz2 lxml shutil\n", 357 | "_bisect cProfile lzma signal\n", 358 | "_blake2 calendar macpath simplegeneric\n", 359 | "_bootlocale cffi macurl2path singledispatch\n", 360 | "_bz2 cgi mailbox singledispatch_helpers\n", 361 | "_cffi_backend cgitb mailcap sip\n", 362 | "_codecs chardet markupsafe sipconfig\n", 363 | "_codecs_cn chunk marshal sipdistutils\n", 364 | "_codecs_hk click math site\n", 365 | "_codecs_iso2022 cloudpickle matplotlib six\n", 366 | "_codecs_jp clyent mimetypes skimage\n", 367 | "_codecs_kr cmath mistune sklearn\n", 368 | "_codecs_tw cmd mkl smtpd\n", 369 | "_collections code mmap smtplib\n", 370 | "_collections_abc codecs modulefinder sndhdr\n", 371 | "_compat_pickle codeop mpl_toolkits snowballstemmer\n", 372 | "_compression collections mpmath socket\n", 373 | "_crypt colorama msgpack socketserver\n", 374 | "_csv colorsys multipledispatch sortedcollections\n", 375 | "_ctypes compileall multiprocessing sortedcontainers\n", 376 | "_ctypes_test concurrent navigator_updater sphinx\n", 377 | "_curses conda nbconvert spwd\n", 378 | "_curses_panel conda_env nbformat spyder\n", 379 | "_datetime configparser netrc spyder_breakpoints\n", 380 | "_decimal contextlib networkx spyder_io_dcm\n", 381 | "_dummy_thread contextlib2 nis spyder_io_hdf5\n", 382 | "_elementtree copy nltk spyder_profiler\n", 383 | "_functools copyreg nntplib spyder_pylint\n", 384 | "_hashlib crypt nose sqlalchemy\n", 385 | "_heapq cryptography notebook sqlite3\n", 386 | "_imp csv ntpath sre_compile\n", 387 | "_io ctypes nturl2path sre_constants\n", 388 | "_json curl numba sre_parse\n", 389 | "_license curses numbers ssl\n", 390 | "_locale cycler numexpr stat\n", 391 | "_lsprof cython numpy statistics\n", 392 | "_lzma cythonmagic numpydoc statsmodels\n", 393 | "_markupbase cytoolz odo storemagic\n", 394 | "_md5 dask olefile string\n", 395 | "_multibytecodec datashape opcode stringprep\n", 396 | "_multiprocessing datetime openpyxl struct\n", 397 | "_opcode dateutil operator subprocess\n", 398 | "_operator dbm optparse sunau\n", 399 | "_osx_support decimal os symbol\n", 400 | "_pickle decorator ossaudiodev sympy\n", 401 | "_posixsubprocess difflib packaging sympyprinting\n", 402 | "_pydecimal dis pandas symtable\n", 403 | "_pyio distributed pandocfilters sys\n", 404 | "_pytest distutils parser sysconfig\n", 405 | "_random doctest partd syslog\n", 406 | "_sha1 docutils path tables\n", 407 | "_sha256 dummy_threading pathlib tabnanny\n", 408 | "_sha3 easy_install pathlib2 tarfile\n", 409 | "_sha512 email patsy tblib\n", 410 | "_signal encodings pdb telnetlib\n", 411 | "_sitebuiltins entrypoints pep8 tempfile\n", 412 | "_socket enum pexpect terminado\n", 413 | "_sqlite3 errno pickle termios\n", 414 | "_sre et_xmlfile pickleshare test_path\n", 415 | "_ssl fastcache pickletools test_pycosat\n", 416 | "_stat faulthandler pip testpath\n", 417 | "_string fcntl pipes tests\n", 418 | "_strptime filecmp pkg_resources textwrap\n", 419 | "_struct fileinput pkgutil this\n", 420 | "_symtable flask platform threading\n", 421 | "_sysconfigdata_m_linux_x86_64-linux-gnu flask_cors plistlib time\n", 422 | "_testbuffer fnmatch ply timeit\n", 423 | "_testcapi formatter poplib tkinter\n", 424 | "_testimportmultiple fractions posix tlz\n", 425 | "_testmultiphase ftplib posixpath token\n", 426 | "_thread functools pprint tokenize\n", 427 | "_threading_local gc profile toolz\n", 428 | "_tkinter genericpath prompt_toolkit tornado\n", 429 | "_tracemalloc getopt pstats trace\n", 430 | "_warnings getpass psutil traceback\n", 431 | "_weakref gettext pty tracemalloc\n", 432 | "_weakrefset gevent ptyprocess traitlets\n", 433 | "_yaml glob pwd tty\n", 434 | "abc greenlet py turtle\n", 435 | "aifc grp py_compile turtledemo\n", 436 | "alabaster gzip pyclbr types\n", 437 | "anaconda_navigator h5py pycosat typing\n", 438 | "anaconda_project hashlib pycparser unicodecsv\n", 439 | "antigravity heapdict pycurl unicodedata\n", 440 | "argparse heapq pydoc unittest\n", 441 | "array hmac pydoc_data urllib\n", 442 | "asn1crypto html pyexpat uu\n", 443 | "ast html5lib pyflakes uuid\n", 444 | "astroid http pygments venv\n", 445 | "astropy idlelib pylab warnings\n", 446 | "asynchat idna pylint wave\n", 447 | "asyncio imagesize pyodbc wcwidth\n", 448 | "asyncore imaplib pyparsing weakref\n", 449 | "atexit imghdr pytest webbrowser\n", 450 | "audioop imp pytz werkzeug\n", 451 | "autoreload importlib pywt wheel\n", 452 | "babel inspect pyximport widgetsnbextension\n", 453 | "backports io qtawesome wrapt\n", 454 | "base64 ipaddress qtconsole wsgiref\n", 455 | "bdb ipykernel qtpy xdrlib\n", 456 | "binascii ipykernel_launcher queue xlrd\n", 457 | "binhex ipython_genutils quopri xlsxwriter\n", 458 | "binstar_client ipywidgets random xlwt\n", 459 | "bisect isort re xml\n", 460 | "bitarray itertools readline xmlrpc\n", 461 | "blaze itsdangerous reprlib xxlimited\n", 462 | "bleach jdcal requests xxsubtype\n", 463 | "bokeh jedi resource yaml\n", 464 | "boto jinja2 rlcompleter zict\n", 465 | "bottleneck json rmagic zipapp\n", 466 | "brain_builtin_inference jsonschema rope zipfile\n", 467 | "brain_dateutil jupyter ruamel_yaml zipimport\n", 468 | "brain_gi jupyter_client runpy zlib\n", 469 | "brain_mechanize jupyter_console sched zmq\n", 470 | "\n", 471 | "Enter any module name to get more help. Or, type \"modules spam\" to search\n", 472 | "for modules whose name or summary contain the string \"spam\".\n", 473 | "\n" 474 | ] 475 | } 476 | ], 477 | "source": [ 478 | "help(\"modules\")" 479 | ] 480 | } 481 | ], 482 | "metadata": { 483 | "kernelspec": { 484 | "display_name": "Python 3", 485 | "language": "python", 486 | "name": "python3" 487 | }, 488 | "language_info": { 489 | "codemirror_mode": { 490 | "name": "ipython", 491 | "version": 3 492 | }, 493 | "file_extension": ".py", 494 | "mimetype": "text/x-python", 495 | "name": "python", 496 | "nbconvert_exporter": "python", 497 | "pygments_lexer": "ipython3", 498 | "version": "3.6.1" 499 | } 500 | }, 501 | "nbformat": 4, 502 | "nbformat_minor": 2 503 | } 504 | -------------------------------------------------------------------------------- /Anaconda环境安装以及搭建Python多内核环境.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kdotm/Python_Series/c686d48797b7266934958b09247aa3ffe783c026/Anaconda环境安装以及搭建Python多内核环境.docx -------------------------------------------------------------------------------- /Python_PyODPS_HTML_to_PDF.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## 爬取 PyODPS[latest] 并转换为 PDF\n", 8 | "- 爬取主链接\n", 9 | "- 根据主链接爬取子连接\n", 10 | "- 参考子链接爬取HTML并转换为PDF\n", 11 | "- 将所有 PDF 整合为一个PDF\n", 12 | "---\n", 13 | "- 注 :\n", 14 | " - PyOdps PDF在线最新版本\n", 15 | " - 0.3.12" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 15, 21 | "metadata": { 22 | "collapsed": true 23 | }, 24 | "outputs": [], 25 | "source": [ 26 | "import re\n", 27 | "import pdfkit\n", 28 | "import pandas as pd\n", 29 | "from urllib import urlopen\n", 30 | "from bs4 import BeautifulSoup\n", 31 | "\n", 32 | "# 设置 pandas 显示参数\n", 33 | "pd.set_option('display.width',200)\n", 34 | "pd.set_option('display.max_rows',1000)\n", 35 | "pd.set_option('display.max_columns',50)\n", 36 | "pd.set_option('display.max_colwidth',500)" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "### 爬取主链接\n", 44 | "#### 爬取PyODPS Docs主页面" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 9, 50 | "metadata": { 51 | "collapsed": true 52 | }, 53 | "outputs": [], 54 | "source": [ 55 | "url='http://pyodps.readthedocs.io/zh_CN/latest/index.html'\n", 56 | "html=urlopen(url).read().decode('utf8')\n", 57 | "soup=BeautifulSoup(html,'lxml')" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "#### 取值最新文档首页 API及标题" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 10, 70 | "metadata": { 71 | "collapsed": true 72 | }, 73 | "outputs": [], 74 | "source": [ 75 | "# 主链接 (API)\n", 76 | "api=soup.find(name='link', attrs={'rel':'canonical'}).get('href')\n", 77 | "# 获取文档标题\n", 78 | "title=soup.find('link',attrs={\"href\":\"#\",\"rel\":\"top\"}).get('title').replace(' ','_')\n", 79 | "\n", 80 | "# 获取首页超链接 (href)\n", 81 | "hrefs=[]\n", 82 | "div_s=soup.find_all(name='div',attrs={'aria-label':'main navigation','role':'navigation'})[0]\n", 83 | "for tag_a in div_s.find_all(name='a',attrs={'class':'reference internal'}):\n", 84 | " content_name=tag_a.get_text()\n", 85 | " url=api+tag_a.get('href')\n", 86 | " hrefs.append([content_name,url])" 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": {}, 92 | "source": [ 93 | "#### 美化 DataFrame 显示效果函数" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 20, 99 | "metadata": { 100 | "collapsed": true 101 | }, 102 | "outputs": [], 103 | "source": [ 104 | "'''\n", 105 | "设置悬停效果\n", 106 | "'''\n", 107 | "def hover(hover_color=\"#ffff99\"):\n", 108 | " return dict(selector=\"tr:hover\",\n", 109 | " props=[(\"background-color\", \"%s\" % hover_color)])\n", 110 | "'''\n", 111 | "美化DataFrame显示效果\n", 112 | "'''\n", 113 | "def display_prettify(df):\n", 114 | " from IPython.display import HTML\n", 115 | "\n", 116 | " styles = [\n", 117 | " hover(),\n", 118 | " dict(selector=\"th\", props=[(\"font-size\", \"100%\"),\n", 119 | " (\"text-align\", \"center\")]),\n", 120 | " dict(selector=\"td\", props=[(\"text-align\", \"left\")]),\n", 121 | " dict(selector=\"caption\", props=[(\"caption-side\", \"left\")])\n", 122 | " ]\n", 123 | " return df.style.set_table_styles(styles).set_caption(\"Hover to highlight.\")" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "#### 首页超连接(href)打印显示" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 13, 136 | "metadata": {}, 137 | "outputs": [ 138 | { 139 | "data": { 140 | "text/html": [ 141 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | "
Hover to highlight.
content_namehref
0安装指南http://pyodps.readthedocs.io/zh_CN/latest/installation-pub-zh.html
1基本操作http://pyodps.readthedocs.io/zh_CN/latest/base-zh.html
2DataFramehttp://pyodps.readthedocs.io/zh_CN/latest/df-zh.html
3机器学习http://pyodps.readthedocs.io/zh_CN/latest/ml-zh.html
4交互体验增强http://pyodps.readthedocs.io/zh_CN/latest/interactive-zh.html
5配置选项http://pyodps.readthedocs.io/zh_CN/latest/options-zh.html
6API Referencehttp://pyodps.readthedocs.io/zh_CN/latest/api.html
" 188 | ], 189 | "text/plain": [ 190 | "" 191 | ] 192 | }, 193 | "execution_count": 13, 194 | "metadata": {}, 195 | "output_type": "execute_result" 196 | } 197 | ], 198 | "source": [ 199 | "df=pd.DataFrame(hrefs, columns=['content_name','href'])\n", 200 | "\n", 201 | "display_prettify(df)" 202 | ] 203 | }, 204 | { 205 | "cell_type": "markdown", 206 | "metadata": {}, 207 | "source": [ 208 | "### 根据主链接爬取子连接" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": null, 214 | "metadata": { 215 | "collapsed": true 216 | }, 217 | "outputs": [], 218 | "source": [ 219 | "hrefs_2=[] # 有序列表,存储主、子链接并与文档目录层次结构保持一致性\n", 220 | "\n", 221 | "for name,url in hrefs:\n", 222 | " if url not in [hf[1] for hf in hrefs_2]: # href 不在 hrefs_2中,则追加\n", 223 | " hrefs_2.append([name,url])\n", 224 | " t_html=urlopen(url).read().decode('utf8')\n", 225 | " \n", 226 | " # 根据正则表达式 查找当前目录主题\n", 227 | " f_re='
    (.*?)
'\n", 228 | " if len(re.findall(f_re, t_html, re.I|re.S|re.M)) !=0 :\n", 229 | " target_s = re.findall(f_re, t_html, re.I|re.S|re.M)[0]\n", 230 | "\n", 231 | " # 根据正则表达式 获取当前子主题链接\n", 232 | " t_re='
(.*?)'\n", 233 | " for href,name in re.findall(t_re, target_s, re.I|re.S|re.M):\n", 234 | " if href.strip().endswith('.html'):\n", 235 | " hrefs_2.append([name,api+href])" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": 22, 241 | "metadata": {}, 242 | "outputs": [ 243 | { 244 | "data": { 245 | "text/html": [ 246 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | "
Hover to highlight.
01
0安装指南http://pyodps.readthedocs.io/zh_CN/latest/installation-pub-zh.html
1基本操作http://pyodps.readthedocs.io/zh_CN/latest/base-zh.html
" 273 | ], 274 | "text/plain": [ 275 | "" 276 | ] 277 | }, 278 | "execution_count": 22, 279 | "metadata": {}, 280 | "output_type": "execute_result" 281 | } 282 | ], 283 | "source": [ 284 | "display_prettify(pd.DataFrame(hrefs_2))" 285 | ] 286 | }, 287 | { 288 | "cell_type": "markdown", 289 | "metadata": {}, 290 | "source": [ 291 | "#### 显示PyODPS 所有链接" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": 105, 297 | "metadata": {}, 298 | "outputs": [ 299 | { 300 | "data": { 301 | "text/html": [ 302 | "
\n", 303 | "\n", 316 | "\n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | "
01
0安装指南http://pyodps.readthedocs.io/zh_CN/latest/installation-pub-zh.html
1基本操作http://pyodps.readthedocs.io/zh_CN/latest/base-zh.html
2项目空间http://pyodps.readthedocs.io/zh_CN/latest/base-projects-zh.html
3http://pyodps.readthedocs.io/zh_CN/latest/base-tables-zh.html
4SQLhttp://pyodps.readthedocs.io/zh_CN/latest/base-sql-zh.html
5任务实例http://pyodps.readthedocs.io/zh_CN/latest/base-instances-zh.html
6资源http://pyodps.readthedocs.io/zh_CN/latest/base-resources-zh.html
7函数http://pyodps.readthedocs.io/zh_CN/latest/base-functions-zh.html
8模型http://pyodps.readthedocs.io/zh_CN/latest/base-models-zh.html
9DataFramehttp://pyodps.readthedocs.io/zh_CN/latest/df-zh.html
10快速开始http://pyodps.readthedocs.io/zh_CN/latest/df-quickstart-zh.html
11基本概念http://pyodps.readthedocs.io/zh_CN/latest/df-basic-zh.html
12列运算http://pyodps.readthedocs.io/zh_CN/latest/df-element-zh.html
13聚合操作http://pyodps.readthedocs.io/zh_CN/latest/df-agg-zh.html
14排序、去重、采样、数据变换http://pyodps.readthedocs.io/zh_CN/latest/df-sort-distinct-apply-zh.html
15数据合并http://pyodps.readthedocs.io/zh_CN/latest/df-merge-zh.html
16窗口函数http://pyodps.readthedocs.io/zh_CN/latest/df-window-zh.html
17绘图http://pyodps.readthedocs.io/zh_CN/latest/df-plot-zh.html
18调试指南http://pyodps.readthedocs.io/zh_CN/latest/df-debug-instruction-zh.html
19机器学习http://pyodps.readthedocs.io/zh_CN/latest/ml-zh.html
20快速开始http://pyodps.readthedocs.io/zh_CN/latest/ml-quickstart-zh.html
21基本概念http://pyodps.readthedocs.io/zh_CN/latest/ml-basic-zh.html
22调用算法http://pyodps.readthedocs.io/zh_CN/latest/ml-algo-pub-zh.html
23结果评估http://pyodps.readthedocs.io/zh_CN/latest/ml-assess-zh.html
24交互体验增强http://pyodps.readthedocs.io/zh_CN/latest/interactive-zh.html
25配置选项http://pyodps.readthedocs.io/zh_CN/latest/options-zh.html
26API Referencehttp://pyodps.readthedocs.io/zh_CN/latest/api.html
27Definitionshttp://pyodps.readthedocs.io/zh_CN/latest/api-def.html
28DataFrame Referencehttp://pyodps.readthedocs.io/zh_CN/latest/api-df.html
29ML Referenceshttp://pyodps.readthedocs.io/zh_CN/latest/api-ml.html
\n", 477 | "
" 478 | ], 479 | "text/plain": [ 480 | " 0 1\n", 481 | "0 安装指南 http://pyodps.readthedocs.io/zh_CN/latest/installation-pub-zh.html\n", 482 | "1 基本操作 http://pyodps.readthedocs.io/zh_CN/latest/base-zh.html\n", 483 | "2 项目空间 http://pyodps.readthedocs.io/zh_CN/latest/base-projects-zh.html\n", 484 | "3 表 http://pyodps.readthedocs.io/zh_CN/latest/base-tables-zh.html\n", 485 | "4 SQL http://pyodps.readthedocs.io/zh_CN/latest/base-sql-zh.html\n", 486 | "5 任务实例 http://pyodps.readthedocs.io/zh_CN/latest/base-instances-zh.html\n", 487 | "6 资源 http://pyodps.readthedocs.io/zh_CN/latest/base-resources-zh.html\n", 488 | "7 函数 http://pyodps.readthedocs.io/zh_CN/latest/base-functions-zh.html\n", 489 | "8 模型 http://pyodps.readthedocs.io/zh_CN/latest/base-models-zh.html\n", 490 | "9 DataFrame http://pyodps.readthedocs.io/zh_CN/latest/df-zh.html\n", 491 | "10 快速开始 http://pyodps.readthedocs.io/zh_CN/latest/df-quickstart-zh.html\n", 492 | "11 基本概念 http://pyodps.readthedocs.io/zh_CN/latest/df-basic-zh.html\n", 493 | "12 列运算 http://pyodps.readthedocs.io/zh_CN/latest/df-element-zh.html\n", 494 | "13 聚合操作 http://pyodps.readthedocs.io/zh_CN/latest/df-agg-zh.html\n", 495 | "14 排序、去重、采样、数据变换 http://pyodps.readthedocs.io/zh_CN/latest/df-sort-distinct-apply-zh.html\n", 496 | "15 数据合并 http://pyodps.readthedocs.io/zh_CN/latest/df-merge-zh.html\n", 497 | "16 窗口函数 http://pyodps.readthedocs.io/zh_CN/latest/df-window-zh.html\n", 498 | "17 绘图 http://pyodps.readthedocs.io/zh_CN/latest/df-plot-zh.html\n", 499 | "18 调试指南 http://pyodps.readthedocs.io/zh_CN/latest/df-debug-instruction-zh.html\n", 500 | "19 机器学习 http://pyodps.readthedocs.io/zh_CN/latest/ml-zh.html\n", 501 | "20 快速开始 http://pyodps.readthedocs.io/zh_CN/latest/ml-quickstart-zh.html\n", 502 | "21 基本概念 http://pyodps.readthedocs.io/zh_CN/latest/ml-basic-zh.html\n", 503 | "22 调用算法 http://pyodps.readthedocs.io/zh_CN/latest/ml-algo-pub-zh.html\n", 504 | "23 结果评估 http://pyodps.readthedocs.io/zh_CN/latest/ml-assess-zh.html\n", 505 | "24 交互体验增强 http://pyodps.readthedocs.io/zh_CN/latest/interactive-zh.html\n", 506 | "25 配置选项 http://pyodps.readthedocs.io/zh_CN/latest/options-zh.html\n", 507 | "26 API Reference http://pyodps.readthedocs.io/zh_CN/latest/api.html\n", 508 | "27 Definitions http://pyodps.readthedocs.io/zh_CN/latest/api-def.html\n", 509 | "28 DataFrame Reference http://pyodps.readthedocs.io/zh_CN/latest/api-df.html\n", 510 | "29 ML References http://pyodps.readthedocs.io/zh_CN/latest/api-ml.html" 511 | ] 512 | }, 513 | "execution_count": 105, 514 | "metadata": {}, 515 | "output_type": "execute_result" 516 | } 517 | ], 518 | "source": [ 519 | "pd.DataFrame(hrefs_2)" 520 | ] 521 | }, 522 | { 523 | "cell_type": "markdown", 524 | "metadata": { 525 | "code_folding": [] 526 | }, 527 | "source": [ 528 | "### 参考子链接爬取HTML并转换为PDF" 529 | ] 530 | }, 531 | { 532 | "cell_type": "code", 533 | "execution_count": 24, 534 | "metadata": {}, 535 | "outputs": [ 536 | { 537 | "name": "stdout", 538 | "output_type": "stream", 539 | "text": [ 540 | "Loading pages (1/6)\n", 541 | "QFont::setPixelSize: Pixel size <= 0 (0) ] 35%\n", 542 | "Counting pages (2/6) \n", 543 | "Resolving links (4/6) \n", 544 | "Loading headers and footers (5/6) \n", 545 | "Printing pages (6/6)\n", 546 | "Done \n", 547 | "Loading pages (1/6)\n", 548 | "QFont::setPixelSize: Pixel size <= 0 (0) ] 26%\n", 549 | "Counting pages (2/6) \n", 550 | "Resolving links (4/6) \n", 551 | "Loading headers and footers (5/6) \n", 552 | "Printing pages (6/6)\n", 553 | "Done \n" 554 | ] 555 | } 556 | ], 557 | "source": [ 558 | "for name,href in hrefs_2:\n", 559 | " pdfkit.from_url(href,'./tmp/'+name+'.pdf')" 560 | ] 561 | }, 562 | { 563 | "cell_type": "markdown", 564 | "metadata": {}, 565 | "source": [ 566 | "### 整合所有 PDF" 567 | ] 568 | }, 569 | { 570 | "cell_type": "code", 571 | "execution_count": 114, 572 | "metadata": { 573 | "collapsed": true 574 | }, 575 | "outputs": [], 576 | "source": [ 577 | "from PyPDF2 import PdfFileMerger\n", 578 | "\n", 579 | "# 创建 PdfFileMerger 对象,合并PDFs\n", 580 | "merger = PdfFileMerger()\n", 581 | "for name, url in hrefs_2:\n", 582 | " t_input = open('./tmp/'+name+'.pdf', 'rb')\n", 583 | " merger.append(t_input)\n", 584 | "\n", 585 | "# 流输出\n", 586 | "output = open(title+\".pdf\", \"wb\")\n", 587 | "merger.write(output)\n", 588 | "\n", 589 | "# 关闭文件流\n", 590 | "output.close()\n", 591 | "merger.close()" 592 | ] 593 | } 594 | ], 595 | "metadata": { 596 | "kernelspec": { 597 | "display_name": "Python 2", 598 | "language": "python", 599 | "name": "python2" 600 | }, 601 | "language_info": { 602 | "codemirror_mode": { 603 | "name": "ipython", 604 | "version": 2 605 | }, 606 | "file_extension": ".py", 607 | "mimetype": "text/x-python", 608 | "name": "python", 609 | "nbconvert_exporter": "python", 610 | "pygments_lexer": "ipython2", 611 | "version": "2.7.13" 612 | }, 613 | "toc": { 614 | "nav_menu": {}, 615 | "number_sections": true, 616 | "sideBar": true, 617 | "skip_h1_title": false, 618 | "toc_cell": false, 619 | "toc_position": { 620 | "height": "454px", 621 | "left": "0px", 622 | "right": "745.925px", 623 | "top": "107px", 624 | "width": "290px" 625 | }, 626 | "toc_section_display": "block", 627 | "toc_window_display": true 628 | } 629 | }, 630 | "nbformat": 4, 631 | "nbformat_minor": 2 632 | } 633 | -------------------------------------------------------------------------------- /Python操作Mysql实例教程手册_代码.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "#### 设置样式显示效果" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 15, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "\n", 19 | "'''\n", 20 | "设置悬停效果\n", 21 | "'''\n", 22 | "def hover(hover_color=\"#ffff99\"):\n", 23 | " return dict(selector=\"tr:hover\",\n", 24 | " props=[(\"background-color\", \"%s\" % hover_color)])\n", 25 | "'''\n", 26 | "美化DataFrame显示效果\n", 27 | "'''\n", 28 | "def display_prettify(df):\n", 29 | " from IPython.display import HTML\n", 30 | "\n", 31 | " styles = [\n", 32 | " hover(),\n", 33 | " dict(selector=\"th\", props=[(\"font-size\", \"100%\"),\n", 34 | " (\"text-align\", \"center\")]),\n", 35 | " dict(selector=\"td\", props=[(\"text-align\", \"left\")]),\n", 36 | " dict(selector=\"caption\", props=[(\"caption-side\", \"left\")])\n", 37 | " ]\n", 38 | " return df.style.set_table_styles(styles).set_caption(\"Hover to highlight.\")" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "### 取得 MariaDB 版本" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 18, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "ip,user,pwd,dnname,port='192.168.182.131','root','root','mysql',3306\n", 55 | "\n", 56 | "import MySQLdb as mdb\n", 57 | "\n", 58 | "conn = None\n", 59 | "try:\n", 60 | " '''\n", 61 | " 1 connect mysql\n", 62 | " type : MySQLdb.connections.Connection\n", 63 | " func : connect(ip,user,password,dbname)\n", 64 | " '''\n", 65 | " conn = mdb.connect(ip,user,pwd,dnname,port)\n", 66 | " cursor = conn.cursor()\n", 67 | " \n", 68 | " '''\n", 69 | " 2 fetchone\n", 70 | " 单条结果,返回单个元组 \n", 71 | " '''\n", 72 | " cursor.execute(\"SELECT VERSION();\") # 执行查询\n", 73 | " row = cursor.fetchone()\n", 74 | " \n", 75 | " '''\n", 76 | " 3 fetchall\n", 77 | " 所有结果,返回二维元组 \n", 78 | " '''\n", 79 | " cursor.execute(\"select * from user;\") # 执行查询\n", 80 | " rows = cursor.fetchall()\n", 81 | "finally:\n", 82 | " if conn:\n", 83 | " conn.close() # 关闭连接\n", 84 | " cursor.close()" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 20, 90 | "metadata": { 91 | "scrolled": false 92 | }, 93 | "outputs": [ 94 | { 95 | "data": { 96 | "text/html": [ 97 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | "
Hover to highlight.
0123456789101112131415161718192021222324252627282930313233343536373839404142434445
0localhostroot*81F5E21E35407D884A6CD4A731AEBFB6AF209E1BYYYYYYYYYYYYYYYYYYYYYYYYYYYYY0000*81F5E21E35407D884A6CD4A731AEBFB6AF209E1BNN0.000000
1127.0.0.1root*81F5E21E35407D884A6CD4A731AEBFB6AF209E1BYYYYYYYYYYYYYYYYYYYYYYYYYYYYY0000*81F5E21E35407D884A6CD4A731AEBFB6AF209E1BNN0.000000
2::1root*81F5E21E35407D884A6CD4A731AEBFB6AF209E1BYYYYYYYYYYYYYYYYYYYYYYYYYYYYY0000NN0.000000
3%root*81F5E21E35407D884A6CD4A731AEBFB6AF209E1BNNNNNNNNNNNNNNNNNNNNNNNNNNNNN0000NN0.000000
4localhostroot1*81F5E21E35407D884A6CD4A731AEBFB6AF209E1BYYYYYYYYYYNYYYYYYYYYYYYYYYYYY0000NN0.000000
5hostnameusername*81F5E21E35407D884A6CD4A731AEBFB6AF209E1BYYYYYYYYYYYYYYYYYYYYYYYYYYYYY0000NN0.000000
6192.168.%root*81F5E21E35407D884A6CD4A731AEBFB6AF209E1BYYYYYYYYYYNYYYYYYYYYYYYYYYYYY0000NN0.000000
" 496 | ], 497 | "text/plain": [ 498 | "" 499 | ] 500 | }, 501 | "execution_count": 20, 502 | "metadata": {}, 503 | "output_type": "execute_result" 504 | } 505 | ], 506 | "source": [ 507 | "import pandas as pd\n", 508 | "\n", 509 | "rw_df=pd.DataFrame([t for t in rows])\n", 510 | "display_prettify(rw_df)" 511 | ] 512 | }, 513 | { 514 | "cell_type": "markdown", 515 | "metadata": {}, 516 | "source": [ 517 | "### 创建一个表并且插入数据" 518 | ] 519 | }, 520 | { 521 | "cell_type": "code", 522 | "execution_count": 27, 523 | "metadata": {}, 524 | "outputs": [], 525 | "source": [ 526 | "import MySQLdb as mdb\n", 527 | "\n", 528 | "# conn 创建全局连接\n", 529 | "conn = mdb.connect(ip,user,pwd,dnname,port);\n", 530 | "with conn:\n", 531 | " curs = conn.cursor() # 获取 cursor\n", 532 | " # 创建数据表 writers(id,name)\n", 533 | " curs.execute(\"CREATE TABLE IF NOT EXISTS \\\n", 534 | " Writers(Id INT PRIMARY KEY AUTO_INCREMENT, Name VARCHAR(25))\")\n", 535 | " # 插入 5 条数据\n", 536 | " curs.execute(\"INSERT INTO Writers(Name) VALUES('Jack London')\")\n", 537 | " curs.execute(\"INSERT INTO Writers(Name) VALUES('Honore de Balzac')\")\n", 538 | " curs.execute(\"INSERT INTO Writers(Name) VALUES('Lion Feuchtwanger')\")\n", 539 | " curs.execute(\"INSERT INTO Writers(Name) VALUES('Emile Zola')\")\n", 540 | " curs.execute(\"INSERT INTO Writers(Name) VALUES('Truman Capote')\")\n", 541 | " \n", 542 | " # conn.commit()\n", 543 | " \n", 544 | "curs.execute('SELECT * FROM Writers;')\n", 545 | "rows=curs.fetchall()\n", 546 | "conn.close()\n", 547 | "curs.close()" 548 | ] 549 | }, 550 | { 551 | "cell_type": "code", 552 | "execution_count": 28, 553 | "metadata": {}, 554 | "outputs": [ 555 | { 556 | "data": { 557 | "text/html": [ 558 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | "
Hover to highlight.
01
01Jack London
12Honore de Balzac
23Lion Feuchtwanger
34Emile Zola
45Truman Capote
" 597 | ], 598 | "text/plain": [ 599 | "" 600 | ] 601 | }, 602 | "execution_count": 28, 603 | "metadata": {}, 604 | "output_type": "execute_result" 605 | } 606 | ], 607 | "source": [ 608 | "display_prettify(pd.DataFrame([rw for rw in rows]))" 609 | ] 610 | }, 611 | { 612 | "cell_type": "markdown", 613 | "metadata": {}, 614 | "source": [ 615 | "### python 获取表数据并取行数据中单个数据" 616 | ] 617 | }, 618 | { 619 | "cell_type": "code", 620 | "execution_count": 53, 621 | "metadata": {}, 622 | "outputs": [ 623 | { 624 | "name": "stdout", 625 | "output_type": "stream", 626 | "text": [ 627 | "1 Jack London\n", 628 | "2 Honore de Balzac\n", 629 | "3 Guy de Maupasant\n", 630 | "4 Guy de Maupasant\n", 631 | "5 Truman Capote\n", 632 | "-----使用字典游标获取结果集,并通过字段名访问值-----\n", 633 | "1 Jack London\n", 634 | "2 Honore de Balzac\n", 635 | "3 Guy de Maupasant\n", 636 | "4 Guy de Maupasant\n", 637 | "5 Truman Capote\n" 638 | ] 639 | } 640 | ], 641 | "source": [ 642 | "import MySQLdb as mdb\n", 643 | "\n", 644 | "# conn 创建全局连接\n", 645 | "conn = mdb.connect(ip,user,pwd,dnname,port);\n", 646 | "\n", 647 | "with conn:\n", 648 | " curs = conn.cursor() # 获取 cursor\n", 649 | " curs.execute(\"SELECT * FROM Writers\")\n", 650 | " \n", 651 | " numrows = int(curs.rowcount) # 获取行数\n", 652 | " for i in range(numrows): # 循环取行数据\n", 653 | " row = curs.fetchone() # 每次取出一行数据,同时指针下移\n", 654 | " print row[0], row[1]\n", 655 | " \n", 656 | " print ('-----使用字典游标获取结果集,并通过字段名访问值-----')\n", 657 | " curs = conn.cursor(mdb.cursors.DictCursor)\n", 658 | " curs.execute(\"SELECT * FROM Writers\")\n", 659 | " rows3=curs.fetchall()\n", 660 | " for row in rows3:\n", 661 | " print \"%s %s\" % (row[\"Id\"], row[\"Name\"])\n", 662 | "\n", 663 | "conn.close()\n", 664 | "curs.close()" 665 | ] 666 | }, 667 | { 668 | "cell_type": "markdown", 669 | "metadata": {}, 670 | "source": [ 671 | "### Prepared Statements 查询(更安全方便)" 672 | ] 673 | }, 674 | { 675 | "cell_type": "code", 676 | "execution_count": 56, 677 | "metadata": {}, 678 | "outputs": [ 679 | { 680 | "name": "stdout", 681 | "output_type": "stream", 682 | "text": [ 683 | "Number of rows updated: 1\n" 684 | ] 685 | } 686 | ], 687 | "source": [ 688 | "import MySQLdb as mdb\n", 689 | "\n", 690 | "conn = mdb.connect(ip,user,pwd,dnname,port);\n", 691 | "with conn:\n", 692 | " curs = conn.cursor()\n", 693 | " # 通过组装sql 进行查询操作\n", 694 | " curs.execute(\"UPDATE Writers SET Name = %s WHERE Id = %s\",(\"Lion Feuchtwanger\", \"3\"))\n", 695 | " # 获取影响多少行\n", 696 | " print \"Number of rows updated: %d\" % curs.rowcount\n", 697 | "\n", 698 | "conn.commit() \n", 699 | "conn.close()\n", 700 | "curs.close()" 701 | ] 702 | }, 703 | { 704 | "cell_type": "markdown", 705 | "metadata": {}, 706 | "source": [ 707 | "### 把图片用二进制存入 MySQL\n", 708 | "- BLOB (Binary Large Object),即较大的二进制对象字段\n", 709 | "- escape_string \n", 710 | " - 字符转义函数\n", 711 | "- CREATE TABLE Images(Id INT PRIMARY KEY AUTO_INCREMENT, Data MEDIUMBLOB);" 712 | ] 713 | }, 714 | { 715 | "cell_type": "code", 716 | "execution_count": 64, 717 | "metadata": {}, 718 | "outputs": [ 719 | { 720 | "name": "stderr", 721 | "output_type": "stream", 722 | "text": [ 723 | "/root/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:9: Warning: Table 'Images' already exists\n" 724 | ] 725 | } 726 | ], 727 | "source": [ 728 | "import MySQLdb as mdb\n", 729 | "import sys\n", 730 | "\n", 731 | "# conn 创建全局连接\n", 732 | "conn = mdb.connect(ip,user,pwd,dnname,port);\n", 733 | "with conn:\n", 734 | " curs = conn.cursor() # 获取 cursor\n", 735 | " # 创建数据表 writers(id,name)\n", 736 | " curs.execute(\"CREATE TABLE if not exists \\\n", 737 | " Images(Id INT PRIMARY KEY AUTO_INCREMENT, Data MEDIUMBLOB);\")\n", 738 | "curs.close()\n", 739 | "\n", 740 | "try:\n", 741 | " fin = open(\"/home/synway/kngines1.jpg\")\n", 742 | " img = fin.read() # 读入文件流\n", 743 | " fin.close()\n", 744 | "except IOError, e:\n", 745 | " print \"Error %d: %s\" % (e.args[0],e.args[1])\n", 746 | " sys.exit(1)\n", 747 | "try:\n", 748 | " cursor = conn.cursor()\n", 749 | " cursor.execute(\"INSERT INTO Images SET Data='%s'\" % mdb.escape_string(img))\n", 750 | " conn.commit() # 提交数据\n", 751 | " cursor.close()\n", 752 | " conn.close()\n", 753 | "except mdb.Error, e:\n", 754 | " print \"Error %d: %s\" % (e.args[0],e.args[1])\n", 755 | " sys.exit(1)" 756 | ] 757 | }, 758 | { 759 | "cell_type": "markdown", 760 | "metadata": {}, 761 | "source": [ 762 | "### 从数据库中把图片读出来" 763 | ] 764 | }, 765 | { 766 | "cell_type": "code", 767 | "execution_count": 65, 768 | "metadata": { 769 | "collapsed": true 770 | }, 771 | "outputs": [], 772 | "source": [ 773 | "import MySQLdb as mdb\n", 774 | "import sys\n", 775 | "try:\n", 776 | " conn = mdb.connect(ip,user,pwd,dnname,port);\n", 777 | " cursor = conn.cursor() \n", 778 | " cursor.execute(\"SELECT Data FROM Images LIMIT 1\") # 执行查询\n", 779 | " \n", 780 | " fout = open('image.png','wb') # 使用二进制进行写文件\n", 781 | " fout.write(cursor.fetchone()[0])\n", 782 | " \n", 783 | " fout.close() # 关闭输出流\n", 784 | " cursor.close() # 关闭游标\n", 785 | " conn.close() # 关闭连接\n", 786 | "except IOError, e:\n", 787 | " print \"Error %d: %s\" % (e.args[0],e.args[1])\n", 788 | " sys.exit(1)" 789 | ] 790 | }, 791 | { 792 | "cell_type": "markdown", 793 | "metadata": {}, 794 | "source": [ 795 | "### Transaction,即事务(手动提交,自动回滚)" 796 | ] 797 | }, 798 | { 799 | "cell_type": "markdown", 800 | "metadata": {}, 801 | "source": [ 802 | "#### 查询默认引擎信息\n", 803 | "- MyISAM\n", 804 | " - 不支持事务;\n", 805 | " - 内部的复杂机制很少,特别适应于读多写少的应用;\n", 806 | "- InnoDB\n", 807 | " - 事务型存储引擎,适合处理大量的短期事务;" 808 | ] 809 | }, 810 | { 811 | "cell_type": "code", 812 | "execution_count": 74, 813 | "metadata": {}, 814 | "outputs": [ 815 | { 816 | "data": { 817 | "text/html": [ 818 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | "
Hover to highlight.
Engine_nameSupportCommentTransactionsXASavepoints
0MRG_MyISAMYESCollection of identical MyISAM tablesNONONO
1CSVYESCSV storage engineNONONO
2AriaYESCrash-safe tables with MyISAM heritageNONONO
3MyISAMYESMyISAM storage engineNONONO
4MEMORYYESHash based, stored in memory, useful for temporary tablesNONONO
5InnoDBDEFAULTSupports transactions, row-level locking, foreign keys and encryption for tablesYESYESYES
6SEQUENCEYESGenerated tables filled with sequential valuesYESNOYES
7PERFORMANCE_SCHEMAYESPerformance SchemaNONONO
" 905 | ], 906 | "text/plain": [ 907 | "" 908 | ] 909 | }, 910 | "execution_count": 74, 911 | "metadata": {}, 912 | "output_type": "execute_result" 913 | } 914 | ], 915 | "source": [ 916 | "import MySQLdb as mdb\n", 917 | "import sys\n", 918 | "\n", 919 | "conn = mdb.connect(ip,user,pwd,dnname,port);\n", 920 | "cursor = conn.cursor()\n", 921 | "cursor.execute('show engines;')\n", 922 | "rows=cursor.fetchall()\n", 923 | "\n", 924 | "conn.close()\n", 925 | "cursor.close()\n", 926 | "\n", 927 | "engines_df=pd.DataFrame([rw for rw in rows], columns=['Engine_name','Support','Comment','Transactions','XA','Savepoints'])\n", 928 | "display_prettify(engines_df)" 929 | ] 930 | }, 931 | { 932 | "cell_type": "code", 933 | "execution_count": 79, 934 | "metadata": {}, 935 | "outputs": [], 936 | "source": [ 937 | "import MySQLdb as mdb\n", 938 | "import sys\n", 939 | "try:\n", 940 | " conn = mdb.connect(ip,user,pwd,dnname,port);\n", 941 | " cursor = conn.cursor()\n", 942 | " \n", 943 | " # 如果某个数据库支持事务,会自动开启\n", 944 | " # MariDB,自动开启事务(若是 MyISAM 引擎则不会)\n", 945 | " cursor.execute(\"UPDATE Writers SET Name = %s WHERE Id = %s\",(\"Leo Tolstoy\", \"1\"))\n", 946 | " cursor.execute(\"UPDATE Writers SET Name = %s WHERE Id = %s\",(\"Boris Pasternak\", \"2\"))\n", 947 | " cursor.execute(\"UPDATE Writer SET Name = %s WHERE Id = %s\",(\"Leonid Leonov\", \"3\"))\n", 948 | " \n", 949 | " # 事务的特性 1、原子性的手动提交\n", 950 | " conn.commit()\n", 951 | " cursor.close()\n", 952 | " conn.close()\n", 953 | "except mdb.Error, e:\n", 954 | " # 若出现错误,则回滚,即上面三条语句要么执行,要么不执行\n", 955 | " conn.rollback()\n", 956 | " print \"Error %d: %s\" % (e.args[0],e.args[1])" 957 | ] 958 | }, 959 | { 960 | "cell_type": "markdown", 961 | "metadata": {}, 962 | "source": [ 963 | "注:
\n", 964 | "(1)因为不存在 writer 表(SQL 第三条语句),所以出现错误:Error 1146: Table ‘test.writer’ doesn’t exist
\n", 965 | "(2)出现错误,触发异常处理, 3 条语句的前两条会自动变成没有执行,结果不变.
\n", 966 | "(3)如果本代码放到一个 MyISAM 引擎表,前两句会执行,第三句不会;如果是 INNDB 引\n", 967 | "擎,则都不会执行。(事务特性)
" 968 | ] 969 | } 970 | ], 971 | "metadata": { 972 | "kernelspec": { 973 | "display_name": "Python 2", 974 | "language": "python", 975 | "name": "python2" 976 | }, 977 | "language_info": { 978 | "codemirror_mode": { 979 | "name": "ipython", 980 | "version": 2 981 | }, 982 | "file_extension": ".py", 983 | "mimetype": "text/x-python", 984 | "name": "python", 985 | "nbconvert_exporter": "python", 986 | "pygments_lexer": "ipython2", 987 | "version": "2.7.13" 988 | }, 989 | "toc": { 990 | "nav_menu": {}, 991 | "number_sections": true, 992 | "sideBar": true, 993 | "skip_h1_title": false, 994 | "toc_cell": false, 995 | "toc_position": { 996 | "height": "454px", 997 | "left": "0px", 998 | "right": "831.2px", 999 | "top": "107px", 1000 | "width": "212px" 1001 | }, 1002 | "toc_section_display": "block", 1003 | "toc_window_display": true 1004 | } 1005 | }, 1006 | "nbformat": 4, 1007 | "nbformat_minor": 2 1008 | } 1009 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Climb_Python 2 | 目前任职大数据开发工作,日常开发使用Python作为数据分析工具,在此比较常用的方面知识或难点总结、整理出来,以此分享,谢谢! 3 | * Anaconda docs 4 | * Anaconda环境安装以及搭建Python多内核环境.doc 5 | * 简单分析脚本 6 | * Anaconda2_infos_analysis.ipynb 7 | * Anaconda3_infos_analysis.ipynb 8 | 9 | * 绘制某段时间内用户增加趋势图 10 | * users_rise_up_period.ipynb 11 | * users_rise_up_period_data 12 | 13 | * 爬取糗事百科段子信息 14 | * [博文: http://blog.csdn.net/qq_24452475/article/details/79122259](http://blog.csdn.net/qq_24452475/article/details/79122259) 15 | * spider_qiushibaike_content_datas.ipynb 16 | * spider_qiushibaike_content_datas.py 17 | 18 | * 爬取公务员招考信息(2018) 19 | * [博文: http://blog.csdn.net/qq_24452475/article/details/79156758](http://blog.csdn.net/qq_24452475/article/details/79156758) 20 | * spider_huatu_civil_servant_post_metas.ipynb 21 | * post_metas.csv 22 | 23 | * 地址转换:根据地名爬取经纬度并计算geohash值 24 | * [博文: http://blog.csdn.net/qq_24452475/article/details/79183861](http://blog.csdn.net/qq_24452475/article/details/79183861) 25 | * lng_lat_2_geohash_two_way.ipynb 26 | 27 | * 爬取 PyODPS[latest] 并转换为 PDF 28 | * [博文: http://blog.csdn.net/qq_24452475/article/details/79248953](http://blog.csdn.net/qq_24452475/article/details/79248953) 29 | * Python_PyODPS_HTML_to_PDF.ipynb 30 | 31 | * Python操作 MariaDB 实例教程手册 32 | * 包含Python操作 MariaDB、执行SQL语句、获取、遍历结果集、图片入库、事务等代码示例 33 | * Python操作 MariaDB 实例教程手册_代码.ipynb 34 | 35 | * Jupyter 常用 魔术命令(magics)总结 36 | * Built-in magic commands 37 | * Python_Common_Magic_Samples.ipynb 38 | -------------------------------------------------------------------------------- /lng_lat_2_geohash_two_way.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## 主要实现\n", 8 | "- 批量查询位置点 经纬度等信息\n", 9 | "- 经纬度转换为 geohash块、\n", 10 | "- 目标geohash 临近geohash块\n", 11 | " - 九宫格" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "### Note\n", 19 | "- 查询位置列表, 需提前提供" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 5, 25 | "metadata": { 26 | "collapsed": true 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "city_lis=['北京', '上海', '郑州']" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "### 获取位置地点经纬度等信息\n", 38 | "- 获取经纬度等信息\n", 39 | "- 计算 geohash" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 6, 45 | "metadata": { 46 | "collapsed": true 47 | }, 48 | "outputs": [], 49 | "source": [ 50 | "import pandas as pd\n", 51 | "import mzgeohash\n", 52 | "import urllib2\n", 53 | "import urllib\n", 54 | "import json" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 23, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "'''\n", 64 | "根据位置名称,获取经纬度等信息\n", 65 | "'''\n", 66 | "def get_metas_from_baidu(city):\n", 67 | " metas={}\n", 68 | " metas['key']='f247cdb592eb43ebac6ccd27f796e2d2'\n", 69 | " metas['address']=city # 城市名称\n", 70 | " \n", 71 | " data=urllib.urlencode(metas)\n", 72 | " url='http://api.map.baidu.com/geocoder?output=json&'+urllib.urlencode(metas)\n", 73 | " # 注释 url='http://api.map.baidu.com/geocoder?'+urllib.urlencode(metas)+'&output=json'\n", 74 | " unicode_s=urllib2.urlopen(url)\n", 75 | " \n", 76 | " return json.loads(unicode_s.read())\n", 77 | "\n", 78 | "city_meta_lis=[] # 计算 geohash 编码并存储\n", 79 | "for city in city_lis:\n", 80 | " tmp=get_metas_from_baidu(city)\n", 81 | " latitude=tmp['result']['location']['lat']\n", 82 | " longitude=tmp['result']['location']['lng']\n", 83 | " t_geohash=mzgeohash.encode((longitude,latitude)) # 使用mzgeohash 计算geohash\n", 84 | " city_meta_lis.append([city,latitude,longitude,t_geohash]) # 存储" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 24, 90 | "metadata": {}, 91 | "outputs": [ 92 | { 93 | "data": { 94 | "text/html": [ 95 | "
\n", 96 | "\n", 109 | "\n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | "
citylatitudelongitudegeohash
0北京39.929986116.395645wx4g0th9p0gk
1上海31.249162121.487899wtw3u88z94p0
2郑州34.756610113.649644ww0vdpjputsw
\n", 143 | "
" 144 | ], 145 | "text/plain": [ 146 | " city latitude longitude geohash\n", 147 | "0 北京 39.929986 116.395645 wx4g0th9p0gk\n", 148 | "1 上海 31.249162 121.487899 wtw3u88z94p0\n", 149 | "2 郑州 34.756610 113.649644 ww0vdpjputsw" 150 | ] 151 | }, 152 | "execution_count": 24, 153 | "metadata": {}, 154 | "output_type": "execute_result" 155 | } 156 | ], 157 | "source": [ 158 | "# 格式化输出\n", 159 | "df=pd.DataFrame(city_meta_lis, columns=['city','latitude','longitude','geohash'])\n", 160 | "df" 161 | ] 162 | }, 163 | { 164 | "cell_type": "markdown", 165 | "metadata": {}, 166 | "source": [ 167 | "### mzgeohash\n", 168 | "- 调用方式\n", 169 | "- 实现原理" 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": [ 176 | "### list all member methods" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 23, 182 | "metadata": {}, 183 | "outputs": [ 184 | { 185 | "data": { 186 | "text/plain": [ 187 | "['__builtins__',\n", 188 | " '__doc__',\n", 189 | " '__file__',\n", 190 | " '__name__',\n", 191 | " '__package__',\n", 192 | " '__path__',\n", 193 | " '__version__',\n", 194 | " 'adjacent',\n", 195 | " 'decode',\n", 196 | " 'encode',\n", 197 | " 'geohash',\n", 198 | " 'neighbors',\n", 199 | " 'neighborsfit']" 200 | ] 201 | }, 202 | "execution_count": 23, 203 | "metadata": {}, 204 | "output_type": "execute_result" 205 | } 206 | ], 207 | "source": [ 208 | "dir(mzgeohash)" 209 | ] 210 | }, 211 | { 212 | "cell_type": "markdown", 213 | "metadata": {}, 214 | "source": [ 215 | "#### 经纬度 转换为 geohash" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 31, 221 | "metadata": {}, 222 | "outputs": [ 223 | { 224 | "data": { 225 | "text/plain": [ 226 | "'xn76urwe1g9y'" 227 | ] 228 | }, 229 | "execution_count": 31, 230 | "metadata": {}, 231 | "output_type": "execute_result" 232 | } 233 | ], 234 | "source": [ 235 | "mzgeohash.encode([139.76608408614993, 35.681382017210126])" 236 | ] 237 | }, 238 | { 239 | "cell_type": "markdown", 240 | "metadata": {}, 241 | "source": [ 242 | "#### geohash 转换为 经纬度" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": 33, 248 | "metadata": {}, 249 | "outputs": [ 250 | { 251 | "data": { 252 | "text/plain": [ 253 | "(139.76608408614993, 35.681382017210126)" 254 | ] 255 | }, 256 | "execution_count": 33, 257 | "metadata": {}, 258 | "output_type": "execute_result" 259 | } 260 | ], 261 | "source": [ 262 | "mzgeohash.decode('xn76urwe1g9y')" 263 | ] 264 | }, 265 | { 266 | "cell_type": "markdown", 267 | "metadata": {}, 268 | "source": [ 269 | "#### 已知 geohash ,计算某一方向的临近geohash\n", 270 | "- 参数 1\n", 271 | " - 已知 geohash 串\n", 272 | "- 参数 2\n", 273 | " - 方向" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": 35, 279 | "metadata": {}, 280 | "outputs": [ 281 | { 282 | "data": { 283 | "text/plain": [ 284 | "'xn76urwe1g9z'" 285 | ] 286 | }, 287 | "execution_count": 35, 288 | "metadata": {}, 289 | "output_type": "execute_result" 290 | } 291 | ], 292 | "source": [ 293 | "mzgeohash.adjacent('xn76urwe1g9y','n') # " 294 | ] 295 | }, 296 | { 297 | "cell_type": "markdown", 298 | "metadata": {}, 299 | "source": [ 300 | "#### 已知 geohash ,临近geohash\n", 301 | "- 九宫格,即周围 8 个geohash" 302 | ] 303 | }, 304 | { 305 | "cell_type": "code", 306 | "execution_count": 34, 307 | "metadata": {}, 308 | "outputs": [ 309 | { 310 | "data": { 311 | "text/plain": [ 312 | "{'c': 'xn76urwe1g9y',\n", 313 | " 'e': 'xn76urwe1gdn',\n", 314 | " 'n': 'xn76urwe1g9z',\n", 315 | " 'ne': 'xn76urwe1gdp',\n", 316 | " 'nw': 'xn76urwe1g9x',\n", 317 | " 's': 'xn76urwe1g9v',\n", 318 | " 'se': 'xn76urwe1gdj',\n", 319 | " 'sw': 'xn76urwe1g9t',\n", 320 | " 'w': 'xn76urwe1g9w'}" 321 | ] 322 | }, 323 | "execution_count": 34, 324 | "metadata": {}, 325 | "output_type": "execute_result" 326 | } 327 | ], 328 | "source": [ 329 | "mzgeohash.neighbors('xn76urwe1g9y')" 330 | ] 331 | }, 332 | { 333 | "cell_type": "markdown", 334 | "metadata": {}, 335 | "source": [ 336 | "#### 已知 某位置点经纬度和周围经纬度点集,计算同属 geohash" 337 | ] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": 37, 342 | "metadata": {}, 343 | "outputs": [ 344 | { 345 | "data": { 346 | "text/plain": [ 347 | "'9q9'" 348 | ] 349 | }, 350 | "execution_count": 37, 351 | "metadata": {}, 352 | "output_type": "execute_result" 353 | } 354 | ], 355 | "source": [ 356 | "test_centroid = [-122.18472385000001, 37.7881345]\n", 357 | "test_points = [\n", 358 | "(-122.2992715, 37.9030588),\n", 359 | "(-122.396742, 37.792976),\n", 360 | "(-122.4474142, 37.72198087),\n", 361 | "(-121.9764, 37.557355),\n", 362 | "(-122.029095, 37.973737),\n", 363 | "(-122.224274, 37.774963),\n", 364 | "(-122.271604, 37.803664),\n", 365 | "(-122.126871, 37.697185),\n", 366 | "(-122.087967, 37.670399),\n", 367 | "(-122.123801, 37.893394),\n", 368 | "(-122.269029, 37.80787),\n", 369 | "(-122.265609, 37.797484),\n", 370 | "(-122.267227, 37.828415),\n", 371 | "(-122.067423, 37.905628),\n", 372 | "(-122.267227, 37.828415),\n", 373 | "(-122.38666, 37.599787),\n", 374 | "(-122.075567, 37.690754),\n", 375 | "(-122.401407, 37.789256),\n", 376 | "(-122.283451, 37.87404),\n", 377 | "(-122.269029, 37.80787),\n", 378 | "(-122.1837911, 37.87836087),\n", 379 | "(-122.419694, 37.765062),\n", 380 | "(-122.2945822, 37.80467476),\n", 381 | "(-122.21244024, 37.71297174),\n", 382 | "(-121.945154, 38.018914),\n", 383 | "(-122.466233, 37.684638),\n", 384 | "(-122.056013, 37.928403),\n", 385 | "(-122.406857, 37.784991),\n", 386 | "(-122.418466, 37.752254),\n", 387 | "(-122.26978, 37.853024),\n", 388 | "(-122.251793, 37.844601),\n", 389 | "(-121.928099, 37.699759),\n", 390 | "(-122.416038, 37.637753),\n", 391 | "(-122.1613112, 37.72261921),\n", 392 | "(-122.0575506, 37.63479954),\n", 393 | "(-122.392612, 37.616035),\n", 394 | "(-122.413756, 37.779528),\n", 395 | "(-122.353165, 37.936887),\n", 396 | "(-122.197273, 37.754006),\n", 397 | "(-122.017867, 37.591208),\n", 398 | "(-122.024597, 38.003275),\n", 399 | "(-122.4690807, 37.70612055),\n", 400 | "(-122.268045, 37.869867),\n", 401 | "(-122.444116, 37.664174),\n", 402 | "(-121.900367, 37.701695),\n", 403 | "(-122.317269, 37.925655),\n", 404 | "(-122.434092, 37.732921)\n", 405 | "]\n", 406 | "\n", 407 | "# expect = '9q9'\n", 408 | "mzgeohash.neighborsfit(test_centroid, test_points)" 409 | ] 410 | }, 411 | { 412 | "cell_type": "markdown", 413 | "metadata": {}, 414 | "source": [ 415 | "### 使用unittest进行 mzgeohash 模块测试\n", 416 | "- 通过 testsuit 执行测试用例\n", 417 | "- TestCase:所有测试用例的基本类,给定测试方法的名称,返回测试用例实例;\n", 418 | "- TestSuit:组织测试用例的实例,支持测试用例的添加和删除,最终将传递给 testRunner进行测试执行;\n", 419 | "- TextTestRunner:进行测试用例执行的实例,其中Text的意思是以文本形式显示测试结果。\n", 420 | " - 测试结果保存在 TextTestResult 实例中,包括运行多少测试用例,成功多少,失败多少等信息;" 421 | ] 422 | }, 423 | { 424 | "cell_type": "code", 425 | "execution_count": 6, 426 | "metadata": { 427 | "collapsed": true 428 | }, 429 | "outputs": [], 430 | "source": [ 431 | "import unittest\n", 432 | "from mzgeohash.test_geohash import Test_encode_decode\n", 433 | "from mzgeohash.test_geohash import Test_adjacent\n", 434 | "from mzgeohash.test_geohash import Test_neighbors\n", 435 | "from mzgeohash.test_geohash import Test_neighborsfit" 436 | ] 437 | }, 438 | { 439 | "cell_type": "code", 440 | "execution_count": 35, 441 | "metadata": {}, 442 | "outputs": [ 443 | { 444 | "name": "stderr", 445 | "output_type": "stream", 446 | "text": [ 447 | "............\n", 448 | "----------------------------------------------------------------------\n", 449 | "Ran 12 tests in 0.053s\n", 450 | "\n", 451 | "OK\n" 452 | ] 453 | } 454 | ], 455 | "source": [ 456 | "suite=unittest.TestSuite()\n", 457 | "suite.addTest(Test_encode_decode('test_decode'))\n", 458 | "suite.addTest(Test_encode_decode('test_encode'))\n", 459 | "suite.addTest(Test_encode_decode('test_roundtrip'))\n", 460 | "suite.addTest(Test_neighbors('test_neighbors'))\n", 461 | "suite.addTest(Test_adjacent('test_adjacent'))\n", 462 | "suite.addTest(Test_neighborsfit('test_neighborsfit'))\n", 463 | "\n", 464 | "runner=unittest.TextTestRunner()\n", 465 | "runner.run(suite);" 466 | ] 467 | } 468 | ], 469 | "metadata": { 470 | "kernelspec": { 471 | "display_name": "Python 2", 472 | "language": "python", 473 | "name": "python2" 474 | }, 475 | "language_info": { 476 | "codemirror_mode": { 477 | "name": "ipython", 478 | "version": 2 479 | }, 480 | "file_extension": ".py", 481 | "mimetype": "text/x-python", 482 | "name": "python", 483 | "nbconvert_exporter": "python", 484 | "pygments_lexer": "ipython2", 485 | "version": "2.7.13" 486 | }, 487 | "toc": { 488 | "nav_menu": {}, 489 | "number_sections": true, 490 | "sideBar": true, 491 | "skip_h1_title": false, 492 | "toc_cell": false, 493 | "toc_position": { 494 | "height": "595px", 495 | "left": "0px", 496 | "right": "1092px", 497 | "top": "107px", 498 | "width": "212px" 499 | }, 500 | "toc_section_display": "block", 501 | "toc_window_display": true 502 | } 503 | }, 504 | "nbformat": 4, 505 | "nbformat_minor": 2 506 | } 507 | -------------------------------------------------------------------------------- /post_metas.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kdotm/Python_Series/c686d48797b7266934958b09247aa3ffe783c026/post_metas.csv -------------------------------------------------------------------------------- /spider_huatu_civil_servant_post_metas.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "deletable": true, 7 | "editable": true 8 | }, 9 | "source": [ 10 | "### 爬取浙江公务员职位信息" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "metadata": { 17 | "collapsed": true, 18 | "deletable": true, 19 | "editable": true 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "import urllib\n", 24 | "import pandas as pd\n", 25 | "\n", 26 | "from bs4 import BeautifulSoup" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "### 1 爬取页面" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 3, 39 | "metadata": { 40 | "collapsed": false, 41 | "deletable": true, 42 | "editable": true 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "api = 'http://zw.huatu.com'\n", 47 | "base='/2018/'\n", 48 | "url=api+base\n", 49 | "\n", 50 | "header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3236.0 Safari/537.36'}\n", 51 | "request = urllib.request.Request(url,headers=header)\n", 52 | "response = urllib.request.urlopen(request).read()\n", 53 | "\n", 54 | "content=BeautifulSoup(response, 'lxml')" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "### 2 内容解析\n", 62 | "- 省/市 : 对应 URL [32个]" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 5, 68 | "metadata": { 69 | "collapsed": false, 70 | "deletable": true, 71 | "editable": true 72 | }, 73 | "outputs": [], 74 | "source": [ 75 | "areas_txt=content.find_all('p', attrs={'id':'ydiqu'})[0]\n", 76 | "area_metas_lis=[]\n", 77 | "counter=0\n", 78 | "\n", 79 | "for i in areas_txt.find_all('a', attrs={'target':'_blank'}):\n", 80 | " url=api+i.get_attribute_list('href')[0][2:]\n", 81 | " area=i.get_text()\n", 82 | " counter+=1\n", 83 | " area_metas_lis.append([counter,area,url])" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 12, 89 | "metadata": { 90 | "collapsed": false, 91 | "deletable": true, 92 | "editable": true 93 | }, 94 | "outputs": [ 95 | { 96 | "data": { 97 | "text/html": [ 98 | "
\n", 99 | "\n", 112 | "\n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | "
noareaurl
01安徽http://zw.huatu.com/2018/buweisearch/1.html
12北京http://zw.huatu.com/2018/buweisearch/2.html
23福建http://zw.huatu.com/2018/buweisearch/3.html
34甘肃http://zw.huatu.com/2018/buweisearch/4.html
45广东http://zw.huatu.com/2018/buweisearch/5.html
\n", 154 | "
" 155 | ], 156 | "text/plain": [ 157 | " no area url\n", 158 | "0 1 安徽 http://zw.huatu.com/2018/buweisearch/1.html\n", 159 | "1 2 北京 http://zw.huatu.com/2018/buweisearch/2.html\n", 160 | "2 3 福建 http://zw.huatu.com/2018/buweisearch/3.html\n", 161 | "3 4 甘肃 http://zw.huatu.com/2018/buweisearch/4.html\n", 162 | "4 5 广东 http://zw.huatu.com/2018/buweisearch/5.html" 163 | ] 164 | }, 165 | "execution_count": 12, 166 | "metadata": {}, 167 | "output_type": "execute_result" 168 | } 169 | ], 170 | "source": [ 171 | "df=pd.DataFrame(area_metas_lis, columns=['no','area', 'url'])\n", 172 | "df.head()" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": { 178 | "deletable": true, 179 | "editable": true 180 | }, 181 | "source": [ 182 | "### 3 以浙江为例,通过省份查询招聘单位及具体信息访问途径(URL)" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": 8, 188 | "metadata": { 189 | "collapsed": false, 190 | "deletable": true, 191 | "editable": true 192 | }, 193 | "outputs": [ 194 | { 195 | "data": { 196 | "text/html": [ 197 | "
\n", 198 | "\n", 211 | "\n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | "
noareaurl
3031浙江http://zw.huatu.com/2018/buweisearch/31.html
\n", 229 | "
" 230 | ], 231 | "text/plain": [ 232 | " no area url\n", 233 | "30 31 浙江 http://zw.huatu.com/2018/buweisearch/31.html" 234 | ] 235 | }, 236 | "execution_count": 8, 237 | "metadata": {}, 238 | "output_type": "execute_result" 239 | } 240 | ], 241 | "source": [ 242 | "query_area='浙江'\n", 243 | "df[df.area==query_area]" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": 9, 249 | "metadata": { 250 | "collapsed": false, 251 | "deletable": true, 252 | "editable": true 253 | }, 254 | "outputs": [], 255 | "source": [ 256 | "url2=df[df.area==query_area].url.values[0]\n", 257 | "\n", 258 | "request2 = urllib.request.Request(url2,headers=header)\n", 259 | "response2 = urllib.request.urlopen(request2).read()\n", 260 | "content2=BeautifulSoup(response2, 'lxml')\n", 261 | "\n", 262 | "tmp2=content2.find_all('table', attrs={'cellspacing':'0','width':'100%'})[0]\n", 263 | "\n", 264 | "unit_lis=[] # 存储职位列表\n", 265 | "for i in tmp2.find_all('a'):\n", 266 | " t_url=api+'/2018'+i.get_attribute_list('href')[0][2:]\n", 267 | " unit_lis.append([i.get_text().strip(),t_url]) # strip 去除空格\n", 268 | "\n", 269 | "unit_df=pd.DataFrame(unit_lis,columns=['unit_name','url'])" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": 13, 275 | "metadata": { 276 | "collapsed": false, 277 | "deletable": true, 278 | "editable": true, 279 | "scrolled": false 280 | }, 281 | "outputs": [ 282 | { 283 | "data": { 284 | "text/html": [ 285 | "
\n", 286 | "\n", 299 | "\n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | "
unit_nameurl
0国家物资储备局浙江办事处http://zw.huatu.com/2018/buwei2018/1763.html
1浙江海事局http://zw.huatu.com/2018/buwei2018/2009.html
2杭州海关http://zw.huatu.com/2018/buwei2018/2105.html
3宁波海关http://zw.huatu.com/2018/buwei2018/2107.html
4浙江省国家税务局http://zw.huatu.com/2018/buwei2018/2175.html
\n", 335 | "
" 336 | ], 337 | "text/plain": [ 338 | " unit_name url\n", 339 | "0 国家物资储备局浙江办事处 http://zw.huatu.com/2018/buwei2018/1763.html\n", 340 | "1 浙江海事局 http://zw.huatu.com/2018/buwei2018/2009.html\n", 341 | "2 杭州海关 http://zw.huatu.com/2018/buwei2018/2105.html\n", 342 | "3 宁波海关 http://zw.huatu.com/2018/buwei2018/2107.html\n", 343 | "4 浙江省国家税务局 http://zw.huatu.com/2018/buwei2018/2175.html" 344 | ] 345 | }, 346 | "execution_count": 13, 347 | "metadata": {}, 348 | "output_type": "execute_result" 349 | } 350 | ], 351 | "source": [ 352 | "unit_df.head()" 353 | ] 354 | }, 355 | { 356 | "cell_type": "markdown", 357 | "metadata": { 358 | "deletable": true, 359 | "editable": true 360 | }, 361 | "source": [ 362 | "### 4 查询所有部门的招聘岗位等详细信息" 363 | ] 364 | }, 365 | { 366 | "cell_type": "code", 367 | "execution_count": 10, 368 | "metadata": { 369 | "collapsed": true, 370 | "deletable": true, 371 | "editable": true 372 | }, 373 | "outputs": [], 374 | "source": [ 375 | "'''\n", 376 | "获取岗位详细信息\n", 377 | "url :\n", 378 | " 访问路径\n", 379 | "'''\n", 380 | "def get_post_metas(url):\n", 381 | " tmp_request = urllib.request.Request(url,headers=header)\n", 382 | " tmp_response = urllib.request.urlopen(tmp_request).read()\n", 383 | " tmp_content=BeautifulSoup(tmp_response, 'lxml')\n", 384 | " \n", 385 | " td_lis=[i.get_text() for i in tmp_content.find_all('td')]\n", 386 | " internal=[]\n", 387 | " for i in range(len(td_lis)):\n", 388 | " if i%10==0: # 生成间隔区间\n", 389 | " internal.append([i,i+10])\n", 390 | " \n", 391 | " row_lis=[]\n", 392 | " for lt,rt in internal: # 根据间隔区间,将数据分行\n", 393 | " row_lis.append(td_lis[lt:rt])\n", 394 | " \n", 395 | " return row_lis\n", 396 | "\n", 397 | "post_metas_lis=[]\n", 398 | "\n", 399 | "for unit_name,url in unit_lis: # 循环,加工招聘单位所招岗位详细信息\n", 400 | " post_metas_lis+=get_post_metas(url)\n", 401 | "\n", 402 | "# 将 list 转换为 DataFrame 格式\n", 403 | "\n", 404 | "# '部门名称','用人用司','职位名称','要求专业','招考人数','报考人数','历年分数线','历年竞争比','录取概率','对比'\n", 405 | "post_metas_df=pd.DataFrame(post_metas_lis, columns=['unit_name','employee_unit','post_name','professional','person_num','person_num2','view','view2','detail','compare'])" 406 | ] 407 | }, 408 | { 409 | "cell_type": "code", 410 | "execution_count": 22, 411 | "metadata": { 412 | "collapsed": false 413 | }, 414 | "outputs": [ 415 | { 416 | "data": { 417 | "text/html": [ 418 | "
\n", 419 | "\n", 432 | "\n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | "
unit_nameemployee_unitpost_nameprofessionalperson_numperson_num2viewview2detailcompare
100浙江省国家税务局杭州市上城区国家税务局纳税服务科科员(一)公共管理类10查看查看详情对比
101浙江省国家税务局杭州市上城区国家税务局纳税服务科科员(二)工商管理类10查看查看详情对比
102浙江省国家税务局杭州市上城区国家税务局纳税服务科科员(三)财政学类10查看查看详情对比
\n", 490 | "
" 491 | ], 492 | "text/plain": [ 493 | " unit_name employee_unit post_name professional person_num person_num2 \\\n", 494 | "100 浙江省国家税务局 杭州市上城区国家税务局 纳税服务科科员(一) 公共管理类 1 0 \n", 495 | "101 浙江省国家税务局 杭州市上城区国家税务局 纳税服务科科员(二) 工商管理类 1 0 \n", 496 | "102 浙江省国家税务局 杭州市上城区国家税务局 纳税服务科科员(三) 财政学类 1 0 \n", 497 | "\n", 498 | " view view2 detail compare \n", 499 | "100 查看 查看 详情 对比 \n", 500 | "101 查看 查看 详情 对比 \n", 501 | "102 查看 查看 详情 对比 " 502 | ] 503 | }, 504 | "execution_count": 22, 505 | "metadata": {}, 506 | "output_type": "execute_result" 507 | } 508 | ], 509 | "source": [ 510 | "post_metas_df[post_metas_df.unit_name=='浙江省国家税务局'].head(3)" 511 | ] 512 | }, 513 | { 514 | "cell_type": "code", 515 | "execution_count": 249, 516 | "metadata": { 517 | "collapsed": true, 518 | "deletable": true, 519 | "editable": true 520 | }, 521 | "outputs": [], 522 | "source": [ 523 | "# 保存数据信息\n", 524 | "\n", 525 | "post_metas_df.to_csv('./post_metas.csv')" 526 | ] 527 | } 528 | ], 529 | "metadata": { 530 | "kernelspec": { 531 | "display_name": "Python 3", 532 | "language": "python", 533 | "name": "python3" 534 | }, 535 | "language_info": { 536 | "codemirror_mode": { 537 | "name": "ipython", 538 | "version": 3 539 | }, 540 | "file_extension": ".py", 541 | "mimetype": "text/x-python", 542 | "name": "python", 543 | "nbconvert_exporter": "python", 544 | "pygments_lexer": "ipython3", 545 | "version": "3.6.3" 546 | } 547 | }, 548 | "nbformat": 4, 549 | "nbformat_minor": 2 550 | } 551 | -------------------------------------------------------------------------------- /spider_qiushibaike_content_datas.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### 爬取 糗事百科段子 内容数据\n", 8 | "---\n", 9 | "- author:\n", 10 | " - kngines\n", 11 | "- date:\n", 12 | " - 20180118" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 1, 18 | "metadata": { 19 | "collapsed": false 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "import re\n", 24 | "import urllib\n", 25 | "import bs4\n", 26 | "import pandas as pd\n", 27 | "from bs4 import BeautifulSoup\n", 28 | "\n", 29 | "page = 1 # 第 1 页\n", 30 | "\n", 31 | "url = 'http://www.qiushibaike.com/hot/page/' + str(page)\n", 32 | "header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3236.0 Safari/537.36'}\n", 33 | "request = urllib.request.Request(url,headers=header)\n", 34 | "response = urllib.request.urlopen(request).read()\n", 35 | "\n", 36 | "content=BeautifulSoup(response, 'lxml')\n", 37 | "\n", 38 | "'''\n", 39 | "1 获取段子内容\n", 40 | "'''\n", 41 | "divs = content.find_all('div', class_='content')\n", 42 | "content_lis=[]\n", 43 | "for div in divs:\n", 44 | " content_lis.append(div.span.get_text())\n", 45 | "\n", 46 | "'''\n", 47 | "2 获取用户昵称\n", 48 | "''' \n", 49 | "tmp=content.find_all('div', class_='col1')[0]\n", 50 | "nick_name_lis=[]\n", 51 | "for nick_name in tmp.find_all('h2'):\n", 52 | " nick_name_lis.append(nick_name.get_text())\n", 53 | "\n", 54 | "'''\n", 55 | "3 获取好笑、评论数量\n", 56 | "'''\n", 57 | "counter=0\n", 58 | "funny_lis=[]\n", 59 | "comment_lis=[]\n", 60 | "for cofu in content.find_all('i', class_='number'):\n", 61 | " counter+=1\n", 62 | " if counter%2!=0:\n", 63 | " funny_lis.append(cofu.get_text())\n", 64 | " else :\n", 65 | " comment_lis.append(cofu.get_text()) " 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "### 1 当前页码信息\n", 73 | "- 第 1 页\n", 74 | " - 通过简单调整,可以实现批量爬取数据" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 2, 80 | "metadata": { 81 | "collapsed": false 82 | }, 83 | "outputs": [ 84 | { 85 | "name": "stdout", 86 | "output_type": "stream", 87 | "text": [ 88 | "\u001b[1;31m[24H] current page:\u001b[0m \u001b[0;30;43m1\u001b[0m\n" 89 | ] 90 | } 91 | ], 92 | "source": [ 93 | "current_page=content.find_all('ul',class_='pagination')[0].find_all('span',class_='current')[0].get_text()\n", 94 | "print ('\\033[1;31m[24H] current page:\\033[0m \\033[0;30;43m%s\\033[0m' % (current_page.replace('\\n','')))" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "### 2 转换[DataFrame]并显示" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 3, 107 | "metadata": { 108 | "collapsed": false 109 | }, 110 | "outputs": [], 111 | "source": [ 112 | "tdf=pd.DataFrame([nick_name_lis,content_lis,funny_lis,comment_lis])\n", 113 | "df=tdf.T\n", 114 | "df.columns=['nick_name','content','funny_cnt','comment_cnt']" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 4, 120 | "metadata": { 121 | "collapsed": false 122 | }, 123 | "outputs": [ 124 | { 125 | "data": { 126 | "text/html": [ 127 | "
\n", 128 | "\n", 141 | "\n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | "
nick_namecontentfunny_cntcomment_cnt
0\\n我家熊孩子不熊\\n\\n\\n\\n九月份闺蜜来我家吃晚饭。她在用减肥瘦瘦包,这个3600块钱的瘦瘦包要求三个月内晚...75856
1\\n道士下山会女神\\n\\n\\n\\n银行办点事,小侄跟着去玩,见大堂圣诞树上挂满红包,小孩好奇心重,便挨个去拆开看,...80711
2\\n阿丹阿乐\\n\\n\\n\\n今天我去医院,遇到一位女士,机缘巧合比较聊得来,她说她是来做人工授精的。刚好我是...145728
\n", 175 | "
" 176 | ], 177 | "text/plain": [ 178 | " nick_name content funny_cnt \\\n", 179 | "0 \\n我家熊孩子不熊\\n \\n\\n\\n九月份闺蜜来我家吃晚饭。她在用减肥瘦瘦包,这个3600块钱的瘦瘦包要求三个月内晚... 758 \n", 180 | "1 \\n道士下山会女神\\n \\n\\n\\n银行办点事,小侄跟着去玩,见大堂圣诞树上挂满红包,小孩好奇心重,便挨个去拆开看,... 807 \n", 181 | "2 \\n阿丹阿乐\\n \\n\\n\\n今天我去医院,遇到一位女士,机缘巧合比较聊得来,她说她是来做人工授精的。刚好我是... 1457 \n", 182 | "\n", 183 | " comment_cnt \n", 184 | "0 56 \n", 185 | "1 11 \n", 186 | "2 28 " 187 | ] 188 | }, 189 | "execution_count": 4, 190 | "metadata": {}, 191 | "output_type": "execute_result" 192 | } 193 | ], 194 | "source": [ 195 | "df.head(3)" 196 | ] 197 | }, 198 | { 199 | "cell_type": "markdown", 200 | "metadata": {}, 201 | "source": [ 202 | "### 3 格式化显示数据" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": 5, 208 | "metadata": { 209 | "collapsed": false 210 | }, 211 | "outputs": [], 212 | "source": [ 213 | "pd.set_option('display.width',200)\n", 214 | "pd.set_option('display.max_columns',20)\n", 215 | "pd.set_option('display.max_rows',50)\n", 216 | "pd.set_option('display.max_colwidth',200)" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": 6, 222 | "metadata": { 223 | "collapsed": false, 224 | "scrolled": true 225 | }, 226 | "outputs": [ 227 | { 228 | "data": { 229 | "text/html": [ 230 | "
\n", 231 | "\n", 244 | "\n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | "
nick_namecontentfunny_cntcomment_cnt
0\\n我家熊孩子不熊\\n\\n\\n\\n九月份闺蜜来我家吃晚饭。她在用减肥瘦瘦包,这个3600块钱的瘦瘦包要求三个月内晚餐不能吃米饭,不能喝水,不能吃任何荤菜和水果……我说,如果是这样子的话,我不用任何药可以瘦成闪电。闺蜜:不不不,我不行,必须靠药物。三个月过去了,现在的她比原来还胖了10斤,她说是自己没忍住吃了几次晚餐……这货的智商是不是都被脂肪吞噬了![捂脸][捂脸][捂脸]\\n\\n75856
1\\n道士下山会女神\\n\\n\\n\\n银行办点事,小侄跟着去玩,见大堂圣诞树上挂满红包,小孩好奇心重,便挨个去拆开看,结果一无所获,小孩失望的念叨:还大银行,都特么骗人。于是又去摘了个小布娃娃,大堂美女上去阻止,小屁孩居然说:亲我一下我就放回去,来都来了总不能让我空手回去吧!!\\n\\n80711
2\\n阿丹阿乐\\n\\n\\n\\n今天我去医院,遇到一位女士,机缘巧合比较聊得来,她说她是来做人工授精的。刚好我是去捐精的,于是我们决定省略中间环节,直接离开医院到她住的地方来了。情况就是这样,我真的没有骗人啊警官!\\n\\n145728
\n", 278 | "
" 279 | ], 280 | "text/plain": [ 281 | " nick_name content \\\n", 282 | "0 \\n我家熊孩子不熊\\n \\n\\n\\n九月份闺蜜来我家吃晚饭。她在用减肥瘦瘦包,这个3600块钱的瘦瘦包要求三个月内晚餐不能吃米饭,不能喝水,不能吃任何荤菜和水果……我说,如果是这样子的话,我不用任何药可以瘦成闪电。闺蜜:不不不,我不行,必须靠药物。三个月过去了,现在的她比原来还胖了10斤,她说是自己没忍住吃了几次晚餐……这货的智商是不是都被脂肪吞噬了![捂脸][捂脸][捂脸]\\n\\n \n", 283 | "1 \\n道士下山会女神\\n \\n\\n\\n银行办点事,小侄跟着去玩,见大堂圣诞树上挂满红包,小孩好奇心重,便挨个去拆开看,结果一无所获,小孩失望的念叨:还大银行,都特么骗人。于是又去摘了个小布娃娃,大堂美女上去阻止,小屁孩居然说:亲我一下我就放回去,来都来了总不能让我空手回去吧!!\\n\\n \n", 284 | "2 \\n阿丹阿乐\\n \\n\\n\\n今天我去医院,遇到一位女士,机缘巧合比较聊得来,她说她是来做人工授精的。刚好我是去捐精的,于是我们决定省略中间环节,直接离开医院到她住的地方来了。情况就是这样,我真的没有骗人啊警官!\\n\\n \n", 285 | "\n", 286 | " funny_cnt comment_cnt \n", 287 | "0 758 56 \n", 288 | "1 807 11 \n", 289 | "2 1457 28 " 290 | ] 291 | }, 292 | "execution_count": 6, 293 | "metadata": {}, 294 | "output_type": "execute_result" 295 | } 296 | ], 297 | "source": [ 298 | "df.head(3)" 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": null, 304 | "metadata": { 305 | "collapsed": true 306 | }, 307 | "outputs": [], 308 | "source": [ 309 | "import re\n", 310 | "import urllib\n", 311 | "import bs4\n", 312 | "import pandas as pd\n", 313 | "from bs4 import BeautifulSoup\n", 314 | "\n", 315 | "page = 1 # 第 1 页\n", 316 | "\n", 317 | "url = 'http://www.qiushibaike.com/hot/page/' + str(page)\n", 318 | "header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3236.0 Safari/537.36'}\n", 319 | "request = urllib.request.Request(url,headers=header)\n", 320 | "response = urllib.request.urlopen(request).read()\n", 321 | "\n", 322 | "content=BeautifulSoup(response, 'lxml')\n", 323 | "\n", 324 | "'''\n", 325 | "1 获取段子内容\n", 326 | "'''\n", 327 | "divs = content.find_all('div', class_='content')\n", 328 | "content_lis=[]\n", 329 | "for div in divs:\n", 330 | " content_lis.append(div.span.get_text())\n", 331 | "\n", 332 | "'''\n", 333 | "2 获取用户昵称\n", 334 | "''' \n", 335 | "tmp=content.find_all('div', class_='col1')[0]\n", 336 | "nick_name_lis=[]\n", 337 | "for nick_name in tmp.find_all('h2'):\n", 338 | " nick_name_lis.append(nick_name.get_text())\n", 339 | "\n", 340 | "'''\n", 341 | "3 获取好笑、评论数量\n", 342 | "'''\n", 343 | "counter=0\n", 344 | "funny_lis=[]\n", 345 | "comment_lis=[]\n", 346 | "for cofu in content.find_all('i', class_='number'):\n", 347 | " counter+=1\n", 348 | " if counter%2!=0:\n", 349 | " funny_lis.append(cofu.get_text())\n", 350 | " else :\n", 351 | " comment_lis.append(cofu.get_text()) \n", 352 | "\n", 353 | "'''\n", 354 | "4 打印当前页\n", 355 | "'''\n", 356 | "current_page=content.find_all('ul',class_='pagination')[0].find_all('span',class_='current')[0].get_text()\n", 357 | "print ('\\033[1;31m[24H] current page:\\033[0m \\033[0;30;43m%s\\033[0m' % (current_page.replace('\\n','')))\n", 358 | "\n", 359 | "'''\n", 360 | "5 转换为 DF并显示\n", 361 | "'''\n", 362 | "tdf=pd.DataFrame([nick_name_lis,content_lis,funny_lis,comment_lis])\n", 363 | "df=tdf.T\n", 364 | "df.columns=['nick_name','content','funny_cnt','comment_cnt']\n", 365 | "df.head(3)" 366 | ] 367 | } 368 | ], 369 | "metadata": { 370 | "kernelspec": { 371 | "display_name": "Python 3", 372 | "language": "python", 373 | "name": "python3" 374 | }, 375 | "language_info": { 376 | "codemirror_mode": { 377 | "name": "ipython", 378 | "version": 3 379 | }, 380 | "file_extension": ".py", 381 | "mimetype": "text/x-python", 382 | "name": "python", 383 | "nbconvert_exporter": "python", 384 | "pygments_lexer": "ipython3", 385 | "version": "3.6.3" 386 | } 387 | }, 388 | "nbformat": 4, 389 | "nbformat_minor": 2 390 | } 391 | -------------------------------------------------------------------------------- /spider_qiushibaike_content_datas.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # ### 爬取 糗事百科段子 内容数据 5 | # --- 6 | # - author: 7 | # - kngines 8 | # - date: 9 | # - 20180118 10 | 11 | # In[1]: 12 | 13 | 14 | import re 15 | import urllib 16 | import bs4 17 | import pandas as pd 18 | from bs4 import BeautifulSoup 19 | 20 | page = 1 # 第 1 页 21 | 22 | url = 'http://www.qiushibaike.com/hot/page/' + str(page) 23 | header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3236.0 Safari/537.36'} 24 | request = urllib.request.Request(url,headers=header) 25 | response = urllib.request.urlopen(request).read() 26 | 27 | content=BeautifulSoup(response, 'lxml') 28 | 29 | ''' 30 | 1 获取段子内容 31 | ''' 32 | divs = content.find_all('div', class_='content') 33 | content_lis=[] 34 | for div in divs: 35 | content_lis.append(div.span.get_text()) 36 | 37 | ''' 38 | 2 获取用户昵称 39 | ''' 40 | tmp=content.find_all('div', class_='col1')[0] 41 | nick_name_lis=[] 42 | for nick_name in tmp.find_all('h2'): 43 | nick_name_lis.append(nick_name.get_text()) 44 | 45 | ''' 46 | 3 获取好笑、评论数量 47 | ''' 48 | counter=0 49 | funny_lis=[] 50 | comment_lis=[] 51 | for cofu in content.find_all('i', class_='number'): 52 | counter+=1 53 | if counter%2!=0: 54 | funny_lis.append(cofu.get_text()) 55 | else : 56 | comment_lis.append(cofu.get_text()) 57 | 58 | 59 | # ### 1 当前页码信息 60 | # - 第 1 页 61 | # - 通过简单调整,可以实现批量爬取数据 62 | 63 | # In[2]: 64 | 65 | 66 | current_page=content.find_all('ul',class_='pagination')[0].find_all('span',class_='current')[0].get_text() 67 | print ('\033[1;31m[24H] current page:\033[0m \033[0;30;43m%s\033[0m' % (current_page.replace('\n',''))) 68 | 69 | 70 | # ### 2 转换[DataFrame]并显示 71 | 72 | # In[3]: 73 | 74 | 75 | tdf=pd.DataFrame([nick_name_lis,content_lis,funny_lis,comment_lis]) 76 | df=tdf.T 77 | df.columns=['nick_name','content','funny_cnt','comment_cnt'] 78 | 79 | 80 | # In[4]: 81 | 82 | 83 | df.head(3) 84 | 85 | 86 | # ### 3 格式化显示数据 87 | 88 | # In[5]: 89 | 90 | 91 | pd.set_option('display.width',200) 92 | pd.set_option('display.max_columns',20) 93 | pd.set_option('display.max_rows',50) 94 | pd.set_option('display.max_colwidth',200) 95 | 96 | 97 | # In[6]: 98 | 99 | 100 | df.head(3) 101 | 102 | 103 | # In[ ]: 104 | 105 | 106 | import re 107 | import urllib 108 | import bs4 109 | import pandas as pd 110 | from bs4 import BeautifulSoup 111 | 112 | page = 1 # 第 1 页 113 | 114 | url = 'http://www.qiushibaike.com/hot/page/' + str(page) 115 | header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3236.0 Safari/537.36'} 116 | request = urllib.request.Request(url,headers=header) 117 | response = urllib.request.urlopen(request).read() 118 | 119 | content=BeautifulSoup(response, 'lxml') 120 | 121 | ''' 122 | 1 获取段子内容 123 | ''' 124 | divs = content.find_all('div', class_='content') 125 | content_lis=[] 126 | for div in divs: 127 | content_lis.append(div.span.get_text()) 128 | 129 | ''' 130 | 2 获取用户昵称 131 | ''' 132 | tmp=content.find_all('div', class_='col1')[0] 133 | nick_name_lis=[] 134 | for nick_name in tmp.find_all('h2'): 135 | nick_name_lis.append(nick_name.get_text()) 136 | 137 | ''' 138 | 3 获取好笑、评论数量 139 | ''' 140 | counter=0 141 | funny_lis=[] 142 | comment_lis=[] 143 | for cofu in content.find_all('i', class_='number'): 144 | counter+=1 145 | if counter%2!=0: 146 | funny_lis.append(cofu.get_text()) 147 | else : 148 | comment_lis.append(cofu.get_text()) 149 | 150 | ''' 151 | 4 打印当前页 152 | ''' 153 | current_page=content.find_all('ul',class_='pagination')[0].find_all('span',class_='current')[0].get_text() 154 | print ('\033[1;31m[24H] current page:\033[0m \033[0;30;43m%s\033[0m' % (current_page.replace('\n',''))) 155 | 156 | ''' 157 | 5 转换为 DF并显示 158 | ''' 159 | tdf=pd.DataFrame([nick_name_lis,content_lis,funny_lis,comment_lis]) 160 | df=tdf.T 161 | df.columns=['nick_name','content','funny_cnt','comment_cnt'] 162 | df.head(3) 163 | 164 | -------------------------------------------------------------------------------- /users_rise_up_period_data: -------------------------------------------------------------------------------- 1 | 20170725, 410 2 | 20170726, 586 3 | 20170727, 802 4 | 20170728, 997 5 | 20170729,1187 6 | 20170730,1380 7 | 20170731,1590 8 | 20170801,1790 9 | 20170802,1975 10 | 20170803,2135 11 | 20170804,2330 12 | 20170805,2560 13 | 20170806,2742 14 | 20170807,2852 15 | 20170808,3093 16 | 20170809,3331 17 | 20170810,3541 18 | 20170811,3721 19 | 20170812,3901 20 | 20170813,4088 21 | 20170814,4266 22 | 20170815,4477 23 | 20170816,4662 24 | 20170817,4839 25 | 20170818,5042 26 | 20170819,5237 27 | 20170820,5550 28 | 20170821,5736 29 | 20170822,5870 30 | 20170823,5984 31 | --------------------------------------------------------------------------------