├── Anaconda2_infos_analysis.ipynb
├── Anaconda3_infos_analysis.ipynb
├── Anaconda环境安装以及搭建Python多内核环境.docx
├── Python_Common_Magic_Samples.ipynb
├── Python_PyODPS_HTML_to_PDF.ipynb
├── Python操作Mysql实例教程手册_代码.ipynb
├── README.md
├── lng_lat_2_geohash_two_way.ipynb
├── post_metas.csv
├── spider_huatu_civil_servant_post_metas.ipynb
├── spider_qiushibaike_content_datas.ipynb
├── spider_qiushibaike_content_datas.py
├── users_rise_up_period.ipynb
└── users_rise_up_period_data
/Anaconda2_infos_analysis.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "### python在数据科学方面需要用到的库:\n",
8 | "\n",
9 | "- numpy:科学计算库。提供矩阵运算的库。\n",
10 | "\n",
11 | "- pandas:数据分析处理库\n",
12 | "\n",
13 | "- scipy:数值计算库。提供数值积分和常微分方程组求解算法。提供了一个非常广泛的特定函数集合。\n",
14 | "\n",
15 | "- Matplotlib:数据可视化库\n"
16 | ]
17 | },
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {},
21 | "source": [
22 | "### 1 当前路径信息(内核Python 2.7)"
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": 3,
28 | "metadata": {},
29 | "outputs": [
30 | {
31 | "data": {
32 | "text/plain": [
33 | "['',\n",
34 | " '/root/anaconda2/lib/python27.zip',\n",
35 | " '/root/anaconda2/lib/python2.7',\n",
36 | " '/root/anaconda2/lib/python2.7/plat-linux2',\n",
37 | " '/root/anaconda2/lib/python2.7/lib-tk',\n",
38 | " '/root/anaconda2/lib/python2.7/lib-old',\n",
39 | " '/root/anaconda2/lib/python2.7/lib-dynload',\n",
40 | " '/root/anaconda2/lib/python2.7/site-packages',\n",
41 | " '/root/anaconda2/lib/python2.7/site-packages/Sphinx-1.5.6-py2.7.egg',\n",
42 | " '/root/anaconda2/lib/python2.7/site-packages/setuptools-27.2.0-py2.7.egg',\n",
43 | " '/root/anaconda2/lib/python2.7/site-packages/IPython/extensions',\n",
44 | " '/root/.ipython']"
45 | ]
46 | },
47 | "execution_count": 3,
48 | "metadata": {},
49 | "output_type": "execute_result"
50 | }
51 | ],
52 | "source": [
53 | "import sys\n",
54 | "\n",
55 | "sys.path"
56 | ]
57 | },
58 | {
59 | "cell_type": "markdown",
60 | "metadata": {},
61 | "source": [
62 | "### 2 科学计算常用包版本及路径信息"
63 | ]
64 | },
65 | {
66 | "cell_type": "code",
67 | "execution_count": 4,
68 | "metadata": {},
69 | "outputs": [
70 | {
71 | "data": {
72 | "text/html": [
73 | "
\n",
74 | "\n",
87 | "
\n",
88 | " \n",
89 | " \n",
90 | " | \n",
91 | " pack_name | \n",
92 | " version | \n",
93 | " path | \n",
94 | "
\n",
95 | " \n",
96 | " \n",
97 | " \n",
98 | " 0 | \n",
99 | " numpy | \n",
100 | " 1.12.1 | \n",
101 | " /root/anaconda2/lib/python2.7/site-packages/numpy/__init__.pyc | \n",
102 | "
\n",
103 | " \n",
104 | " 1 | \n",
105 | " matplotlib | \n",
106 | " 2.0.2 | \n",
107 | " /root/anaconda2/lib/python2.7/site-packages/matplotlib/__init__.pyc | \n",
108 | "
\n",
109 | " \n",
110 | " 2 | \n",
111 | " pandas | \n",
112 | " 0.20.1 | \n",
113 | " /root/anaconda2/lib/python2.7/site-packages/pandas/__init__.pyc | \n",
114 | "
\n",
115 | " \n",
116 | " 3 | \n",
117 | " scipy | \n",
118 | " 0.19.0 | \n",
119 | " /root/anaconda2/lib/python2.7/site-packages/scipy/__init__.pyc | \n",
120 | "
\n",
121 | " \n",
122 | "
\n",
123 | "
"
124 | ],
125 | "text/plain": [
126 | " pack_name version \\\n",
127 | "0 numpy 1.12.1 \n",
128 | "1 matplotlib 2.0.2 \n",
129 | "2 pandas 0.20.1 \n",
130 | "3 scipy 0.19.0 \n",
131 | "\n",
132 | " path \n",
133 | "0 /root/anaconda2/lib/python2.7/site-packages/numpy/__init__.pyc \n",
134 | "1 /root/anaconda2/lib/python2.7/site-packages/matplotlib/__init__.pyc \n",
135 | "2 /root/anaconda2/lib/python2.7/site-packages/pandas/__init__.pyc \n",
136 | "3 /root/anaconda2/lib/python2.7/site-packages/scipy/__init__.pyc "
137 | ]
138 | },
139 | "execution_count": 4,
140 | "metadata": {},
141 | "output_type": "execute_result"
142 | }
143 | ],
144 | "source": [
145 | "import numpy\n",
146 | "import matplotlib\n",
147 | "import pandas as pd\n",
148 | "import scipy\n",
149 | "\n",
150 | "packs = [\n",
151 | " (\"numpy\", numpy.__version__, numpy.__file__),\n",
152 | " (\"matplotlib\", matplotlib.__version__, matplotlib.__file__),\n",
153 | " (\"pandas\", pd.__version__, pd.__file__),\n",
154 | " (\"scipy\", scipy.__version__, scipy.__file__)\n",
155 | "]\n",
156 | "\n",
157 | "pd_packages = pd.DataFrame(packs, columns=[\"pack_name\", \"version\", \"path\"])\n",
158 | "pd.set_option(\"max_colwidth\", 120)\n",
159 | "pd_packages"
160 | ]
161 | },
162 | {
163 | "cell_type": "markdown",
164 | "metadata": {},
165 | "source": [
166 | "### 3 Anaconda2 集成包简单分析"
167 | ]
168 | },
169 | {
170 | "cell_type": "code",
171 | "execution_count": 10,
172 | "metadata": {
173 | "collapsed": true
174 | },
175 | "outputs": [],
176 | "source": [
177 | "import sys\n",
178 | "%matplotlib inline\n",
179 | "\n",
180 | "packages = [pack.split(\".\")[0] for pack in sys.modules.keys()]\n",
181 | "pd_packages = pd.DataFrame(packages, columns=[\"package\"])\n",
182 | "pack_series = pd_packages.groupby(by=\"package\").size()"
183 | ]
184 | },
185 | {
186 | "cell_type": "code",
187 | "execution_count": 11,
188 | "metadata": {},
189 | "outputs": [
190 | {
191 | "data": {
192 | "text/plain": [
193 | "count 214.000000\n",
194 | "mean 10.163551\n",
195 | "std 37.323436\n",
196 | "min 1.000000\n",
197 | "25% 1.000000\n",
198 | "50% 1.000000\n",
199 | "75% 1.000000\n",
200 | "max 411.000000\n",
201 | "dtype: float64"
202 | ]
203 | },
204 | "execution_count": 11,
205 | "metadata": {},
206 | "output_type": "execute_result"
207 | }
208 | ],
209 | "source": [
210 | "pack_series.describe()"
211 | ]
212 | },
213 | {
214 | "cell_type": "code",
215 | "execution_count": 12,
216 | "metadata": {},
217 | "outputs": [
218 | {
219 | "data": {
220 | "text/plain": [
221 | "214"
222 | ]
223 | },
224 | "execution_count": 12,
225 | "metadata": {},
226 | "output_type": "execute_result"
227 | }
228 | ],
229 | "source": [
230 | "pack_series.index.duplicated()\n",
231 | "pack_temp = pack_series.index.drop_duplicates()\n",
232 | "len(pack_temp)"
233 | ]
234 | },
235 | {
236 | "cell_type": "markdown",
237 | "metadata": {},
238 | "source": [
239 | "### 3.1 根据 Anaconda 2包中的模块数量倒序 绘图"
240 | ]
241 | },
242 | {
243 | "cell_type": "code",
244 | "execution_count": 14,
245 | "metadata": {},
246 | "outputs": [
247 | {
248 | "data": {
249 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAEKCAYAAAAcgp5RAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XucXWVh7vHfM3vul8xMksk9EC5B5SJgI6JctHiB1lao\n9VB6LITCObSnaG3P0R5pz6nYlkqrx6MgaFHUVFFKT/WQUitikIuKhHAnCSEBEpOQyyRkMpPMJXN5\n+8d6J9mZzGRmz+yZvfbO8/185rPXXnuttd+9Zu9nv/td73qXQgiYmVnpKit0AczMbHI56M3MSpyD\n3sysxDnozcxKnIPezKzEOejNzEqcg97MrMQ56M3MSpyD3sysxJUXugAAM2fODIsWLSp0MczMisqT\nTz65K4TQMtpyqQj6RYsWsWrVqkIXw8ysqEjaNJbl3HRjZlbiHPRmZiXOQW9mVuIc9GZmJc5Bb2ZW\n4hz0ZmYlzkFvZlbiUhH029u76e0fKHQxzMxKUiqCvrWjh9f3Hyh0MczMSlIqgh5gZ3tPoYtgZlaS\nxhz0kjKSnpZ0X7w/XdIDktbH2+asZW+QtEHSOkkXj2X7rfu6cy+9mZmNKpca/ceAtVn3PwmsCCEs\nBlbE+0g6FbgCOA24BLhdUma0jbtGb2Y2OcYU9JIWAO8HvpY1+1JgWZxeBlyWNf/uEEJPCOFVYANw\nzmjP0drhoDczmwxjrdF/AfgzILtrzOwQwrY4vR2YHafnA5uzltsS540oI7HTQW9mNilGDXpJvwHs\nDCE8OdIyIYQAhFyeWNJ1klZJWiUGXKM3M5skY6nRnwd8QNJG4G7gIknfBnZImgsQb3fG5bcCC7PW\nXxDnHSaEcEcIYUkIYUl1ZQU7O3ww1sxsMowa9CGEG0IIC0IIi0gOsj4YQvg9YDmwNC62FLg3Ti8H\nrpBUJekEYDGw8mjPUV4mWve5Rm9mNhkmcoWpm4F7JF0LbAIuBwghrJZ0D7AG6AOuDyH0H7UQmTJ2\ntvcQQkDSBIpkZmZD5RT0IYSHgIfi9G7g3SMsdxNw01i3W5ERPX0DdPT0Ma26IpcimZnZKFJxZmx5\nWVIM96U3M8u/dAR9Jmmucc8bM7P8S0XQVwzW6N3zxsws71IR9K7Rm5lNnlQEfaZMVJaXOejNzCZB\nKoIeoKW+ykFvZjYJ0hP0DVUe78bMbBKkJuhnNbhGb2Y2GVIT9EmN3r1uzMzyLTVBP6uhmj2dvRzo\n80XCzczyKT1BP60KgF0e3MzMLK9SE/Qt9UnQu53ezCy/UhP0gzV697wxM8uv1AR9S4Nr9GZmkyE1\nQT+zfrBG7543Zmb5lJqgr8iUMb2u0jV6M7M8S03QQ3LSlNvozczyK1VB3+KzY83M8s5Bb2ZW4lIZ\n9CGEQhfFzKxkpCroZzVUc6B/gL1dvYUuiplZyUhV0LsvvZlZ/qUq6Gc1+OxYM7N8S1XQu0ZvZpZ/\nqQr6QzV6nx1rZpYvqQr6+qpyqit8kXAzs3xKVdBLYlZDtdvozczyKFVBDz5pysws31IX9B7vxsws\nv1IX9K7Rm5nlV+qCflZDFXu7eunu7S90UczMSkLqgn6wL70vEm5mlh+pC/pZDdWAz441M8uX1AX9\nYI1+Z7uD3swsH1IX9INnx7a66cbMLC9SF/TT6yqRoLXdwyCYmeVD6oK+PFPGjLoq1+jNzPIkdUEP\nSTu92+jNzPIjlUE/q8E1ejOzfBk16CVVS1op6VlJqyV9Os6fLukBSevjbXPWOjdI2iBpnaSLcy2U\na/RmZvkzlhp9D3BRCOFM4CzgEknnAp8EVoQQFgMr4n0knQpcAZwGXALcLimTS6FmNVSxa18PAwO+\nSLiZ2USNGvQhsS/erYh/AbgUWBbnLwMui9OXAneHEHpCCK8CG4BzcilUS0MVfQOBPZ0HclnNzMyG\nMaY2ekkZSc8AO4EHQgiPA7NDCNviItuB2XF6PrA5a/Utcd6YDZ4d63Z6M7OJG1PQhxD6QwhnAQuA\ncySdPuTxQFLLHzNJ10laJWlVa2vrYY/57Fgzs/zJqddNCKEN+AlJ2/sOSXMB4u3OuNhWYGHWagvi\nvKHbuiOEsCSEsKSlpeWwx2b5IuFmZnkzll43LZKa4nQN8F7gRWA5sDQuthS4N04vB66QVCXpBGAx\nsDKXQh2s0TvozcwmrHwMy8wFlsWeM2XAPSGE+yQ9Btwj6VpgE3A5QAhhtaR7gDVAH3B9CCGnweXr\nqsqpq8y4Rm9mlgejBn0I4Tng7GHm7wbePcI6NwE3TaRgLQ1V7OzweDdmZhOVyjNjIel54xq9mdnE\npTbofe1YM7P8cNCbmZW4VAd9R08fXQd8kXAzs4lIbdC7L72ZWX6kNugP9aV3zxszs4lIbdAfHO/G\nNXozswlJbdD77Fgzs/xIbdBPr6skUybX6M3MJii1QZ8pEzPqKt1Gb2Y2QakNeoBZ09yX3sxsolId\n9C31VW6jNzOboFQH/ayGage9mdkEpTro5zYlA5v19PnsWDOz8Up10M9vqgFgW5sPyJqZjVe6g745\nCfqtbV0FLomZWfFKddAvbK4FYOseB72Z2XilOujnNFZTJtjiGr2Z2bilOugrMmXMnlbtGr2Z2QSk\nOughOSC7ta2z0MUwMyta6Q/65hofjDUzm4D0B31TDdvauukfCIUuiplZUUp/0DfX0DcQPLiZmdk4\npT/o40lTPiBrZjY+qQ/6BT5pysxsQlIf9PNijX6La/RmZuOS+qCvrSxnel2la/RmZuOU+qCH2Jfe\nNXozs3EpnqB3jd7MbFyKI+ibkxp9CO5Lb2aWq+II+qYaunr7eX3/gUIXxcys6BRH0LuLpZnZuBVH\n0PukKTOzcSuKoPdJU2Zm41cUQd9YU0FdZcYnTZmZjUNRBL0kD1dsZjZORRH04JOmzMzGq3iC3jV6\nM7NxKZ6gb6plb1cv+3r6Cl0UM7OiMmrQS1oo6SeS1khaLeljcf50SQ9IWh9vm7PWuUHSBknrJF2c\nj4Ie7Evv5hszs5yMpUbfB/yPEMKpwLnA9ZJOBT4JrAghLAZWxPvEx64ATgMuAW6XlJloQQ/2pfeF\nws3McjJq0IcQtoUQnorTHcBaYD5wKbAsLrYMuCxOXwrcHULoCSG8CmwAzploQRe4Rm9mNi45tdFL\nWgScDTwOzA4hbIsPbQdmx+n5wOas1bbEeRPSUl9FZaaMLT4ga2aWkzEHvaR64F+APwkhtGc/FpJh\nJXMaWlLSdZJWSVrV2to6ekHLxNymatfozcxyNKagl1RBEvJ3hRC+F2fvkDQ3Pj4X2BnnbwUWZq2+\nIM47TAjhjhDCkhDCkpaWljEV1uPSm5nlbiy9bgTcCawNIXw+66HlwNI4vRS4N2v+FZKqJJ0ALAZW\n5qOwPmnKzCx35WNY5jzgSuB5Sc/EeX8O3AzcI+laYBNwOUAIYbWke4A1JD12rg8h9OejsPOba9jZ\n0UNPXz9V5RPuyGNmdkwYNehDCD8FNMLD7x5hnZuAmyZQrmENdrHc1tbNopl1+d68mVlJKpozY8EX\nIDEzG4+iCvoFTbWA+9KbmeWiqIJ+TmM1Eu5Lb2aWg6IK+sryMmY3uC+9mVkuiiroYXC4Yo93Y2Y2\nVsUX9D5pyswsJ8UX9M01bGvrpn8gpxEXzMyOWcUX9E019A0EdnZ0F7ooZmZFofiC3sMVm5nlpOiC\nfkGTT5oyM8tF0QX9YI1+i2v0ZmZjUnRBX1tZTnNthWv0ZmZjVHRBD7EvvWv0ZmZjUpxB7770ZmZj\nVqRBX8vWPV0kVzA0M7OjKc6gb66hq7efPZ29hS6KmVnqFWfQN7kvvZnZWBVl0C84eAESD25mZjaa\nogz6wRq9+9KbmY2uKIO+qbaC2sqMe96YmY1BUQa9pKSLpWv0ZmajKsqgB3jDnAae2dzmLpZmZqMo\n2qC/cHELOzt6eGnHvkIXxcws1Yo26M9fPBOAR9e3FrgkZmbpVrRBP6+phpNa6nh0/a5CF8XMLNWK\nNugBLljcwuOv7qa7t7/QRTEzS62iDvoLT5lJd+8AT27aU+iimJmlVlEH/dtOmEFFRjzidnozsxEV\nddDXVZXzluOa+anb6c3MRlTUQQ9w4SktrH6tnV37egpdFDOzVCr6oL8gdrP82QbX6s3MhlP0QX/a\nvEaaayt45CUHvZnZcIo+6DNl4h0nz+TR9a0eDsHMbBhFH/QAFy6eyc6OHtbv9HAIZmZDlUTQn7+4\nBYBHXnI3SzOzoUoi6Od7OAQzsxGVRNCDh0MwMxtJCQV9MhzCUx4OwczsMCUT9OeeODgcgptvzMyy\nlUzQDw6H4PHpzcwON2rQS/q6pJ2SXsiaN13SA5LWx9vmrMdukLRB0jpJF09WwYdzweKZHg7BzGyI\nsdTovwlcMmTeJ4EVIYTFwIp4H0mnAlcAp8V1bpeUyVtpR3FB7Gbp4RDMzA4ZNehDCI8Arw+ZfSmw\nLE4vAy7Lmn93CKEnhPAqsAE4J09lHdXp8xtpqq1wN0szsyzjbaOfHULYFqe3A7Pj9Hxgc9ZyW+K8\nI0i6TtIqSataW/PTrp4pE+d5OAQzs8NM+GBsSBI151QNIdwRQlgSQljS0tIy0WIcdMHJM9nR7uEQ\nzMwGjTfod0iaCxBvd8b5W4GFWcstiPOmzPlx2OLP3r+Of3tuG1v2dLp2b2bHtPJxrrccWArcHG/v\nzZr/HUmfB+YBi4GVEy1kLhY01/KBM+fxw9XbeWDNDgBm1ldy5oImzlyY/C05vpm6qvG+dDOz4jJq\n2kn6LvAuYKakLcCnSAL+HknXApuAywFCCKsl3QOsAfqA60MIUz4mwS2/ezYH+gZYt72DZ7a08ezm\nNp7Z3MaD63YSAlRmynjrCc2885QW3nnKLE6ZXY+kqS6mmdmUUBqaNZYsWRJWrVo16c/T3t3Ls5vb\neHT9Lh5e18q6HR0AzJlWzTtPaeHdb5rFe0+d7dA3s6Ig6ckQwpJRlzuWgn6obXu7eHhdKw+/1MpP\n1++io6ePP3znSXzy19445WUxM8vVWIP+mG6onttYwxXnHMcV5xxHb/8ANy5fzVcefpl5TdVc9fZF\nhS6emVleHNNBn60iU8ZfXXo6O9p7+NTy1cxqqOaS0+cUulhmZhNWMoOa5UOmTNz6u2dz5oImPnb3\n0zy5aegJwWZmxcdBP0RNZYY7ly5hbmM11y5bxcutPvHKzIqbg34YM+qrWHbNOZSXiaVfX8nOju5C\nF8nMbNwc9CM4fkYddy59K7v3HeCabz7Bvp6+QhfJzGxcHPRHcebCJm778Nms3dbBf122iqd/ucfD\nKZhZ0XHQj+KiN87mMx88gyd/uYffuv3nvOtzD/H5H61jgwdNM7MicUyfMJWLvV293L96O8ufeY2f\nv7yLgQCnzZvGZWfN59Kz5jFrWnWhi2hmxxifGTuJdrZ386/PbWP5M1t5dsteGqrL+da1b+OshU2F\nLpqZHUPGGvRuuhmHWdOqufb8E7j3I+fzoz+9kKbaCq782uM89cs9hS6amdkRHPQTdMrsBv7purcz\nvb6Sq+5c6ZOszCx1HPR5MK+phruvO5eWhiquunMlT2x02JtZejjo82RuYxL2s6dVs/TrK3n8ld2F\nLpKZGeCgz6vZ06q5+7pzmdtYzdXfeILHXnbYm1nhOejzbNa0au6+7u0saK7h97+5kjt/+io/fGEb\nj728m7Xb2tm2t4vu3im/6JaZHcM8TPEkaGmo4rvXncuVd67kr+9bM+wyVeVlNNVW0FRTSWNtBU01\nFcn92koaB6drKmmqrTh0v7aSusqMr4BlZjlx0E+SmfVV3PfR89nR3k1bZy9tXQfY29lLW1cvezoP\n0NbZG+8n0798vZPntiT3u3sHRtxumZLhlIfTWFPJ1e84nqXvWERDdcVkvTQzKzIO+kmUKRPzmmqY\n11ST03rdvf3s7epNviA6D9DWdehLob2rj4ERTnJbu62dz/3oJe545BWuPf9Erj5vEY01DnyzY52D\nPoWqKzJUV2SYPY5hFZ7fspdbHlzP//3xS3ztp6/w++edwDXnLaKptnISSmpmxcBDIJSo1a/t5dYV\nG/jh6u3UV5XzvtNmU1We67F3ce6J0/mNN88bsbnIzArHY90YAC9ub+fWBzfwxKu5n8R1oH+Ats5e\nTpxZx0cuOpkPnDmP8ow7apmlhYPeJmxgIHD/6u18ccV6XtzewaIZtVz/qydz2dnzqXDgmxWcg97y\nZmAg8MDaHdyyYj2rX2tn4fQarn/Xyfz2ryxw4JsVkEevtLwpKxMXnzaH+z56Pl+7agnNtZV88nvP\nc/EXHmHF2h2+6pZZyjnobcwk8Z5TZ3Pv9efx1auWQIBrl63iyjtXsnZbe6GLZ2YjcNBbziTx3lNn\n88M/uZBP/eapPL91L++/5VFu+N5z7OzoLnTxzGwIt9HbhLV1HuCWFRv4x8c2UlVexh+88yTOPXEG\ni2bW0lJf5SEbzCaJD8balHuldR+f+fcXeWDNjoPzaiszHD+jjkUzajl+Rh0Lp9cwvXZwfJ/KOIZP\nBTUVHsPHLFdjDXqfGWt5c2JLPV+9agmbX+/k5dZ9bNrdycbd+9m0u5N1Ozr48dod9PYPX7GozJTR\n0lDF8fELYfCLYdHMWo6fXkdNZWaKX41Z6XDQW94tnF7Lwum1R8zvHwi0dvQcGtQtDujWFsf12dHe\nzabd+7l/9XZe33/gsHWb4gifjbWVh0b6jPdHOuNXgpNa6jlrYdO4hpMwKxUOepsymTIxp7GaOY2j\nh+7erl5+GX8RbNy1n9Z9PVlfCgfYuHs/bZ29tHf3MpbWxznTqjlzYSNnLWzmzIWNnDG/0SN82jHD\nQW+p1FhTwRkLGjljQeNRl+sfCPQPDJ/0fQMDvLi9g2c3t/HM5jae3dzG/asPHT+orig7csz/mkrq\nq8sZz9A+jTUVsdmpjuNn1jLNXySWEg56K2qZMo044FolZbzluGbeclzzwXl79h/gua17WfNae2xC\nOtR8tHFXJ21dbXR09+VcjhCga8iVw6bXVXL8jFpOmFF36HhDPP7g0URtKrnXjVmedB3oZ9Pr+9m4\nq5NNu/ezcXe83bWfbe3dhzUxNdZUsGhGciyjumL4A81lgmnVyS+Nw49NHP1XR01lxt1ajxHudWM2\nxWoqM7xxzjTeOGfaEY919/az+fXOQ+EfeyM9v3UvfSP0ROofCLR399J5IPdrDA/t1rpoRi2LZtbR\nVFuByO0LQIL6qnJ3gy1iDnqzKVBdkWHx7AYWz27Ied2evuSKY3uzeih1dPeOuHx7Vy+bXu8cU7fW\nXFVmyg67xnFDdQVlIwR/eZkOHvsYet5ES33VUX/NWH456M1Srqo8w6yGDLMaxtdFtH8g8FpbFxt3\n7x/X8YeBENjX3XfwS+Zgt9jYJXak1t++gQH2dvWyp7OXA31HXgdZgrnTqo84ftFYU0muPxoE1FeX\n0xSbuGor/csj26QFvaRLgC8CGeBrIYSbJ+u5zGxkmTKNeG7DVOnu7Y8HvQ+wZ//gOROHmrF+tHoH\nu4ecOzERFRnRWFNJc20FDdXlIx+wLy/juOmHN2+V4gl6kxL0kjLAbcB7gS3AE5KWhxDWTMbzmVm6\nVVdkmNOYOeo5FO3dvWza1UlHz8jNUiMJATq6+444Ca+t88BRz7XY39M/7JfM7GlVzGuqoTn+Qhja\n9FRVnvsXgQQNVeXJtqb4l8dk1ejPATaEEF4BkHQ3cCngoDezYU2rrhj1vInJ0t59+Al6G3d3sm1v\nFzs7unlpRwd7O3vp6Mm92Ws0g8c8GqrLyUxi4E9W0M8HNmfd3wK8LXsBSdcB1wEcd9xxk1QMM7PR\nTauu4PT5jZw+f+Qvmt7+AdqPcsxhNAMhsK+nb5jhPw7Q3tVHIPcD5j8e43IFOxgbQrgDuAOSfvSF\nKoeZ2VhUZMqYUV/FjPqqQhfloC//3tiWm6wLj2wFFmbdXxDnmZnZFJusoH8CWCzpBEmVwBXA8kl6\nLjMzO4pJaboJIfRJ+ghwP0n3yq+HEFZPxnOZmdnRTVobfQjhB8APJmv7ZmY2Nr44uJlZiXPQm5mV\nOAe9mVmJc9CbmZW4VFx4RFIHsK7Q5UiZmcCuQhciRbw/Duf9caRjcZ8cH0JoGW2htAxTvG4sV0k5\nlkha5X1yiPfH4bw/juR9MjI33ZiZlTgHvZlZiUtL0N9R6AKkkPfJ4bw/Duf9cSTvkxGk4mCsmZlN\nnrTU6M3MbJKkNuglPSSpaI6gS1ok6YUpeq4fSGqKf380Fc9ZCiT9eaHLcDSSbpT08UKXI+0kXS3p\nS3H6DyVdlTV/3hjW/2tJz0l6RtKPxrJOsUtt0NuRlCgLIfx6CKENaAJyCvrBbUxOCVMv1UGfK0lp\n6R5dMCGEr4QQ/jHevRoYS2h/NoTw5hDCWcB9wF9OVvnSYsIf+FiTfVHSXZLWSvp/kmol/aWkJyS9\nIOkOxSvgxpr630laKeklSRfE+TWS7o7b+D5Qk/UcX5a0StJqSZ/Omn+zpDXx2/lzE30tObzmt8bn\nrJZUJ2k1UJ/1+Gnx9T0Tl1s83DqSTpd0m6QPxPW+L+nrcfoaSTfF/btO0j8CLwALJW2UNBO4GTgp\nPs9n43qfiPv9ucF9Ndw2pmg/jfTe+BVJD0t6UtL9kuZKKo/lfldc9zOSborTGyX9vaTn4349Oc5v\nkfQvcb0nJJ0X59dL+kZc/jlJvy3pZqAm7qu7puL1Z+2Hq2I5npX0rbhfHozzVkg64lqaks6S9Iu4\nzPclNcf5D0n6gqRVwMck/aakxyU9LenHkmbH5W6UtEzSo5I2Sfpg1j78oaSKuNywn9OpFj8T/xb3\n0QuSfkfSJfH985SkWyTdN8x6N0r6uKQPAUuAu+L/uGa49xlACKE9axN1MI5r+BWbEMKE/oBFJDvq\nvHj/68DHgelZy3wL+M04/RDwf+L0rwM/jtP/nWTceoA3A33Aknh/erzNxPXfDMwgOZt28IBy00Rf\nS46v+2+AzwG3ATfE/fBCfOxW4MNxuhKoGW6dOO8KkhoGwErgF3H6G8DFcbsDwLlZz72R5CzAg88Z\n57+PpOeBSL7E7wMuHG4bU7SPhntvfAL4OdAS5/1O1v/9NGAt8B7gaaAy6/X+RZy+CrgvTn8HOD9O\nHwesjdN/B3whqxzN8XbfVL7+rNf0EjBz8L0M/CuwNN6/Bvj/cfpG4ONx+jngnXH6rwZfT3z/3579\n2rI+A/+FQ5+tG4GfAhXAmUAn8Gvxse8Dl2V/toZ+Tguwn34b+GrW/UaS604vju/ne7L+71cDXxpm\nnz3EocyoGOl9Fu/fFLf/wuAypfyXr5/wm0MIP4vT3wbOB3411jSeBy4iecMP+l68fZIkDCAJpG8D\nhBCeI3mjD7pc0lMkH/7TgFOBvUA3cKekD5K8kafSXwHvJalF/P2Qxx4D/lzS/yQ5RbnrKOs8Clwg\n6VRgDbAj1jzeTvJGBdgUQvjFGMr0vvj3NPAU8EaSD0ou28i3oe+Ni4HTgQckPQP8L5JLTRKSi9N8\ni+QL6poQwoGs7Xw36/btcfo9wJfidpYD0yTVx/m3Da4YQtgzGS9sjC4C/jmEsCuW5XWS8n8nPv4t\nks/LQZIaSSouD8dZy0g+H4P+KWt6AXB//Jx9gsM/Z/8eQugFniepJP0wzn+eQ5+7o31Op9LzwHuV\n/Nq/ADgBeDWEsD4kyfztHLf3BkZ4nwGEEP4ihLAQuAv4SF5eQYrlq41v6E+fANxO8u26WdKNQHXW\n4z3xtn+0Mkg6geQXwltDCHskfROoDslVrM4B3g18iOSfddFEX0gOZpA011Rw+GsjhPAdSY8D7wd+\nIOkPQggPDrPO/hDCVklNwCXAIyQ1vstJap8dkmYA+8dYJgGfCSH8w2EzpUU5bCPfhr43OoDVIYS3\nD7cwcAbQBsw6ynYGp8tIfqV0Zy9YoNaHqZT9v7wV+HwIYXls9rox67EegBDCgKTeGJiQ/Lorl1TN\n0T+nUyaE8JKkt5D8yv8bYMUENymO/j4bdBfJBZI+NcHnS7V81eiPkzS4Q/8zyU9GgF2xhvWhMWzj\nkbgukk4naZ4BmEbyxt4b2x9/LS5TDzSG5EpWf0ry83Qq/QPwv0neKH+X/YCkE4FXQgi3APdy6LWM\ntM4vgD8h2QePknyxPTqGMnQADVn37weuifsGSfMlDQ3MqTb0vfELoGVwnqQKSafF6Q+SfNFdCNwa\nvwAH/U7W7WNx+kfARwcXkHRWnHwAuD5rfnOc7B1sm55CDwL/KX5hI2k6yS+1K+LjH2bI/zqEsBfY\nE2u2AFcCDzO8RmBrnF6aY9kGQz2Xz+mkUNLzpTOE8G3gs8A7gEWSToqL/O4YNpP9eVjHyO+zxVnr\nXAq8mIeXkGr5qtGvA65XciBxDfBlkrbDF4DtJBcLH82XgW9IWkvSTvskQAjhWUlPk/wzNgODzQAN\nwL2xViKSNv4poaQ7V2+suWdIPrjZvyYuB66U1Evy+v92uHUkXRRr+o8C7wshbJC0iSTsRg36EMJu\nST9T0q3z30MIn5D0JuCxWKvdB/weyS+nQhn63riV5AvplthEUQ58QdIOkoPL7461yy8BX+RQeDVL\neo6kljr4of9j4LY4v5zki/IPSWqEt8X90g98mqS58A7gOUlPhRA+PNkvHJLmKCUHlR+W1E/SrPZR\nkvf6J4BW4PeHWXUp8BVJtcArIywDSQ3+nyXtIflSOSGHsrVJ+iq5fU4nyxnAZyUNAL3AfyM5DvVv\nkjpJPg8NR1kf4Jsk+6yLpHnsQwx5nwGrgZslvYHkl80mkvdMSZvwmbGxWeC+EMLp+SiQlY58vTck\nbSRpXjjWhqC1KDZLfTyE8BuFLksxOlb7U5uZHTM81o2ZWYlzjd7MrMQ56M3MSpyD3sysxDnozYaQ\n9K7hxlUxK1YOejOzEuegt5Ki3EdTPVnJqI/PKhkl8aQh23urkpEhT5J0jqTH4v2fx5NuiNu/R8lI\nqt+PY8csiY+9L67zlKR/Hjxr2WwqOeitFL2BZITHNwHtJGP2fymE8NZ48lYNMHjizV3AbSGEM0lO\nu982uBFJ7wC+AlwaQniZ5OzsC0IIZ5OMYf63cdE/AvaEEE4lGeLiV+L6M0kG03pPCOEtwCqm8Axu\ns0HH/IWIP/vZAAABaUlEQVQLrCQNHTHzj4FXJf0ZUEsyxMRqSQ8B80MI3wcYHBwtVvbfRDJkwvtC\nCK/FbTUCy+JYKYFkcDpIRp/8YtzGC3FIBoBzSUZa/VncZiWHxukxmzIOeitFuY6mOpxtcZmzgcGg\n/2vgJyGE34rDOzw0yjYEPBBCGMuAXGaTxk03VorGNJpqCKED2CLpMgBJVXEQMUiGSn4/8Jk4zgoc\nPlLk1VnP9zOSgexQcl2BM+L8XwDn6dAVseoknZKvF2k2Vg56K0WDI2auJRlF9cvA4CiN93P4KI1X\nAn8cm1t+DswZfCCEsIOkLf82SW8juVjMZ+Joqtm/hm8nGRJ3DcnImauBvSGEVpIvhO/G7T9GcjEY\nsynlsW6spBRiNNU47HRFCKE79tr5MfCGIVfIMisYt9GbTVwt8JN4URMBf+SQtzRxjd7MrMS5jd7M\nrMQ56M3MSpyD3sysxDnozcxKnIPezKzEOejNzErcfwBMMvAaOv+OJwAAAABJRU5ErkJggg==\n",
250 | "text/plain": [
251 | ""
252 | ]
253 | },
254 | "metadata": {},
255 | "output_type": "display_data"
256 | }
257 | ],
258 | "source": [
259 | "pack_series.sort_values(ascending=False)[:50].plot();"
260 | ]
261 | },
262 | {
263 | "cell_type": "code",
264 | "execution_count": 9,
265 | "metadata": {},
266 | "outputs": [
267 | {
268 | "data": {
269 | "text/plain": [
270 | "package\n",
271 | "pandas 406\n",
272 | "IPython 263\n",
273 | "prompt_toolkit 146\n",
274 | "matplotlib 92\n",
275 | "_pytest 91\n",
276 | "numpy 87\n",
277 | "ipykernel 71\n",
278 | "zmq 66\n",
279 | "jupyter_client 52\n",
280 | "py 51\n",
281 | "xlsxwriter 50\n",
282 | "email 49\n",
283 | "pygments 49\n",
284 | "ipywidgets 42\n",
285 | "pkg_resources 42\n",
286 | "traitlets 42\n",
287 | "dateutil 35\n",
288 | "bottleneck 27\n",
289 | "numexpr 25\n",
290 | "unittest 24\n",
291 | "dtype: int64"
292 | ]
293 | },
294 | "execution_count": 9,
295 | "metadata": {},
296 | "output_type": "execute_result"
297 | }
298 | ],
299 | "source": [
300 | "pack_series.sort_values(ascending=False)[:20]"
301 | ]
302 | },
303 | {
304 | "cell_type": "markdown",
305 | "metadata": {},
306 | "source": [
307 | "### 3.2 Aanaconda 2 中模块信息"
308 | ]
309 | },
310 | {
311 | "cell_type": "code",
312 | "execution_count": null,
313 | "metadata": {
314 | "scrolled": false
315 | },
316 | "outputs": [
317 | {
318 | "name": "stdout",
319 | "output_type": "stream",
320 | "text": [
321 | "\n",
322 | "Please wait a moment while I gather a list of all available modules...\n",
323 | "\n"
324 | ]
325 | },
326 | {
327 | "name": "stderr",
328 | "output_type": "stream",
329 | "text": [
330 | "/root/anaconda2/lib/python2.7/site-packages/IPython/kernel/__init__.py:13: ShimWarning: The `IPython.kernel` package has been deprecated since IPython 4.0.You should import from ipykernel or jupyter_client instead.\n",
331 | " \"You should import from ipykernel or jupyter_client instead.\", ShimWarning)\n",
332 | "/root/anaconda2/lib/python2.7/site-packages/odo/backends/pandas.py:94: FutureWarning: pandas.tslib is deprecated and will be removed in a future version.\n",
333 | "You can access NaTType as type(pandas.NaT)\n",
334 | " @convert.register((pd.Timestamp, pd.Timedelta), (pd.tslib.NaTType, type(None)))\n",
335 | "/root/anaconda2/lib/python2.7/site-packages/blaze/server/server.py:17: ExtDeprecationWarning: Importing flask.ext.cors is deprecated, use flask_cors instead.\n",
336 | " from flask.ext.cors import cross_origin\n",
337 | "/root/anaconda2/lib/python2.7/site-packages/bokeh/util/deprecation.py:34: BokehDeprecationWarning: MPL compatibility can no longer be successfully maintained, and is now deprecated. All MPL compat functions will be removed completely on the release of Bokeh 1.0. See http://bokeh.pydata.org/en/latest/docs/releases/0.12.5.html for more information\n",
338 | " warn(message)\n",
339 | "/root/anaconda2/lib/python2.7/site-packages/matplotlib/cbook.py:136: MatplotlibDeprecationWarning: The matplotlib.delaunay module was deprecated in version 1.4. Use matplotlib.tri.Triangulation instead.\n",
340 | " warnings.warn(message, mplDeprecation, stacklevel=1)\n",
341 | "/root/anaconda2/lib/python2.7/site-packages/nltk/twitter/__init__.py:20: UserWarning: The twython library has not been installed. Some functionality from the twitter package will not be available.\n",
342 | " warnings.warn(\"The twython library has not been installed. \"\n",
343 | "/root/anaconda2/lib/python2.7/site-packages/skimage/viewer/utils/core.py:10: UserWarning: Recommended matplotlib backend is `Agg` for full skimage.viewer functionality.\n",
344 | " warn(\"Recommended matplotlib backend is `Agg` for full \"\n",
345 | "/root/anaconda2/lib/python2.7/site-packages/qtawesome/iconic_font.py:268: UserWarning: You need to have a running QApplication to use QtAwesome!\n",
346 | " warnings.warn(\"You need to have a running \"\n",
347 | "/root/anaconda2/lib/python2.7/site-packages/statsmodels/compat/pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.\n",
348 | " from pandas.core import datetools\n"
349 | ]
350 | },
351 | {
352 | "name": "stdout",
353 | "output_type": "stream",
354 | "text": [
355 | "BaseHTTPServer bs4 jupyter_core scripts\n",
356 | "Bastion bsddb keyword seaborn\n",
357 | "CDROM bz2 lazy_object_proxy select\n",
358 | "CGIHTTPServer cPickle lib2to3 sets\n",
359 | "Canvas cProfile linecache setuptools\n",
360 | "ConfigParser cStringIO linuxaudiodev sgmllib\n",
361 | "Cookie cairo llvmlite sha\n",
362 | "Crypto calendar locale shelve\n",
363 | "Cython cdecimal locket shlex\n",
364 | "DLFCN cffi logging shutil\n",
365 | "Dialog cgi lxml signal\n",
366 | "DocXMLRPCServer cgitb macpath simplegeneric\n",
367 | "FileDialog chardet macurl2path singledispatch\n",
368 | "FixTk chunk mailbox singledispatch_helpers\n",
369 | "HTMLParser click mailcap sip\n",
370 | "IN cloudpickle markupbase sipconfig\n",
371 | "IPython clyent markupsafe sipdistutils\n",
372 | "MimeWriter cmath marshal site\n",
373 | "OleFileIO_PL cmd math six\n",
374 | "OpenSSL code matplotlib skimage\n",
375 | "PIL codecs md5 sklearn\n",
376 | "PyQt5 codeop mhlib smtpd\n",
377 | "Queue collections mimetools smtplib\n",
378 | "ScrolledText colorama mimetypes sndhdr\n",
379 | "SimpleDialog colorsys mimify snowballstemmer\n",
380 | "SimpleHTTPServer command mistune socket\n",
381 | "SimpleXMLRPCServer commands mkl sortedcollections\n",
382 | "SocketServer compileall mmap sortedcontainers\n",
383 | "StringIO compiler modulefinder sphinx\n",
384 | "TYPES concurrent mpl_toolkits spwd\n",
385 | "Tix conda mpmath spyder\n",
386 | "Tkconstants conda_env msgpack spyder_breakpoints\n",
387 | "Tkdnd configparser multifile spyder_io_dcm\n",
388 | "Tkinter contextlib multipledispatch spyder_io_hdf5\n",
389 | "UserDict contextlib2 multiprocessing spyder_profiler\n",
390 | "UserList cookielib mutex spyder_pylint\n",
391 | "UserString copy navigator_updater sqlalchemy\n",
392 | "_LWPCookieJar copy_reg nbconvert sqlite3\n",
393 | "_MozillaCookieJar crypt nbformat sre\n",
394 | "__builtin__ cryptography netrc sre_compile\n",
395 | "__future__ csv networkx sre_constants\n",
396 | "_abcoll ctypes new sre_parse\n",
397 | "_ast curl nis ssl\n",
398 | "_bisect curses nltk stat\n",
399 | "_cffi_backend cycler nntplib statsmodels\n",
400 | "_codecs cython nose statvfs\n",
401 | "_codecs_cn cythonmagic notebook storemagic\n",
402 | "_codecs_hk cytoolz ntpath string\n",
403 | "_codecs_iso2022 dask nturl2path stringold\n",
404 | "_codecs_jp datashape numba stringprep\n",
405 | "_codecs_kr datetime numbers strop\n",
406 | "_codecs_tw dateutil numexpr struct\n",
407 | "_collections dbhash numpy subprocess\n",
408 | "_csv decimal numpydoc subprocess32\n",
409 | "_ctypes decorator odo sunau\n",
410 | "_ctypes_test difflib olefile sunaudio\n",
411 | "_curses dircache opcode symbol\n",
412 | "_curses_panel dis openpyxl sympy\n",
413 | "_elementtree distributed operator sympyprinting\n",
414 | "_functools distutils optparse symtable\n",
415 | "_hashlib doctest os sys\n",
416 | "_heapq docutils os2emxpath sysconfig\n",
417 | "_hotshot dumbdbm ossaudiodev syslog\n",
418 | "_io dummy_thread packaging tables\n",
419 | "_json dummy_threading pandas tabnanny\n",
420 | "_license easy_install pandocfilters tarfile\n",
421 | "_locale email parser tblib\n",
422 | "_lsprof encodings partd telnetlib\n",
423 | "_multibytecodec entrypoints path tempfile\n",
424 | "_multiprocessing enum pathlib2 terminado\n",
425 | "_osx_support errno patsy termios\n",
426 | "_posixsubprocess et_xmlfile pdb test_path\n",
427 | "_pyio exceptions pep8 test_pycosat\n",
428 | "_pytest extern pexpect testpath\n",
429 | "_random fastcache pickle tests\n",
430 | "_scandir fcntl pickleshare textwrap\n",
431 | "_socket filecmp pickletools this\n",
432 | "_sqlite3 fileinput pip thread\n",
433 | "_sre flask pipes threading\n",
434 | "_ssl flask_cors pkg_resources time\n",
435 | "_strptime fnmatch pkgutil timeit\n",
436 | "_struct formatter platform tkColorChooser\n",
437 | "_symtable fpformat plistlib tkCommonDialog\n",
438 | "_sysconfigdata fractions ply tkFileDialog\n",
439 | "_testcapi ftplib popen2 tkFont\n",
440 | "_threading_local funcsigs poplib tkMessageBox\n",
441 | "_tkinter functools posix tkSimpleDialog\n",
442 | "_vendor functools32 posixfile tlz\n",
443 | "_warnings future_builtins posixpath toaiff\n",
444 | "_weakref gc pprint token\n",
445 | "_weakrefset genericpath profile tokenize\n",
446 | "_yaml getopt prompt_toolkit toolz\n",
447 | "abc getpass pstats tornado\n",
448 | "aifc gettext psutil trace\n",
449 | "alabaster gevent pty traceback\n",
450 | "anaconda_navigator glob ptyprocess traitlets\n",
451 | "anaconda_project greenlet pwd ttk\n",
452 | "antigravity grin py tty\n",
453 | "anydbm grp py_compile turtle\n",
454 | "argparse gzip pyclbr types\n",
455 | "array h5py pycosat unicodecsv\n",
456 | "asn1crypto hashlib pycparser unicodedata\n",
457 | "ast heapdict pycurl unittest\n",
458 | "astroid heapq pydoc urllib\n",
459 | "astropy hmac pydoc_data urllib2\n",
460 | "asynchat hotshot pyexpat urlparse\n",
461 | "asyncore html5lib pyflakes user\n",
462 | "atexit htmlentitydefs pygments uu\n",
463 | "audiodev htmllib pylab uuid\n",
464 | "audioop httplib pylint warnings\n",
465 | "autoreload idlelib pyodbc wave\n",
466 | "babel idna pyparsing wcwidth\n",
467 | "backports ihooks pytest weakref\n",
468 | "backports_abc imagesize pytz webbrowser\n",
469 | "base64 imaplib pywt werkzeug\n",
470 | "bdb imghdr pyximport wheel\n",
471 | "binascii imp qtawesome whichdb\n",
472 | "binhex importlib qtconsole widgetsnbextension\n",
473 | "binstar_client imputil qtpy wrapt\n",
474 | "bisect inspect quopri wsgiref\n",
475 | "bitarray io random xdrlib\n",
476 | "blaze ipaddress re xlrd\n",
477 | "bleach ipykernel readline xlsxwriter\n",
478 | "bokeh ipykernel_launcher repr xlwt\n",
479 | "boto ipython_genutils requests xml\n",
480 | "bottleneck ipywidgets resource xmllib\n",
481 | "brain_builtin_inference isort rexec xmlrpclib\n",
482 | "brain_dateutil itertools rfc822 xxsubtype\n",
483 | "brain_gi itsdangerous rlcompleter yaml\n",
484 | "brain_mechanize jdcal rmagic zict\n",
485 | "brain_nose jedi robotparser zipfile\n",
486 | "brain_numpy jinja2 rope zipimport\n",
487 | "brain_pytest json ruamel_yaml zlib\n",
488 | "brain_qt jsonschema runpy zmq\n",
489 | "brain_six jupyter scandir \n",
490 | "brain_ssl jupyter_client sched \n",
491 | "brain_stdlib jupyter_console scipy \n",
492 | "\n",
493 | "Enter any module name to get more help. Or, type \"modules spam\" to search\n",
494 | "for modules whose descriptions contain the word \"spam\".\n",
495 | "\n"
496 | ]
497 | }
498 | ],
499 | "source": [
500 | "help(\"modules\")"
501 | ]
502 | }
503 | ],
504 | "metadata": {
505 | "kernelspec": {
506 | "display_name": "Python 2",
507 | "language": "python",
508 | "name": "python2.7"
509 | },
510 | "language_info": {
511 | "codemirror_mode": {
512 | "name": "ipython",
513 | "version": 2
514 | },
515 | "file_extension": ".py",
516 | "mimetype": "text/x-python",
517 | "name": "python",
518 | "nbconvert_exporter": "python",
519 | "pygments_lexer": "ipython2",
520 | "version": "2.7.13"
521 | }
522 | },
523 | "nbformat": 4,
524 | "nbformat_minor": 2
525 | }
526 |
--------------------------------------------------------------------------------
/Anaconda3_infos_analysis.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "### python在数据科学方面需要用到的库:\n",
8 | "\n",
9 | "- numpy:科学计算库。提供矩阵运算的库。\n",
10 | "\n",
11 | "- pandas:数据分析处理库\n",
12 | "\n",
13 | "- scipy:数值计算库。提供数值积分和常微分方程组求解算法。提供了一个非常广泛的特定函数集合。\n",
14 | "\n",
15 | "- Matplotlib:数据可视化库\n"
16 | ]
17 | },
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {},
21 | "source": [
22 | "### 1 当前路径信息(内核Python 3.6)"
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": 1,
28 | "metadata": {},
29 | "outputs": [
30 | {
31 | "data": {
32 | "text/plain": [
33 | "['',\n",
34 | " '/root/anaconda3/lib/python36.zip',\n",
35 | " '/root/anaconda3/lib/python3.6',\n",
36 | " '/root/anaconda3/lib/python3.6/lib-dynload',\n",
37 | " '/root/anaconda3/lib/python3.6/site-packages',\n",
38 | " '/root/anaconda3/lib/python3.6/site-packages/Sphinx-1.5.6-py3.6.egg',\n",
39 | " '/root/anaconda3/lib/python3.6/site-packages/setuptools-27.2.0-py3.6.egg',\n",
40 | " '/root/anaconda3/lib/python3.6/site-packages/IPython/extensions',\n",
41 | " '/root/.ipython']"
42 | ]
43 | },
44 | "execution_count": 1,
45 | "metadata": {},
46 | "output_type": "execute_result"
47 | }
48 | ],
49 | "source": [
50 | "import sys\n",
51 | "\n",
52 | "sys.path"
53 | ]
54 | },
55 | {
56 | "cell_type": "markdown",
57 | "metadata": {},
58 | "source": [
59 | "### 2 科学计算常用包版本及路径信息"
60 | ]
61 | },
62 | {
63 | "cell_type": "code",
64 | "execution_count": 3,
65 | "metadata": {},
66 | "outputs": [
67 | {
68 | "data": {
69 | "text/html": [
70 | "\n",
71 | "\n",
84 | "
\n",
85 | " \n",
86 | " \n",
87 | " | \n",
88 | " pack_name | \n",
89 | " version | \n",
90 | " path | \n",
91 | "
\n",
92 | " \n",
93 | " \n",
94 | " \n",
95 | " 0 | \n",
96 | " numpy | \n",
97 | " 1.12.1 | \n",
98 | " /root/anaconda3/lib/python3.6/site-packages/numpy/__init__.py | \n",
99 | "
\n",
100 | " \n",
101 | " 1 | \n",
102 | " matplotlib | \n",
103 | " 2.0.2 | \n",
104 | " /root/anaconda3/lib/python3.6/site-packages/matplotlib/__init__.py | \n",
105 | "
\n",
106 | " \n",
107 | " 2 | \n",
108 | " pandas | \n",
109 | " 0.20.1 | \n",
110 | " /root/anaconda3/lib/python3.6/site-packages/pandas/__init__.py | \n",
111 | "
\n",
112 | " \n",
113 | " 3 | \n",
114 | " scipy | \n",
115 | " 0.19.0 | \n",
116 | " /root/anaconda3/lib/python3.6/site-packages/scipy/__init__.py | \n",
117 | "
\n",
118 | " \n",
119 | "
\n",
120 | "
"
121 | ],
122 | "text/plain": [
123 | " pack_name version \\\n",
124 | "0 numpy 1.12.1 \n",
125 | "1 matplotlib 2.0.2 \n",
126 | "2 pandas 0.20.1 \n",
127 | "3 scipy 0.19.0 \n",
128 | "\n",
129 | " path \n",
130 | "0 /root/anaconda3/lib/python3.6/site-packages/numpy/__init__.py \n",
131 | "1 /root/anaconda3/lib/python3.6/site-packages/matplotlib/__init__.py \n",
132 | "2 /root/anaconda3/lib/python3.6/site-packages/pandas/__init__.py \n",
133 | "3 /root/anaconda3/lib/python3.6/site-packages/scipy/__init__.py "
134 | ]
135 | },
136 | "execution_count": 3,
137 | "metadata": {},
138 | "output_type": "execute_result"
139 | }
140 | ],
141 | "source": [
142 | "import numpy\n",
143 | "import matplotlib\n",
144 | "import pandas as pd\n",
145 | "import scipy\n",
146 | "\n",
147 | "packs = [\n",
148 | " (\"numpy\", numpy.__version__, numpy.__file__),\n",
149 | " (\"matplotlib\", matplotlib.__version__, matplotlib.__file__),\n",
150 | " (\"pandas\", pd.__version__, pd.__file__),\n",
151 | " (\"scipy\", scipy.__version__, scipy.__file__)\n",
152 | "]\n",
153 | "\n",
154 | "pd_packages = pd.DataFrame(packs, columns=[\"pack_name\", \"version\", \"path\"])\n",
155 | "pd.set_option(\"max_colwidth\", 120)\n",
156 | "pd_packages"
157 | ]
158 | },
159 | {
160 | "cell_type": "markdown",
161 | "metadata": {},
162 | "source": [
163 | "### 3 Anaconda3 集成包简单分析"
164 | ]
165 | },
166 | {
167 | "cell_type": "code",
168 | "execution_count": 4,
169 | "metadata": {
170 | "collapsed": true
171 | },
172 | "outputs": [],
173 | "source": [
174 | "import sys\n",
175 | "%matplotlib inline\n",
176 | "\n",
177 | "packages = [pack.split(\".\")[0] for pack in sys.modules.keys()]\n",
178 | "pd_packages = pd.DataFrame(packages, columns=[\"package\"])\n",
179 | "pack_series = pd_packages.groupby(by=\"package\").size()"
180 | ]
181 | },
182 | {
183 | "cell_type": "code",
184 | "execution_count": 5,
185 | "metadata": {},
186 | "outputs": [
187 | {
188 | "data": {
189 | "text/plain": [
190 | "count 229.000000\n",
191 | "mean 4.978166\n",
192 | "std 16.714697\n",
193 | "min 1.000000\n",
194 | "25% 1.000000\n",
195 | "50% 1.000000\n",
196 | "75% 1.000000\n",
197 | "max 169.000000\n",
198 | "dtype: float64"
199 | ]
200 | },
201 | "execution_count": 5,
202 | "metadata": {},
203 | "output_type": "execute_result"
204 | }
205 | ],
206 | "source": [
207 | "pack_series.describe()"
208 | ]
209 | },
210 | {
211 | "cell_type": "code",
212 | "execution_count": 6,
213 | "metadata": {},
214 | "outputs": [
215 | {
216 | "data": {
217 | "text/plain": [
218 | "229"
219 | ]
220 | },
221 | "execution_count": 6,
222 | "metadata": {},
223 | "output_type": "execute_result"
224 | }
225 | ],
226 | "source": [
227 | "pack_series.index.duplicated()\n",
228 | "pack_temp = pack_series.index.drop_duplicates()\n",
229 | "len(pack_temp)"
230 | ]
231 | },
232 | {
233 | "cell_type": "markdown",
234 | "metadata": {},
235 | "source": [
236 | "### 3.1 根据 Anaconda 3包中的模块数量倒序 绘图"
237 | ]
238 | },
239 | {
240 | "cell_type": "code",
241 | "execution_count": 10,
242 | "metadata": {},
243 | "outputs": [
244 | {
245 | "data": {
246 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAEKCAYAAAAcgp5RAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xl8XNV99/HPV5slS/IuS7bB2AYvEIIdMCalQKAJNElD\ngDShEJKGJg1JkzZb8/RputK0adOmCc3zEEiAUHhSAll5QillLUsDhGAIttlkbDBgW5JlbGzJi6zl\n1z/ulT02siVrZjSj0ff9eumlO2fuvXPmSvPV0bnnnquIwMzMSldZoStgZmb55aA3MytxDnozsxLn\noDczK3EOejOzEuegNzMrcQ56M7MS56A3Mytxgwa9pOslbZL0dEbZDyQ9lX6tk/RUWj5H0q6M576d\nz8qbmdngKoawzg3AlcD/6y+IiN/pX5b0dWBbxvprI2LJ4VRi2rRpMWfOnMPZxMxszHviiSc2R0TD\nYOsNGvQR8ZCkOQM9J0nAhcBvHG4FM82ZM4fly5dnswszszFH0stDWS/bPvrTgbaIeCGjbG7abfOg\npNOz3L+ZmWVpKF03h3IxcHPG4xZgdkS8Jukk4P9LelNEbD9wQ0mXAZcBzJ49O8tqmJnZwQy7RS+p\nAngf8IP+sojoiojX0uUngLXAgoG2j4hrImJpRCxtaBi0i8nMzIYpm66bdwDPR8T6/gJJDZLK0+V5\nwHzgxeyqaGZm2RjK8MqbgUeBhZLWS/pY+tRF7N9tA3AGsDIdbvlj4JMRsSWXFTYzs8MzlFE3Fx+k\n/NIByn4C/CT7apmZWa74ylgzsxJXFEHfum03vX2+paGZWT4URdC3d3bR3tFV6GqYmZWkogh6gI3b\ndhW6CmZmJalogr7l9d2FroKZWUkqnqB3i97MLC+KIujLJDa6RW9mlhdFEfSV5XKL3swsT4ok6MvY\nuM0tejOzfCiaoG953S16M7N8KJKgF+2dXezp6St0VczMSk5RBH1VeRkR0Lbd3TdmZrlWFEFfWZ5U\no8X99GZmOVccQV/RH/Tupzczy7XiCPpyAXgsvZlZHhRF0JdJTKiucIvezCwPiiLoAWZOqnGL3sws\nD4om6GdMrHaL3swsD4on6CfVeNSNmVkeFE3Qz5xYzZYde9jd3VvoqpiZlZSiCfoZE2sAj6U3M8u1\nQYNe0vWSNkl6OqPsckkbJD2Vfr0747kvSVojqVnSbw61IjMmVQN4zhszsxwbSov+BuCdA5RfERFL\n0q87ACQdB1wEvCnd5ipJ5UOpyMy0Re9ZLM3McmvQoI+Ih4AtQ9zfecAtEdEVES8Ba4BlQ9mwaaJb\n9GZm+ZBNH/0fSVqZdu1MTstmAa9mrLM+LRtUdWU5U2ur3KI3M8ux4Qb91cA8YAnQAnz9cHcg6TJJ\nyyUtb29vB5J+eo+lNzPLrWEFfUS0RURvRPQB17Kve2YDcGTGqkekZQPt45qIWBoRSxsaGoBk5E2L\nr441M8upYQW9pBkZDy8A+kfk3AZcJGmcpLnAfOCXQ93vzInVbHSL3swspyoGW0HSzcCZwDRJ64G/\nBs6UtAQIYB3wCYCIeEbSD4FngR7g0xEx5CugZkyqoWN3D51dPdSNG7RqZmY2BIOmaURcPEDxdw+x\n/leArwynMjMyRt7Mb6wfzi7MzOwARXNlLCQzWILH0puZ5VJRBf0Mj6U3M8u5ogr6xgnVSG7Rm5nl\nUlEFfWV5GdPrx7lFb2aWQ0UV9JCOpXeL3swsZ4ow6D2W3swsl4ow6JOrYyOi0FUxMysJRRf0MydV\ns6u7l227ugtdFTOzklB0Qd9/p6mNnvPGzCwnii/o0ztNtW53P72ZWS4UXdDPdIvezCynii7oG+rH\nUVEmz0tvZpYjRRf05WWicUK156U3M8uRogt68Fh6M7NcKs6gn+SrY83McqUog37mxGpatvmiKTOz\nXCjKoJ8xsZo9PX28tmNPoatiZjbqFWfQpzcg8QlZM7PsFWXQ7x1L7xOyZmZZK8qg77861vPSm5ll\nryiDfmptFVUVZR55Y2aWA4MGvaTrJW2S9HRG2dckPS9ppaRbJU1Ky+dI2iXpqfTr28OplKR0LL2D\n3swsW0Np0d8AvPOAsnuA4yPiBGA18KWM59ZGxJL065PDrdiMidXuujEzy4FBgz4iHgK2HFB2d0T0\npA9/ARyR64rN9C0FzcxyIhd99B8F/jPj8dy02+ZBSacfbCNJl0laLml5e3v7G56fMama1u276e3z\nRVNmZtnIKugl/TnQA9yUFrUAsyNiCfAF4PuSJgy0bURcExFLI2JpQ0PDG56fMbGG3r6gvaMrmyqa\nmY15ww56SZcC7wEuiXSugojoiojX0uUngLXAguHsf2Y6xNJj6c3MsjOsoJf0TuBPgPdGxM6M8gZJ\n5enyPGA+8OJwXqP/loK+OtbMLDsVg60g6WbgTGCapPXAX5OMshkH3CMJ4BfpCJszgC9L6gb6gE9G\nxJYBdzyI/qtjfQMSM7PsDBr0EXHxAMXfPci6PwF+km2lACbUVDC+qty3FDQzy1JRXhkL+y6acove\nzCw7RRv0ADMn1fjqWDOzLBV30E+sYcNWt+jNzLJR1EF/9PRaNnd2sdU3IDEzG7aiDvqFTcm1Vs1t\nHQWuiZnZ6FXUQb+oqR6A5lYHvZnZcBV10E+vH8fEmkqed9CbmQ1bUQe9JBY21dPcur3QVTEzG7WK\nOugh6b5Z3dZJOp2OmZkdpqIP+oVN9XR29bDBNyExMxuWog96n5A1M8tO0Qf9/MYk6H1C1sxseIo+\n6CdUVzJrUo1b9GZmw1T0QQ9JP/1qXzRlZjYsoybo17Z30t3bV+iqmJmNOqMj6Bvr6e4NXmzfUeiq\nmJmNOqMj6Jv6T8j6wikzs8M1KoL+6IY6KsrkE7JmZsMwKoK+qqKMeQ21PiFrZjYMoyLoIZmy2GPp\nzcwO36BBL+l6SZskPZ1RNkXSPZJeSL9PznjuS5LWSGqW9Ju5qujCxjrWb91FZ1dPrnZpZjYmDKVF\nfwPwzgPK/hS4LyLmA/elj5F0HHAR8KZ0m6skleeiontvQuJWvZnZYRk06CPiIWDLAcXnATemyzcC\n52eU3xIRXRHxErAGWJaLivbPeeN+ejOzwzPcPvrGiGhJl1uBxnR5FvBqxnrr07KszZpUQ21VuVv0\nZmaHKeuTsZFMFH/Yk8VLukzScknL29vbB12/rEzMb6z3WHozs8M03KBvkzQDIP2+KS3fAByZsd4R\nadkbRMQ1EbE0IpY2NDQM6UUXNdXT3Nrhm5CYmR2G4Qb9bcBH0uWPAD/LKL9I0jhJc4H5wC+zq+I+\nC5vq2bqzm/aOrlzt0sys5A1leOXNwKPAQknrJX0M+CpwtqQXgHekj4mIZ4AfAs8CdwKfjojeXFW2\nfyqEZp+QNTMbsorBVoiIiw/y1NsPsv5XgK9kU6mDWdi4725Tp88fWnePmdlYN2qujAWYWjeOaXXj\nfIWsmdlhGFVBD/tOyJqZ2dCMuqDvv9tUb59H3piZDcWoDPqunj5e2bKz0FUxMxsVRl/Q7z0h6wun\nzMyGYtQF/YLGeiR8QtbMbIhGXdDXVJVz1JTxPiFrZjZEoy7oIemn90VTZmZDM0qDfgLrNu9gd3fO\nLro1MytZozPoG+vpC1izqbPQVTEzK3qjM+jTOW98QtbMbHCjMujnTB1PVUWZh1iamQ3BqAz6ivIy\nFjXVc+uvNnDLL1+hu7ev0FUyMytaozLoAf7+gjdzxOTx/OlPV3H2Nx7kZ09toM/TIpiZvcGoDfrj\nZ03k1k+dynW/u5TqynI+e8tTvOub/81dz7T6DlRmZhlGbdADSOIdxzVyx2dO5/9e/Ba6e/v4xPee\n4PxvPex7y5qZpUZ10PcrKxPnLp7J3Z8/g3/67RNYv3UX//vHK92yNzOjRIK+X0V5GReefCR/fM5C\nVqzfxs/XbC50lczMCq6kgr7fb580i6YJ1Vz5X2sKXRUzs4IryaAfV1HOx8+Yx2MvbeHxdVsKXR0z\ns4IqyaAHuHjZkUyprXKr3szGvGEHvaSFkp7K+Nou6XOSLpe0IaP83bms8FCNr6rgY6fN5cHV7axa\nv60QVTAzKwrDDvqIaI6IJRGxBDgJ2Ancmj59Rf9zEXFHLio6HB/+taOor67gW/e7VW9mY1euum7e\nDqyNiJdztL+cmFBdyaWnzuHOZ1p5wfPXm9kYlaugvwi4OePxH0laKel6SZNz9BrD8nu/PpeaynKu\nemBtIathZlYwWQe9pCrgvcCP0qKrgXnAEqAF+PpBtrtM0nJJy9vb27OtxkFNqa3iklNmc9uKjbzy\n2s68vY6ZWbHKRYv+XcCTEdEGEBFtEdEbEX3AtcCygTaKiGsiYmlELG1oaMhBNQ7u42fMo1zi6gfd\nqjezsScXQX8xGd02kmZkPHcB8HQOXiMrjROq+cDSI/jxE6/Ssm1XoatjZjaisgp6SbXA2cBPM4r/\nSdIqSSuBs4DPZ/MaufLJtx1NX8A1D71Y6KqYmY2oimw2jogdwNQDyj6cVY3y5Mgp4zl/ySxu/uUr\nfPqsY5hWN67QVTIzGxEle2XsQD75tnns7u7jP1a2FLoqZmYjZkwF/THT65g0vtI3FTezMWVMBb0k\nFkyv98VTZjamjKmgB1jQVEdzW4dvSmJmY8bYC/rGejp299C2vavQVTEzGxFjMugBmt19Y2ZjxJgN\nevfTm9lYMeaCfkptFdPqxtHskTdmNkaMuaAHWNBYx+pNnYWuhpnZiBijQZ8Msezr88gbMyt9Yzbo\nd+7pZcPrnuDMzErfmAz6hU11AKz2CVkzGwPGZNAfMz0ZebO6zf30Zlb6xmTQT6ypZMbEarfozWxM\nGJNBDzC/sd5Bb2ZjwpgN+oWNdazZ1EmvR96YWYkbs0E/v7Gerp4+XtniG4abWWkbs0HfPxWCu2/M\nrNSN2aCfPz0dYumpEMysxI3ZoK8dV8ERk2s8FYKZlbysbg4uaR3QAfQCPRGxVNIU4AfAHGAdcGFE\nbM2umvmxsLHeLXozK3m5aNGfFRFLImJp+vhPgfsiYj5wX/q4KM1vrOfFzZ109/YVuipmZnmTj66b\n84Ab0+UbgfPz8Bo5sbCpju7eYN3mHYWuiplZ3mQb9AHcK+kJSZelZY0R0ZIutwKNWb5G3sz3VAhm\nNgZk1UcPnBYRGyRNB+6R9HzmkxERkga8Iin9w3AZwOzZs7OsxvAcM72OMiW3FfwtZhSkDmZm+ZZV\niz4iNqTfNwG3AsuANkkzANLvmw6y7TURsTQiljY0NGRTjWGrriznqKm1vq2gmZW0YQe9pFpJ9f3L\nwDnA08BtwEfS1T4C/CzbSubTgsY63yjczEpaNl03jcCtkvr38/2IuFPS48APJX0MeBm4MPtq5s+C\nxnrufW4Tu7t7qa4sL3R1zMxybthBHxEvAosHKH8NeHs2lRpJCxrr6e0LXmzfwXEzJxS6OmZmOTdm\nr4zt1z/nzQub3H1jZqVpzAf93Gm1VJSJZl8ha2YlaswHfVVFGXOn1XosvZmVrDEf9AALmny3KTMr\nXQ56YMH0el7dupNde3oLXRUzs5xz0JOMpY+ANZ6y2MxKkIOepOsG8IVTZlaSHPTAUVPGU1Ve5qkQ\nzKwkOeiBivIyjp7uqRDMrDQ56FMLGut4wUMszawEOehTCxrr2fD6Ljp2dxe6KmZmOeWgTx07Izkh\n+/W7V9PV42GWZlY6HPSpty2YziWnzOaGR9Zx3pUP8+zG7YWukplZTjjoU+Vl4isXvJl/vfRkXtux\nh/O+9XOufmAtvX0D3iDLzGzUcNAf4KxF07nrc2fwjmMb+cc7n+eiax7l1S07C10tM7Nhc9APYEpt\nFVddciLfuHAxz7d08M5/eYgbHn6Jte2d9PT2Fbp6ZmaHJdubg5csSbzvxCNYNncKX/zRCi7/92eB\nZLbL+dPrWNhYz4KmehY21fOWIycxaXxVgWtsZjYwRRS+D3rp0qWxfPnyQlfjoPr6gmdbtvN8awer\n2zpoTr+3bNsNwKTxldz88bdy7AzfocrMRo6kJyJi6WDruUU/BGVl4vhZEzl+1sT9yrft7OaZjdv4\n4x+t4EPXPcYPPvFWjpleX6BampkNzH30WZg4vpJTj5nGTb9/CmVl4oPXPsa6zTsKXS0zs/046HNg\nXkMdN/3+KXT39nHJdY+x4fVdha6Smdleww56SUdKul/Ss5KekfTZtPxySRskPZV+vTt31S1eCxrr\n+d7HTmH77m4+eO0vaNu+u9BVMjMDsmvR9wB/HBHHAW8FPi3puPS5KyJiSfp1R9a1HCWOnzWRGz+6\njM0dXVxy3WNs7uwqdJXMzIYf9BHREhFPpssdwHPArFxVbLQ6cfZkvnvpyazfupMPXfcYLdt2sX13\n94BfxTDiycxKX06GV0qaAzwEHA98Afg9YBuwnKTVv/VQ2xf78MrheGh1O79/43L2HOICq9lTxnPu\n4hm8d/EsFjZ5tI6ZHZ6hDq/MOugl1QEPAl+JiJ9KagQ2AwH8LTAjIj46wHaXAZcBzJ49+6SXX345\nq3oUoxWvvs7j67YM+FxPX/Dwms08svY1evuCBY11vHfxTM5dPJOjptaOcE3NbDQakaCXVAncDtwV\nEd8Y4Pk5wO0Rcfyh9lOKLfqh2tzZxX+uauG2FRt5fF3yj8/iIyby0dPmcu4JMykrU4FraGbFKu9B\nL0nAjcCWiPhcRvmMiGhJlz8PnBIRFx1qX2M56DNtfH0Xt6/cyI+Wr+eFTZ0saqrnC2cv4OzjGkkO\nt5nZPiMR9KcB/w2sAvo7ov8MuBhYQtJ1sw74RH/wH4yDfn99fcHtq1q44p7VvLR5B4uPnMQXz1nA\nacdMc+Cb2V4j1kefCw76gfX09vGTJ9fzzXtfYOO23ZwydwpfOHsBS+dModxdOmZjnoO+hHT19HLz\nY69w5f1r2dzZxbiKMo5JZ9Bc2JTOotlYz4yJ1W7xm40hDvoStHNPD3c+3cpzLdtpbutkdWsHrRlX\n4NZXV+ydPnlRUz0LGpPvnkLZrDR59soSNL6qgvedeMR+Za/v3MPqtk6a2zpobt3O6tZObl+xke8/\n1rN3nen141iYtvr7W//zG+sYX+Ufv9lY4E/6KDdpfBXL5k5h2dwpe8sigrbtXTS3dbC6tYPnWzto\nbtvO937xMl09yXlzKblga0FjPSfPmcx7TpjJzEk1hXobZpZH7roZQ3r7gle27KS5dd/NU55r3c6L\n7cnUyifPmcx7F8/kXW+ewbS6cQWurZkNxn30NmTrNu/g9pUbuW3FRla3dVJeJk49eirnLp7Jb76p\niYk1lYWuopkNwEFvw9Lc2sFtKzbw7ytaeGXLTqrKyzhzYQPnLp7JO45tpKaqvNBVNLOUg96yEhGs\nWL+N257ayO0rN7Kpo4vxVeWcfVwj554wkzMWNFBV4fvWmBWSg95yprcveOyl1/j3FRu5Y1Ur23Z1\nU19dwdTagYdtVleWc/Gy2Vy07EjGVfg/ALN8cdBbXuzp6ePna9q559lN7NzTM+A6r27ZyZOvvM6s\nSTV89u3zed+Js6god+vfLNcc9FYwEcHP12zmn+9qZsX6bcybVsvnzl7Ae948w7NxmuWQg94KLiK4\n+9k2vnH3aprbOljUVM+nzzqGk46a7OkazHLAQW9Fo7cvuH3lRq64ZzXrXtsJDDxdw6xJNRws+yeP\nr6J2nK/vM8vkoLei093bx5Mvb2V1W0c6ZUNy1W7H7oH7+g90xOSavX8UFjYlX/Om1Xn0j41ZnuvG\nik5leRmnzJvKKfOm7i2LCFq376a5tYNNHV0DbxiwqWM3zW2dNLdu54Hmdnr6kgZKRZmYO612v7l8\nFjXVc+Tk8T4fYJZy0FtBSWLGxBpmTBz6PDt7evp4cXPnflM5rFj/Orev3Hd/m5rKco6eXnvQidv6\n/0Bk/ofgWT6tVDnobdSpqihjUdMEFjVN2K+8s6uHF9qS4H++tYO17TvY09M74D52dfdy24qN3JQx\ny2fjhHEsaKxnztTanN/YpXFCdfJHpamemT4RbSPMQW8lo25cBW+ZPZm3zJ48pPUzu42aW/edN1i5\nfhu5PHcVAR1d+/6g1I+rSKaLbqpn/vQ6aot9umjBkZPHs6ipnskHuUjOiluR/4aZ5U9mt9GZC6fn\n9bW27exO/pCkU0c3t3bwHytb2LarO6+vm2sN9eP2dXc11nP09Fqqyg//6ufaceXMnjLeF9KNEAe9\n2QiYOL5ywPsGbO7cw57evgLWbHA9vX2se23n3nsbrG7r4KbHXmZ3d3b1rqooY/4Bt8Rc1FTPtLpx\njFTHVpk0Jk7aO+jNCkQSDfWjY97/o6bW8rYFDXsf9/YFr27ZyUubd9Dbd/jdXFt27uGFtg6a2zp5\neO1mfvqrDbms7pBVlot50+r2Dtft/6Mza1JNSf0ByFvQS3on8E2gHLguIr6ar9cys5FVXibmTKtl\nzrTanOxv7y0xW7fz+s6R687q7OrhhU2dPPHyVm5bsXFveW1VOTMm1YzYfxb5lpegl1QOfAs4G1gP\nPC7ptoh4Nh+vZ2aj20C3xBxpHbu7Wd3WmVzQ19rBpo7dBavLUN07xPXy1aJfBqyJiBcBJN0CnAc4\n6M2sKNVXV3LSUZM56aihjdoqBld/aGjr5euU9yzg1YzH69OyvSRdJmm5pOXt7e15qoaZmRVsbFNE\nXBMRSyNiaUNDw+AbmJnZsOQr6DcAR2Y8PiItMzOzEZavoH8cmC9prqQq4CLgtjy9lpmZHUJeTsZG\nRI+kPwTuIhleeX1EPJOP1zIzs0PL2zj6iLgDuCNf+zczs6HxRBNmZiXOQW9mVuKK4laCkjqA5kLX\no8hMAzYXuhJFxMdjfz4ebzQWj8lRETHo+PRimdSseSj3PRxLJC33MdnHx2N/Ph5v5GNycO66MTMr\ncQ56M7MSVyxBf02hK1CEfEz25+OxPx+PN/IxOYiiOBlrZmb5UywtejMzy5OiDXpJD0gq6jPokh4Z\n5nZzJD2d6/oM8pqXS/riSL7m4dZB0vmSjsti/3MkfTDj8VJJ/yddvlTSlcPd92hRDD/noZD0OUnj\nC12PsaJog340iIhTR/o1JRXLkNh8OB8YdtADc4C9QR8RyyPiM9lWyvLic0BOgj69o50dQtZBn7ai\nnpd0k6TnJP1Y0nhJfyXpcUlPS7pGktL1H5D0j5J+KWm1pNPT8hpJt6T7uBWoyXiNq9OblDwj6W8y\nyr8q6VlJKyX9c7bvZRjvvTP9fqakhyT9h6RmSd+WVCbpo5L+JWP9j0u64oB9zJP0K0knSyqX9LX0\nuK2U9ImM/f+3pNuAZ9Nj/pyka9NjcrekmnTdoyXdKemJdJtFI3hI3kDSn6c/558DC9Oyj6fvcYWk\nn6S/L6cC7wW+Jump9H0M+F4k3SDp/Rmv0ZkufhU4Pd3+8+lxu32E33LW0t+FlZKqJdWmP+M/lPSg\npJ9JejH93b8k/RytknR0oes9kIPkw2eAmcD9ku4/2OfkYNmSrrMuzZEngQ9IWiLpF+lxu1XS5HS9\nYyTdm/6uPdl/nCT9r4zP2d+kZbXpZ3iFktz6nbS8oDmTExGR1RdJKyqAX08fXw98EZiSsc73gHPT\n5QeAr6fL7wbuTZe/QDLLJcAJQA+wNH08Jf1enm5/AjCV5Gra/hPKk7J9L8N4753p9zOB3cC8tI73\nAO8H6oC1QGW63iPAm9Nj9jRJ8P0KWJw+fxnwF+nyOGA5MDfd/w5gbsYx7wGWpI9/CHwoXb4PmJ8u\nnwL8V7p8OfDFET4+JwGrSFpuE4A16e/G1Ix1/g74o3T5BuD9Gc8d7L0cuF7mz+H2jPK9j4FLgStH\n+ncki2P3d8A/k9x7+Uvpe3kdmJH+bmwA/iZd97PAvxTq5zzI+5jDwPmwDpiWlh3qc/KGbdPldcCf\nZLzOSuBt6fKXM47HY8AF6XJ1+rt4DskIHZE0dm8HzgB+G7g2Y58TKYKcycVXrrpuXo2Ih9PlfwNO\nA86S9JikVcBvAG/KWP+n6fcnSH6YpAf63wAiYmX6g+t3YfqX+1fpfo4DtpGE63clvQ/YmaP3Mly/\njIgXI6IXuBk4LSI6gf8C3pO2RisjYlW6fgPwM+CSiFiRlp0D/K6kp0h+QacC8zP2/1LG670UEU+l\ny08AcyTVAacCP0r38R2SYCiU04FbI2JnRGxn3z0Jjk9b6KuAS9j/dwOAInwvI+3LwNnAUuCf0rLH\nI6IlIrpIgvHutHwV+z5HxWigfNhrkM/Jobb9AYCkiSQB/GBafiNwhqR6YFZE3Jq+zu6I2EnyOTuH\nJE+eBBaRfM5WAWen/ymcHhHbKL6cGZZc9fceOEYzgKtIWuSvSrqc5K9pv670e+9gdZA0l6QFcHJE\nbJV0A1AdyZz3y4C3k7Se/5DkD0qhDHQMAK4D/gx4HvjXjOe3Aa+Q/OL23zRdJK3buzJ3JOlMkhZ9\npq6M5V6Srq4y4PWIWDK8tzBibgDOj4gVki4laa0e6FDvpSd9HkllQFV+qllQU0laupXs++xk/sz7\nMh73UTzTmQzkYJ+NTAf7nBxq2wM/E0Ml4B8i4jtveEI6kaSn4e8k3RcRXy6ynBmWXLXoZ0v6tXT5\ng8DP0+XNacvs/QNvtp+H0m2RdDxJ9wwk//LvALZJagTela5TB0yMZN77zwOLc/FGsrBMyR21yoDf\nIT0GEfEYyW0VP0jS0u+3B7iApAXffwLxLuAPJFUCSFogqXaoFUhbzS9J+kC6vSQV8rg8BJyv5PxL\nPXBuWl4PtKTv85KM9TvS5wZ7L+tIuoUg6devPHD7EvAd4C+Bm4B/LHBdsjVQPuz3szrE5+Rg2ULG\nttuArUrP9wEfBh6MiA5gvaTzASSNS/v47wI+mmYIkmZJmi5pJrAzIv4N+BpwYhHmzLDkqhXQDHxa\n0vUkrdOrgckk/dCtJLcWHMzVwL9Keg54jqQ7grTV9yuSv/SvAv3/xtUDP5NUTfIX+gs5ei/D9Thw\nJXAMcD9wa8ZzPyTpT9+auUFE7JD0HuAeJScUryP5F/xJSQLaSUaiHI5LgKsl/QVJAN4CrDj0JvkR\nEU9K+kH6+pvY93vwlyRdU+3p9/4P/C3AtenJuvdz8PdyLcnPfgVwJ/tadiuB3rT8BpJ/zUcdSb8L\ndEfE95XMb72VAAACs0lEQVSMKHmEfd2do9FA+bAHuFPSxog4K11voM/JQNsO5CPAt9MgfxH4vbT8\nw8B3JH0Z6AY+EBF3SzoWeDT5mNEJfIjks/s1SX3pun9A8eXMsGR9ZaykOSQnvI7PRYVGo7Rr5YsR\n8Z6DPH87cEVE3DeiFTMrsMPJhwM/J86W3PE4+jySNEnSamCXQ95sYP6c5J/nujEzK3Fu0ZuZlTgH\nvZlZiXPQm5mVOAe92QE0SufIMTsYB72ZWYlz0FtJ0eHPpjrg7IYZ+ztZyeyiR0taJunR9PEjkvpn\n4xwv6YdKZji8VckcT0vT585Jt3lS0o/6r8Y0G0kOeitFC4GrIuJYYDvwKZKZK09OL76pAfovbrsJ\n+FZELCaZRK2lfydKpk7+NnBeRKwluTr79Ih4C/BXwN+nq34K2BoRx5Fc9XtSuv004C+Ad0TEiSSz\nkY7KKyttdCvmiZDMhuvAGQ8/QzJvzp+QTFM7BXhG0gMcMLshQNrYP5ZkKttzImJjuq+JwI2S5pNM\nrtU/x85pwDfTfTwtqX/m1beSzLT6cLrPKuDRfLxhs0Nx0FspOtzZVAfSkq7zFqA/6P8WuD8iLkgv\nz39gkH0IuCciLh5yzc3ywF03VoqGNJvqIWY3hOQmH78F/EM6lxEkLfoN6fKlGa/3MHBhuo/jSG6a\nAfAL4NclHZM+VytpQa7epNlQOeitFPXPePgcySyqV5PMePk0yRS1mbOpfhj4TNrd8gjQ1P9ERLSR\n9OV/S9IpJDcA+Yd0NtXM/4avAhokPUtyZ6hngG0R0U7yB+HmdP+PktzkwmxEea4bKymFmPEwnUq4\nMiJ2p6N27gUWRsSekaqD2aG4j94se+NJbnRdSdIv/ymHvBUTt+jNzEqc++jNzEqcg97MrMQ56M3M\nSpyD3sysxDnozcxKnIPezKzE/Q/9/LoiMkkCBQAAAABJRU5ErkJggg==\n",
247 | "text/plain": [
248 | ""
249 | ]
250 | },
251 | "metadata": {},
252 | "output_type": "display_data"
253 | }
254 | ],
255 | "source": [
256 | "pack_series.sort_values(ascending=False)[:50].plot();"
257 | ]
258 | },
259 | {
260 | "cell_type": "code",
261 | "execution_count": 11,
262 | "metadata": {},
263 | "outputs": [
264 | {
265 | "data": {
266 | "text/plain": [
267 | "package\n",
268 | "pandas 169\n",
269 | "IPython 118\n",
270 | "matplotlib 85\n",
271 | "numpy 82\n",
272 | "prompt_toolkit 70\n",
273 | "_pytest 36\n",
274 | "xlsxwriter 32\n",
275 | "zmq 31\n",
276 | "ipywidgets 25\n",
277 | "py 25\n",
278 | "ipykernel 23\n",
279 | "pygments 20\n",
280 | "jupyter_client 19\n",
281 | "pkg_resources 18\n",
282 | "bottleneck 17\n",
283 | "dtype: int64"
284 | ]
285 | },
286 | "execution_count": 11,
287 | "metadata": {},
288 | "output_type": "execute_result"
289 | }
290 | ],
291 | "source": [
292 | "pack_series.sort_values(ascending=False)[:15]"
293 | ]
294 | },
295 | {
296 | "cell_type": "markdown",
297 | "metadata": {},
298 | "source": [
299 | "### 3.2 Aanaconda 3 中模块信息"
300 | ]
301 | },
302 | {
303 | "cell_type": "code",
304 | "execution_count": 14,
305 | "metadata": {
306 | "scrolled": false
307 | },
308 | "outputs": [
309 | {
310 | "name": "stdout",
311 | "output_type": "stream",
312 | "text": [
313 | "\n",
314 | "Please wait a moment while I gather a list of all available modules...\n",
315 | "\n"
316 | ]
317 | },
318 | {
319 | "name": "stderr",
320 | "output_type": "stream",
321 | "text": [
322 | "/root/anaconda3/lib/python3.6/site-packages/IPython/kernel/__init__.py:13: ShimWarning: The `IPython.kernel` package has been deprecated since IPython 4.0.You should import from ipykernel or jupyter_client instead.\n",
323 | " \"You should import from ipykernel or jupyter_client instead.\", ShimWarning)\n",
324 | "/root/anaconda3/lib/python3.6/site-packages/odo/backends/pandas.py:94: FutureWarning: pandas.tslib is deprecated and will be removed in a future version.\n",
325 | "You can access NaTType as type(pandas.NaT)\n",
326 | " @convert.register((pd.Timestamp, pd.Timedelta), (pd.tslib.NaTType, type(None)))\n",
327 | "/root/anaconda3/lib/python3.6/site-packages/blaze/server/server.py:17: ExtDeprecationWarning: Importing flask.ext.cors is deprecated, use flask_cors instead.\n",
328 | " from flask.ext.cors import cross_origin\n",
329 | "/root/anaconda3/lib/python3.6/site-packages/bokeh/util/deprecation.py:34: BokehDeprecationWarning: MPL compatibility can no longer be successfully maintained, and is now deprecated. All MPL compat functions will be removed completely on the release of Bokeh 1.0. See http://bokeh.pydata.org/en/latest/docs/releases/0.12.5.html for more information\n",
330 | " warn(message)\n",
331 | "/root/anaconda3/lib/python3.6/site-packages/matplotlib/cbook.py:136: MatplotlibDeprecationWarning: The matplotlib.delaunay module was deprecated in version 1.4. Use matplotlib.tri.Triangulation instead.\n",
332 | " warnings.warn(message, mplDeprecation, stacklevel=1)\n",
333 | "/root/anaconda3/lib/python3.6/site-packages/nltk/twitter/__init__.py:20: UserWarning: The twython library has not been installed. Some functionality from the twitter package will not be available.\n",
334 | " warnings.warn(\"The twython library has not been installed. \"\n",
335 | "/root/anaconda3/lib/python3.6/site-packages/skimage/viewer/utils/core.py:10: UserWarning: Recommended matplotlib backend is `Agg` for full skimage.viewer functionality.\n",
336 | " warn(\"Recommended matplotlib backend is `Agg` for full \"\n",
337 | "/root/anaconda3/lib/python3.6/site-packages/qtawesome/iconic_font.py:268: UserWarning: You need to have a running QApplication to use QtAwesome!\n",
338 | " warnings.warn(\"You need to have a running \"\n",
339 | "/root/anaconda3/lib/python3.6/site-packages/statsmodels/compat/pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.\n",
340 | " from pandas.core import datetools\n"
341 | ]
342 | },
343 | {
344 | "name": "stdout",
345 | "output_type": "stream",
346 | "text": [
347 | "Crypto brain_nose jupyter_core scipy\n",
348 | "Cython brain_numpy keyword scripts\n",
349 | "IPython brain_pytest lazy_object_proxy seaborn\n",
350 | "OleFileIO_PL brain_qt lib2to3 secrets\n",
351 | "OpenSSL brain_six linecache select\n",
352 | "PIL brain_ssl llvmlite selectors\n",
353 | "PyQt5 brain_stdlib locale setuptools\n",
354 | "__future__ bs4 locket shelve\n",
355 | "_ast builtins logging shlex\n",
356 | "_asyncio bz2 lxml shutil\n",
357 | "_bisect cProfile lzma signal\n",
358 | "_blake2 calendar macpath simplegeneric\n",
359 | "_bootlocale cffi macurl2path singledispatch\n",
360 | "_bz2 cgi mailbox singledispatch_helpers\n",
361 | "_cffi_backend cgitb mailcap sip\n",
362 | "_codecs chardet markupsafe sipconfig\n",
363 | "_codecs_cn chunk marshal sipdistutils\n",
364 | "_codecs_hk click math site\n",
365 | "_codecs_iso2022 cloudpickle matplotlib six\n",
366 | "_codecs_jp clyent mimetypes skimage\n",
367 | "_codecs_kr cmath mistune sklearn\n",
368 | "_codecs_tw cmd mkl smtpd\n",
369 | "_collections code mmap smtplib\n",
370 | "_collections_abc codecs modulefinder sndhdr\n",
371 | "_compat_pickle codeop mpl_toolkits snowballstemmer\n",
372 | "_compression collections mpmath socket\n",
373 | "_crypt colorama msgpack socketserver\n",
374 | "_csv colorsys multipledispatch sortedcollections\n",
375 | "_ctypes compileall multiprocessing sortedcontainers\n",
376 | "_ctypes_test concurrent navigator_updater sphinx\n",
377 | "_curses conda nbconvert spwd\n",
378 | "_curses_panel conda_env nbformat spyder\n",
379 | "_datetime configparser netrc spyder_breakpoints\n",
380 | "_decimal contextlib networkx spyder_io_dcm\n",
381 | "_dummy_thread contextlib2 nis spyder_io_hdf5\n",
382 | "_elementtree copy nltk spyder_profiler\n",
383 | "_functools copyreg nntplib spyder_pylint\n",
384 | "_hashlib crypt nose sqlalchemy\n",
385 | "_heapq cryptography notebook sqlite3\n",
386 | "_imp csv ntpath sre_compile\n",
387 | "_io ctypes nturl2path sre_constants\n",
388 | "_json curl numba sre_parse\n",
389 | "_license curses numbers ssl\n",
390 | "_locale cycler numexpr stat\n",
391 | "_lsprof cython numpy statistics\n",
392 | "_lzma cythonmagic numpydoc statsmodels\n",
393 | "_markupbase cytoolz odo storemagic\n",
394 | "_md5 dask olefile string\n",
395 | "_multibytecodec datashape opcode stringprep\n",
396 | "_multiprocessing datetime openpyxl struct\n",
397 | "_opcode dateutil operator subprocess\n",
398 | "_operator dbm optparse sunau\n",
399 | "_osx_support decimal os symbol\n",
400 | "_pickle decorator ossaudiodev sympy\n",
401 | "_posixsubprocess difflib packaging sympyprinting\n",
402 | "_pydecimal dis pandas symtable\n",
403 | "_pyio distributed pandocfilters sys\n",
404 | "_pytest distutils parser sysconfig\n",
405 | "_random doctest partd syslog\n",
406 | "_sha1 docutils path tables\n",
407 | "_sha256 dummy_threading pathlib tabnanny\n",
408 | "_sha3 easy_install pathlib2 tarfile\n",
409 | "_sha512 email patsy tblib\n",
410 | "_signal encodings pdb telnetlib\n",
411 | "_sitebuiltins entrypoints pep8 tempfile\n",
412 | "_socket enum pexpect terminado\n",
413 | "_sqlite3 errno pickle termios\n",
414 | "_sre et_xmlfile pickleshare test_path\n",
415 | "_ssl fastcache pickletools test_pycosat\n",
416 | "_stat faulthandler pip testpath\n",
417 | "_string fcntl pipes tests\n",
418 | "_strptime filecmp pkg_resources textwrap\n",
419 | "_struct fileinput pkgutil this\n",
420 | "_symtable flask platform threading\n",
421 | "_sysconfigdata_m_linux_x86_64-linux-gnu flask_cors plistlib time\n",
422 | "_testbuffer fnmatch ply timeit\n",
423 | "_testcapi formatter poplib tkinter\n",
424 | "_testimportmultiple fractions posix tlz\n",
425 | "_testmultiphase ftplib posixpath token\n",
426 | "_thread functools pprint tokenize\n",
427 | "_threading_local gc profile toolz\n",
428 | "_tkinter genericpath prompt_toolkit tornado\n",
429 | "_tracemalloc getopt pstats trace\n",
430 | "_warnings getpass psutil traceback\n",
431 | "_weakref gettext pty tracemalloc\n",
432 | "_weakrefset gevent ptyprocess traitlets\n",
433 | "_yaml glob pwd tty\n",
434 | "abc greenlet py turtle\n",
435 | "aifc grp py_compile turtledemo\n",
436 | "alabaster gzip pyclbr types\n",
437 | "anaconda_navigator h5py pycosat typing\n",
438 | "anaconda_project hashlib pycparser unicodecsv\n",
439 | "antigravity heapdict pycurl unicodedata\n",
440 | "argparse heapq pydoc unittest\n",
441 | "array hmac pydoc_data urllib\n",
442 | "asn1crypto html pyexpat uu\n",
443 | "ast html5lib pyflakes uuid\n",
444 | "astroid http pygments venv\n",
445 | "astropy idlelib pylab warnings\n",
446 | "asynchat idna pylint wave\n",
447 | "asyncio imagesize pyodbc wcwidth\n",
448 | "asyncore imaplib pyparsing weakref\n",
449 | "atexit imghdr pytest webbrowser\n",
450 | "audioop imp pytz werkzeug\n",
451 | "autoreload importlib pywt wheel\n",
452 | "babel inspect pyximport widgetsnbextension\n",
453 | "backports io qtawesome wrapt\n",
454 | "base64 ipaddress qtconsole wsgiref\n",
455 | "bdb ipykernel qtpy xdrlib\n",
456 | "binascii ipykernel_launcher queue xlrd\n",
457 | "binhex ipython_genutils quopri xlsxwriter\n",
458 | "binstar_client ipywidgets random xlwt\n",
459 | "bisect isort re xml\n",
460 | "bitarray itertools readline xmlrpc\n",
461 | "blaze itsdangerous reprlib xxlimited\n",
462 | "bleach jdcal requests xxsubtype\n",
463 | "bokeh jedi resource yaml\n",
464 | "boto jinja2 rlcompleter zict\n",
465 | "bottleneck json rmagic zipapp\n",
466 | "brain_builtin_inference jsonschema rope zipfile\n",
467 | "brain_dateutil jupyter ruamel_yaml zipimport\n",
468 | "brain_gi jupyter_client runpy zlib\n",
469 | "brain_mechanize jupyter_console sched zmq\n",
470 | "\n",
471 | "Enter any module name to get more help. Or, type \"modules spam\" to search\n",
472 | "for modules whose name or summary contain the string \"spam\".\n",
473 | "\n"
474 | ]
475 | }
476 | ],
477 | "source": [
478 | "help(\"modules\")"
479 | ]
480 | }
481 | ],
482 | "metadata": {
483 | "kernelspec": {
484 | "display_name": "Python 3",
485 | "language": "python",
486 | "name": "python3"
487 | },
488 | "language_info": {
489 | "codemirror_mode": {
490 | "name": "ipython",
491 | "version": 3
492 | },
493 | "file_extension": ".py",
494 | "mimetype": "text/x-python",
495 | "name": "python",
496 | "nbconvert_exporter": "python",
497 | "pygments_lexer": "ipython3",
498 | "version": "3.6.1"
499 | }
500 | },
501 | "nbformat": 4,
502 | "nbformat_minor": 2
503 | }
504 |
--------------------------------------------------------------------------------
/Anaconda环境安装以及搭建Python多内核环境.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kdotm/Python_Series/c686d48797b7266934958b09247aa3ffe783c026/Anaconda环境安装以及搭建Python多内核环境.docx
--------------------------------------------------------------------------------
/Python_PyODPS_HTML_to_PDF.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## 爬取 PyODPS[latest] 并转换为 PDF\n",
8 | "- 爬取主链接\n",
9 | "- 根据主链接爬取子连接\n",
10 | "- 参考子链接爬取HTML并转换为PDF\n",
11 | "- 将所有 PDF 整合为一个PDF\n",
12 | "---\n",
13 | "- 注 :\n",
14 | " - PyOdps PDF在线最新版本\n",
15 | " - 0.3.12"
16 | ]
17 | },
18 | {
19 | "cell_type": "code",
20 | "execution_count": 15,
21 | "metadata": {
22 | "collapsed": true
23 | },
24 | "outputs": [],
25 | "source": [
26 | "import re\n",
27 | "import pdfkit\n",
28 | "import pandas as pd\n",
29 | "from urllib import urlopen\n",
30 | "from bs4 import BeautifulSoup\n",
31 | "\n",
32 | "# 设置 pandas 显示参数\n",
33 | "pd.set_option('display.width',200)\n",
34 | "pd.set_option('display.max_rows',1000)\n",
35 | "pd.set_option('display.max_columns',50)\n",
36 | "pd.set_option('display.max_colwidth',500)"
37 | ]
38 | },
39 | {
40 | "cell_type": "markdown",
41 | "metadata": {},
42 | "source": [
43 | "### 爬取主链接\n",
44 | "#### 爬取PyODPS Docs主页面"
45 | ]
46 | },
47 | {
48 | "cell_type": "code",
49 | "execution_count": 9,
50 | "metadata": {
51 | "collapsed": true
52 | },
53 | "outputs": [],
54 | "source": [
55 | "url='http://pyodps.readthedocs.io/zh_CN/latest/index.html'\n",
56 | "html=urlopen(url).read().decode('utf8')\n",
57 | "soup=BeautifulSoup(html,'lxml')"
58 | ]
59 | },
60 | {
61 | "cell_type": "markdown",
62 | "metadata": {},
63 | "source": [
64 | "#### 取值最新文档首页 API及标题"
65 | ]
66 | },
67 | {
68 | "cell_type": "code",
69 | "execution_count": 10,
70 | "metadata": {
71 | "collapsed": true
72 | },
73 | "outputs": [],
74 | "source": [
75 | "# 主链接 (API)\n",
76 | "api=soup.find(name='link', attrs={'rel':'canonical'}).get('href')\n",
77 | "# 获取文档标题\n",
78 | "title=soup.find('link',attrs={\"href\":\"#\",\"rel\":\"top\"}).get('title').replace(' ','_')\n",
79 | "\n",
80 | "# 获取首页超链接 (href)\n",
81 | "hrefs=[]\n",
82 | "div_s=soup.find_all(name='div',attrs={'aria-label':'main navigation','role':'navigation'})[0]\n",
83 | "for tag_a in div_s.find_all(name='a',attrs={'class':'reference internal'}):\n",
84 | " content_name=tag_a.get_text()\n",
85 | " url=api+tag_a.get('href')\n",
86 | " hrefs.append([content_name,url])"
87 | ]
88 | },
89 | {
90 | "cell_type": "markdown",
91 | "metadata": {},
92 | "source": [
93 | "#### 美化 DataFrame 显示效果函数"
94 | ]
95 | },
96 | {
97 | "cell_type": "code",
98 | "execution_count": 20,
99 | "metadata": {
100 | "collapsed": true
101 | },
102 | "outputs": [],
103 | "source": [
104 | "'''\n",
105 | "设置悬停效果\n",
106 | "'''\n",
107 | "def hover(hover_color=\"#ffff99\"):\n",
108 | " return dict(selector=\"tr:hover\",\n",
109 | " props=[(\"background-color\", \"%s\" % hover_color)])\n",
110 | "'''\n",
111 | "美化DataFrame显示效果\n",
112 | "'''\n",
113 | "def display_prettify(df):\n",
114 | " from IPython.display import HTML\n",
115 | "\n",
116 | " styles = [\n",
117 | " hover(),\n",
118 | " dict(selector=\"th\", props=[(\"font-size\", \"100%\"),\n",
119 | " (\"text-align\", \"center\")]),\n",
120 | " dict(selector=\"td\", props=[(\"text-align\", \"left\")]),\n",
121 | " dict(selector=\"caption\", props=[(\"caption-side\", \"left\")])\n",
122 | " ]\n",
123 | " return df.style.set_table_styles(styles).set_caption(\"Hover to highlight.\")"
124 | ]
125 | },
126 | {
127 | "cell_type": "markdown",
128 | "metadata": {},
129 | "source": [
130 | "#### 首页超连接(href)打印显示"
131 | ]
132 | },
133 | {
134 | "cell_type": "code",
135 | "execution_count": 13,
136 | "metadata": {},
137 | "outputs": [
138 | {
139 | "data": {
140 | "text/html": [
141 | " \n",
152 | "Hover to highlight. \n",
153 | " \n",
154 | " | \n",
155 | " content_name | \n",
156 | " href | \n",
157 | "
\n",
158 | " \n",
159 | " 0 | \n",
160 | " 安装指南 | \n",
161 | " http://pyodps.readthedocs.io/zh_CN/latest/installation-pub-zh.html | \n",
162 | "
\n",
163 | " 1 | \n",
164 | " 基本操作 | \n",
165 | " http://pyodps.readthedocs.io/zh_CN/latest/base-zh.html | \n",
166 | "
\n",
167 | " 2 | \n",
168 | " DataFrame | \n",
169 | " http://pyodps.readthedocs.io/zh_CN/latest/df-zh.html | \n",
170 | "
\n",
171 | " 3 | \n",
172 | " 机器学习 | \n",
173 | " http://pyodps.readthedocs.io/zh_CN/latest/ml-zh.html | \n",
174 | "
\n",
175 | " 4 | \n",
176 | " 交互体验增强 | \n",
177 | " http://pyodps.readthedocs.io/zh_CN/latest/interactive-zh.html | \n",
178 | "
\n",
179 | " 5 | \n",
180 | " 配置选项 | \n",
181 | " http://pyodps.readthedocs.io/zh_CN/latest/options-zh.html | \n",
182 | "
\n",
183 | " 6 | \n",
184 | " API Reference | \n",
185 | " http://pyodps.readthedocs.io/zh_CN/latest/api.html | \n",
186 | "
\n",
187 | "
"
188 | ],
189 | "text/plain": [
190 | ""
191 | ]
192 | },
193 | "execution_count": 13,
194 | "metadata": {},
195 | "output_type": "execute_result"
196 | }
197 | ],
198 | "source": [
199 | "df=pd.DataFrame(hrefs, columns=['content_name','href'])\n",
200 | "\n",
201 | "display_prettify(df)"
202 | ]
203 | },
204 | {
205 | "cell_type": "markdown",
206 | "metadata": {},
207 | "source": [
208 | "### 根据主链接爬取子连接"
209 | ]
210 | },
211 | {
212 | "cell_type": "code",
213 | "execution_count": null,
214 | "metadata": {
215 | "collapsed": true
216 | },
217 | "outputs": [],
218 | "source": [
219 | "hrefs_2=[] # 有序列表,存储主、子链接并与文档目录层次结构保持一致性\n",
220 | "\n",
221 | "for name,url in hrefs:\n",
222 | " if url not in [hf[1] for hf in hrefs_2]: # href 不在 hrefs_2中,则追加\n",
223 | " hrefs_2.append([name,url])\n",
224 | " t_html=urlopen(url).read().decode('utf8')\n",
225 | " \n",
226 | " # 根据正则表达式 查找当前目录主题\n",
227 | " f_re=''\n",
228 | " if len(re.findall(f_re, t_html, re.I|re.S|re.M)) !=0 :\n",
229 | " target_s = re.findall(f_re, t_html, re.I|re.S|re.M)[0]\n",
230 | "\n",
231 | " # 根据正则表达式 获取当前子主题链接\n",
232 | " t_re='(.*?)'\n",
233 | " for href,name in re.findall(t_re, target_s, re.I|re.S|re.M):\n",
234 | " if href.strip().endswith('.html'):\n",
235 | " hrefs_2.append([name,api+href])"
236 | ]
237 | },
238 | {
239 | "cell_type": "code",
240 | "execution_count": 22,
241 | "metadata": {},
242 | "outputs": [
243 | {
244 | "data": {
245 | "text/html": [
246 | " \n",
257 | "Hover to highlight. \n",
258 | " \n",
259 | " | \n",
260 | " 0 | \n",
261 | " 1 | \n",
262 | "
\n",
263 | " \n",
264 | " 0 | \n",
265 | " 安装指南 | \n",
266 | " http://pyodps.readthedocs.io/zh_CN/latest/installation-pub-zh.html | \n",
267 | "
\n",
268 | " 1 | \n",
269 | " 基本操作 | \n",
270 | " http://pyodps.readthedocs.io/zh_CN/latest/base-zh.html | \n",
271 | "
\n",
272 | "
"
273 | ],
274 | "text/plain": [
275 | ""
276 | ]
277 | },
278 | "execution_count": 22,
279 | "metadata": {},
280 | "output_type": "execute_result"
281 | }
282 | ],
283 | "source": [
284 | "display_prettify(pd.DataFrame(hrefs_2))"
285 | ]
286 | },
287 | {
288 | "cell_type": "markdown",
289 | "metadata": {},
290 | "source": [
291 | "#### 显示PyODPS 所有链接"
292 | ]
293 | },
294 | {
295 | "cell_type": "code",
296 | "execution_count": 105,
297 | "metadata": {},
298 | "outputs": [
299 | {
300 | "data": {
301 | "text/html": [
302 | "\n",
303 | "\n",
316 | "
\n",
317 | " \n",
318 | " \n",
319 | " | \n",
320 | " 0 | \n",
321 | " 1 | \n",
322 | "
\n",
323 | " \n",
324 | " \n",
325 | " \n",
326 | " 0 | \n",
327 | " 安装指南 | \n",
328 | " http://pyodps.readthedocs.io/zh_CN/latest/installation-pub-zh.html | \n",
329 | "
\n",
330 | " \n",
331 | " 1 | \n",
332 | " 基本操作 | \n",
333 | " http://pyodps.readthedocs.io/zh_CN/latest/base-zh.html | \n",
334 | "
\n",
335 | " \n",
336 | " 2 | \n",
337 | " 项目空间 | \n",
338 | " http://pyodps.readthedocs.io/zh_CN/latest/base-projects-zh.html | \n",
339 | "
\n",
340 | " \n",
341 | " 3 | \n",
342 | " 表 | \n",
343 | " http://pyodps.readthedocs.io/zh_CN/latest/base-tables-zh.html | \n",
344 | "
\n",
345 | " \n",
346 | " 4 | \n",
347 | " SQL | \n",
348 | " http://pyodps.readthedocs.io/zh_CN/latest/base-sql-zh.html | \n",
349 | "
\n",
350 | " \n",
351 | " 5 | \n",
352 | " 任务实例 | \n",
353 | " http://pyodps.readthedocs.io/zh_CN/latest/base-instances-zh.html | \n",
354 | "
\n",
355 | " \n",
356 | " 6 | \n",
357 | " 资源 | \n",
358 | " http://pyodps.readthedocs.io/zh_CN/latest/base-resources-zh.html | \n",
359 | "
\n",
360 | " \n",
361 | " 7 | \n",
362 | " 函数 | \n",
363 | " http://pyodps.readthedocs.io/zh_CN/latest/base-functions-zh.html | \n",
364 | "
\n",
365 | " \n",
366 | " 8 | \n",
367 | " 模型 | \n",
368 | " http://pyodps.readthedocs.io/zh_CN/latest/base-models-zh.html | \n",
369 | "
\n",
370 | " \n",
371 | " 9 | \n",
372 | " DataFrame | \n",
373 | " http://pyodps.readthedocs.io/zh_CN/latest/df-zh.html | \n",
374 | "
\n",
375 | " \n",
376 | " 10 | \n",
377 | " 快速开始 | \n",
378 | " http://pyodps.readthedocs.io/zh_CN/latest/df-quickstart-zh.html | \n",
379 | "
\n",
380 | " \n",
381 | " 11 | \n",
382 | " 基本概念 | \n",
383 | " http://pyodps.readthedocs.io/zh_CN/latest/df-basic-zh.html | \n",
384 | "
\n",
385 | " \n",
386 | " 12 | \n",
387 | " 列运算 | \n",
388 | " http://pyodps.readthedocs.io/zh_CN/latest/df-element-zh.html | \n",
389 | "
\n",
390 | " \n",
391 | " 13 | \n",
392 | " 聚合操作 | \n",
393 | " http://pyodps.readthedocs.io/zh_CN/latest/df-agg-zh.html | \n",
394 | "
\n",
395 | " \n",
396 | " 14 | \n",
397 | " 排序、去重、采样、数据变换 | \n",
398 | " http://pyodps.readthedocs.io/zh_CN/latest/df-sort-distinct-apply-zh.html | \n",
399 | "
\n",
400 | " \n",
401 | " 15 | \n",
402 | " 数据合并 | \n",
403 | " http://pyodps.readthedocs.io/zh_CN/latest/df-merge-zh.html | \n",
404 | "
\n",
405 | " \n",
406 | " 16 | \n",
407 | " 窗口函数 | \n",
408 | " http://pyodps.readthedocs.io/zh_CN/latest/df-window-zh.html | \n",
409 | "
\n",
410 | " \n",
411 | " 17 | \n",
412 | " 绘图 | \n",
413 | " http://pyodps.readthedocs.io/zh_CN/latest/df-plot-zh.html | \n",
414 | "
\n",
415 | " \n",
416 | " 18 | \n",
417 | " 调试指南 | \n",
418 | " http://pyodps.readthedocs.io/zh_CN/latest/df-debug-instruction-zh.html | \n",
419 | "
\n",
420 | " \n",
421 | " 19 | \n",
422 | " 机器学习 | \n",
423 | " http://pyodps.readthedocs.io/zh_CN/latest/ml-zh.html | \n",
424 | "
\n",
425 | " \n",
426 | " 20 | \n",
427 | " 快速开始 | \n",
428 | " http://pyodps.readthedocs.io/zh_CN/latest/ml-quickstart-zh.html | \n",
429 | "
\n",
430 | " \n",
431 | " 21 | \n",
432 | " 基本概念 | \n",
433 | " http://pyodps.readthedocs.io/zh_CN/latest/ml-basic-zh.html | \n",
434 | "
\n",
435 | " \n",
436 | " 22 | \n",
437 | " 调用算法 | \n",
438 | " http://pyodps.readthedocs.io/zh_CN/latest/ml-algo-pub-zh.html | \n",
439 | "
\n",
440 | " \n",
441 | " 23 | \n",
442 | " 结果评估 | \n",
443 | " http://pyodps.readthedocs.io/zh_CN/latest/ml-assess-zh.html | \n",
444 | "
\n",
445 | " \n",
446 | " 24 | \n",
447 | " 交互体验增强 | \n",
448 | " http://pyodps.readthedocs.io/zh_CN/latest/interactive-zh.html | \n",
449 | "
\n",
450 | " \n",
451 | " 25 | \n",
452 | " 配置选项 | \n",
453 | " http://pyodps.readthedocs.io/zh_CN/latest/options-zh.html | \n",
454 | "
\n",
455 | " \n",
456 | " 26 | \n",
457 | " API Reference | \n",
458 | " http://pyodps.readthedocs.io/zh_CN/latest/api.html | \n",
459 | "
\n",
460 | " \n",
461 | " 27 | \n",
462 | " Definitions | \n",
463 | " http://pyodps.readthedocs.io/zh_CN/latest/api-def.html | \n",
464 | "
\n",
465 | " \n",
466 | " 28 | \n",
467 | " DataFrame Reference | \n",
468 | " http://pyodps.readthedocs.io/zh_CN/latest/api-df.html | \n",
469 | "
\n",
470 | " \n",
471 | " 29 | \n",
472 | " ML References | \n",
473 | " http://pyodps.readthedocs.io/zh_CN/latest/api-ml.html | \n",
474 | "
\n",
475 | " \n",
476 | "
\n",
477 | "
"
478 | ],
479 | "text/plain": [
480 | " 0 1\n",
481 | "0 安装指南 http://pyodps.readthedocs.io/zh_CN/latest/installation-pub-zh.html\n",
482 | "1 基本操作 http://pyodps.readthedocs.io/zh_CN/latest/base-zh.html\n",
483 | "2 项目空间 http://pyodps.readthedocs.io/zh_CN/latest/base-projects-zh.html\n",
484 | "3 表 http://pyodps.readthedocs.io/zh_CN/latest/base-tables-zh.html\n",
485 | "4 SQL http://pyodps.readthedocs.io/zh_CN/latest/base-sql-zh.html\n",
486 | "5 任务实例 http://pyodps.readthedocs.io/zh_CN/latest/base-instances-zh.html\n",
487 | "6 资源 http://pyodps.readthedocs.io/zh_CN/latest/base-resources-zh.html\n",
488 | "7 函数 http://pyodps.readthedocs.io/zh_CN/latest/base-functions-zh.html\n",
489 | "8 模型 http://pyodps.readthedocs.io/zh_CN/latest/base-models-zh.html\n",
490 | "9 DataFrame http://pyodps.readthedocs.io/zh_CN/latest/df-zh.html\n",
491 | "10 快速开始 http://pyodps.readthedocs.io/zh_CN/latest/df-quickstart-zh.html\n",
492 | "11 基本概念 http://pyodps.readthedocs.io/zh_CN/latest/df-basic-zh.html\n",
493 | "12 列运算 http://pyodps.readthedocs.io/zh_CN/latest/df-element-zh.html\n",
494 | "13 聚合操作 http://pyodps.readthedocs.io/zh_CN/latest/df-agg-zh.html\n",
495 | "14 排序、去重、采样、数据变换 http://pyodps.readthedocs.io/zh_CN/latest/df-sort-distinct-apply-zh.html\n",
496 | "15 数据合并 http://pyodps.readthedocs.io/zh_CN/latest/df-merge-zh.html\n",
497 | "16 窗口函数 http://pyodps.readthedocs.io/zh_CN/latest/df-window-zh.html\n",
498 | "17 绘图 http://pyodps.readthedocs.io/zh_CN/latest/df-plot-zh.html\n",
499 | "18 调试指南 http://pyodps.readthedocs.io/zh_CN/latest/df-debug-instruction-zh.html\n",
500 | "19 机器学习 http://pyodps.readthedocs.io/zh_CN/latest/ml-zh.html\n",
501 | "20 快速开始 http://pyodps.readthedocs.io/zh_CN/latest/ml-quickstart-zh.html\n",
502 | "21 基本概念 http://pyodps.readthedocs.io/zh_CN/latest/ml-basic-zh.html\n",
503 | "22 调用算法 http://pyodps.readthedocs.io/zh_CN/latest/ml-algo-pub-zh.html\n",
504 | "23 结果评估 http://pyodps.readthedocs.io/zh_CN/latest/ml-assess-zh.html\n",
505 | "24 交互体验增强 http://pyodps.readthedocs.io/zh_CN/latest/interactive-zh.html\n",
506 | "25 配置选项 http://pyodps.readthedocs.io/zh_CN/latest/options-zh.html\n",
507 | "26 API Reference http://pyodps.readthedocs.io/zh_CN/latest/api.html\n",
508 | "27 Definitions http://pyodps.readthedocs.io/zh_CN/latest/api-def.html\n",
509 | "28 DataFrame Reference http://pyodps.readthedocs.io/zh_CN/latest/api-df.html\n",
510 | "29 ML References http://pyodps.readthedocs.io/zh_CN/latest/api-ml.html"
511 | ]
512 | },
513 | "execution_count": 105,
514 | "metadata": {},
515 | "output_type": "execute_result"
516 | }
517 | ],
518 | "source": [
519 | "pd.DataFrame(hrefs_2)"
520 | ]
521 | },
522 | {
523 | "cell_type": "markdown",
524 | "metadata": {
525 | "code_folding": []
526 | },
527 | "source": [
528 | "### 参考子链接爬取HTML并转换为PDF"
529 | ]
530 | },
531 | {
532 | "cell_type": "code",
533 | "execution_count": 24,
534 | "metadata": {},
535 | "outputs": [
536 | {
537 | "name": "stdout",
538 | "output_type": "stream",
539 | "text": [
540 | "Loading pages (1/6)\n",
541 | "QFont::setPixelSize: Pixel size <= 0 (0) ] 35%\n",
542 | "Counting pages (2/6) \n",
543 | "Resolving links (4/6) \n",
544 | "Loading headers and footers (5/6) \n",
545 | "Printing pages (6/6)\n",
546 | "Done \n",
547 | "Loading pages (1/6)\n",
548 | "QFont::setPixelSize: Pixel size <= 0 (0) ] 26%\n",
549 | "Counting pages (2/6) \n",
550 | "Resolving links (4/6) \n",
551 | "Loading headers and footers (5/6) \n",
552 | "Printing pages (6/6)\n",
553 | "Done \n"
554 | ]
555 | }
556 | ],
557 | "source": [
558 | "for name,href in hrefs_2:\n",
559 | " pdfkit.from_url(href,'./tmp/'+name+'.pdf')"
560 | ]
561 | },
562 | {
563 | "cell_type": "markdown",
564 | "metadata": {},
565 | "source": [
566 | "### 整合所有 PDF"
567 | ]
568 | },
569 | {
570 | "cell_type": "code",
571 | "execution_count": 114,
572 | "metadata": {
573 | "collapsed": true
574 | },
575 | "outputs": [],
576 | "source": [
577 | "from PyPDF2 import PdfFileMerger\n",
578 | "\n",
579 | "# 创建 PdfFileMerger 对象,合并PDFs\n",
580 | "merger = PdfFileMerger()\n",
581 | "for name, url in hrefs_2:\n",
582 | " t_input = open('./tmp/'+name+'.pdf', 'rb')\n",
583 | " merger.append(t_input)\n",
584 | "\n",
585 | "# 流输出\n",
586 | "output = open(title+\".pdf\", \"wb\")\n",
587 | "merger.write(output)\n",
588 | "\n",
589 | "# 关闭文件流\n",
590 | "output.close()\n",
591 | "merger.close()"
592 | ]
593 | }
594 | ],
595 | "metadata": {
596 | "kernelspec": {
597 | "display_name": "Python 2",
598 | "language": "python",
599 | "name": "python2"
600 | },
601 | "language_info": {
602 | "codemirror_mode": {
603 | "name": "ipython",
604 | "version": 2
605 | },
606 | "file_extension": ".py",
607 | "mimetype": "text/x-python",
608 | "name": "python",
609 | "nbconvert_exporter": "python",
610 | "pygments_lexer": "ipython2",
611 | "version": "2.7.13"
612 | },
613 | "toc": {
614 | "nav_menu": {},
615 | "number_sections": true,
616 | "sideBar": true,
617 | "skip_h1_title": false,
618 | "toc_cell": false,
619 | "toc_position": {
620 | "height": "454px",
621 | "left": "0px",
622 | "right": "745.925px",
623 | "top": "107px",
624 | "width": "290px"
625 | },
626 | "toc_section_display": "block",
627 | "toc_window_display": true
628 | }
629 | },
630 | "nbformat": 4,
631 | "nbformat_minor": 2
632 | }
633 |
--------------------------------------------------------------------------------
/Python操作Mysql实例教程手册_代码.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "#### 设置样式显示效果"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 15,
13 | "metadata": {
14 | "collapsed": true
15 | },
16 | "outputs": [],
17 | "source": [
18 | "\n",
19 | "'''\n",
20 | "设置悬停效果\n",
21 | "'''\n",
22 | "def hover(hover_color=\"#ffff99\"):\n",
23 | " return dict(selector=\"tr:hover\",\n",
24 | " props=[(\"background-color\", \"%s\" % hover_color)])\n",
25 | "'''\n",
26 | "美化DataFrame显示效果\n",
27 | "'''\n",
28 | "def display_prettify(df):\n",
29 | " from IPython.display import HTML\n",
30 | "\n",
31 | " styles = [\n",
32 | " hover(),\n",
33 | " dict(selector=\"th\", props=[(\"font-size\", \"100%\"),\n",
34 | " (\"text-align\", \"center\")]),\n",
35 | " dict(selector=\"td\", props=[(\"text-align\", \"left\")]),\n",
36 | " dict(selector=\"caption\", props=[(\"caption-side\", \"left\")])\n",
37 | " ]\n",
38 | " return df.style.set_table_styles(styles).set_caption(\"Hover to highlight.\")"
39 | ]
40 | },
41 | {
42 | "cell_type": "markdown",
43 | "metadata": {},
44 | "source": [
45 | "### 取得 MariaDB 版本"
46 | ]
47 | },
48 | {
49 | "cell_type": "code",
50 | "execution_count": 18,
51 | "metadata": {},
52 | "outputs": [],
53 | "source": [
54 | "ip,user,pwd,dnname,port='192.168.182.131','root','root','mysql',3306\n",
55 | "\n",
56 | "import MySQLdb as mdb\n",
57 | "\n",
58 | "conn = None\n",
59 | "try:\n",
60 | " '''\n",
61 | " 1 connect mysql\n",
62 | " type : MySQLdb.connections.Connection\n",
63 | " func : connect(ip,user,password,dbname)\n",
64 | " '''\n",
65 | " conn = mdb.connect(ip,user,pwd,dnname,port)\n",
66 | " cursor = conn.cursor()\n",
67 | " \n",
68 | " '''\n",
69 | " 2 fetchone\n",
70 | " 单条结果,返回单个元组 \n",
71 | " '''\n",
72 | " cursor.execute(\"SELECT VERSION();\") # 执行查询\n",
73 | " row = cursor.fetchone()\n",
74 | " \n",
75 | " '''\n",
76 | " 3 fetchall\n",
77 | " 所有结果,返回二维元组 \n",
78 | " '''\n",
79 | " cursor.execute(\"select * from user;\") # 执行查询\n",
80 | " rows = cursor.fetchall()\n",
81 | "finally:\n",
82 | " if conn:\n",
83 | " conn.close() # 关闭连接\n",
84 | " cursor.close()"
85 | ]
86 | },
87 | {
88 | "cell_type": "code",
89 | "execution_count": 20,
90 | "metadata": {
91 | "scrolled": false
92 | },
93 | "outputs": [
94 | {
95 | "data": {
96 | "text/html": [
97 | " \n",
108 | "Hover to highlight. \n",
109 | " \n",
110 | " | \n",
111 | " 0 | \n",
112 | " 1 | \n",
113 | " 2 | \n",
114 | " 3 | \n",
115 | " 4 | \n",
116 | " 5 | \n",
117 | " 6 | \n",
118 | " 7 | \n",
119 | " 8 | \n",
120 | " 9 | \n",
121 | " 10 | \n",
122 | " 11 | \n",
123 | " 12 | \n",
124 | " 13 | \n",
125 | " 14 | \n",
126 | " 15 | \n",
127 | " 16 | \n",
128 | " 17 | \n",
129 | " 18 | \n",
130 | " 19 | \n",
131 | " 20 | \n",
132 | " 21 | \n",
133 | " 22 | \n",
134 | " 23 | \n",
135 | " 24 | \n",
136 | " 25 | \n",
137 | " 26 | \n",
138 | " 27 | \n",
139 | " 28 | \n",
140 | " 29 | \n",
141 | " 30 | \n",
142 | " 31 | \n",
143 | " 32 | \n",
144 | " 33 | \n",
145 | " 34 | \n",
146 | " 35 | \n",
147 | " 36 | \n",
148 | " 37 | \n",
149 | " 38 | \n",
150 | " 39 | \n",
151 | " 40 | \n",
152 | " 41 | \n",
153 | " 42 | \n",
154 | " 43 | \n",
155 | " 44 | \n",
156 | " 45 | \n",
157 | "
\n",
158 | " \n",
159 | " 0 | \n",
160 | " localhost | \n",
161 | " root | \n",
162 | " *81F5E21E35407D884A6CD4A731AEBFB6AF209E1B | \n",
163 | " Y | \n",
164 | " Y | \n",
165 | " Y | \n",
166 | " Y | \n",
167 | " Y | \n",
168 | " Y | \n",
169 | " Y | \n",
170 | " Y | \n",
171 | " Y | \n",
172 | " Y | \n",
173 | " Y | \n",
174 | " Y | \n",
175 | " Y | \n",
176 | " Y | \n",
177 | " Y | \n",
178 | " Y | \n",
179 | " Y | \n",
180 | " Y | \n",
181 | " Y | \n",
182 | " Y | \n",
183 | " Y | \n",
184 | " Y | \n",
185 | " Y | \n",
186 | " Y | \n",
187 | " Y | \n",
188 | " Y | \n",
189 | " Y | \n",
190 | " Y | \n",
191 | " Y | \n",
192 | " | \n",
193 | " | \n",
194 | " | \n",
195 | " | \n",
196 | " 0 | \n",
197 | " 0 | \n",
198 | " 0 | \n",
199 | " 0 | \n",
200 | " | \n",
201 | " *81F5E21E35407D884A6CD4A731AEBFB6AF209E1B | \n",
202 | " N | \n",
203 | " N | \n",
204 | " | \n",
205 | " 0.000000 | \n",
206 | "
\n",
207 | " 1 | \n",
208 | " 127.0.0.1 | \n",
209 | " root | \n",
210 | " *81F5E21E35407D884A6CD4A731AEBFB6AF209E1B | \n",
211 | " Y | \n",
212 | " Y | \n",
213 | " Y | \n",
214 | " Y | \n",
215 | " Y | \n",
216 | " Y | \n",
217 | " Y | \n",
218 | " Y | \n",
219 | " Y | \n",
220 | " Y | \n",
221 | " Y | \n",
222 | " Y | \n",
223 | " Y | \n",
224 | " Y | \n",
225 | " Y | \n",
226 | " Y | \n",
227 | " Y | \n",
228 | " Y | \n",
229 | " Y | \n",
230 | " Y | \n",
231 | " Y | \n",
232 | " Y | \n",
233 | " Y | \n",
234 | " Y | \n",
235 | " Y | \n",
236 | " Y | \n",
237 | " Y | \n",
238 | " Y | \n",
239 | " Y | \n",
240 | " | \n",
241 | " | \n",
242 | " | \n",
243 | " | \n",
244 | " 0 | \n",
245 | " 0 | \n",
246 | " 0 | \n",
247 | " 0 | \n",
248 | " | \n",
249 | " *81F5E21E35407D884A6CD4A731AEBFB6AF209E1B | \n",
250 | " N | \n",
251 | " N | \n",
252 | " | \n",
253 | " 0.000000 | \n",
254 | "
\n",
255 | " 2 | \n",
256 | " ::1 | \n",
257 | " root | \n",
258 | " *81F5E21E35407D884A6CD4A731AEBFB6AF209E1B | \n",
259 | " Y | \n",
260 | " Y | \n",
261 | " Y | \n",
262 | " Y | \n",
263 | " Y | \n",
264 | " Y | \n",
265 | " Y | \n",
266 | " Y | \n",
267 | " Y | \n",
268 | " Y | \n",
269 | " Y | \n",
270 | " Y | \n",
271 | " Y | \n",
272 | " Y | \n",
273 | " Y | \n",
274 | " Y | \n",
275 | " Y | \n",
276 | " Y | \n",
277 | " Y | \n",
278 | " Y | \n",
279 | " Y | \n",
280 | " Y | \n",
281 | " Y | \n",
282 | " Y | \n",
283 | " Y | \n",
284 | " Y | \n",
285 | " Y | \n",
286 | " Y | \n",
287 | " Y | \n",
288 | " | \n",
289 | " | \n",
290 | " | \n",
291 | " | \n",
292 | " 0 | \n",
293 | " 0 | \n",
294 | " 0 | \n",
295 | " 0 | \n",
296 | " | \n",
297 | " | \n",
298 | " N | \n",
299 | " N | \n",
300 | " | \n",
301 | " 0.000000 | \n",
302 | "
\n",
303 | " 3 | \n",
304 | " % | \n",
305 | " root | \n",
306 | " *81F5E21E35407D884A6CD4A731AEBFB6AF209E1B | \n",
307 | " N | \n",
308 | " N | \n",
309 | " N | \n",
310 | " N | \n",
311 | " N | \n",
312 | " N | \n",
313 | " N | \n",
314 | " N | \n",
315 | " N | \n",
316 | " N | \n",
317 | " N | \n",
318 | " N | \n",
319 | " N | \n",
320 | " N | \n",
321 | " N | \n",
322 | " N | \n",
323 | " N | \n",
324 | " N | \n",
325 | " N | \n",
326 | " N | \n",
327 | " N | \n",
328 | " N | \n",
329 | " N | \n",
330 | " N | \n",
331 | " N | \n",
332 | " N | \n",
333 | " N | \n",
334 | " N | \n",
335 | " N | \n",
336 | " | \n",
337 | " | \n",
338 | " | \n",
339 | " | \n",
340 | " 0 | \n",
341 | " 0 | \n",
342 | " 0 | \n",
343 | " 0 | \n",
344 | " | \n",
345 | " | \n",
346 | " N | \n",
347 | " N | \n",
348 | " | \n",
349 | " 0.000000 | \n",
350 | "
\n",
351 | " 4 | \n",
352 | " localhost | \n",
353 | " root1 | \n",
354 | " *81F5E21E35407D884A6CD4A731AEBFB6AF209E1B | \n",
355 | " Y | \n",
356 | " Y | \n",
357 | " Y | \n",
358 | " Y | \n",
359 | " Y | \n",
360 | " Y | \n",
361 | " Y | \n",
362 | " Y | \n",
363 | " Y | \n",
364 | " Y | \n",
365 | " N | \n",
366 | " Y | \n",
367 | " Y | \n",
368 | " Y | \n",
369 | " Y | \n",
370 | " Y | \n",
371 | " Y | \n",
372 | " Y | \n",
373 | " Y | \n",
374 | " Y | \n",
375 | " Y | \n",
376 | " Y | \n",
377 | " Y | \n",
378 | " Y | \n",
379 | " Y | \n",
380 | " Y | \n",
381 | " Y | \n",
382 | " Y | \n",
383 | " Y | \n",
384 | " | \n",
385 | " | \n",
386 | " | \n",
387 | " | \n",
388 | " 0 | \n",
389 | " 0 | \n",
390 | " 0 | \n",
391 | " 0 | \n",
392 | " | \n",
393 | " | \n",
394 | " N | \n",
395 | " N | \n",
396 | " | \n",
397 | " 0.000000 | \n",
398 | "
\n",
399 | " 5 | \n",
400 | " hostname | \n",
401 | " username | \n",
402 | " *81F5E21E35407D884A6CD4A731AEBFB6AF209E1B | \n",
403 | " Y | \n",
404 | " Y | \n",
405 | " Y | \n",
406 | " Y | \n",
407 | " Y | \n",
408 | " Y | \n",
409 | " Y | \n",
410 | " Y | \n",
411 | " Y | \n",
412 | " Y | \n",
413 | " Y | \n",
414 | " Y | \n",
415 | " Y | \n",
416 | " Y | \n",
417 | " Y | \n",
418 | " Y | \n",
419 | " Y | \n",
420 | " Y | \n",
421 | " Y | \n",
422 | " Y | \n",
423 | " Y | \n",
424 | " Y | \n",
425 | " Y | \n",
426 | " Y | \n",
427 | " Y | \n",
428 | " Y | \n",
429 | " Y | \n",
430 | " Y | \n",
431 | " Y | \n",
432 | " | \n",
433 | " | \n",
434 | " | \n",
435 | " | \n",
436 | " 0 | \n",
437 | " 0 | \n",
438 | " 0 | \n",
439 | " 0 | \n",
440 | " | \n",
441 | " | \n",
442 | " N | \n",
443 | " N | \n",
444 | " | \n",
445 | " 0.000000 | \n",
446 | "
\n",
447 | " 6 | \n",
448 | " 192.168.% | \n",
449 | " root | \n",
450 | " *81F5E21E35407D884A6CD4A731AEBFB6AF209E1B | \n",
451 | " Y | \n",
452 | " Y | \n",
453 | " Y | \n",
454 | " Y | \n",
455 | " Y | \n",
456 | " Y | \n",
457 | " Y | \n",
458 | " Y | \n",
459 | " Y | \n",
460 | " Y | \n",
461 | " N | \n",
462 | " Y | \n",
463 | " Y | \n",
464 | " Y | \n",
465 | " Y | \n",
466 | " Y | \n",
467 | " Y | \n",
468 | " Y | \n",
469 | " Y | \n",
470 | " Y | \n",
471 | " Y | \n",
472 | " Y | \n",
473 | " Y | \n",
474 | " Y | \n",
475 | " Y | \n",
476 | " Y | \n",
477 | " Y | \n",
478 | " Y | \n",
479 | " Y | \n",
480 | " | \n",
481 | " | \n",
482 | " | \n",
483 | " | \n",
484 | " 0 | \n",
485 | " 0 | \n",
486 | " 0 | \n",
487 | " 0 | \n",
488 | " | \n",
489 | " | \n",
490 | " N | \n",
491 | " N | \n",
492 | " | \n",
493 | " 0.000000 | \n",
494 | "
\n",
495 | "
"
496 | ],
497 | "text/plain": [
498 | ""
499 | ]
500 | },
501 | "execution_count": 20,
502 | "metadata": {},
503 | "output_type": "execute_result"
504 | }
505 | ],
506 | "source": [
507 | "import pandas as pd\n",
508 | "\n",
509 | "rw_df=pd.DataFrame([t for t in rows])\n",
510 | "display_prettify(rw_df)"
511 | ]
512 | },
513 | {
514 | "cell_type": "markdown",
515 | "metadata": {},
516 | "source": [
517 | "### 创建一个表并且插入数据"
518 | ]
519 | },
520 | {
521 | "cell_type": "code",
522 | "execution_count": 27,
523 | "metadata": {},
524 | "outputs": [],
525 | "source": [
526 | "import MySQLdb as mdb\n",
527 | "\n",
528 | "# conn 创建全局连接\n",
529 | "conn = mdb.connect(ip,user,pwd,dnname,port);\n",
530 | "with conn:\n",
531 | " curs = conn.cursor() # 获取 cursor\n",
532 | " # 创建数据表 writers(id,name)\n",
533 | " curs.execute(\"CREATE TABLE IF NOT EXISTS \\\n",
534 | " Writers(Id INT PRIMARY KEY AUTO_INCREMENT, Name VARCHAR(25))\")\n",
535 | " # 插入 5 条数据\n",
536 | " curs.execute(\"INSERT INTO Writers(Name) VALUES('Jack London')\")\n",
537 | " curs.execute(\"INSERT INTO Writers(Name) VALUES('Honore de Balzac')\")\n",
538 | " curs.execute(\"INSERT INTO Writers(Name) VALUES('Lion Feuchtwanger')\")\n",
539 | " curs.execute(\"INSERT INTO Writers(Name) VALUES('Emile Zola')\")\n",
540 | " curs.execute(\"INSERT INTO Writers(Name) VALUES('Truman Capote')\")\n",
541 | " \n",
542 | " # conn.commit()\n",
543 | " \n",
544 | "curs.execute('SELECT * FROM Writers;')\n",
545 | "rows=curs.fetchall()\n",
546 | "conn.close()\n",
547 | "curs.close()"
548 | ]
549 | },
550 | {
551 | "cell_type": "code",
552 | "execution_count": 28,
553 | "metadata": {},
554 | "outputs": [
555 | {
556 | "data": {
557 | "text/html": [
558 | " \n",
569 | "Hover to highlight. \n",
570 | " \n",
571 | " | \n",
572 | " 0 | \n",
573 | " 1 | \n",
574 | "
\n",
575 | " \n",
576 | " 0 | \n",
577 | " 1 | \n",
578 | " Jack London | \n",
579 | "
\n",
580 | " 1 | \n",
581 | " 2 | \n",
582 | " Honore de Balzac | \n",
583 | "
\n",
584 | " 2 | \n",
585 | " 3 | \n",
586 | " Lion Feuchtwanger | \n",
587 | "
\n",
588 | " 3 | \n",
589 | " 4 | \n",
590 | " Emile Zola | \n",
591 | "
\n",
592 | " 4 | \n",
593 | " 5 | \n",
594 | " Truman Capote | \n",
595 | "
\n",
596 | "
"
597 | ],
598 | "text/plain": [
599 | ""
600 | ]
601 | },
602 | "execution_count": 28,
603 | "metadata": {},
604 | "output_type": "execute_result"
605 | }
606 | ],
607 | "source": [
608 | "display_prettify(pd.DataFrame([rw for rw in rows]))"
609 | ]
610 | },
611 | {
612 | "cell_type": "markdown",
613 | "metadata": {},
614 | "source": [
615 | "### python 获取表数据并取行数据中单个数据"
616 | ]
617 | },
618 | {
619 | "cell_type": "code",
620 | "execution_count": 53,
621 | "metadata": {},
622 | "outputs": [
623 | {
624 | "name": "stdout",
625 | "output_type": "stream",
626 | "text": [
627 | "1 Jack London\n",
628 | "2 Honore de Balzac\n",
629 | "3 Guy de Maupasant\n",
630 | "4 Guy de Maupasant\n",
631 | "5 Truman Capote\n",
632 | "-----使用字典游标获取结果集,并通过字段名访问值-----\n",
633 | "1 Jack London\n",
634 | "2 Honore de Balzac\n",
635 | "3 Guy de Maupasant\n",
636 | "4 Guy de Maupasant\n",
637 | "5 Truman Capote\n"
638 | ]
639 | }
640 | ],
641 | "source": [
642 | "import MySQLdb as mdb\n",
643 | "\n",
644 | "# conn 创建全局连接\n",
645 | "conn = mdb.connect(ip,user,pwd,dnname,port);\n",
646 | "\n",
647 | "with conn:\n",
648 | " curs = conn.cursor() # 获取 cursor\n",
649 | " curs.execute(\"SELECT * FROM Writers\")\n",
650 | " \n",
651 | " numrows = int(curs.rowcount) # 获取行数\n",
652 | " for i in range(numrows): # 循环取行数据\n",
653 | " row = curs.fetchone() # 每次取出一行数据,同时指针下移\n",
654 | " print row[0], row[1]\n",
655 | " \n",
656 | " print ('-----使用字典游标获取结果集,并通过字段名访问值-----')\n",
657 | " curs = conn.cursor(mdb.cursors.DictCursor)\n",
658 | " curs.execute(\"SELECT * FROM Writers\")\n",
659 | " rows3=curs.fetchall()\n",
660 | " for row in rows3:\n",
661 | " print \"%s %s\" % (row[\"Id\"], row[\"Name\"])\n",
662 | "\n",
663 | "conn.close()\n",
664 | "curs.close()"
665 | ]
666 | },
667 | {
668 | "cell_type": "markdown",
669 | "metadata": {},
670 | "source": [
671 | "### Prepared Statements 查询(更安全方便)"
672 | ]
673 | },
674 | {
675 | "cell_type": "code",
676 | "execution_count": 56,
677 | "metadata": {},
678 | "outputs": [
679 | {
680 | "name": "stdout",
681 | "output_type": "stream",
682 | "text": [
683 | "Number of rows updated: 1\n"
684 | ]
685 | }
686 | ],
687 | "source": [
688 | "import MySQLdb as mdb\n",
689 | "\n",
690 | "conn = mdb.connect(ip,user,pwd,dnname,port);\n",
691 | "with conn:\n",
692 | " curs = conn.cursor()\n",
693 | " # 通过组装sql 进行查询操作\n",
694 | " curs.execute(\"UPDATE Writers SET Name = %s WHERE Id = %s\",(\"Lion Feuchtwanger\", \"3\"))\n",
695 | " # 获取影响多少行\n",
696 | " print \"Number of rows updated: %d\" % curs.rowcount\n",
697 | "\n",
698 | "conn.commit() \n",
699 | "conn.close()\n",
700 | "curs.close()"
701 | ]
702 | },
703 | {
704 | "cell_type": "markdown",
705 | "metadata": {},
706 | "source": [
707 | "### 把图片用二进制存入 MySQL\n",
708 | "- BLOB (Binary Large Object),即较大的二进制对象字段\n",
709 | "- escape_string \n",
710 | " - 字符转义函数\n",
711 | "- CREATE TABLE Images(Id INT PRIMARY KEY AUTO_INCREMENT, Data MEDIUMBLOB);"
712 | ]
713 | },
714 | {
715 | "cell_type": "code",
716 | "execution_count": 64,
717 | "metadata": {},
718 | "outputs": [
719 | {
720 | "name": "stderr",
721 | "output_type": "stream",
722 | "text": [
723 | "/root/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:9: Warning: Table 'Images' already exists\n"
724 | ]
725 | }
726 | ],
727 | "source": [
728 | "import MySQLdb as mdb\n",
729 | "import sys\n",
730 | "\n",
731 | "# conn 创建全局连接\n",
732 | "conn = mdb.connect(ip,user,pwd,dnname,port);\n",
733 | "with conn:\n",
734 | " curs = conn.cursor() # 获取 cursor\n",
735 | " # 创建数据表 writers(id,name)\n",
736 | " curs.execute(\"CREATE TABLE if not exists \\\n",
737 | " Images(Id INT PRIMARY KEY AUTO_INCREMENT, Data MEDIUMBLOB);\")\n",
738 | "curs.close()\n",
739 | "\n",
740 | "try:\n",
741 | " fin = open(\"/home/synway/kngines1.jpg\")\n",
742 | " img = fin.read() # 读入文件流\n",
743 | " fin.close()\n",
744 | "except IOError, e:\n",
745 | " print \"Error %d: %s\" % (e.args[0],e.args[1])\n",
746 | " sys.exit(1)\n",
747 | "try:\n",
748 | " cursor = conn.cursor()\n",
749 | " cursor.execute(\"INSERT INTO Images SET Data='%s'\" % mdb.escape_string(img))\n",
750 | " conn.commit() # 提交数据\n",
751 | " cursor.close()\n",
752 | " conn.close()\n",
753 | "except mdb.Error, e:\n",
754 | " print \"Error %d: %s\" % (e.args[0],e.args[1])\n",
755 | " sys.exit(1)"
756 | ]
757 | },
758 | {
759 | "cell_type": "markdown",
760 | "metadata": {},
761 | "source": [
762 | "### 从数据库中把图片读出来"
763 | ]
764 | },
765 | {
766 | "cell_type": "code",
767 | "execution_count": 65,
768 | "metadata": {
769 | "collapsed": true
770 | },
771 | "outputs": [],
772 | "source": [
773 | "import MySQLdb as mdb\n",
774 | "import sys\n",
775 | "try:\n",
776 | " conn = mdb.connect(ip,user,pwd,dnname,port);\n",
777 | " cursor = conn.cursor() \n",
778 | " cursor.execute(\"SELECT Data FROM Images LIMIT 1\") # 执行查询\n",
779 | " \n",
780 | " fout = open('image.png','wb') # 使用二进制进行写文件\n",
781 | " fout.write(cursor.fetchone()[0])\n",
782 | " \n",
783 | " fout.close() # 关闭输出流\n",
784 | " cursor.close() # 关闭游标\n",
785 | " conn.close() # 关闭连接\n",
786 | "except IOError, e:\n",
787 | " print \"Error %d: %s\" % (e.args[0],e.args[1])\n",
788 | " sys.exit(1)"
789 | ]
790 | },
791 | {
792 | "cell_type": "markdown",
793 | "metadata": {},
794 | "source": [
795 | "### Transaction,即事务(手动提交,自动回滚)"
796 | ]
797 | },
798 | {
799 | "cell_type": "markdown",
800 | "metadata": {},
801 | "source": [
802 | "#### 查询默认引擎信息\n",
803 | "- MyISAM\n",
804 | " - 不支持事务;\n",
805 | " - 内部的复杂机制很少,特别适应于读多写少的应用;\n",
806 | "- InnoDB\n",
807 | " - 事务型存储引擎,适合处理大量的短期事务;"
808 | ]
809 | },
810 | {
811 | "cell_type": "code",
812 | "execution_count": 74,
813 | "metadata": {},
814 | "outputs": [
815 | {
816 | "data": {
817 | "text/html": [
818 | " \n",
829 | "Hover to highlight. \n",
830 | " \n",
831 | " | \n",
832 | " Engine_name | \n",
833 | " Support | \n",
834 | " Comment | \n",
835 | " Transactions | \n",
836 | " XA | \n",
837 | " Savepoints | \n",
838 | "
\n",
839 | " \n",
840 | " 0 | \n",
841 | " MRG_MyISAM | \n",
842 | " YES | \n",
843 | " Collection of identical MyISAM tables | \n",
844 | " NO | \n",
845 | " NO | \n",
846 | " NO | \n",
847 | "
\n",
848 | " 1 | \n",
849 | " CSV | \n",
850 | " YES | \n",
851 | " CSV storage engine | \n",
852 | " NO | \n",
853 | " NO | \n",
854 | " NO | \n",
855 | "
\n",
856 | " 2 | \n",
857 | " Aria | \n",
858 | " YES | \n",
859 | " Crash-safe tables with MyISAM heritage | \n",
860 | " NO | \n",
861 | " NO | \n",
862 | " NO | \n",
863 | "
\n",
864 | " 3 | \n",
865 | " MyISAM | \n",
866 | " YES | \n",
867 | " MyISAM storage engine | \n",
868 | " NO | \n",
869 | " NO | \n",
870 | " NO | \n",
871 | "
\n",
872 | " 4 | \n",
873 | " MEMORY | \n",
874 | " YES | \n",
875 | " Hash based, stored in memory, useful for temporary tables | \n",
876 | " NO | \n",
877 | " NO | \n",
878 | " NO | \n",
879 | "
\n",
880 | " 5 | \n",
881 | " InnoDB | \n",
882 | " DEFAULT | \n",
883 | " Supports transactions, row-level locking, foreign keys and encryption for tables | \n",
884 | " YES | \n",
885 | " YES | \n",
886 | " YES | \n",
887 | "
\n",
888 | " 6 | \n",
889 | " SEQUENCE | \n",
890 | " YES | \n",
891 | " Generated tables filled with sequential values | \n",
892 | " YES | \n",
893 | " NO | \n",
894 | " YES | \n",
895 | "
\n",
896 | " 7 | \n",
897 | " PERFORMANCE_SCHEMA | \n",
898 | " YES | \n",
899 | " Performance Schema | \n",
900 | " NO | \n",
901 | " NO | \n",
902 | " NO | \n",
903 | "
\n",
904 | "
"
905 | ],
906 | "text/plain": [
907 | ""
908 | ]
909 | },
910 | "execution_count": 74,
911 | "metadata": {},
912 | "output_type": "execute_result"
913 | }
914 | ],
915 | "source": [
916 | "import MySQLdb as mdb\n",
917 | "import sys\n",
918 | "\n",
919 | "conn = mdb.connect(ip,user,pwd,dnname,port);\n",
920 | "cursor = conn.cursor()\n",
921 | "cursor.execute('show engines;')\n",
922 | "rows=cursor.fetchall()\n",
923 | "\n",
924 | "conn.close()\n",
925 | "cursor.close()\n",
926 | "\n",
927 | "engines_df=pd.DataFrame([rw for rw in rows], columns=['Engine_name','Support','Comment','Transactions','XA','Savepoints'])\n",
928 | "display_prettify(engines_df)"
929 | ]
930 | },
931 | {
932 | "cell_type": "code",
933 | "execution_count": 79,
934 | "metadata": {},
935 | "outputs": [],
936 | "source": [
937 | "import MySQLdb as mdb\n",
938 | "import sys\n",
939 | "try:\n",
940 | " conn = mdb.connect(ip,user,pwd,dnname,port);\n",
941 | " cursor = conn.cursor()\n",
942 | " \n",
943 | " # 如果某个数据库支持事务,会自动开启\n",
944 | " # MariDB,自动开启事务(若是 MyISAM 引擎则不会)\n",
945 | " cursor.execute(\"UPDATE Writers SET Name = %s WHERE Id = %s\",(\"Leo Tolstoy\", \"1\"))\n",
946 | " cursor.execute(\"UPDATE Writers SET Name = %s WHERE Id = %s\",(\"Boris Pasternak\", \"2\"))\n",
947 | " cursor.execute(\"UPDATE Writer SET Name = %s WHERE Id = %s\",(\"Leonid Leonov\", \"3\"))\n",
948 | " \n",
949 | " # 事务的特性 1、原子性的手动提交\n",
950 | " conn.commit()\n",
951 | " cursor.close()\n",
952 | " conn.close()\n",
953 | "except mdb.Error, e:\n",
954 | " # 若出现错误,则回滚,即上面三条语句要么执行,要么不执行\n",
955 | " conn.rollback()\n",
956 | " print \"Error %d: %s\" % (e.args[0],e.args[1])"
957 | ]
958 | },
959 | {
960 | "cell_type": "markdown",
961 | "metadata": {},
962 | "source": [
963 | "注:
\n",
964 | "(1)因为不存在 writer 表(SQL 第三条语句),所以出现错误:Error 1146: Table ‘test.writer’ doesn’t exist
\n",
965 | "(2)出现错误,触发异常处理, 3 条语句的前两条会自动变成没有执行,结果不变.
\n",
966 | "(3)如果本代码放到一个 MyISAM 引擎表,前两句会执行,第三句不会;如果是 INNDB 引\n",
967 | "擎,则都不会执行。(事务特性)"
968 | ]
969 | }
970 | ],
971 | "metadata": {
972 | "kernelspec": {
973 | "display_name": "Python 2",
974 | "language": "python",
975 | "name": "python2"
976 | },
977 | "language_info": {
978 | "codemirror_mode": {
979 | "name": "ipython",
980 | "version": 2
981 | },
982 | "file_extension": ".py",
983 | "mimetype": "text/x-python",
984 | "name": "python",
985 | "nbconvert_exporter": "python",
986 | "pygments_lexer": "ipython2",
987 | "version": "2.7.13"
988 | },
989 | "toc": {
990 | "nav_menu": {},
991 | "number_sections": true,
992 | "sideBar": true,
993 | "skip_h1_title": false,
994 | "toc_cell": false,
995 | "toc_position": {
996 | "height": "454px",
997 | "left": "0px",
998 | "right": "831.2px",
999 | "top": "107px",
1000 | "width": "212px"
1001 | },
1002 | "toc_section_display": "block",
1003 | "toc_window_display": true
1004 | }
1005 | },
1006 | "nbformat": 4,
1007 | "nbformat_minor": 2
1008 | }
1009 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Climb_Python
2 | 目前任职大数据开发工作,日常开发使用Python作为数据分析工具,在此比较常用的方面知识或难点总结、整理出来,以此分享,谢谢!
3 | * Anaconda docs
4 | * Anaconda环境安装以及搭建Python多内核环境.doc
5 | * 简单分析脚本
6 | * Anaconda2_infos_analysis.ipynb
7 | * Anaconda3_infos_analysis.ipynb
8 |
9 | * 绘制某段时间内用户增加趋势图
10 | * users_rise_up_period.ipynb
11 | * users_rise_up_period_data
12 |
13 | * 爬取糗事百科段子信息
14 | * [博文: http://blog.csdn.net/qq_24452475/article/details/79122259](http://blog.csdn.net/qq_24452475/article/details/79122259)
15 | * spider_qiushibaike_content_datas.ipynb
16 | * spider_qiushibaike_content_datas.py
17 |
18 | * 爬取公务员招考信息(2018)
19 | * [博文: http://blog.csdn.net/qq_24452475/article/details/79156758](http://blog.csdn.net/qq_24452475/article/details/79156758)
20 | * spider_huatu_civil_servant_post_metas.ipynb
21 | * post_metas.csv
22 |
23 | * 地址转换:根据地名爬取经纬度并计算geohash值
24 | * [博文: http://blog.csdn.net/qq_24452475/article/details/79183861](http://blog.csdn.net/qq_24452475/article/details/79183861)
25 | * lng_lat_2_geohash_two_way.ipynb
26 |
27 | * 爬取 PyODPS[latest] 并转换为 PDF
28 | * [博文: http://blog.csdn.net/qq_24452475/article/details/79248953](http://blog.csdn.net/qq_24452475/article/details/79248953)
29 | * Python_PyODPS_HTML_to_PDF.ipynb
30 |
31 | * Python操作 MariaDB 实例教程手册
32 | * 包含Python操作 MariaDB、执行SQL语句、获取、遍历结果集、图片入库、事务等代码示例
33 | * Python操作 MariaDB 实例教程手册_代码.ipynb
34 |
35 | * Jupyter 常用 魔术命令(magics)总结
36 | * Built-in magic commands
37 | * Python_Common_Magic_Samples.ipynb
38 |
--------------------------------------------------------------------------------
/lng_lat_2_geohash_two_way.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## 主要实现\n",
8 | "- 批量查询位置点 经纬度等信息\n",
9 | "- 经纬度转换为 geohash块、\n",
10 | "- 目标geohash 临近geohash块\n",
11 | " - 九宫格"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {},
17 | "source": [
18 | "### Note\n",
19 | "- 查询位置列表, 需提前提供"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": 5,
25 | "metadata": {
26 | "collapsed": true
27 | },
28 | "outputs": [],
29 | "source": [
30 | "city_lis=['北京', '上海', '郑州']"
31 | ]
32 | },
33 | {
34 | "cell_type": "markdown",
35 | "metadata": {},
36 | "source": [
37 | "### 获取位置地点经纬度等信息\n",
38 | "- 获取经纬度等信息\n",
39 | "- 计算 geohash"
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": 6,
45 | "metadata": {
46 | "collapsed": true
47 | },
48 | "outputs": [],
49 | "source": [
50 | "import pandas as pd\n",
51 | "import mzgeohash\n",
52 | "import urllib2\n",
53 | "import urllib\n",
54 | "import json"
55 | ]
56 | },
57 | {
58 | "cell_type": "code",
59 | "execution_count": 23,
60 | "metadata": {},
61 | "outputs": [],
62 | "source": [
63 | "'''\n",
64 | "根据位置名称,获取经纬度等信息\n",
65 | "'''\n",
66 | "def get_metas_from_baidu(city):\n",
67 | " metas={}\n",
68 | " metas['key']='f247cdb592eb43ebac6ccd27f796e2d2'\n",
69 | " metas['address']=city # 城市名称\n",
70 | " \n",
71 | " data=urllib.urlencode(metas)\n",
72 | " url='http://api.map.baidu.com/geocoder?output=json&'+urllib.urlencode(metas)\n",
73 | " # 注释 url='http://api.map.baidu.com/geocoder?'+urllib.urlencode(metas)+'&output=json'\n",
74 | " unicode_s=urllib2.urlopen(url)\n",
75 | " \n",
76 | " return json.loads(unicode_s.read())\n",
77 | "\n",
78 | "city_meta_lis=[] # 计算 geohash 编码并存储\n",
79 | "for city in city_lis:\n",
80 | " tmp=get_metas_from_baidu(city)\n",
81 | " latitude=tmp['result']['location']['lat']\n",
82 | " longitude=tmp['result']['location']['lng']\n",
83 | " t_geohash=mzgeohash.encode((longitude,latitude)) # 使用mzgeohash 计算geohash\n",
84 | " city_meta_lis.append([city,latitude,longitude,t_geohash]) # 存储"
85 | ]
86 | },
87 | {
88 | "cell_type": "code",
89 | "execution_count": 24,
90 | "metadata": {},
91 | "outputs": [
92 | {
93 | "data": {
94 | "text/html": [
95 | "\n",
96 | "\n",
109 | "
\n",
110 | " \n",
111 | " \n",
112 | " | \n",
113 | " city | \n",
114 | " latitude | \n",
115 | " longitude | \n",
116 | " geohash | \n",
117 | "
\n",
118 | " \n",
119 | " \n",
120 | " \n",
121 | " 0 | \n",
122 | " 北京 | \n",
123 | " 39.929986 | \n",
124 | " 116.395645 | \n",
125 | " wx4g0th9p0gk | \n",
126 | "
\n",
127 | " \n",
128 | " 1 | \n",
129 | " 上海 | \n",
130 | " 31.249162 | \n",
131 | " 121.487899 | \n",
132 | " wtw3u88z94p0 | \n",
133 | "
\n",
134 | " \n",
135 | " 2 | \n",
136 | " 郑州 | \n",
137 | " 34.756610 | \n",
138 | " 113.649644 | \n",
139 | " ww0vdpjputsw | \n",
140 | "
\n",
141 | " \n",
142 | "
\n",
143 | "
"
144 | ],
145 | "text/plain": [
146 | " city latitude longitude geohash\n",
147 | "0 北京 39.929986 116.395645 wx4g0th9p0gk\n",
148 | "1 上海 31.249162 121.487899 wtw3u88z94p0\n",
149 | "2 郑州 34.756610 113.649644 ww0vdpjputsw"
150 | ]
151 | },
152 | "execution_count": 24,
153 | "metadata": {},
154 | "output_type": "execute_result"
155 | }
156 | ],
157 | "source": [
158 | "# 格式化输出\n",
159 | "df=pd.DataFrame(city_meta_lis, columns=['city','latitude','longitude','geohash'])\n",
160 | "df"
161 | ]
162 | },
163 | {
164 | "cell_type": "markdown",
165 | "metadata": {},
166 | "source": [
167 | "### mzgeohash\n",
168 | "- 调用方式\n",
169 | "- 实现原理"
170 | ]
171 | },
172 | {
173 | "cell_type": "markdown",
174 | "metadata": {},
175 | "source": [
176 | "### list all member methods"
177 | ]
178 | },
179 | {
180 | "cell_type": "code",
181 | "execution_count": 23,
182 | "metadata": {},
183 | "outputs": [
184 | {
185 | "data": {
186 | "text/plain": [
187 | "['__builtins__',\n",
188 | " '__doc__',\n",
189 | " '__file__',\n",
190 | " '__name__',\n",
191 | " '__package__',\n",
192 | " '__path__',\n",
193 | " '__version__',\n",
194 | " 'adjacent',\n",
195 | " 'decode',\n",
196 | " 'encode',\n",
197 | " 'geohash',\n",
198 | " 'neighbors',\n",
199 | " 'neighborsfit']"
200 | ]
201 | },
202 | "execution_count": 23,
203 | "metadata": {},
204 | "output_type": "execute_result"
205 | }
206 | ],
207 | "source": [
208 | "dir(mzgeohash)"
209 | ]
210 | },
211 | {
212 | "cell_type": "markdown",
213 | "metadata": {},
214 | "source": [
215 | "#### 经纬度 转换为 geohash"
216 | ]
217 | },
218 | {
219 | "cell_type": "code",
220 | "execution_count": 31,
221 | "metadata": {},
222 | "outputs": [
223 | {
224 | "data": {
225 | "text/plain": [
226 | "'xn76urwe1g9y'"
227 | ]
228 | },
229 | "execution_count": 31,
230 | "metadata": {},
231 | "output_type": "execute_result"
232 | }
233 | ],
234 | "source": [
235 | "mzgeohash.encode([139.76608408614993, 35.681382017210126])"
236 | ]
237 | },
238 | {
239 | "cell_type": "markdown",
240 | "metadata": {},
241 | "source": [
242 | "#### geohash 转换为 经纬度"
243 | ]
244 | },
245 | {
246 | "cell_type": "code",
247 | "execution_count": 33,
248 | "metadata": {},
249 | "outputs": [
250 | {
251 | "data": {
252 | "text/plain": [
253 | "(139.76608408614993, 35.681382017210126)"
254 | ]
255 | },
256 | "execution_count": 33,
257 | "metadata": {},
258 | "output_type": "execute_result"
259 | }
260 | ],
261 | "source": [
262 | "mzgeohash.decode('xn76urwe1g9y')"
263 | ]
264 | },
265 | {
266 | "cell_type": "markdown",
267 | "metadata": {},
268 | "source": [
269 | "#### 已知 geohash ,计算某一方向的临近geohash\n",
270 | "- 参数 1\n",
271 | " - 已知 geohash 串\n",
272 | "- 参数 2\n",
273 | " - 方向"
274 | ]
275 | },
276 | {
277 | "cell_type": "code",
278 | "execution_count": 35,
279 | "metadata": {},
280 | "outputs": [
281 | {
282 | "data": {
283 | "text/plain": [
284 | "'xn76urwe1g9z'"
285 | ]
286 | },
287 | "execution_count": 35,
288 | "metadata": {},
289 | "output_type": "execute_result"
290 | }
291 | ],
292 | "source": [
293 | "mzgeohash.adjacent('xn76urwe1g9y','n') # "
294 | ]
295 | },
296 | {
297 | "cell_type": "markdown",
298 | "metadata": {},
299 | "source": [
300 | "#### 已知 geohash ,临近geohash\n",
301 | "- 九宫格,即周围 8 个geohash"
302 | ]
303 | },
304 | {
305 | "cell_type": "code",
306 | "execution_count": 34,
307 | "metadata": {},
308 | "outputs": [
309 | {
310 | "data": {
311 | "text/plain": [
312 | "{'c': 'xn76urwe1g9y',\n",
313 | " 'e': 'xn76urwe1gdn',\n",
314 | " 'n': 'xn76urwe1g9z',\n",
315 | " 'ne': 'xn76urwe1gdp',\n",
316 | " 'nw': 'xn76urwe1g9x',\n",
317 | " 's': 'xn76urwe1g9v',\n",
318 | " 'se': 'xn76urwe1gdj',\n",
319 | " 'sw': 'xn76urwe1g9t',\n",
320 | " 'w': 'xn76urwe1g9w'}"
321 | ]
322 | },
323 | "execution_count": 34,
324 | "metadata": {},
325 | "output_type": "execute_result"
326 | }
327 | ],
328 | "source": [
329 | "mzgeohash.neighbors('xn76urwe1g9y')"
330 | ]
331 | },
332 | {
333 | "cell_type": "markdown",
334 | "metadata": {},
335 | "source": [
336 | "#### 已知 某位置点经纬度和周围经纬度点集,计算同属 geohash"
337 | ]
338 | },
339 | {
340 | "cell_type": "code",
341 | "execution_count": 37,
342 | "metadata": {},
343 | "outputs": [
344 | {
345 | "data": {
346 | "text/plain": [
347 | "'9q9'"
348 | ]
349 | },
350 | "execution_count": 37,
351 | "metadata": {},
352 | "output_type": "execute_result"
353 | }
354 | ],
355 | "source": [
356 | "test_centroid = [-122.18472385000001, 37.7881345]\n",
357 | "test_points = [\n",
358 | "(-122.2992715, 37.9030588),\n",
359 | "(-122.396742, 37.792976),\n",
360 | "(-122.4474142, 37.72198087),\n",
361 | "(-121.9764, 37.557355),\n",
362 | "(-122.029095, 37.973737),\n",
363 | "(-122.224274, 37.774963),\n",
364 | "(-122.271604, 37.803664),\n",
365 | "(-122.126871, 37.697185),\n",
366 | "(-122.087967, 37.670399),\n",
367 | "(-122.123801, 37.893394),\n",
368 | "(-122.269029, 37.80787),\n",
369 | "(-122.265609, 37.797484),\n",
370 | "(-122.267227, 37.828415),\n",
371 | "(-122.067423, 37.905628),\n",
372 | "(-122.267227, 37.828415),\n",
373 | "(-122.38666, 37.599787),\n",
374 | "(-122.075567, 37.690754),\n",
375 | "(-122.401407, 37.789256),\n",
376 | "(-122.283451, 37.87404),\n",
377 | "(-122.269029, 37.80787),\n",
378 | "(-122.1837911, 37.87836087),\n",
379 | "(-122.419694, 37.765062),\n",
380 | "(-122.2945822, 37.80467476),\n",
381 | "(-122.21244024, 37.71297174),\n",
382 | "(-121.945154, 38.018914),\n",
383 | "(-122.466233, 37.684638),\n",
384 | "(-122.056013, 37.928403),\n",
385 | "(-122.406857, 37.784991),\n",
386 | "(-122.418466, 37.752254),\n",
387 | "(-122.26978, 37.853024),\n",
388 | "(-122.251793, 37.844601),\n",
389 | "(-121.928099, 37.699759),\n",
390 | "(-122.416038, 37.637753),\n",
391 | "(-122.1613112, 37.72261921),\n",
392 | "(-122.0575506, 37.63479954),\n",
393 | "(-122.392612, 37.616035),\n",
394 | "(-122.413756, 37.779528),\n",
395 | "(-122.353165, 37.936887),\n",
396 | "(-122.197273, 37.754006),\n",
397 | "(-122.017867, 37.591208),\n",
398 | "(-122.024597, 38.003275),\n",
399 | "(-122.4690807, 37.70612055),\n",
400 | "(-122.268045, 37.869867),\n",
401 | "(-122.444116, 37.664174),\n",
402 | "(-121.900367, 37.701695),\n",
403 | "(-122.317269, 37.925655),\n",
404 | "(-122.434092, 37.732921)\n",
405 | "]\n",
406 | "\n",
407 | "# expect = '9q9'\n",
408 | "mzgeohash.neighborsfit(test_centroid, test_points)"
409 | ]
410 | },
411 | {
412 | "cell_type": "markdown",
413 | "metadata": {},
414 | "source": [
415 | "### 使用unittest进行 mzgeohash 模块测试\n",
416 | "- 通过 testsuit 执行测试用例\n",
417 | "- TestCase:所有测试用例的基本类,给定测试方法的名称,返回测试用例实例;\n",
418 | "- TestSuit:组织测试用例的实例,支持测试用例的添加和删除,最终将传递给 testRunner进行测试执行;\n",
419 | "- TextTestRunner:进行测试用例执行的实例,其中Text的意思是以文本形式显示测试结果。\n",
420 | " - 测试结果保存在 TextTestResult 实例中,包括运行多少测试用例,成功多少,失败多少等信息;"
421 | ]
422 | },
423 | {
424 | "cell_type": "code",
425 | "execution_count": 6,
426 | "metadata": {
427 | "collapsed": true
428 | },
429 | "outputs": [],
430 | "source": [
431 | "import unittest\n",
432 | "from mzgeohash.test_geohash import Test_encode_decode\n",
433 | "from mzgeohash.test_geohash import Test_adjacent\n",
434 | "from mzgeohash.test_geohash import Test_neighbors\n",
435 | "from mzgeohash.test_geohash import Test_neighborsfit"
436 | ]
437 | },
438 | {
439 | "cell_type": "code",
440 | "execution_count": 35,
441 | "metadata": {},
442 | "outputs": [
443 | {
444 | "name": "stderr",
445 | "output_type": "stream",
446 | "text": [
447 | "............\n",
448 | "----------------------------------------------------------------------\n",
449 | "Ran 12 tests in 0.053s\n",
450 | "\n",
451 | "OK\n"
452 | ]
453 | }
454 | ],
455 | "source": [
456 | "suite=unittest.TestSuite()\n",
457 | "suite.addTest(Test_encode_decode('test_decode'))\n",
458 | "suite.addTest(Test_encode_decode('test_encode'))\n",
459 | "suite.addTest(Test_encode_decode('test_roundtrip'))\n",
460 | "suite.addTest(Test_neighbors('test_neighbors'))\n",
461 | "suite.addTest(Test_adjacent('test_adjacent'))\n",
462 | "suite.addTest(Test_neighborsfit('test_neighborsfit'))\n",
463 | "\n",
464 | "runner=unittest.TextTestRunner()\n",
465 | "runner.run(suite);"
466 | ]
467 | }
468 | ],
469 | "metadata": {
470 | "kernelspec": {
471 | "display_name": "Python 2",
472 | "language": "python",
473 | "name": "python2"
474 | },
475 | "language_info": {
476 | "codemirror_mode": {
477 | "name": "ipython",
478 | "version": 2
479 | },
480 | "file_extension": ".py",
481 | "mimetype": "text/x-python",
482 | "name": "python",
483 | "nbconvert_exporter": "python",
484 | "pygments_lexer": "ipython2",
485 | "version": "2.7.13"
486 | },
487 | "toc": {
488 | "nav_menu": {},
489 | "number_sections": true,
490 | "sideBar": true,
491 | "skip_h1_title": false,
492 | "toc_cell": false,
493 | "toc_position": {
494 | "height": "595px",
495 | "left": "0px",
496 | "right": "1092px",
497 | "top": "107px",
498 | "width": "212px"
499 | },
500 | "toc_section_display": "block",
501 | "toc_window_display": true
502 | }
503 | },
504 | "nbformat": 4,
505 | "nbformat_minor": 2
506 | }
507 |
--------------------------------------------------------------------------------
/post_metas.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kdotm/Python_Series/c686d48797b7266934958b09247aa3ffe783c026/post_metas.csv
--------------------------------------------------------------------------------
/spider_huatu_civil_servant_post_metas.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "deletable": true,
7 | "editable": true
8 | },
9 | "source": [
10 | "### 爬取浙江公务员职位信息"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 2,
16 | "metadata": {
17 | "collapsed": true,
18 | "deletable": true,
19 | "editable": true
20 | },
21 | "outputs": [],
22 | "source": [
23 | "import urllib\n",
24 | "import pandas as pd\n",
25 | "\n",
26 | "from bs4 import BeautifulSoup"
27 | ]
28 | },
29 | {
30 | "cell_type": "markdown",
31 | "metadata": {},
32 | "source": [
33 | "### 1 爬取页面"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": 3,
39 | "metadata": {
40 | "collapsed": false,
41 | "deletable": true,
42 | "editable": true
43 | },
44 | "outputs": [],
45 | "source": [
46 | "api = 'http://zw.huatu.com'\n",
47 | "base='/2018/'\n",
48 | "url=api+base\n",
49 | "\n",
50 | "header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3236.0 Safari/537.36'}\n",
51 | "request = urllib.request.Request(url,headers=header)\n",
52 | "response = urllib.request.urlopen(request).read()\n",
53 | "\n",
54 | "content=BeautifulSoup(response, 'lxml')"
55 | ]
56 | },
57 | {
58 | "cell_type": "markdown",
59 | "metadata": {},
60 | "source": [
61 | "### 2 内容解析\n",
62 | "- 省/市 : 对应 URL [32个]"
63 | ]
64 | },
65 | {
66 | "cell_type": "code",
67 | "execution_count": 5,
68 | "metadata": {
69 | "collapsed": false,
70 | "deletable": true,
71 | "editable": true
72 | },
73 | "outputs": [],
74 | "source": [
75 | "areas_txt=content.find_all('p', attrs={'id':'ydiqu'})[0]\n",
76 | "area_metas_lis=[]\n",
77 | "counter=0\n",
78 | "\n",
79 | "for i in areas_txt.find_all('a', attrs={'target':'_blank'}):\n",
80 | " url=api+i.get_attribute_list('href')[0][2:]\n",
81 | " area=i.get_text()\n",
82 | " counter+=1\n",
83 | " area_metas_lis.append([counter,area,url])"
84 | ]
85 | },
86 | {
87 | "cell_type": "code",
88 | "execution_count": 12,
89 | "metadata": {
90 | "collapsed": false,
91 | "deletable": true,
92 | "editable": true
93 | },
94 | "outputs": [
95 | {
96 | "data": {
97 | "text/html": [
98 | "\n",
99 | "\n",
112 | "
\n",
113 | " \n",
114 | " \n",
115 | " | \n",
116 | " no | \n",
117 | " area | \n",
118 | " url | \n",
119 | "
\n",
120 | " \n",
121 | " \n",
122 | " \n",
123 | " 0 | \n",
124 | " 1 | \n",
125 | " 安徽 | \n",
126 | " http://zw.huatu.com/2018/buweisearch/1.html | \n",
127 | "
\n",
128 | " \n",
129 | " 1 | \n",
130 | " 2 | \n",
131 | " 北京 | \n",
132 | " http://zw.huatu.com/2018/buweisearch/2.html | \n",
133 | "
\n",
134 | " \n",
135 | " 2 | \n",
136 | " 3 | \n",
137 | " 福建 | \n",
138 | " http://zw.huatu.com/2018/buweisearch/3.html | \n",
139 | "
\n",
140 | " \n",
141 | " 3 | \n",
142 | " 4 | \n",
143 | " 甘肃 | \n",
144 | " http://zw.huatu.com/2018/buweisearch/4.html | \n",
145 | "
\n",
146 | " \n",
147 | " 4 | \n",
148 | " 5 | \n",
149 | " 广东 | \n",
150 | " http://zw.huatu.com/2018/buweisearch/5.html | \n",
151 | "
\n",
152 | " \n",
153 | "
\n",
154 | "
"
155 | ],
156 | "text/plain": [
157 | " no area url\n",
158 | "0 1 安徽 http://zw.huatu.com/2018/buweisearch/1.html\n",
159 | "1 2 北京 http://zw.huatu.com/2018/buweisearch/2.html\n",
160 | "2 3 福建 http://zw.huatu.com/2018/buweisearch/3.html\n",
161 | "3 4 甘肃 http://zw.huatu.com/2018/buweisearch/4.html\n",
162 | "4 5 广东 http://zw.huatu.com/2018/buweisearch/5.html"
163 | ]
164 | },
165 | "execution_count": 12,
166 | "metadata": {},
167 | "output_type": "execute_result"
168 | }
169 | ],
170 | "source": [
171 | "df=pd.DataFrame(area_metas_lis, columns=['no','area', 'url'])\n",
172 | "df.head()"
173 | ]
174 | },
175 | {
176 | "cell_type": "markdown",
177 | "metadata": {
178 | "deletable": true,
179 | "editable": true
180 | },
181 | "source": [
182 | "### 3 以浙江为例,通过省份查询招聘单位及具体信息访问途径(URL)"
183 | ]
184 | },
185 | {
186 | "cell_type": "code",
187 | "execution_count": 8,
188 | "metadata": {
189 | "collapsed": false,
190 | "deletable": true,
191 | "editable": true
192 | },
193 | "outputs": [
194 | {
195 | "data": {
196 | "text/html": [
197 | "\n",
198 | "\n",
211 | "
\n",
212 | " \n",
213 | " \n",
214 | " | \n",
215 | " no | \n",
216 | " area | \n",
217 | " url | \n",
218 | "
\n",
219 | " \n",
220 | " \n",
221 | " \n",
222 | " 30 | \n",
223 | " 31 | \n",
224 | " 浙江 | \n",
225 | " http://zw.huatu.com/2018/buweisearch/31.html | \n",
226 | "
\n",
227 | " \n",
228 | "
\n",
229 | "
"
230 | ],
231 | "text/plain": [
232 | " no area url\n",
233 | "30 31 浙江 http://zw.huatu.com/2018/buweisearch/31.html"
234 | ]
235 | },
236 | "execution_count": 8,
237 | "metadata": {},
238 | "output_type": "execute_result"
239 | }
240 | ],
241 | "source": [
242 | "query_area='浙江'\n",
243 | "df[df.area==query_area]"
244 | ]
245 | },
246 | {
247 | "cell_type": "code",
248 | "execution_count": 9,
249 | "metadata": {
250 | "collapsed": false,
251 | "deletable": true,
252 | "editable": true
253 | },
254 | "outputs": [],
255 | "source": [
256 | "url2=df[df.area==query_area].url.values[0]\n",
257 | "\n",
258 | "request2 = urllib.request.Request(url2,headers=header)\n",
259 | "response2 = urllib.request.urlopen(request2).read()\n",
260 | "content2=BeautifulSoup(response2, 'lxml')\n",
261 | "\n",
262 | "tmp2=content2.find_all('table', attrs={'cellspacing':'0','width':'100%'})[0]\n",
263 | "\n",
264 | "unit_lis=[] # 存储职位列表\n",
265 | "for i in tmp2.find_all('a'):\n",
266 | " t_url=api+'/2018'+i.get_attribute_list('href')[0][2:]\n",
267 | " unit_lis.append([i.get_text().strip(),t_url]) # strip 去除空格\n",
268 | "\n",
269 | "unit_df=pd.DataFrame(unit_lis,columns=['unit_name','url'])"
270 | ]
271 | },
272 | {
273 | "cell_type": "code",
274 | "execution_count": 13,
275 | "metadata": {
276 | "collapsed": false,
277 | "deletable": true,
278 | "editable": true,
279 | "scrolled": false
280 | },
281 | "outputs": [
282 | {
283 | "data": {
284 | "text/html": [
285 | "\n",
286 | "\n",
299 | "
\n",
300 | " \n",
301 | " \n",
302 | " | \n",
303 | " unit_name | \n",
304 | " url | \n",
305 | "
\n",
306 | " \n",
307 | " \n",
308 | " \n",
309 | " 0 | \n",
310 | " 国家物资储备局浙江办事处 | \n",
311 | " http://zw.huatu.com/2018/buwei2018/1763.html | \n",
312 | "
\n",
313 | " \n",
314 | " 1 | \n",
315 | " 浙江海事局 | \n",
316 | " http://zw.huatu.com/2018/buwei2018/2009.html | \n",
317 | "
\n",
318 | " \n",
319 | " 2 | \n",
320 | " 杭州海关 | \n",
321 | " http://zw.huatu.com/2018/buwei2018/2105.html | \n",
322 | "
\n",
323 | " \n",
324 | " 3 | \n",
325 | " 宁波海关 | \n",
326 | " http://zw.huatu.com/2018/buwei2018/2107.html | \n",
327 | "
\n",
328 | " \n",
329 | " 4 | \n",
330 | " 浙江省国家税务局 | \n",
331 | " http://zw.huatu.com/2018/buwei2018/2175.html | \n",
332 | "
\n",
333 | " \n",
334 | "
\n",
335 | "
"
336 | ],
337 | "text/plain": [
338 | " unit_name url\n",
339 | "0 国家物资储备局浙江办事处 http://zw.huatu.com/2018/buwei2018/1763.html\n",
340 | "1 浙江海事局 http://zw.huatu.com/2018/buwei2018/2009.html\n",
341 | "2 杭州海关 http://zw.huatu.com/2018/buwei2018/2105.html\n",
342 | "3 宁波海关 http://zw.huatu.com/2018/buwei2018/2107.html\n",
343 | "4 浙江省国家税务局 http://zw.huatu.com/2018/buwei2018/2175.html"
344 | ]
345 | },
346 | "execution_count": 13,
347 | "metadata": {},
348 | "output_type": "execute_result"
349 | }
350 | ],
351 | "source": [
352 | "unit_df.head()"
353 | ]
354 | },
355 | {
356 | "cell_type": "markdown",
357 | "metadata": {
358 | "deletable": true,
359 | "editable": true
360 | },
361 | "source": [
362 | "### 4 查询所有部门的招聘岗位等详细信息"
363 | ]
364 | },
365 | {
366 | "cell_type": "code",
367 | "execution_count": 10,
368 | "metadata": {
369 | "collapsed": true,
370 | "deletable": true,
371 | "editable": true
372 | },
373 | "outputs": [],
374 | "source": [
375 | "'''\n",
376 | "获取岗位详细信息\n",
377 | "url :\n",
378 | " 访问路径\n",
379 | "'''\n",
380 | "def get_post_metas(url):\n",
381 | " tmp_request = urllib.request.Request(url,headers=header)\n",
382 | " tmp_response = urllib.request.urlopen(tmp_request).read()\n",
383 | " tmp_content=BeautifulSoup(tmp_response, 'lxml')\n",
384 | " \n",
385 | " td_lis=[i.get_text() for i in tmp_content.find_all('td')]\n",
386 | " internal=[]\n",
387 | " for i in range(len(td_lis)):\n",
388 | " if i%10==0: # 生成间隔区间\n",
389 | " internal.append([i,i+10])\n",
390 | " \n",
391 | " row_lis=[]\n",
392 | " for lt,rt in internal: # 根据间隔区间,将数据分行\n",
393 | " row_lis.append(td_lis[lt:rt])\n",
394 | " \n",
395 | " return row_lis\n",
396 | "\n",
397 | "post_metas_lis=[]\n",
398 | "\n",
399 | "for unit_name,url in unit_lis: # 循环,加工招聘单位所招岗位详细信息\n",
400 | " post_metas_lis+=get_post_metas(url)\n",
401 | "\n",
402 | "# 将 list 转换为 DataFrame 格式\n",
403 | "\n",
404 | "# '部门名称','用人用司','职位名称','要求专业','招考人数','报考人数','历年分数线','历年竞争比','录取概率','对比'\n",
405 | "post_metas_df=pd.DataFrame(post_metas_lis, columns=['unit_name','employee_unit','post_name','professional','person_num','person_num2','view','view2','detail','compare'])"
406 | ]
407 | },
408 | {
409 | "cell_type": "code",
410 | "execution_count": 22,
411 | "metadata": {
412 | "collapsed": false
413 | },
414 | "outputs": [
415 | {
416 | "data": {
417 | "text/html": [
418 | "\n",
419 | "\n",
432 | "
\n",
433 | " \n",
434 | " \n",
435 | " | \n",
436 | " unit_name | \n",
437 | " employee_unit | \n",
438 | " post_name | \n",
439 | " professional | \n",
440 | " person_num | \n",
441 | " person_num2 | \n",
442 | " view | \n",
443 | " view2 | \n",
444 | " detail | \n",
445 | " compare | \n",
446 | "
\n",
447 | " \n",
448 | " \n",
449 | " \n",
450 | " 100 | \n",
451 | " 浙江省国家税务局 | \n",
452 | " 杭州市上城区国家税务局 | \n",
453 | " 纳税服务科科员(一) | \n",
454 | " 公共管理类 | \n",
455 | " 1 | \n",
456 | " 0 | \n",
457 | " 查看 | \n",
458 | " 查看 | \n",
459 | " 详情 | \n",
460 | " 对比 | \n",
461 | "
\n",
462 | " \n",
463 | " 101 | \n",
464 | " 浙江省国家税务局 | \n",
465 | " 杭州市上城区国家税务局 | \n",
466 | " 纳税服务科科员(二) | \n",
467 | " 工商管理类 | \n",
468 | " 1 | \n",
469 | " 0 | \n",
470 | " 查看 | \n",
471 | " 查看 | \n",
472 | " 详情 | \n",
473 | " 对比 | \n",
474 | "
\n",
475 | " \n",
476 | " 102 | \n",
477 | " 浙江省国家税务局 | \n",
478 | " 杭州市上城区国家税务局 | \n",
479 | " 纳税服务科科员(三) | \n",
480 | " 财政学类 | \n",
481 | " 1 | \n",
482 | " 0 | \n",
483 | " 查看 | \n",
484 | " 查看 | \n",
485 | " 详情 | \n",
486 | " 对比 | \n",
487 | "
\n",
488 | " \n",
489 | "
\n",
490 | "
"
491 | ],
492 | "text/plain": [
493 | " unit_name employee_unit post_name professional person_num person_num2 \\\n",
494 | "100 浙江省国家税务局 杭州市上城区国家税务局 纳税服务科科员(一) 公共管理类 1 0 \n",
495 | "101 浙江省国家税务局 杭州市上城区国家税务局 纳税服务科科员(二) 工商管理类 1 0 \n",
496 | "102 浙江省国家税务局 杭州市上城区国家税务局 纳税服务科科员(三) 财政学类 1 0 \n",
497 | "\n",
498 | " view view2 detail compare \n",
499 | "100 查看 查看 详情 对比 \n",
500 | "101 查看 查看 详情 对比 \n",
501 | "102 查看 查看 详情 对比 "
502 | ]
503 | },
504 | "execution_count": 22,
505 | "metadata": {},
506 | "output_type": "execute_result"
507 | }
508 | ],
509 | "source": [
510 | "post_metas_df[post_metas_df.unit_name=='浙江省国家税务局'].head(3)"
511 | ]
512 | },
513 | {
514 | "cell_type": "code",
515 | "execution_count": 249,
516 | "metadata": {
517 | "collapsed": true,
518 | "deletable": true,
519 | "editable": true
520 | },
521 | "outputs": [],
522 | "source": [
523 | "# 保存数据信息\n",
524 | "\n",
525 | "post_metas_df.to_csv('./post_metas.csv')"
526 | ]
527 | }
528 | ],
529 | "metadata": {
530 | "kernelspec": {
531 | "display_name": "Python 3",
532 | "language": "python",
533 | "name": "python3"
534 | },
535 | "language_info": {
536 | "codemirror_mode": {
537 | "name": "ipython",
538 | "version": 3
539 | },
540 | "file_extension": ".py",
541 | "mimetype": "text/x-python",
542 | "name": "python",
543 | "nbconvert_exporter": "python",
544 | "pygments_lexer": "ipython3",
545 | "version": "3.6.3"
546 | }
547 | },
548 | "nbformat": 4,
549 | "nbformat_minor": 2
550 | }
551 |
--------------------------------------------------------------------------------
/spider_qiushibaike_content_datas.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "### 爬取 糗事百科段子 内容数据\n",
8 | "---\n",
9 | "- author:\n",
10 | " - kngines\n",
11 | "- date:\n",
12 | " - 20180118"
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": 1,
18 | "metadata": {
19 | "collapsed": false
20 | },
21 | "outputs": [],
22 | "source": [
23 | "import re\n",
24 | "import urllib\n",
25 | "import bs4\n",
26 | "import pandas as pd\n",
27 | "from bs4 import BeautifulSoup\n",
28 | "\n",
29 | "page = 1 # 第 1 页\n",
30 | "\n",
31 | "url = 'http://www.qiushibaike.com/hot/page/' + str(page)\n",
32 | "header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3236.0 Safari/537.36'}\n",
33 | "request = urllib.request.Request(url,headers=header)\n",
34 | "response = urllib.request.urlopen(request).read()\n",
35 | "\n",
36 | "content=BeautifulSoup(response, 'lxml')\n",
37 | "\n",
38 | "'''\n",
39 | "1 获取段子内容\n",
40 | "'''\n",
41 | "divs = content.find_all('div', class_='content')\n",
42 | "content_lis=[]\n",
43 | "for div in divs:\n",
44 | " content_lis.append(div.span.get_text())\n",
45 | "\n",
46 | "'''\n",
47 | "2 获取用户昵称\n",
48 | "''' \n",
49 | "tmp=content.find_all('div', class_='col1')[0]\n",
50 | "nick_name_lis=[]\n",
51 | "for nick_name in tmp.find_all('h2'):\n",
52 | " nick_name_lis.append(nick_name.get_text())\n",
53 | "\n",
54 | "'''\n",
55 | "3 获取好笑、评论数量\n",
56 | "'''\n",
57 | "counter=0\n",
58 | "funny_lis=[]\n",
59 | "comment_lis=[]\n",
60 | "for cofu in content.find_all('i', class_='number'):\n",
61 | " counter+=1\n",
62 | " if counter%2!=0:\n",
63 | " funny_lis.append(cofu.get_text())\n",
64 | " else :\n",
65 | " comment_lis.append(cofu.get_text()) "
66 | ]
67 | },
68 | {
69 | "cell_type": "markdown",
70 | "metadata": {},
71 | "source": [
72 | "### 1 当前页码信息\n",
73 | "- 第 1 页\n",
74 | " - 通过简单调整,可以实现批量爬取数据"
75 | ]
76 | },
77 | {
78 | "cell_type": "code",
79 | "execution_count": 2,
80 | "metadata": {
81 | "collapsed": false
82 | },
83 | "outputs": [
84 | {
85 | "name": "stdout",
86 | "output_type": "stream",
87 | "text": [
88 | "\u001b[1;31m[24H] current page:\u001b[0m \u001b[0;30;43m1\u001b[0m\n"
89 | ]
90 | }
91 | ],
92 | "source": [
93 | "current_page=content.find_all('ul',class_='pagination')[0].find_all('span',class_='current')[0].get_text()\n",
94 | "print ('\\033[1;31m[24H] current page:\\033[0m \\033[0;30;43m%s\\033[0m' % (current_page.replace('\\n','')))"
95 | ]
96 | },
97 | {
98 | "cell_type": "markdown",
99 | "metadata": {},
100 | "source": [
101 | "### 2 转换[DataFrame]并显示"
102 | ]
103 | },
104 | {
105 | "cell_type": "code",
106 | "execution_count": 3,
107 | "metadata": {
108 | "collapsed": false
109 | },
110 | "outputs": [],
111 | "source": [
112 | "tdf=pd.DataFrame([nick_name_lis,content_lis,funny_lis,comment_lis])\n",
113 | "df=tdf.T\n",
114 | "df.columns=['nick_name','content','funny_cnt','comment_cnt']"
115 | ]
116 | },
117 | {
118 | "cell_type": "code",
119 | "execution_count": 4,
120 | "metadata": {
121 | "collapsed": false
122 | },
123 | "outputs": [
124 | {
125 | "data": {
126 | "text/html": [
127 | "\n",
128 | "\n",
141 | "
\n",
142 | " \n",
143 | " \n",
144 | " | \n",
145 | " nick_name | \n",
146 | " content | \n",
147 | " funny_cnt | \n",
148 | " comment_cnt | \n",
149 | "
\n",
150 | " \n",
151 | " \n",
152 | " \n",
153 | " 0 | \n",
154 | " \\n我家熊孩子不熊\\n | \n",
155 | " \\n\\n\\n九月份闺蜜来我家吃晚饭。她在用减肥瘦瘦包,这个3600块钱的瘦瘦包要求三个月内晚... | \n",
156 | " 758 | \n",
157 | " 56 | \n",
158 | "
\n",
159 | " \n",
160 | " 1 | \n",
161 | " \\n道士下山会女神\\n | \n",
162 | " \\n\\n\\n银行办点事,小侄跟着去玩,见大堂圣诞树上挂满红包,小孩好奇心重,便挨个去拆开看,... | \n",
163 | " 807 | \n",
164 | " 11 | \n",
165 | "
\n",
166 | " \n",
167 | " 2 | \n",
168 | " \\n阿丹阿乐\\n | \n",
169 | " \\n\\n\\n今天我去医院,遇到一位女士,机缘巧合比较聊得来,她说她是来做人工授精的。刚好我是... | \n",
170 | " 1457 | \n",
171 | " 28 | \n",
172 | "
\n",
173 | " \n",
174 | "
\n",
175 | "
"
176 | ],
177 | "text/plain": [
178 | " nick_name content funny_cnt \\\n",
179 | "0 \\n我家熊孩子不熊\\n \\n\\n\\n九月份闺蜜来我家吃晚饭。她在用减肥瘦瘦包,这个3600块钱的瘦瘦包要求三个月内晚... 758 \n",
180 | "1 \\n道士下山会女神\\n \\n\\n\\n银行办点事,小侄跟着去玩,见大堂圣诞树上挂满红包,小孩好奇心重,便挨个去拆开看,... 807 \n",
181 | "2 \\n阿丹阿乐\\n \\n\\n\\n今天我去医院,遇到一位女士,机缘巧合比较聊得来,她说她是来做人工授精的。刚好我是... 1457 \n",
182 | "\n",
183 | " comment_cnt \n",
184 | "0 56 \n",
185 | "1 11 \n",
186 | "2 28 "
187 | ]
188 | },
189 | "execution_count": 4,
190 | "metadata": {},
191 | "output_type": "execute_result"
192 | }
193 | ],
194 | "source": [
195 | "df.head(3)"
196 | ]
197 | },
198 | {
199 | "cell_type": "markdown",
200 | "metadata": {},
201 | "source": [
202 | "### 3 格式化显示数据"
203 | ]
204 | },
205 | {
206 | "cell_type": "code",
207 | "execution_count": 5,
208 | "metadata": {
209 | "collapsed": false
210 | },
211 | "outputs": [],
212 | "source": [
213 | "pd.set_option('display.width',200)\n",
214 | "pd.set_option('display.max_columns',20)\n",
215 | "pd.set_option('display.max_rows',50)\n",
216 | "pd.set_option('display.max_colwidth',200)"
217 | ]
218 | },
219 | {
220 | "cell_type": "code",
221 | "execution_count": 6,
222 | "metadata": {
223 | "collapsed": false,
224 | "scrolled": true
225 | },
226 | "outputs": [
227 | {
228 | "data": {
229 | "text/html": [
230 | "\n",
231 | "\n",
244 | "
\n",
245 | " \n",
246 | " \n",
247 | " | \n",
248 | " nick_name | \n",
249 | " content | \n",
250 | " funny_cnt | \n",
251 | " comment_cnt | \n",
252 | "
\n",
253 | " \n",
254 | " \n",
255 | " \n",
256 | " 0 | \n",
257 | " \\n我家熊孩子不熊\\n | \n",
258 | " \\n\\n\\n九月份闺蜜来我家吃晚饭。她在用减肥瘦瘦包,这个3600块钱的瘦瘦包要求三个月内晚餐不能吃米饭,不能喝水,不能吃任何荤菜和水果……我说,如果是这样子的话,我不用任何药可以瘦成闪电。闺蜜:不不不,我不行,必须靠药物。三个月过去了,现在的她比原来还胖了10斤,她说是自己没忍住吃了几次晚餐……这货的智商是不是都被脂肪吞噬了![捂脸][捂脸][捂脸]\\n\\n | \n",
259 | " 758 | \n",
260 | " 56 | \n",
261 | "
\n",
262 | " \n",
263 | " 1 | \n",
264 | " \\n道士下山会女神\\n | \n",
265 | " \\n\\n\\n银行办点事,小侄跟着去玩,见大堂圣诞树上挂满红包,小孩好奇心重,便挨个去拆开看,结果一无所获,小孩失望的念叨:还大银行,都特么骗人。于是又去摘了个小布娃娃,大堂美女上去阻止,小屁孩居然说:亲我一下我就放回去,来都来了总不能让我空手回去吧!!\\n\\n | \n",
266 | " 807 | \n",
267 | " 11 | \n",
268 | "
\n",
269 | " \n",
270 | " 2 | \n",
271 | " \\n阿丹阿乐\\n | \n",
272 | " \\n\\n\\n今天我去医院,遇到一位女士,机缘巧合比较聊得来,她说她是来做人工授精的。刚好我是去捐精的,于是我们决定省略中间环节,直接离开医院到她住的地方来了。情况就是这样,我真的没有骗人啊警官!\\n\\n | \n",
273 | " 1457 | \n",
274 | " 28 | \n",
275 | "
\n",
276 | " \n",
277 | "
\n",
278 | "
"
279 | ],
280 | "text/plain": [
281 | " nick_name content \\\n",
282 | "0 \\n我家熊孩子不熊\\n \\n\\n\\n九月份闺蜜来我家吃晚饭。她在用减肥瘦瘦包,这个3600块钱的瘦瘦包要求三个月内晚餐不能吃米饭,不能喝水,不能吃任何荤菜和水果……我说,如果是这样子的话,我不用任何药可以瘦成闪电。闺蜜:不不不,我不行,必须靠药物。三个月过去了,现在的她比原来还胖了10斤,她说是自己没忍住吃了几次晚餐……这货的智商是不是都被脂肪吞噬了![捂脸][捂脸][捂脸]\\n\\n \n",
283 | "1 \\n道士下山会女神\\n \\n\\n\\n银行办点事,小侄跟着去玩,见大堂圣诞树上挂满红包,小孩好奇心重,便挨个去拆开看,结果一无所获,小孩失望的念叨:还大银行,都特么骗人。于是又去摘了个小布娃娃,大堂美女上去阻止,小屁孩居然说:亲我一下我就放回去,来都来了总不能让我空手回去吧!!\\n\\n \n",
284 | "2 \\n阿丹阿乐\\n \\n\\n\\n今天我去医院,遇到一位女士,机缘巧合比较聊得来,她说她是来做人工授精的。刚好我是去捐精的,于是我们决定省略中间环节,直接离开医院到她住的地方来了。情况就是这样,我真的没有骗人啊警官!\\n\\n \n",
285 | "\n",
286 | " funny_cnt comment_cnt \n",
287 | "0 758 56 \n",
288 | "1 807 11 \n",
289 | "2 1457 28 "
290 | ]
291 | },
292 | "execution_count": 6,
293 | "metadata": {},
294 | "output_type": "execute_result"
295 | }
296 | ],
297 | "source": [
298 | "df.head(3)"
299 | ]
300 | },
301 | {
302 | "cell_type": "code",
303 | "execution_count": null,
304 | "metadata": {
305 | "collapsed": true
306 | },
307 | "outputs": [],
308 | "source": [
309 | "import re\n",
310 | "import urllib\n",
311 | "import bs4\n",
312 | "import pandas as pd\n",
313 | "from bs4 import BeautifulSoup\n",
314 | "\n",
315 | "page = 1 # 第 1 页\n",
316 | "\n",
317 | "url = 'http://www.qiushibaike.com/hot/page/' + str(page)\n",
318 | "header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3236.0 Safari/537.36'}\n",
319 | "request = urllib.request.Request(url,headers=header)\n",
320 | "response = urllib.request.urlopen(request).read()\n",
321 | "\n",
322 | "content=BeautifulSoup(response, 'lxml')\n",
323 | "\n",
324 | "'''\n",
325 | "1 获取段子内容\n",
326 | "'''\n",
327 | "divs = content.find_all('div', class_='content')\n",
328 | "content_lis=[]\n",
329 | "for div in divs:\n",
330 | " content_lis.append(div.span.get_text())\n",
331 | "\n",
332 | "'''\n",
333 | "2 获取用户昵称\n",
334 | "''' \n",
335 | "tmp=content.find_all('div', class_='col1')[0]\n",
336 | "nick_name_lis=[]\n",
337 | "for nick_name in tmp.find_all('h2'):\n",
338 | " nick_name_lis.append(nick_name.get_text())\n",
339 | "\n",
340 | "'''\n",
341 | "3 获取好笑、评论数量\n",
342 | "'''\n",
343 | "counter=0\n",
344 | "funny_lis=[]\n",
345 | "comment_lis=[]\n",
346 | "for cofu in content.find_all('i', class_='number'):\n",
347 | " counter+=1\n",
348 | " if counter%2!=0:\n",
349 | " funny_lis.append(cofu.get_text())\n",
350 | " else :\n",
351 | " comment_lis.append(cofu.get_text()) \n",
352 | "\n",
353 | "'''\n",
354 | "4 打印当前页\n",
355 | "'''\n",
356 | "current_page=content.find_all('ul',class_='pagination')[0].find_all('span',class_='current')[0].get_text()\n",
357 | "print ('\\033[1;31m[24H] current page:\\033[0m \\033[0;30;43m%s\\033[0m' % (current_page.replace('\\n','')))\n",
358 | "\n",
359 | "'''\n",
360 | "5 转换为 DF并显示\n",
361 | "'''\n",
362 | "tdf=pd.DataFrame([nick_name_lis,content_lis,funny_lis,comment_lis])\n",
363 | "df=tdf.T\n",
364 | "df.columns=['nick_name','content','funny_cnt','comment_cnt']\n",
365 | "df.head(3)"
366 | ]
367 | }
368 | ],
369 | "metadata": {
370 | "kernelspec": {
371 | "display_name": "Python 3",
372 | "language": "python",
373 | "name": "python3"
374 | },
375 | "language_info": {
376 | "codemirror_mode": {
377 | "name": "ipython",
378 | "version": 3
379 | },
380 | "file_extension": ".py",
381 | "mimetype": "text/x-python",
382 | "name": "python",
383 | "nbconvert_exporter": "python",
384 | "pygments_lexer": "ipython3",
385 | "version": "3.6.3"
386 | }
387 | },
388 | "nbformat": 4,
389 | "nbformat_minor": 2
390 | }
391 |
--------------------------------------------------------------------------------
/spider_qiushibaike_content_datas.py:
--------------------------------------------------------------------------------
1 |
2 | # coding: utf-8
3 |
4 | # ### 爬取 糗事百科段子 内容数据
5 | # ---
6 | # - author:
7 | # - kngines
8 | # - date:
9 | # - 20180118
10 |
11 | # In[1]:
12 |
13 |
14 | import re
15 | import urllib
16 | import bs4
17 | import pandas as pd
18 | from bs4 import BeautifulSoup
19 |
20 | page = 1 # 第 1 页
21 |
22 | url = 'http://www.qiushibaike.com/hot/page/' + str(page)
23 | header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3236.0 Safari/537.36'}
24 | request = urllib.request.Request(url,headers=header)
25 | response = urllib.request.urlopen(request).read()
26 |
27 | content=BeautifulSoup(response, 'lxml')
28 |
29 | '''
30 | 1 获取段子内容
31 | '''
32 | divs = content.find_all('div', class_='content')
33 | content_lis=[]
34 | for div in divs:
35 | content_lis.append(div.span.get_text())
36 |
37 | '''
38 | 2 获取用户昵称
39 | '''
40 | tmp=content.find_all('div', class_='col1')[0]
41 | nick_name_lis=[]
42 | for nick_name in tmp.find_all('h2'):
43 | nick_name_lis.append(nick_name.get_text())
44 |
45 | '''
46 | 3 获取好笑、评论数量
47 | '''
48 | counter=0
49 | funny_lis=[]
50 | comment_lis=[]
51 | for cofu in content.find_all('i', class_='number'):
52 | counter+=1
53 | if counter%2!=0:
54 | funny_lis.append(cofu.get_text())
55 | else :
56 | comment_lis.append(cofu.get_text())
57 |
58 |
59 | # ### 1 当前页码信息
60 | # - 第 1 页
61 | # - 通过简单调整,可以实现批量爬取数据
62 |
63 | # In[2]:
64 |
65 |
66 | current_page=content.find_all('ul',class_='pagination')[0].find_all('span',class_='current')[0].get_text()
67 | print ('\033[1;31m[24H] current page:\033[0m \033[0;30;43m%s\033[0m' % (current_page.replace('\n','')))
68 |
69 |
70 | # ### 2 转换[DataFrame]并显示
71 |
72 | # In[3]:
73 |
74 |
75 | tdf=pd.DataFrame([nick_name_lis,content_lis,funny_lis,comment_lis])
76 | df=tdf.T
77 | df.columns=['nick_name','content','funny_cnt','comment_cnt']
78 |
79 |
80 | # In[4]:
81 |
82 |
83 | df.head(3)
84 |
85 |
86 | # ### 3 格式化显示数据
87 |
88 | # In[5]:
89 |
90 |
91 | pd.set_option('display.width',200)
92 | pd.set_option('display.max_columns',20)
93 | pd.set_option('display.max_rows',50)
94 | pd.set_option('display.max_colwidth',200)
95 |
96 |
97 | # In[6]:
98 |
99 |
100 | df.head(3)
101 |
102 |
103 | # In[ ]:
104 |
105 |
106 | import re
107 | import urllib
108 | import bs4
109 | import pandas as pd
110 | from bs4 import BeautifulSoup
111 |
112 | page = 1 # 第 1 页
113 |
114 | url = 'http://www.qiushibaike.com/hot/page/' + str(page)
115 | header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3236.0 Safari/537.36'}
116 | request = urllib.request.Request(url,headers=header)
117 | response = urllib.request.urlopen(request).read()
118 |
119 | content=BeautifulSoup(response, 'lxml')
120 |
121 | '''
122 | 1 获取段子内容
123 | '''
124 | divs = content.find_all('div', class_='content')
125 | content_lis=[]
126 | for div in divs:
127 | content_lis.append(div.span.get_text())
128 |
129 | '''
130 | 2 获取用户昵称
131 | '''
132 | tmp=content.find_all('div', class_='col1')[0]
133 | nick_name_lis=[]
134 | for nick_name in tmp.find_all('h2'):
135 | nick_name_lis.append(nick_name.get_text())
136 |
137 | '''
138 | 3 获取好笑、评论数量
139 | '''
140 | counter=0
141 | funny_lis=[]
142 | comment_lis=[]
143 | for cofu in content.find_all('i', class_='number'):
144 | counter+=1
145 | if counter%2!=0:
146 | funny_lis.append(cofu.get_text())
147 | else :
148 | comment_lis.append(cofu.get_text())
149 |
150 | '''
151 | 4 打印当前页
152 | '''
153 | current_page=content.find_all('ul',class_='pagination')[0].find_all('span',class_='current')[0].get_text()
154 | print ('\033[1;31m[24H] current page:\033[0m \033[0;30;43m%s\033[0m' % (current_page.replace('\n','')))
155 |
156 | '''
157 | 5 转换为 DF并显示
158 | '''
159 | tdf=pd.DataFrame([nick_name_lis,content_lis,funny_lis,comment_lis])
160 | df=tdf.T
161 | df.columns=['nick_name','content','funny_cnt','comment_cnt']
162 | df.head(3)
163 |
164 |
--------------------------------------------------------------------------------
/users_rise_up_period_data:
--------------------------------------------------------------------------------
1 | 20170725, 410
2 | 20170726, 586
3 | 20170727, 802
4 | 20170728, 997
5 | 20170729,1187
6 | 20170730,1380
7 | 20170731,1590
8 | 20170801,1790
9 | 20170802,1975
10 | 20170803,2135
11 | 20170804,2330
12 | 20170805,2560
13 | 20170806,2742
14 | 20170807,2852
15 | 20170808,3093
16 | 20170809,3331
17 | 20170810,3541
18 | 20170811,3721
19 | 20170812,3901
20 | 20170813,4088
21 | 20170814,4266
22 | 20170815,4477
23 | 20170816,4662
24 | 20170817,4839
25 | 20170818,5042
26 | 20170819,5237
27 | 20170820,5550
28 | 20170821,5736
29 | 20170822,5870
30 | 20170823,5984
31 |
--------------------------------------------------------------------------------