├── 2+Clustering+-K+Means.ipynb ├── All+CSV+Files+in+a+Folder.ipynb ├── Bokeh.ipynb ├── Classification Models v 2.ipynb ├── Hierarchical+Clustering (1).ipynb ├── Intro+R+June+2019.ipynb ├── Kmeans+with+scaling.ipynb ├── LICENSE ├── Logistic Regression.ipynb ├── Logistic Regression.pptx ├── ML+and+Data+Viz.ipynb ├── ML+part+2.ipynb ├── ML+part+3.ipynb ├── Machine Learning.pdf ├── Machine+Learning++Part+1 (1).ipynb ├── Machine+Learning++Part+1.ipynb ├── Naive+Bayes.ipynb ├── Python+with+Postgres (3).ipynb ├── Quarterly+Time+Series+of+the+Number+of+Australian+Residents.ipynb ├── README.md ├── Regression Models v 2.ipynb ├── Splitting+Dataset+in+control+and+validation.ipynb ├── Tweepy.ipynb ├── Web+Scraping+Yelp+with+Beautiful+Soup.ipynb ├── Web+Scraping.ipynb ├── _config.yml ├── anscombe+dataset.ipynb ├── autosklearn+iris.ipynb ├── chi+square+test.ipynb ├── class+exercise+data+viz.ipynb ├── computer-vision ├── FirstDetection.py ├── computer vision.ipynb ├── image.jpeg └── imagenew.jpeg ├── data+exploration.ipynb ├── data+manipulation.ipynb ├── data+munging+again.ipynb ├── data+viz.ipynb ├── data+wrangling+titanic+dataset.ipynb ├── data ├── RFM part2.xlsx ├── stats DAP.xlsx ├── stats for data scientists.csv └── weather.csv ├── decisiontree.ipynb ├── descriptive+stats+in+Python.ipynb ├── german credit ├── introductory+python.ipynb ├── iris2.ipynb ├── lambda+functions.ipynb ├── linear regression using statsmodel and scikit.ipynb ├── matplotlib+cars.ipynb ├── matplotlib+line+graph.ipynb ├── multiple+file+concat+in+pandas.ipynb ├── my+first+class+in+python.ipynb ├── nltk.ipynb ├── pandas+11.ipynb ├── pandas+analysis+1.ipynb ├── pandas+data+manipulation.ipynb ├── pyspark.ipynb ├── python+intro.ipynb ├── python+with+postgres (1).ipynb ├── python+with+postgres.ipynb ├── reg+model.ipynb ├── regression.ipynb ├── scrape+amazon.ipynb ├── simple+matplot+graph.ipynb ├── test+web+scraping.ipynb ├── text+mining.ipynb ├── time+series.ipynb ├── titanic forked.ipynb ├── tpot.ipynb └── trial+time+series.ipynb /All+CSV+Files+in+a+Folder.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 5, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import os\n", 12 | "import glob" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 6, 18 | "metadata": { 19 | "collapsed": false 20 | }, 21 | "outputs": [ 22 | { 23 | "data": { 24 | "text/plain": [ 25 | "'C:\\\\Users\\\\Dell'" 26 | ] 27 | }, 28 | "execution_count": 6, 29 | "metadata": {}, 30 | "output_type": "execute_result" 31 | } 32 | ], 33 | "source": [ 34 | "os.getcwd()" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 7, 40 | "metadata": { 41 | "collapsed": true 42 | }, 43 | "outputs": [], 44 | "source": [ 45 | "path = 'C:\\\\Users\\\\Dell\\\\Downloads'" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 8, 51 | "metadata": { 52 | "collapsed": true 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "extension = 'csv'\n", 57 | "os.chdir(path)" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 9, 63 | "metadata": { 64 | "collapsed": false 65 | }, 66 | "outputs": [ 67 | { 68 | "name": "stdout", 69 | "output_type": "stream", 70 | "text": [ 71 | "['AirPassengers.csv', 'BigDiamonds.csv', 'Boston (1).csv', 'Boston.csv', 'ccFraud.csv', 'class2.csv', 'data1.csv', 'datasets.csv', 'Diamond (1).csv', 'Diamond (2).csv', 'Diamond (3).csv', 'Diamond (4).csv', 'Diamond (5).csv', 'Diamond (6).csv', 'Diamond.csv', 'Hdma.csv', 'Hedonic.csv', 'pgd.csv', 'protein.csv', 'RidingMowers.csv', 'sales-of-shampoo-over-a-three-ye.csv', 'telecom.csv']\n" 72 | ] 73 | } 74 | ], 75 | "source": [ 76 | "result = [i for i in glob.glob('*.{}'.format(extension))]\n", 77 | "print(result)" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": { 84 | "collapsed": true 85 | }, 86 | "outputs": [], 87 | "source": [] 88 | } 89 | ], 90 | "metadata": { 91 | "anaconda-cloud": {}, 92 | "kernelspec": { 93 | "display_name": "Python [default]", 94 | "language": "python", 95 | "name": "python3" 96 | }, 97 | "language_info": { 98 | "codemirror_mode": { 99 | "name": "ipython", 100 | "version": 3 101 | }, 102 | "file_extension": ".py", 103 | "mimetype": "text/x-python", 104 | "name": "python", 105 | "nbconvert_exporter": "python", 106 | "pygments_lexer": "ipython3", 107 | "version": "3.5.2" 108 | } 109 | }, 110 | "nbformat": 4, 111 | "nbformat_minor": 1 112 | } 113 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | -------------------------------------------------------------------------------- /Logistic Regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "from sklearn.datasets import load_iris" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": { 18 | "collapsed": true 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "iris = load_iris()" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 3, 28 | "metadata": { 29 | "collapsed": false 30 | }, 31 | "outputs": [ 32 | { 33 | "data": { 34 | "text/plain": [ 35 | "array([[ 5.1, 3.5, 1.4, 0.2],\n", 36 | " [ 4.9, 3. , 1.4, 0.2],\n", 37 | " [ 4.7, 3.2, 1.3, 0.2],\n", 38 | " [ 4.6, 3.1, 1.5, 0.2],\n", 39 | " [ 5. , 3.6, 1.4, 0.2],\n", 40 | " [ 5.4, 3.9, 1.7, 0.4],\n", 41 | " [ 4.6, 3.4, 1.4, 0.3],\n", 42 | " [ 5. , 3.4, 1.5, 0.2],\n", 43 | " [ 4.4, 2.9, 1.4, 0.2],\n", 44 | " [ 4.9, 3.1, 1.5, 0.1],\n", 45 | " [ 5.4, 3.7, 1.5, 0.2],\n", 46 | " [ 4.8, 3.4, 1.6, 0.2],\n", 47 | " [ 4.8, 3. , 1.4, 0.1],\n", 48 | " [ 4.3, 3. , 1.1, 0.1],\n", 49 | " [ 5.8, 4. , 1.2, 0.2],\n", 50 | " [ 5.7, 4.4, 1.5, 0.4],\n", 51 | " [ 5.4, 3.9, 1.3, 0.4],\n", 52 | " [ 5.1, 3.5, 1.4, 0.3],\n", 53 | " [ 5.7, 3.8, 1.7, 0.3],\n", 54 | " [ 5.1, 3.8, 1.5, 0.3],\n", 55 | " [ 5.4, 3.4, 1.7, 0.2],\n", 56 | " [ 5.1, 3.7, 1.5, 0.4],\n", 57 | " [ 4.6, 3.6, 1. , 0.2],\n", 58 | " [ 5.1, 3.3, 1.7, 0.5],\n", 59 | " [ 4.8, 3.4, 1.9, 0.2],\n", 60 | " [ 5. , 3. , 1.6, 0.2],\n", 61 | " [ 5. , 3.4, 1.6, 0.4],\n", 62 | " [ 5.2, 3.5, 1.5, 0.2],\n", 63 | " [ 5.2, 3.4, 1.4, 0.2],\n", 64 | " [ 4.7, 3.2, 1.6, 0.2],\n", 65 | " [ 4.8, 3.1, 1.6, 0.2],\n", 66 | " [ 5.4, 3.4, 1.5, 0.4],\n", 67 | " [ 5.2, 4.1, 1.5, 0.1],\n", 68 | " [ 5.5, 4.2, 1.4, 0.2],\n", 69 | " [ 4.9, 3.1, 1.5, 0.1],\n", 70 | " [ 5. , 3.2, 1.2, 0.2],\n", 71 | " [ 5.5, 3.5, 1.3, 0.2],\n", 72 | " [ 4.9, 3.1, 1.5, 0.1],\n", 73 | " [ 4.4, 3. , 1.3, 0.2],\n", 74 | " [ 5.1, 3.4, 1.5, 0.2],\n", 75 | " [ 5. , 3.5, 1.3, 0.3],\n", 76 | " [ 4.5, 2.3, 1.3, 0.3],\n", 77 | " [ 4.4, 3.2, 1.3, 0.2],\n", 78 | " [ 5. , 3.5, 1.6, 0.6],\n", 79 | " [ 5.1, 3.8, 1.9, 0.4],\n", 80 | " [ 4.8, 3. , 1.4, 0.3],\n", 81 | " [ 5.1, 3.8, 1.6, 0.2],\n", 82 | " [ 4.6, 3.2, 1.4, 0.2],\n", 83 | " [ 5.3, 3.7, 1.5, 0.2],\n", 84 | " [ 5. , 3.3, 1.4, 0.2],\n", 85 | " [ 7. , 3.2, 4.7, 1.4],\n", 86 | " [ 6.4, 3.2, 4.5, 1.5],\n", 87 | " [ 6.9, 3.1, 4.9, 1.5],\n", 88 | " [ 5.5, 2.3, 4. , 1.3],\n", 89 | " [ 6.5, 2.8, 4.6, 1.5],\n", 90 | " [ 5.7, 2.8, 4.5, 1.3],\n", 91 | " [ 6.3, 3.3, 4.7, 1.6],\n", 92 | " [ 4.9, 2.4, 3.3, 1. ],\n", 93 | " [ 6.6, 2.9, 4.6, 1.3],\n", 94 | " [ 5.2, 2.7, 3.9, 1.4],\n", 95 | " [ 5. , 2. , 3.5, 1. ],\n", 96 | " [ 5.9, 3. , 4.2, 1.5],\n", 97 | " [ 6. , 2.2, 4. , 1. ],\n", 98 | " [ 6.1, 2.9, 4.7, 1.4],\n", 99 | " [ 5.6, 2.9, 3.6, 1.3],\n", 100 | " [ 6.7, 3.1, 4.4, 1.4],\n", 101 | " [ 5.6, 3. , 4.5, 1.5],\n", 102 | " [ 5.8, 2.7, 4.1, 1. ],\n", 103 | " [ 6.2, 2.2, 4.5, 1.5],\n", 104 | " [ 5.6, 2.5, 3.9, 1.1],\n", 105 | " [ 5.9, 3.2, 4.8, 1.8],\n", 106 | " [ 6.1, 2.8, 4. , 1.3],\n", 107 | " [ 6.3, 2.5, 4.9, 1.5],\n", 108 | " [ 6.1, 2.8, 4.7, 1.2],\n", 109 | " [ 6.4, 2.9, 4.3, 1.3],\n", 110 | " [ 6.6, 3. , 4.4, 1.4],\n", 111 | " [ 6.8, 2.8, 4.8, 1.4],\n", 112 | " [ 6.7, 3. , 5. , 1.7],\n", 113 | " [ 6. , 2.9, 4.5, 1.5],\n", 114 | " [ 5.7, 2.6, 3.5, 1. ],\n", 115 | " [ 5.5, 2.4, 3.8, 1.1],\n", 116 | " [ 5.5, 2.4, 3.7, 1. ],\n", 117 | " [ 5.8, 2.7, 3.9, 1.2],\n", 118 | " [ 6. , 2.7, 5.1, 1.6],\n", 119 | " [ 5.4, 3. , 4.5, 1.5],\n", 120 | " [ 6. , 3.4, 4.5, 1.6],\n", 121 | " [ 6.7, 3.1, 4.7, 1.5],\n", 122 | " [ 6.3, 2.3, 4.4, 1.3],\n", 123 | " [ 5.6, 3. , 4.1, 1.3],\n", 124 | " [ 5.5, 2.5, 4. , 1.3],\n", 125 | " [ 5.5, 2.6, 4.4, 1.2],\n", 126 | " [ 6.1, 3. , 4.6, 1.4],\n", 127 | " [ 5.8, 2.6, 4. , 1.2],\n", 128 | " [ 5. , 2.3, 3.3, 1. ],\n", 129 | " [ 5.6, 2.7, 4.2, 1.3],\n", 130 | " [ 5.7, 3. , 4.2, 1.2],\n", 131 | " [ 5.7, 2.9, 4.2, 1.3],\n", 132 | " [ 6.2, 2.9, 4.3, 1.3],\n", 133 | " [ 5.1, 2.5, 3. , 1.1],\n", 134 | " [ 5.7, 2.8, 4.1, 1.3],\n", 135 | " [ 6.3, 3.3, 6. , 2.5],\n", 136 | " [ 5.8, 2.7, 5.1, 1.9],\n", 137 | " [ 7.1, 3. , 5.9, 2.1],\n", 138 | " [ 6.3, 2.9, 5.6, 1.8],\n", 139 | " [ 6.5, 3. , 5.8, 2.2],\n", 140 | " [ 7.6, 3. , 6.6, 2.1],\n", 141 | " [ 4.9, 2.5, 4.5, 1.7],\n", 142 | " [ 7.3, 2.9, 6.3, 1.8],\n", 143 | " [ 6.7, 2.5, 5.8, 1.8],\n", 144 | " [ 7.2, 3.6, 6.1, 2.5],\n", 145 | " [ 6.5, 3.2, 5.1, 2. ],\n", 146 | " [ 6.4, 2.7, 5.3, 1.9],\n", 147 | " [ 6.8, 3. , 5.5, 2.1],\n", 148 | " [ 5.7, 2.5, 5. , 2. ],\n", 149 | " [ 5.8, 2.8, 5.1, 2.4],\n", 150 | " [ 6.4, 3.2, 5.3, 2.3],\n", 151 | " [ 6.5, 3. , 5.5, 1.8],\n", 152 | " [ 7.7, 3.8, 6.7, 2.2],\n", 153 | " [ 7.7, 2.6, 6.9, 2.3],\n", 154 | " [ 6. , 2.2, 5. , 1.5],\n", 155 | " [ 6.9, 3.2, 5.7, 2.3],\n", 156 | " [ 5.6, 2.8, 4.9, 2. ],\n", 157 | " [ 7.7, 2.8, 6.7, 2. ],\n", 158 | " [ 6.3, 2.7, 4.9, 1.8],\n", 159 | " [ 6.7, 3.3, 5.7, 2.1],\n", 160 | " [ 7.2, 3.2, 6. , 1.8],\n", 161 | " [ 6.2, 2.8, 4.8, 1.8],\n", 162 | " [ 6.1, 3. , 4.9, 1.8],\n", 163 | " [ 6.4, 2.8, 5.6, 2.1],\n", 164 | " [ 7.2, 3. , 5.8, 1.6],\n", 165 | " [ 7.4, 2.8, 6.1, 1.9],\n", 166 | " [ 7.9, 3.8, 6.4, 2. ],\n", 167 | " [ 6.4, 2.8, 5.6, 2.2],\n", 168 | " [ 6.3, 2.8, 5.1, 1.5],\n", 169 | " [ 6.1, 2.6, 5.6, 1.4],\n", 170 | " [ 7.7, 3. , 6.1, 2.3],\n", 171 | " [ 6.3, 3.4, 5.6, 2.4],\n", 172 | " [ 6.4, 3.1, 5.5, 1.8],\n", 173 | " [ 6. , 3. , 4.8, 1.8],\n", 174 | " [ 6.9, 3.1, 5.4, 2.1],\n", 175 | " [ 6.7, 3.1, 5.6, 2.4],\n", 176 | " [ 6.9, 3.1, 5.1, 2.3],\n", 177 | " [ 5.8, 2.7, 5.1, 1.9],\n", 178 | " [ 6.8, 3.2, 5.9, 2.3],\n", 179 | " [ 6.7, 3.3, 5.7, 2.5],\n", 180 | " [ 6.7, 3. , 5.2, 2.3],\n", 181 | " [ 6.3, 2.5, 5. , 1.9],\n", 182 | " [ 6.5, 3. , 5.2, 2. ],\n", 183 | " [ 6.2, 3.4, 5.4, 2.3],\n", 184 | " [ 5.9, 3. , 5.1, 1.8]])" 185 | ] 186 | }, 187 | "execution_count": 3, 188 | "metadata": {}, 189 | "output_type": "execute_result" 190 | } 191 | ], 192 | "source": [ 193 | "#Print Iris data\n", 194 | "iris.data" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 7, 200 | "metadata": { 201 | "collapsed": false 202 | }, 203 | "outputs": [ 204 | { 205 | "data": { 206 | "text/plain": [ 207 | "['sepal length (cm)',\n", 208 | " 'sepal width (cm)',\n", 209 | " 'petal length (cm)',\n", 210 | " 'petal width (cm)']" 211 | ] 212 | }, 213 | "execution_count": 7, 214 | "metadata": {}, 215 | "output_type": "execute_result" 216 | } 217 | ], 218 | "source": [ 219 | "# names of the features (or print() to print)\n", 220 | "iris.feature_names" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": 6, 226 | "metadata": { 227 | "collapsed": false 228 | }, 229 | "outputs": [ 230 | { 231 | "data": { 232 | "text/plain": [ 233 | "array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 234 | " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 235 | " 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", 236 | " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", 237 | " 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", 238 | " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", 239 | " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])" 240 | ] 241 | }, 242 | "execution_count": 6, 243 | "metadata": {}, 244 | "output_type": "execute_result" 245 | } 246 | ], 247 | "source": [ 248 | "#Values of target values\n", 249 | "iris.target" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": 8, 255 | "metadata": { 256 | "collapsed": false 257 | }, 258 | "outputs": [ 259 | { 260 | "data": { 261 | "text/plain": [ 262 | "array(['setosa', 'versicolor', 'virginica'], \n", 263 | " dtype='\n" 128 | ] 129 | } 130 | ], 131 | "source": [ 132 | "print(insp)" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 9, 138 | "metadata": { 139 | "collapsed": false 140 | }, 141 | "outputs": [ 142 | { 143 | "data": { 144 | "text/plain": [ 145 | "['weather', 'cities', 'sales', 'sales23', 'sales77']" 146 | ] 147 | }, 148 | "execution_count": 9, 149 | "metadata": {}, 150 | "output_type": "execute_result" 151 | } 152 | ], 153 | "source": [ 154 | "engine.table_names()" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": 11, 160 | "metadata": { 161 | "collapsed": true 162 | }, 163 | "outputs": [], 164 | "source": [ 165 | "data3= pd.read_sql_query('select * from \"sales77\" limit 10',con=engine)" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": 12, 171 | "metadata": { 172 | "collapsed": false 173 | }, 174 | "outputs": [ 175 | { 176 | "name": "stdout", 177 | "output_type": "stream", 178 | "text": [ 179 | " customer_id sales date product_id\n", 180 | "0 10001 845 2017-07-05 407\n", 181 | "1 10002 2370 2015-11-18 617\n", 182 | "2 10003 5744 2017-02-10 928\n", 183 | "3 10004 3230 2017-01-13 500\n", 184 | "4 10005 8781 2017-04-23 555\n", 185 | "5 10006 2544 2016-01-14 316\n", 186 | "6 10007 217 2015-06-21 187\n", 187 | "7 10008 306 2015-02-27 880\n", 188 | "8 10009 8720 2015-09-03 900\n", 189 | "9 10010 6137 2016-06-08 110\n" 190 | ] 191 | } 192 | ], 193 | "source": [ 194 | "print(data3)" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": null, 200 | "metadata": { 201 | "collapsed": true 202 | }, 203 | "outputs": [], 204 | "source": [] 205 | } 206 | ], 207 | "metadata": { 208 | "anaconda-cloud": {}, 209 | "kernelspec": { 210 | "display_name": "Python [conda root]", 211 | "language": "python", 212 | "name": "conda-root-py" 213 | }, 214 | "language_info": { 215 | "codemirror_mode": { 216 | "name": "ipython", 217 | "version": 3 218 | }, 219 | "file_extension": ".py", 220 | "mimetype": "text/x-python", 221 | "name": "python", 222 | "nbconvert_exporter": "python", 223 | "pygments_lexer": "ipython3", 224 | "version": "3.5.2" 225 | } 226 | }, 227 | "nbformat": 4, 228 | "nbformat_minor": 1 229 | } 230 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | PySpark https://nbviewer.jupyter.org/github/decisionstats/python_for_datascience/blob/master/pyspark.ipynb 2 | 3 | Bokeh - https://github.com/decisionstats/python_for_datascience/blob/master/Bokeh.ipynb 4 | 5 | See https://decisionstats.github.io/python_for_datascience/ 6 | 7 | Classification Models https://nbviewer.jupyter.org/github/decisionstats/python_for_datascience/blob/master/Classification%20Models%20v%202.ipynb 8 | 9 | # Python for Data Science 10 | 11 | 12 | Notebooks for Python for R Users: A Data Science Approach 13 | 14 | 15 | # CLASSROOM 16 | https://nbviewer.jupyter.org/gist/decisionstats/2c99a54b0a61c082b8814c1e1466ad62 17 | 18 | ## Interfaces 19 | - Command Line 20 | - Rodeo 21 | - IDLE 22 | - Jupyter 23 | - Beaker 24 | 25 | ## Cloud 26 | 27 | ## Basic Python 28 | - Revised Intro to Python Dec 2017 https://github.com/decisionstats/pythonfordatascience/blob/master/python%2Bintro.ipynb 29 | - Introductory Python https://nbviewer.jupyter.org/gist/decisionstats/ce2c16ee98abcf328177 30 | - Strings, Lists and Tuples ,Dicts https://nbviewer.jupyter.org/gist/decisionstats/752ff727101cf6fc13225bd94eef358a 31 | - variables in strings in python https://nbviewer.jupyter.org/gist/decisionstats/b9edb29ae440b45799f4e8d273269228 32 | - Selecting Data in Pandas https://nbviewer.jupyter.org/gist/decisionstats/01fc540363f1081c5358 33 | - numpy to pandas http://nbviewer.jupyter.org/gist/decisionstats/0a752d23e94708c6ddbaea478ecd9a81 34 | - using re.sub for cleaning data https://nbviewer.jupyter.org/gist/decisionstats/42b3fc90ae6fa537a19a08017e0336cb 35 | - using re.search and bool for searching for strings https://nbviewer.jupyter.org/gist/decisionstats/612116b1b8147cfb3808f5ac3c791eba 36 | - using os package for file operations https://nbviewer.jupyter.org/gist/decisionstats/29f3adfb6980db52a61130aa8c8f9166 37 | - data transformations https://nbviewer.jupyter.org/gist/decisionstats/b818917b37807fa0ded41522928f26af 38 | - data manipulation https://github.com/decisionstats/pythonfordatascience/blob/master/data%2Bmanipulation.ipynb 39 | 40 | ## Data Input 41 | ### Web Scraping 42 | - Yelp with Beautiful Soup http://nbviewer.ipython.org/gist/decisionstats/3385dc84c39109f49b83 43 | - Scraping Rotten Tomatoes by Beautiful Soup https://github.com/decisionstats/pythonfordatascience/blob/master/Web%2BScraping.ipynb 44 | - Using PyCurl for Web Scraping 45 | - Using Scrapy for Web Scraping 46 | - Social Media Scraping 47 | - Cricket Analysis 48 | 1 49 | ### Databases 50 | - MySQL 51 | - PostGres https://nbviewer.jupyter.org/gist/decisionstats/d3cf51e145b581480a42348a8a16177e 52 | https://nbviewer.jupyter.org/gist/decisionstats/e283591acf4b51ba3c47e0bcfe331c05 53 | - MongoDB 54 | - HDFS 55 | - Spark 56 | 57 | ## Data Manipulation 58 | - Using SQL for Groupby https://nbviewer.jupyter.org/gist/decisionstats/284a86d0541d06489e92 59 | - Using For Loops https://nbviewer.jupyter.org/gist/decisionstats/ce2c16ee98abcf328177 60 | - Apply and Lambda 61 | - Converting data from one format to another ( str) 62 | - Using grepl and gsub 63 | - Subset of a DataFrame and List 64 | - Conditional Manipulation 65 | 66 | ## Data Exploration 67 | - Adult DataSet http://nbviewer.ipython.org/gist/decisionstats/4142e98375445c5e4174 and https://nbviewer.jupyter.org/gist/ajayteach/eed37262e64de78f4b209c5eb4a7ed23 68 | - Big Diamonds Dataset 69 | - Iris Dataset 70 | 71 | 72 | 73 | ## Data Visualization 74 | - Basic Plots using MatplotLib 75 | - Advanced Plots using Seaborn https://github.com/decisionstats/pythonfordatascience/blob/master/ML%2Band%2BData%2BViz.ipynb 76 | - Data Visualization using GGPlot http://nbviewer.ipython.org/gist/decisionstats/df98ff9df42e7764d600 77 | - Plots using Bokeh 78 | - Anscombe Dataset http://nbviewer.jupyter.org/gist/decisionstats/3737642751895f470d5c07194302f53e 79 | 80 | ## Regression Modeling 81 | - Using Statsmodels Iris Dataset https://github.com/decisionstats/pythonfordatascience/blob/master/regression.ipynb 82 | -Using Bostom Dataset with statsmodel and scikit learn https://github.com/decisionstats/pythonfordatascience/blob/master/linear%20regression%20using%20statsmodel%20and%20scikit.ipynb 83 | - Using Pandas 84 | - Using Scikit-learn 85 | 86 | ## Data Mining 87 | - Decision Trees https://nbviewer.jupyter.org/gist/decisionstats/8b762caa7b7deebb68e3f275daf02a9d 88 | - Decision Tree with Weather Dataset from Rattle https://nbviewer.jupyter.org/gist/decisionstats/47a2324b14ebfd22657b40ec1ae5b480 89 | - Association Analysis 90 | - Clustering Kmeans and Hierarchical kmeans https://nbviewer.jupyter.org/gist/decisionstats/a1554207a7583bad6f53825905e72289 91 | - Neural Networks 92 | - Naive Bayes https://github.com/decisionstats/pythonfordatascience/blob/master/Naive%2BBayes.ipynb 93 | 94 | ## Classification 95 | - Classification https://nbviewer.jupyter.org/github/decisionstats/python_for_datascience/blob/master/Classification%20Models%20v%202.ipynb 96 | 97 | ## Time Series Forecasting 98 | https://nbviewer.jupyter.org/gist/decisionstats/341f890827f73a3ba24ac8861803cab8 99 | - ETS Models 100 | - Arima Models 101 | 102 | ## Optimizing Code 103 | - Measuring Code Speed timeit 104 | - Measuring Code Performance 105 | 106 | ## Machine Learning 107 | Automated Machine Learning TPot https://github.com/decisionstats/pythonfordatascience/blob/master/tpot.ipynb 108 | 109 | ## Text Mining 110 | - Tokens and TDM https://github.com/decisionstats/pythonfordatascience/blob/master/text%2Bmining.ipynb 111 | - Word Cloud (corpus,stopwords,association,tdm) 112 | - Sentiment Analysis on Tweets https://github.com/decisionstats/pythonfordatascience/blob/master/Tweepy.ipynb 113 | 114 | 115 | 116 | ## Complete Tutorial 117 | - Diamonds Dataset http://nbviewer.ipython.org/gist/decisionstats/c1684daaeecf62dd4bf4 118 | - StatisticsViews Data Science Tutorial http://www.statisticsviews.com/details/feature/8868901/A-Tutorial-on-Python.html 119 | 120 | 121 | 122 | # Current To Dos 123 | Spatial Data using Python 124 | http://sensitivecities.com/so-youd-like-to-make-a-map-using-python-EN.html#.V4EneVgrJ-8 125 | http://nbviewer.jupyter.org/gist/urschrei/74c6223d9f6a5dea4e75 126 | http://spatialdemography.org/essential-python-geospatial-libraries/ 127 | 128 | # New Version of Python ggplot 129 | http://ggplot.yhathq.com/ 130 | 131 | 132 | #Datasets 133 | http://www.gunviolencearchive.org/ 134 | Washington Post https://github.com/washingtonpost/data-police-shootings 135 | Titanic Dataset -Kaggle (forked from Kaggle https://github.com/decisionstats/pythonfordatascience/blob/master/titanic%20forked.ipynb ) 136 | 137 | 138 | -------------------------------------------------------------------------------- /Tweepy.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "#http://www.geeksforgeeks.org/twitter-sentiment-analysis-using-python/" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [ 19 | { 20 | "name": "stdout", 21 | "output_type": "stream", 22 | "text": [ 23 | "Requirement already satisfied: tweepy in c:\\users\\kogentix\\anaconda3\\lib\\site-packages\n", 24 | "Requirement already satisfied: requests-oauthlib>=0.4.1 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from tweepy)\n", 25 | "Requirement already satisfied: six>=1.7.3 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from tweepy)\n", 26 | "Requirement already satisfied: requests>=2.4.3 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from tweepy)\n", 27 | "Requirement already satisfied: oauthlib>=0.6.2 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from requests-oauthlib>=0.4.1->tweepy)\n" 28 | ] 29 | } 30 | ], 31 | "source": [ 32 | "!pip install tweepy" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 3, 38 | "metadata": {}, 39 | "outputs": [ 40 | { 41 | "name": "stdout", 42 | "output_type": "stream", 43 | "text": [ 44 | "Requirement already satisfied: textblob in c:\\users\\kogentix\\anaconda3\\lib\\site-packages\n", 45 | "Requirement already satisfied: nltk>=3.1 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from textblob)\n", 46 | "Requirement already satisfied: six in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from nltk>=3.1->textblob)\n" 47 | ] 48 | } 49 | ], 50 | "source": [ 51 | "!pip install textblob" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 10, 57 | "metadata": {}, 58 | "outputs": [ 59 | { 60 | "name": "stdout", 61 | "output_type": "stream", 62 | "text": [ 63 | "Positive tweets percentage: 42.42424242424242 %\n", 64 | "Negative tweets percentage: 16.666666666666668 %\n", 65 | "Neutral tweets percentage: 40.90909090909091 %\n", 66 | "\n", 67 | "\n", 68 | "Positive tweets:\n", 69 | "RT @ProudResister: Thank you, Brave Women.\n", 70 | "Thank you, Black Voters.\n", 71 | "Thank you, Doug Jones.\n", 72 | "\n", 73 | "F+ck you, Donald Trump.\n", 74 | "F+ck you, Steve Bannon.…\n", 75 | "RT @RVAwonk: I'd like to extend my congratulations to Steve Bannon for masterfully humiliating himself, the Republican Party, AND Donald Tr…\n", 76 | "RT @tonyschwartz: It is over for Donald Trump. He has lost in one of the most Republican states in America. God Bless 51 per cent of Alabam…\n", 77 | "RT @keithboykin: Doug Jones wins! Donald Trump is on an epic losing streak this year. #AlabamaSenateElection https://t.co/gamrQSCXK4\n", 78 | "RT @Fun_Beard: Given his track record, if Donald Trump wants republicans to win, maybe he should start backing democrats?\n", 79 | "RT @colbertlateshow: People Magazine released a photo of Natasha Stoynoff, one of Trump’s accusers standing right next to Donald Trump. In…\n", 80 | "RT @AngryBlackLady: Ok, who wrote this tweet. Because it sure as shit wasn’t donald j. trump. https://t.co/8NkNtInOnJ\n", 81 | "RT @PoliticalShort: Co-founder of Trump dossier firm Fusion GPS confirmed in court filings on Tuesday that he met last year with Justice De…\n", 82 | "RT @Amy_Siskind: Wouldn’t it be powerful if tomorrow the 33 US Senators who called on Al Franken to resign called a press conference and ca…\n", 83 | "RT @SethAbramson: Congress must immediately launch a bipartisan investigation into the 20+ allegations of sexual assault against Donald Tru…\n", 84 | "\n", 85 | "\n", 86 | "Negative tweets:\n", 87 | "RT @ShaunKing: I’ve counted at least 25 times where Steve Bannon said in speeches that “This election is a referendum on Donald Trump. A vo…\n", 88 | "RT @AhmedBaba_: Alabama is further proof that people like Donald Trump and Roy Moore may try and bring out the worst in America but the bes…\n", 89 | "RT @funder: I’m so tired of Donald Trump. He’s pure evil.\n", 90 | "RT @SenFeinstein: Another disgusting tweet from Thin-Skinned Donald Trump. This man has a problem, plain and simple. He lashes out at women…\n", 91 | "@11thHour I know in my heart of hearts, a majority of Americans favor decency. The politicians got it wrong. Once a… https://t.co/P1gMp8znRO\n", 92 | "Donald Trump’s staff caught posting ridiculously phony tweet on his account https://t.co/BlwOTN1LPz\n", 93 | "RT @ashleyfeinberg: donald trump is hog tied in a bathroom somewhere desperately chewing trying to chew through the door https://t.co/Y70Tm…\n", 94 | "Donald Trump & his Crew summed up in one video. This is insane man 🤦🏾‍♂️ This is who YOU guys voted for. https://t.co/1Ht1DqpPbN\n", 95 | "Doug Jones declared winner of Alabama Senate race! Donald Trump and child molester the big losers https://t.co/F6bRRuM9ew\n", 96 | "poop donald trump Black foot socks by bwet13 https://t.co/SHPlwdpE9s #bwet #bwet13 #etsy\n" 97 | ] 98 | } 99 | ], 100 | "source": [ 101 | "import re\n", 102 | "import tweepy\n", 103 | "from tweepy import OAuthHandler\n", 104 | "from textblob import TextBlob\n", 105 | " \n", 106 | "class TwitterClient(object):\n", 107 | " '''\n", 108 | " Generic Twitter Class for sentiment analysis.\n", 109 | " '''\n", 110 | " def __init__(self):\n", 111 | " '''\n", 112 | " Class constructor or initialization method.\n", 113 | " '''\n", 114 | " # keys and tokens from the Twitter Dev Console\n", 115 | " consumer_key = 'cUqEqaAhL8iFsowJj50Tf8mwM'\n", 116 | " consumer_secret = 'WQYMRAjZdTC4DcZZrxyi5QFbyerVTzoTirmAy0J4lcBLlSrJMc'\n", 117 | " access_token = '50995744-4dZaS1J21jUhGUBiH5P7EVZI0q5gbQI45caGmjUMk'\n", 118 | " access_token_secret = 'gY1zXKey05klmPFGZzf2R5lcMvSadOWvROcLjQhwxmhyi'\n", 119 | " \n", 120 | " # attempt authentication\n", 121 | " try:\n", 122 | " # create OAuthHandler object\n", 123 | " self.auth = OAuthHandler(consumer_key, consumer_secret)\n", 124 | " # set access token and secret\n", 125 | " self.auth.set_access_token(access_token, access_token_secret)\n", 126 | " # create tweepy API object to fetch tweets\n", 127 | " self.api = tweepy.API(self.auth)\n", 128 | " except:\n", 129 | " print(\"Error: Authentication Failed\")\n", 130 | " \n", 131 | " def clean_tweet(self, tweet):\n", 132 | " '''\n", 133 | " Utility function to clean tweet text by removing links, special characters\n", 134 | " using simple regex statements.\n", 135 | " '''\n", 136 | " return ' '.join(re.sub(\"(@[A-Za-z0-9]+)|([^0-9A-Za-z \\t])|(\\w+:\\/\\/\\S+)\", \" \", tweet).split())\n", 137 | " \n", 138 | " def get_tweet_sentiment(self, tweet):\n", 139 | " '''\n", 140 | " Utility function to classify sentiment of passed tweet\n", 141 | " using textblob's sentiment method\n", 142 | " '''\n", 143 | " # create TextBlob object of passed tweet text\n", 144 | " analysis = TextBlob(self.clean_tweet(tweet))\n", 145 | " # set sentiment\n", 146 | " if analysis.sentiment.polarity > 0:\n", 147 | " return 'positive'\n", 148 | " elif analysis.sentiment.polarity == 0:\n", 149 | " return 'neutral'\n", 150 | " else:\n", 151 | " return 'negative'\n", 152 | " \n", 153 | " def get_tweets(self, query, count = 10):\n", 154 | " '''\n", 155 | " Main function to fetch tweets and parse them.\n", 156 | " '''\n", 157 | " # empty list to store parsed tweets\n", 158 | " tweets = []\n", 159 | " \n", 160 | " try:\n", 161 | " # call twitter api to fetch tweets\n", 162 | " fetched_tweets = self.api.search(q = query, count = count)\n", 163 | " \n", 164 | " # parsing tweets one by one\n", 165 | " for tweet in fetched_tweets:\n", 166 | " # empty dictionary to store required params of a tweet\n", 167 | " parsed_tweet = {}\n", 168 | " \n", 169 | " # saving text of tweet\n", 170 | " parsed_tweet['text'] = tweet.text\n", 171 | " # saving sentiment of tweet\n", 172 | " parsed_tweet['sentiment'] = self.get_tweet_sentiment(tweet.text)\n", 173 | " \n", 174 | " # appending parsed tweet to tweets list\n", 175 | " if tweet.retweet_count > 0:\n", 176 | " # if tweet has retweets, ensure that it is appended only once\n", 177 | " if parsed_tweet not in tweets:\n", 178 | " tweets.append(parsed_tweet)\n", 179 | " else:\n", 180 | " tweets.append(parsed_tweet)\n", 181 | " \n", 182 | " # return parsed tweets\n", 183 | " return tweets\n", 184 | " \n", 185 | " except tweepy.TweepError as e:\n", 186 | " # print error (if any)\n", 187 | " print(\"Error : \" + str(e))\n", 188 | " \n", 189 | "def main():\n", 190 | " # creating object of TwitterClient Class\n", 191 | " api = TwitterClient()\n", 192 | " # calling function to get tweets\n", 193 | " tweets = api.get_tweets(query = 'Donald Trump', count = 200)\n", 194 | " \n", 195 | " # picking positive tweets from tweets\n", 196 | " ptweets = [tweet for tweet in tweets if tweet['sentiment'] == 'positive']\n", 197 | " # percentage of positive tweets\n", 198 | " print(\"Positive tweets percentage: {} %\".format(100*len(ptweets)/len(tweets)))\n", 199 | " # picking negative tweets from tweets\n", 200 | " ntweets = [tweet for tweet in tweets if tweet['sentiment'] == 'negative']\n", 201 | " # percentage of negative tweets\n", 202 | " print(\"Negative tweets percentage: {} %\".format(100*len(ntweets)/len(tweets)))\n", 203 | " # percentage of neutral tweets\n", 204 | " print(\"Neutral tweets percentage: {} %\".format(100*(len(tweets) - len(ntweets) - len(ptweets))/len(tweets)))\n", 205 | " \n", 206 | " # printing first 5 positive tweets\n", 207 | " print(\"\\n\\nPositive tweets:\")\n", 208 | " for tweet in ptweets[:10]:\n", 209 | " print(tweet['text'])\n", 210 | " \n", 211 | " # printing first 5 negative tweets\n", 212 | " print(\"\\n\\nNegative tweets:\")\n", 213 | " for tweet in ntweets[:10]:\n", 214 | " print(tweet['text'])\n", 215 | " \n", 216 | "if __name__ == \"__main__\":\n", 217 | " # calling main function\n", 218 | " main()" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": null, 224 | "metadata": { 225 | "collapsed": true 226 | }, 227 | "outputs": [], 228 | "source": [] 229 | } 230 | ], 231 | "metadata": { 232 | "kernelspec": { 233 | "display_name": "Python 3", 234 | "language": "python", 235 | "name": "python3" 236 | }, 237 | "language_info": { 238 | "codemirror_mode": { 239 | "name": "ipython", 240 | "version": 3 241 | }, 242 | "file_extension": ".py", 243 | "mimetype": "text/x-python", 244 | "name": "python", 245 | "nbconvert_exporter": "python", 246 | "pygments_lexer": "ipython3", 247 | "version": "3.6.1" 248 | } 249 | }, 250 | "nbformat": 4, 251 | "nbformat_minor": 2 252 | } 253 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-cayman -------------------------------------------------------------------------------- /autosklearn+iris.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import autosklearn.classification\n", 10 | "import sklearn.model_selection\n", 11 | "import sklearn.datasets\n", 12 | "import sklearn.metrics" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": { 19 | "collapsed": true 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "X, y = sklearn.datasets.load_iris(return_X_y=True)\n", 24 | "X_train, X_test, y_train, y_test = \\\n", 25 | " sklearn.model_selection.train_test_split(X, y, random_state=1)" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 4, 31 | "metadata": {}, 32 | "outputs": [ 33 | { 34 | "data": { 35 | "text/plain": [ 36 | "(array([[ 6.5, 2.8, 4.6, 1.5],\n", 37 | " [ 6.7, 2.5, 5.8, 1.8],\n", 38 | " [ 6.8, 3. , 5.5, 2.1],\n", 39 | " [ 5.1, 3.5, 1.4, 0.3],\n", 40 | " [ 6. , 2.2, 5. , 1.5],\n", 41 | " [ 6.3, 2.9, 5.6, 1.8],\n", 42 | " [ 6.6, 2.9, 4.6, 1.3],\n", 43 | " [ 7.7, 2.6, 6.9, 2.3],\n", 44 | " [ 5.7, 3.8, 1.7, 0.3],\n", 45 | " [ 5. , 3.6, 1.4, 0.2],\n", 46 | " [ 4.8, 3. , 1.4, 0.3],\n", 47 | " [ 5.2, 2.7, 3.9, 1.4],\n", 48 | " [ 5.1, 3.4, 1.5, 0.2],\n", 49 | " [ 5.5, 3.5, 1.3, 0.2],\n", 50 | " [ 7.7, 3.8, 6.7, 2.2],\n", 51 | " [ 6.9, 3.1, 5.4, 2.1],\n", 52 | " [ 7.3, 2.9, 6.3, 1.8],\n", 53 | " [ 6.4, 2.8, 5.6, 2.2],\n", 54 | " [ 6.2, 2.8, 4.8, 1.8],\n", 55 | " [ 6. , 3.4, 4.5, 1.6],\n", 56 | " [ 7.7, 2.8, 6.7, 2. ],\n", 57 | " [ 5.7, 3. , 4.2, 1.2],\n", 58 | " [ 4.8, 3.4, 1.6, 0.2],\n", 59 | " [ 5.7, 2.5, 5. , 2. ],\n", 60 | " [ 6.3, 2.7, 4.9, 1.8],\n", 61 | " [ 4.8, 3. , 1.4, 0.1],\n", 62 | " [ 4.7, 3.2, 1.3, 0.2],\n", 63 | " [ 6.5, 3. , 5.8, 2.2],\n", 64 | " [ 4.6, 3.4, 1.4, 0.3],\n", 65 | " [ 6.1, 3. , 4.9, 1.8],\n", 66 | " [ 6.5, 3.2, 5.1, 2. ],\n", 67 | " [ 6.7, 3.1, 4.4, 1.4],\n", 68 | " [ 5.7, 2.8, 4.5, 1.3],\n", 69 | " [ 6.7, 3.3, 5.7, 2.5],\n", 70 | " [ 6. , 3. , 4.8, 1.8],\n", 71 | " [ 5.1, 3.8, 1.6, 0.2],\n", 72 | " [ 6. , 2.2, 4. , 1. ],\n", 73 | " [ 6.4, 2.9, 4.3, 1.3],\n", 74 | " [ 6.5, 3. , 5.5, 1.8],\n", 75 | " [ 5. , 2.3, 3.3, 1. ],\n", 76 | " [ 6.3, 3.3, 6. , 2.5],\n", 77 | " [ 5.5, 2.5, 4. , 1.3],\n", 78 | " [ 5.4, 3.7, 1.5, 0.2],\n", 79 | " [ 4.9, 3.1, 1.5, 0.1],\n", 80 | " [ 5.2, 4.1, 1.5, 0.1],\n", 81 | " [ 6.7, 3.3, 5.7, 2.1],\n", 82 | " [ 4.4, 3. , 1.3, 0.2],\n", 83 | " [ 6. , 2.7, 5.1, 1.6],\n", 84 | " [ 6.4, 2.7, 5.3, 1.9],\n", 85 | " [ 5.9, 3. , 5.1, 1.8],\n", 86 | " [ 5.2, 3.5, 1.5, 0.2],\n", 87 | " [ 5.1, 3.3, 1.7, 0.5],\n", 88 | " [ 5.8, 2.7, 4.1, 1. ],\n", 89 | " [ 4.9, 3.1, 1.5, 0.1],\n", 90 | " [ 7.4, 2.8, 6.1, 1.9],\n", 91 | " [ 6.2, 2.9, 4.3, 1.3],\n", 92 | " [ 7.6, 3. , 6.6, 2.1],\n", 93 | " [ 6.7, 3. , 5.2, 2.3],\n", 94 | " [ 6.3, 2.3, 4.4, 1.3],\n", 95 | " [ 6.2, 3.4, 5.4, 2.3],\n", 96 | " [ 7.2, 3.6, 6.1, 2.5],\n", 97 | " [ 5.6, 2.9, 3.6, 1.3],\n", 98 | " [ 5.7, 4.4, 1.5, 0.4],\n", 99 | " [ 5.8, 2.7, 3.9, 1.2],\n", 100 | " [ 4.5, 2.3, 1.3, 0.3],\n", 101 | " [ 5.5, 2.4, 3.8, 1.1],\n", 102 | " [ 6.9, 3.1, 4.9, 1.5],\n", 103 | " [ 5. , 3.4, 1.6, 0.4],\n", 104 | " [ 6.8, 2.8, 4.8, 1.4],\n", 105 | " [ 5. , 3.5, 1.6, 0.6],\n", 106 | " [ 4.8, 3.4, 1.9, 0.2],\n", 107 | " [ 6.3, 3.4, 5.6, 2.4],\n", 108 | " [ 5.6, 2.8, 4.9, 2. ],\n", 109 | " [ 6.8, 3.2, 5.9, 2.3],\n", 110 | " [ 5. , 3.3, 1.4, 0.2],\n", 111 | " [ 5.1, 3.7, 1.5, 0.4],\n", 112 | " [ 5.9, 3.2, 4.8, 1.8],\n", 113 | " [ 4.6, 3.1, 1.5, 0.2],\n", 114 | " [ 5.8, 2.7, 5.1, 1.9],\n", 115 | " [ 4.8, 3.1, 1.6, 0.2],\n", 116 | " [ 6.5, 3. , 5.2, 2. ],\n", 117 | " [ 4.9, 2.5, 4.5, 1.7],\n", 118 | " [ 4.6, 3.2, 1.4, 0.2],\n", 119 | " [ 6.4, 3.2, 5.3, 2.3],\n", 120 | " [ 4.3, 3. , 1.1, 0.1],\n", 121 | " [ 5.6, 3. , 4.1, 1.3],\n", 122 | " [ 4.4, 2.9, 1.4, 0.2],\n", 123 | " [ 5.5, 2.4, 3.7, 1. ],\n", 124 | " [ 5. , 2. , 3.5, 1. ],\n", 125 | " [ 5.1, 3.5, 1.4, 0.2],\n", 126 | " [ 4.9, 3. , 1.4, 0.2],\n", 127 | " [ 4.9, 2.4, 3.3, 1. ],\n", 128 | " [ 4.6, 3.6, 1. , 0.2],\n", 129 | " [ 5.9, 3. , 4.2, 1.5],\n", 130 | " [ 6.1, 2.9, 4.7, 1.4],\n", 131 | " [ 5. , 3.4, 1.5, 0.2],\n", 132 | " [ 6.7, 3.1, 4.7, 1.5],\n", 133 | " [ 5.7, 2.9, 4.2, 1.3],\n", 134 | " [ 6.2, 2.2, 4.5, 1.5],\n", 135 | " [ 7. , 3.2, 4.7, 1.4],\n", 136 | " [ 5.8, 2.7, 5.1, 1.9],\n", 137 | " [ 5.4, 3.4, 1.7, 0.2],\n", 138 | " [ 5. , 3. , 1.6, 0.2],\n", 139 | " [ 6.1, 2.6, 5.6, 1.4],\n", 140 | " [ 6.1, 2.8, 4. , 1.3],\n", 141 | " [ 7.2, 3. , 5.8, 1.6],\n", 142 | " [ 5.7, 2.6, 3.5, 1. ],\n", 143 | " [ 6.3, 2.8, 5.1, 1.5],\n", 144 | " [ 6.4, 3.1, 5.5, 1.8],\n", 145 | " [ 6.3, 2.5, 4.9, 1.5],\n", 146 | " [ 6.7, 3.1, 5.6, 2.4],\n", 147 | " [ 4.9, 3.1, 1.5, 0.1]]),\n", 148 | " array([1, 2, 2, 0, 2, 2, 1, 2, 0, 0, 0, 1, 0, 0, 2, 2, 2, 2, 2, 1, 2, 1, 0,\n", 149 | " 2, 2, 0, 0, 2, 0, 2, 2, 1, 1, 2, 2, 0, 1, 1, 2, 1, 2, 1, 0, 0, 0, 2,\n", 150 | " 0, 1, 2, 2, 0, 0, 1, 0, 2, 1, 2, 2, 1, 2, 2, 1, 0, 1, 0, 1, 1, 0, 1,\n", 151 | " 0, 0, 2, 2, 2, 0, 0, 1, 0, 2, 0, 2, 2, 0, 2, 0, 1, 0, 1, 1, 0, 0, 1,\n", 152 | " 0, 1, 1, 0, 1, 1, 1, 1, 2, 0, 0, 2, 1, 2, 1, 2, 2, 1, 2, 0]))" 153 | ] 154 | }, 155 | "execution_count": 4, 156 | "metadata": {}, 157 | "output_type": "execute_result" 158 | } 159 | ], 160 | "source": [ 161 | "X_train,y_train" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 5, 167 | "metadata": {}, 168 | "outputs": [ 169 | { 170 | "data": { 171 | "text/plain": [ 172 | "(array([[ 5.8, 4. , 1.2, 0.2],\n", 173 | " [ 5.1, 2.5, 3. , 1.1],\n", 174 | " [ 6.6, 3. , 4.4, 1.4],\n", 175 | " [ 5.4, 3.9, 1.3, 0.4],\n", 176 | " [ 7.9, 3.8, 6.4, 2. ],\n", 177 | " [ 6.3, 3.3, 4.7, 1.6],\n", 178 | " [ 6.9, 3.1, 5.1, 2.3],\n", 179 | " [ 5.1, 3.8, 1.9, 0.4],\n", 180 | " [ 4.7, 3.2, 1.6, 0.2],\n", 181 | " [ 6.9, 3.2, 5.7, 2.3],\n", 182 | " [ 5.6, 2.7, 4.2, 1.3],\n", 183 | " [ 5.4, 3.9, 1.7, 0.4],\n", 184 | " [ 7.1, 3. , 5.9, 2.1],\n", 185 | " [ 6.4, 3.2, 4.5, 1.5],\n", 186 | " [ 6. , 2.9, 4.5, 1.5],\n", 187 | " [ 4.4, 3.2, 1.3, 0.2],\n", 188 | " [ 5.8, 2.6, 4. , 1.2],\n", 189 | " [ 5.6, 3. , 4.5, 1.5],\n", 190 | " [ 5.4, 3.4, 1.5, 0.4],\n", 191 | " [ 5. , 3.2, 1.2, 0.2],\n", 192 | " [ 5.5, 2.6, 4.4, 1.2],\n", 193 | " [ 5.4, 3. , 4.5, 1.5],\n", 194 | " [ 6.7, 3. , 5. , 1.7],\n", 195 | " [ 5. , 3.5, 1.3, 0.3],\n", 196 | " [ 7.2, 3.2, 6. , 1.8],\n", 197 | " [ 5.7, 2.8, 4.1, 1.3],\n", 198 | " [ 5.5, 4.2, 1.4, 0.2],\n", 199 | " [ 5.1, 3.8, 1.5, 0.3],\n", 200 | " [ 6.1, 2.8, 4.7, 1.2],\n", 201 | " [ 6.3, 2.5, 5. , 1.9],\n", 202 | " [ 6.1, 3. , 4.6, 1.4],\n", 203 | " [ 7.7, 3. , 6.1, 2.3],\n", 204 | " [ 5.6, 2.5, 3.9, 1.1],\n", 205 | " [ 6.4, 2.8, 5.6, 2.1],\n", 206 | " [ 5.8, 2.8, 5.1, 2.4],\n", 207 | " [ 5.3, 3.7, 1.5, 0.2],\n", 208 | " [ 5.5, 2.3, 4. , 1.3],\n", 209 | " [ 5.2, 3.4, 1.4, 0.2]]),\n", 210 | " array([0, 1, 1, 0, 2, 1, 2, 0, 0, 2, 1, 0, 2, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1,\n", 211 | " 0, 2, 1, 0, 0, 1, 2, 1, 2, 1, 2, 2, 0, 1, 0]))" 212 | ] 213 | }, 214 | "execution_count": 5, 215 | "metadata": {}, 216 | "output_type": "execute_result" 217 | } 218 | ], 219 | "source": [ 220 | "X_test,y_test" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": 10, 226 | "metadata": {}, 227 | "outputs": [ 228 | { 229 | "data": { 230 | "text/plain": [ 231 | "38" 232 | ] 233 | }, 234 | "execution_count": 10, 235 | "metadata": {}, 236 | "output_type": "execute_result" 237 | } 238 | ], 239 | "source": [ 240 | "len(y_test)" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 9, 246 | "metadata": {}, 247 | "outputs": [ 248 | { 249 | "data": { 250 | "text/plain": [ 251 | "38" 252 | ] 253 | }, 254 | "execution_count": 9, 255 | "metadata": {}, 256 | "output_type": "execute_result" 257 | } 258 | ], 259 | "source": [ 260 | "len(X_test)" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": 7, 266 | "metadata": {}, 267 | "outputs": [ 268 | { 269 | "data": { 270 | "text/plain": [ 271 | "112" 272 | ] 273 | }, 274 | "execution_count": 7, 275 | "metadata": {}, 276 | "output_type": "execute_result" 277 | } 278 | ], 279 | "source": [ 280 | "len(X_train)" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": 8, 286 | "metadata": {}, 287 | "outputs": [ 288 | { 289 | "data": { 290 | "text/plain": [ 291 | "112" 292 | ] 293 | }, 294 | "execution_count": 8, 295 | "metadata": {}, 296 | "output_type": "execute_result" 297 | } 298 | ], 299 | "source": [ 300 | "len(y_train)" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": 11, 306 | "metadata": { 307 | "collapsed": true 308 | }, 309 | "outputs": [], 310 | "source": [ 311 | "automl = autosklearn.classification.AutoSklearnClassifier()\n", 312 | "automl.fit(X_train, y_train)" 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "execution_count": 13, 318 | "metadata": { 319 | "collapsed": true 320 | }, 321 | "outputs": [], 322 | "source": [ 323 | "y_hat = automl.predict(X_test)" 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": 16, 329 | "metadata": {}, 330 | "outputs": [ 331 | { 332 | "name": "stdout", 333 | "output_type": "stream", 334 | "text": [ 335 | "Accuracy score 0.947368421053\n" 336 | ] 337 | } 338 | ], 339 | "source": [ 340 | "print(\"Accuracy score\", sklearn.metrics.accuracy_score(y_test, y_hat))" 341 | ] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "execution_count": null, 346 | "metadata": { 347 | "collapsed": true 348 | }, 349 | "outputs": [], 350 | "source": [] 351 | } 352 | ], 353 | "metadata": { 354 | "kernelspec": { 355 | "display_name": "Python 3", 356 | "language": "python", 357 | "name": "python3" 358 | }, 359 | "language_info": { 360 | "codemirror_mode": { 361 | "name": "ipython", 362 | "version": 3 363 | }, 364 | "file_extension": ".py", 365 | "mimetype": "text/x-python", 366 | "name": "python", 367 | "nbconvert_exporter": "python", 368 | "pygments_lexer": "ipython3", 369 | "version": "3.6.3" 370 | } 371 | }, 372 | "nbformat": 4, 373 | "nbformat_minor": 2 374 | } 375 | -------------------------------------------------------------------------------- /chi+square+test.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "with code from http://rpubs.com/newajay/chisquaretest" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 11, 13 | "metadata": { 14 | "collapsed": false 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "from scipy.stats import chi2_contingency\n", 19 | "import numpy as np" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 13, 25 | "metadata": { 26 | "collapsed": false 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "obs = np.array([[7, 87, 12,9], [4, 102, 7,8]])\n" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 15, 36 | "metadata": { 37 | "collapsed": false 38 | }, 39 | "outputs": [], 40 | "source": [ 41 | "chi2, p, dof, expected = chi2_contingency(obs)\n" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 16, 47 | "metadata": { 48 | "collapsed": false 49 | }, 50 | "outputs": [ 51 | { 52 | "name": "stdout", 53 | "output_type": "stream", 54 | "text": [ 55 | "0.357103080041\n" 56 | ] 57 | } 58 | ], 59 | "source": [ 60 | "print (p)" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 17, 66 | "metadata": { 67 | "collapsed": false 68 | }, 69 | "outputs": [ 70 | { 71 | "name": "stdout", 72 | "output_type": "stream", 73 | "text": [ 74 | "3.23281822261\n" 75 | ] 76 | } 77 | ], 78 | "source": [ 79 | "print (chi2)" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 18, 85 | "metadata": { 86 | "collapsed": false 87 | }, 88 | "outputs": [ 89 | { 90 | "name": "stdout", 91 | "output_type": "stream", 92 | "text": [ 93 | "3\n" 94 | ] 95 | } 96 | ], 97 | "source": [ 98 | "print (dof)" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 19, 104 | "metadata": { 105 | "collapsed": false 106 | }, 107 | "outputs": [ 108 | { 109 | "name": "stdout", 110 | "output_type": "stream", 111 | "text": [ 112 | "[[ 5.36016949 92.09745763 9.25847458 8.28389831]\n", 113 | " [ 5.63983051 96.90254237 9.74152542 8.71610169]]\n" 114 | ] 115 | } 116 | ], 117 | "source": [ 118 | "print (expected)" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": { 125 | "collapsed": true 126 | }, 127 | "outputs": [], 128 | "source": [] 129 | } 130 | ], 131 | "metadata": { 132 | "anaconda-cloud": {}, 133 | "kernelspec": { 134 | "display_name": "Python [conda root]", 135 | "language": "python", 136 | "name": "conda-root-py" 137 | }, 138 | "language_info": { 139 | "codemirror_mode": { 140 | "name": "ipython", 141 | "version": 3 142 | }, 143 | "file_extension": ".py", 144 | "mimetype": "text/x-python", 145 | "name": "python", 146 | "nbconvert_exporter": "python", 147 | "pygments_lexer": "ipython3", 148 | "version": "3.5.2" 149 | } 150 | }, 151 | "nbformat": 4, 152 | "nbformat_minor": 1 153 | } 154 | -------------------------------------------------------------------------------- /computer-vision/FirstDetection.py: -------------------------------------------------------------------------------- 1 | from imageai.Detection import ObjectDetection 2 | import os 3 | 4 | execution_path = os.getcwd() 5 | 6 | detector = ObjectDetection() 7 | detector.setModelTypeAsRetinaNet() 8 | detector.setModelPath( os.path.join(execution_path , "resnet50_coco_best_v2.0.1.h5")) 9 | detector.loadModel() 10 | detections = detector.detectObjectsFromImage(input_image=os.path.join(execution_path , "image.jpeg"), output_image_path=os.path.join(execution_path , "imagenew.jpeg")) 11 | 12 | for eachObject in detections: 13 | print(eachObject["name"] , " : " , eachObject["percentage_probability"] ) 14 | -------------------------------------------------------------------------------- /computer-vision/computer vision.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Citation-https://towardsdatascience.com/object-detection-with-10-lines-of-code-d6cb4d86f606" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "name": "stdout", 17 | "output_type": "stream", 18 | "text": [ 19 | "Requirement already satisfied: tensorflow in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (1.6.0)\n", 20 | "Requirement already satisfied: tensorboard<1.7.0,>=1.6.0 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from tensorflow) (1.6.0)\n", 21 | "Requirement already satisfied: grpcio>=1.8.6 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from tensorflow) (1.10.0)\n", 22 | "Requirement already satisfied: numpy>=1.13.3 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from tensorflow) (1.15.0)\n", 23 | "Requirement already satisfied: six>=1.10.0 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from tensorflow) (1.11.0)\n", 24 | "Requirement already satisfied: astor>=0.6.0 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from tensorflow) (0.6.2)\n", 25 | "Requirement already satisfied: absl-py>=0.1.6 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from tensorflow) (0.1.11)\n", 26 | "Requirement already satisfied: termcolor>=1.1.0 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from tensorflow) (1.1.0)\n", 27 | "Requirement already satisfied: protobuf>=3.4.0 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from tensorflow) (3.5.2)\n", 28 | "Requirement already satisfied: gast>=0.2.0 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from tensorflow) (0.2.0)\n", 29 | "Requirement already satisfied: wheel>=0.26 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from tensorflow) (0.30.0)\n", 30 | "Requirement already satisfied: bleach==1.5.0 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from tensorboard<1.7.0,>=1.6.0->tensorflow) (1.5.0)\n", 31 | "Requirement already satisfied: markdown>=2.6.8 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from tensorboard<1.7.0,>=1.6.0->tensorflow) (2.6.11)\n", 32 | "Requirement already satisfied: html5lib==0.9999999 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from tensorboard<1.7.0,>=1.6.0->tensorflow) (0.9999999)\n", 33 | "Requirement already satisfied: werkzeug>=0.11.10 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from tensorboard<1.7.0,>=1.6.0->tensorflow) (0.14.1)\n", 34 | "Requirement already satisfied: setuptools in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from protobuf>=3.4.0->tensorflow) (38.5.2)\n" 35 | ] 36 | } 37 | ], 38 | "source": [ 39 | "! pip install tensorflow\n", 40 | "\n" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 2, 46 | "metadata": {}, 47 | "outputs": [ 48 | { 49 | "name": "stdout", 50 | "output_type": "stream", 51 | "text": [ 52 | "Requirement already up-to-date: pip in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (18.0)\n" 53 | ] 54 | } 55 | ], 56 | "source": [ 57 | "! pip install --upgrade pip" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "conda install numpy" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 3, 70 | "metadata": {}, 71 | "outputs": [ 72 | { 73 | "name": "stdout", 74 | "output_type": "stream", 75 | "text": [ 76 | "Collecting numpy\n", 77 | " Using cached https://files.pythonhosted.org/packages/53/d1/2499797c88de95ea3239ad7f6e6a47895fe51aad1aa2a116f50ec9e0ee74/numpy-1.15.0-cp36-none-win_amd64.whl\n", 78 | "Installing collected packages: numpy\n", 79 | "Successfully installed numpy-1.15.0\n" 80 | ] 81 | }, 82 | { 83 | "name": "stderr", 84 | "output_type": "stream", 85 | "text": [ 86 | "kmodes 0.7 has requirement numpy==1.12.1, but you'll have numpy 1.15.0 which is incompatible.\n", 87 | "kmodes 0.7 has requirement scikit-learn==0.18.1, but you'll have scikit-learn 0.19.1 which is incompatible.\n", 88 | "kmodes 0.7 has requirement scipy==0.19.0, but you'll have scipy 1.0.0 which is incompatible.\n" 89 | ] 90 | } 91 | ], 92 | "source": [ 93 | "! pip install numpy -I" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 4, 99 | "metadata": {}, 100 | "outputs": [], 101 | "source": [ 102 | "import numpy.core.multiarray" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 5, 108 | "metadata": {}, 109 | "outputs": [ 110 | { 111 | "name": "stdout", 112 | "output_type": "stream", 113 | "text": [ 114 | "Requirement already satisfied: spacy in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (2.0.11)\n", 115 | "Requirement already satisfied: numpy>=1.7 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from spacy) (1.15.0)\n", 116 | "Requirement already satisfied: murmurhash<0.29,>=0.28 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from spacy) (0.28.0)\n", 117 | "Requirement already satisfied: cymem<1.32,>=1.30 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from spacy) (1.31.2)\n", 118 | "Requirement already satisfied: preshed<2.0.0,>=1.0.0 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from spacy) (1.0.0)\n", 119 | "Requirement already satisfied: thinc<6.11.0,>=6.10.1 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from spacy) (6.10.2)\n", 120 | "Requirement already satisfied: plac<1.0.0,>=0.9.6 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from spacy) (0.9.6)\n", 121 | "Requirement already satisfied: pathlib in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from spacy) (1.0.1)\n", 122 | "Requirement already satisfied: ujson>=1.35 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from spacy) (1.35)\n", 123 | "Requirement already satisfied: dill<0.3,>=0.2 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from spacy) (0.2.7.1)\n", 124 | "Requirement already satisfied: regex==2017.4.5 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from spacy) (2017.4.5)\n", 125 | "Requirement already satisfied: wrapt in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from thinc<6.11.0,>=6.10.1->spacy) (1.10.10)\n", 126 | "Requirement already satisfied: tqdm<5.0.0,>=4.10.0 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from thinc<6.11.0,>=6.10.1->spacy) (4.23.0)\n", 127 | "Requirement already satisfied: cytoolz<0.9,>=0.8 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from thinc<6.11.0,>=6.10.1->spacy) (0.8.2)\n", 128 | "Requirement already satisfied: six<2.0.0,>=1.10.0 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from thinc<6.11.0,>=6.10.1->spacy) (1.11.0)\n", 129 | "Requirement already satisfied: termcolor in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from thinc<6.11.0,>=6.10.1->spacy) (1.1.0)\n", 130 | "Requirement already satisfied: msgpack-python in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from thinc<6.11.0,>=6.10.1->spacy) (0.4.8)\n", 131 | "Requirement already satisfied: msgpack-numpy==0.4.1 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from thinc<6.11.0,>=6.10.1->spacy) (0.4.1)\n", 132 | "Requirement already satisfied: pyreadline>=1.7.1 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from dill<0.3,>=0.2->spacy) (2.1)\n" 133 | ] 134 | } 135 | ], 136 | "source": [ 137 | "!pip install spacy" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 7, 143 | "metadata": {}, 144 | "outputs": [ 145 | { 146 | "name": "stdout", 147 | "output_type": "stream", 148 | "text": [ 149 | "Requirement already satisfied: scipy in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (1.0.0)\n" 150 | ] 151 | } 152 | ], 153 | "source": [ 154 | "! pip install scipy\n" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": 8, 160 | "metadata": {}, 161 | "outputs": [ 162 | { 163 | "name": "stdout", 164 | "output_type": "stream", 165 | "text": [ 166 | "Requirement already satisfied: opencv-python in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (3.4.2.17)\n", 167 | "Requirement already satisfied: numpy>=1.11.3 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from opencv-python) (1.15.0)\n" 168 | ] 169 | } 170 | ], 171 | "source": [ 172 | "! pip install opencv-python\n", 173 | "\n" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 9, 179 | "metadata": {}, 180 | "outputs": [ 181 | { 182 | "name": "stdout", 183 | "output_type": "stream", 184 | "text": [ 185 | "Requirement already satisfied: pillow in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (5.0.0)\n" 186 | ] 187 | } 188 | ], 189 | "source": [ 190 | "! pip install pillow\n", 191 | "\n" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": 10, 197 | "metadata": {}, 198 | "outputs": [ 199 | { 200 | "name": "stdout", 201 | "output_type": "stream", 202 | "text": [ 203 | "Requirement already satisfied: matplotlib in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (2.2.2)\n", 204 | "Requirement already satisfied: numpy>=1.7.1 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from matplotlib) (1.15.0)\n", 205 | "Requirement already satisfied: cycler>=0.10 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from matplotlib) (0.10.0)\n", 206 | "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from matplotlib) (2.1.4)\n", 207 | "Requirement already satisfied: python-dateutil>=2.1 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from matplotlib) (2.6.1)\n", 208 | "Requirement already satisfied: pytz in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from matplotlib) (2017.2)\n", 209 | "Requirement already satisfied: six>=1.10 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from matplotlib) (1.11.0)\n", 210 | "Requirement already satisfied: kiwisolver>=1.0.1 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from matplotlib) (1.0.1)\n", 211 | "Requirement already satisfied: setuptools in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from kiwisolver>=1.0.1->matplotlib) (38.5.2)\n" 212 | ] 213 | } 214 | ], 215 | "source": [ 216 | "! pip install matplotlib\n", 217 | "\n" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": 11, 223 | "metadata": {}, 224 | "outputs": [ 225 | { 226 | "name": "stdout", 227 | "output_type": "stream", 228 | "text": [ 229 | "Requirement already satisfied: h5py in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (2.7.1)\n", 230 | "Requirement already satisfied: numpy>=1.7 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from h5py) (1.15.0)\n", 231 | "Requirement already satisfied: six in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from h5py) (1.11.0)\n" 232 | ] 233 | } 234 | ], 235 | "source": [ 236 | "! pip install h5py\n", 237 | "\n" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": 12, 243 | "metadata": {}, 244 | "outputs": [ 245 | { 246 | "name": "stdout", 247 | "output_type": "stream", 248 | "text": [ 249 | "Requirement already satisfied: keras in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (2.1.5)\n", 250 | "Requirement already satisfied: pyyaml in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from keras) (3.12)\n", 251 | "Requirement already satisfied: scipy>=0.14 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from keras) (1.0.0)\n", 252 | "Requirement already satisfied: six>=1.9.0 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from keras) (1.11.0)\n", 253 | "Requirement already satisfied: numpy>=1.9.1 in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (from keras) (1.15.0)\n" 254 | ] 255 | } 256 | ], 257 | "source": [ 258 | "! pip install keras\n", 259 | "\n" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": 13, 265 | "metadata": {}, 266 | "outputs": [ 267 | { 268 | "name": "stdout", 269 | "output_type": "stream", 270 | "text": [ 271 | "Requirement already satisfied: imageai==2.0.2 from https://github.com/OlafenwaMoses/ImageAI/releases/download/2.0.2/imageai-2.0.2-py3-none-any.whl in c:\\users\\kogentix\\anaconda3\\lib\\site-packages (2.0.2)\n" 272 | ] 273 | } 274 | ], 275 | "source": [ 276 | "! pip3 install https://github.com/OlafenwaMoses/ImageAI/releases/download/2.0.2/imageai-2.0.2-py3-none-any.whl" 277 | ] 278 | }, 279 | { 280 | "cell_type": "markdown", 281 | "metadata": {}, 282 | "source": [ 283 | "Download the RetinaNet model file that will be used for object detection via this link\n", 284 | "https://github.com/OlafenwaMoses/ImageAI/releases/download/1.0/resnet50_coco_best_v2.0.1.h5" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": 14, 290 | "metadata": {}, 291 | "outputs": [], 292 | "source": [ 293 | "import os as os" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": 15, 299 | "metadata": {}, 300 | "outputs": [ 301 | { 302 | "data": { 303 | "text/plain": [ 304 | "'C:\\\\Users\\\\KOGENTIX'" 305 | ] 306 | }, 307 | "execution_count": 15, 308 | "metadata": {}, 309 | "output_type": "execute_result" 310 | } 311 | ], 312 | "source": [ 313 | "os.getcwd()" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": 16, 319 | "metadata": {}, 320 | "outputs": [], 321 | "source": [ 322 | "os.chdir('C:\\\\Users\\\\KOGENTIX\\\\Desktop\\\\image')" 323 | ] 324 | }, 325 | { 326 | "cell_type": "code", 327 | "execution_count": 17, 328 | "metadata": {}, 329 | "outputs": [ 330 | { 331 | "data": { 332 | "text/plain": [ 333 | "'C:\\\\Users\\\\KOGENTIX\\\\Desktop\\\\image'" 334 | ] 335 | }, 336 | "execution_count": 17, 337 | "metadata": {}, 338 | "output_type": "execute_result" 339 | } 340 | ], 341 | "source": [ 342 | "os.getcwd()" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": 23, 348 | "metadata": {}, 349 | "outputs": [ 350 | { 351 | "name": "stdout", 352 | "output_type": "stream", 353 | "text": [ 354 | "person : 56.95698857307434\n", 355 | "person : 52.80917286872864\n", 356 | "person : 70.20386457443237\n", 357 | "person : 76.8346905708313\n", 358 | "person : 78.70020866394043\n", 359 | "bicycle : 79.7773540019989\n", 360 | "person : 83.55741500854492\n", 361 | "person : 89.43805694580078\n", 362 | "truck : 60.933083295822144\n", 363 | "person : 69.52632069587708\n", 364 | "bus : 98.00646901130676\n", 365 | "truck : 83.69445204734802\n", 366 | "car : 71.7008650302887\n" 367 | ] 368 | } 369 | ], 370 | "source": [ 371 | "from imageai.Detection import ObjectDetection\n", 372 | "import os\n", 373 | "\n", 374 | "execution_path = os.getcwd()\n", 375 | "\n", 376 | "detector = ObjectDetection()\n", 377 | "detector.setModelTypeAsRetinaNet()\n", 378 | "detector.setModelPath( os.path.join(execution_path , \"resnet50_coco_best_v2.0.1.h5\"))\n", 379 | "detector.loadModel()\n", 380 | "detections = detector.detectObjectsFromImage(input_image=os.path.join(execution_path , \"image.jpeg\"), output_image_path=os.path.join(execution_path , \"imagenew.jpeg\"))\n", 381 | "\n", 382 | "for eachObject in detections:\n", 383 | " print(eachObject[\"name\"] , \" : \" , eachObject[\"percentage_probability\"] )" 384 | ] 385 | }, 386 | { 387 | "cell_type": "code", 388 | "execution_count": null, 389 | "metadata": {}, 390 | "outputs": [], 391 | "source": [] 392 | } 393 | ], 394 | "metadata": { 395 | "kernelspec": { 396 | "display_name": "Python 3", 397 | "language": "python", 398 | "name": "python3" 399 | }, 400 | "language_info": { 401 | "codemirror_mode": { 402 | "name": "ipython", 403 | "version": 3 404 | }, 405 | "file_extension": ".py", 406 | "mimetype": "text/x-python", 407 | "name": "python", 408 | "nbconvert_exporter": "python", 409 | "pygments_lexer": "ipython3", 410 | "version": "3.6.6" 411 | } 412 | }, 413 | "nbformat": 4, 414 | "nbformat_minor": 2 415 | } 416 | -------------------------------------------------------------------------------- /computer-vision/image.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decisionstats/python_for_datascience/1cc8909d850d77410b41cd256d274166392b88a1/computer-vision/image.jpeg -------------------------------------------------------------------------------- /computer-vision/imagenew.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decisionstats/python_for_datascience/1cc8909d850d77410b41cd256d274166392b88a1/computer-vision/imagenew.jpeg -------------------------------------------------------------------------------- /data/RFM part2.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decisionstats/python_for_datascience/1cc8909d850d77410b41cd256d274166392b88a1/data/RFM part2.xlsx -------------------------------------------------------------------------------- /data/stats DAP.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/decisionstats/python_for_datascience/1cc8909d850d77410b41cd256d274166392b88a1/data/stats DAP.xlsx -------------------------------------------------------------------------------- /descriptive+stats+in+Python.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import pandas as pd" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": { 18 | "collapsed": true 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "import numpy as np" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 17, 28 | "metadata": { 29 | "collapsed": false 30 | }, 31 | "outputs": [ 32 | { 33 | "data": { 34 | "text/html": [ 35 | "
\n", 36 | "\n", 37 | " \n", 38 | " \n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | "
crimzninduschasnoxrmagedisradtaxptratioblacklstatmedv
00.0063218.02.3100.5386.57565.24.0900129615.3396.904.9824.0
10.027310.07.0700.4696.42178.94.9671224217.8396.909.1421.6
20.027290.07.0700.4697.18561.14.9671224217.8392.834.0334.7
30.032370.02.1800.4586.99845.86.0622322218.7394.632.9433.4
40.069050.02.1800.4587.14754.26.0622322218.7396.905.3336.2
\n", 144 | "
" 145 | ], 146 | "text/plain": [ 147 | " crim zn indus chas nox rm age dis rad tax ptratio \\\n", 148 | "0 0.00632 18.0 2.31 0 0.538 6.575 65.2 4.0900 1 296 15.3 \n", 149 | "1 0.02731 0.0 7.07 0 0.469 6.421 78.9 4.9671 2 242 17.8 \n", 150 | "2 0.02729 0.0 7.07 0 0.469 7.185 61.1 4.9671 2 242 17.8 \n", 151 | "3 0.03237 0.0 2.18 0 0.458 6.998 45.8 6.0622 3 222 18.7 \n", 152 | "4 0.06905 0.0 2.18 0 0.458 7.147 54.2 6.0622 3 222 18.7 \n", 153 | "\n", 154 | " black lstat medv \n", 155 | "0 396.90 4.98 24.0 \n", 156 | "1 396.90 9.14 21.6 \n", 157 | "2 392.83 4.03 34.7 \n", 158 | "3 394.63 2.94 33.4 \n", 159 | "4 396.90 5.33 36.2 " 160 | ] 161 | }, 162 | "execution_count": 17, 163 | "metadata": {}, 164 | "output_type": "execute_result" 165 | } 166 | ], 167 | "source": [ 168 | "df=pd.read_csv(\"http://vincentarelbundock.github.io/Rdatasets/csv/MASS/Boston.csv\")\n", 169 | "df2=df.iloc[:,1:]\n", 170 | "df2.head() " 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": 18, 176 | "metadata": { 177 | "collapsed": false 178 | }, 179 | "outputs": [ 180 | { 181 | "data": { 182 | "text/html": [ 183 | "
\n", 184 | "\n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | "
LCUCmax_valmean_valmin_valnnmissoutlier_flagp1p10p5p90p95p99q1q2q3sd
crim-22.16560029.39264888.97623.6135240.00632506010.013610.0381950.02791010.753015.7891541.370330.0820450.256513.6770828.593041
zn-58.53455181.261823100.000011.3636360.00000506010.000000.0000000.00000042.500080.0000090.000000.0000000.0000012.50000023.299396
indus-9.42393331.69749027.740011.1367790.46000506011.253502.9100002.18000019.580021.8900025.650005.1900009.6900018.1000006.853571
chas-0.6920590.8303991.00000.0691700.00000506010.000000.0000000.0000000.00001.000001.000000.0000000.000000.0000000.253743
nox0.2074060.9019840.87100.5546950.38500506010.398000.4270000.4092500.71300.740000.871000.4490000.538000.6240000.115763
rm4.1788678.3904028.78006.2846343.56100506014.524455.5935005.3140007.15157.587508.335005.8855006.208506.6235000.701923
age-15.788197152.937999100.000068.5749012.90000506016.6100026.95000017.72500098.8000100.00000100.0000045.02500077.5000094.07500028.121033
dis-2.51584210.10592812.12653.7950431.12960506011.206541.6283001.4619756.81667.827809.222772.1001753.207455.1884252.103628
rad-16.54654635.64536024.00009.5494071.00000506011.000003.0000002.00000024.000024.0000024.000004.0000005.0000024.0000008.698651
tax-96.874331913.348639711.0000408.237154187.0000050601188.00000233.000000222.000000666.0000666.00000666.00000279.000000330.00000666.000000168.370495
ptratio11.96711824.94394922.000018.45553412.600005060113.0000014.75000014.70000020.900021.0000021.2000017.40000019.0500020.2000002.162805
black83.060209630.287854396.9000356.6740320.32000506016.73000290.27000084.590000396.9000396.90000396.90000375.377500391.44000396.22500091.204607
lstat-8.74894234.05506837.970012.6530631.73000506012.883004.6800003.70750023.035026.8075033.918506.95000011.3600016.9550007.134002
medv-5.03122850.09684150.000022.5328065.00000506017.0100012.75000010.20000034.800043.4000050.0000017.02500021.2000025.0000009.188012
\n", 505 | "
" 506 | ], 507 | "text/plain": [ 508 | " LC UC max_val mean_val min_val n nmiss \\\n", 509 | "crim -22.165600 29.392648 88.9762 3.613524 0.00632 506 0 \n", 510 | "zn -58.534551 81.261823 100.0000 11.363636 0.00000 506 0 \n", 511 | "indus -9.423933 31.697490 27.7400 11.136779 0.46000 506 0 \n", 512 | "chas -0.692059 0.830399 1.0000 0.069170 0.00000 506 0 \n", 513 | "nox 0.207406 0.901984 0.8710 0.554695 0.38500 506 0 \n", 514 | "rm 4.178867 8.390402 8.7800 6.284634 3.56100 506 0 \n", 515 | "age -15.788197 152.937999 100.0000 68.574901 2.90000 506 0 \n", 516 | "dis -2.515842 10.105928 12.1265 3.795043 1.12960 506 0 \n", 517 | "rad -16.546546 35.645360 24.0000 9.549407 1.00000 506 0 \n", 518 | "tax -96.874331 913.348639 711.0000 408.237154 187.00000 506 0 \n", 519 | "ptratio 11.967118 24.943949 22.0000 18.455534 12.60000 506 0 \n", 520 | "black 83.060209 630.287854 396.9000 356.674032 0.32000 506 0 \n", 521 | "lstat -8.748942 34.055068 37.9700 12.653063 1.73000 506 0 \n", 522 | "medv -5.031228 50.096841 50.0000 22.532806 5.00000 506 0 \n", 523 | "\n", 524 | " outlier_flag p1 p10 p5 p90 p95 \\\n", 525 | "crim 1 0.01361 0.038195 0.027910 10.7530 15.78915 \n", 526 | "zn 1 0.00000 0.000000 0.000000 42.5000 80.00000 \n", 527 | "indus 1 1.25350 2.910000 2.180000 19.5800 21.89000 \n", 528 | "chas 1 0.00000 0.000000 0.000000 0.0000 1.00000 \n", 529 | "nox 1 0.39800 0.427000 0.409250 0.7130 0.74000 \n", 530 | "rm 1 4.52445 5.593500 5.314000 7.1515 7.58750 \n", 531 | "age 1 6.61000 26.950000 17.725000 98.8000 100.00000 \n", 532 | "dis 1 1.20654 1.628300 1.461975 6.8166 7.82780 \n", 533 | "rad 1 1.00000 3.000000 2.000000 24.0000 24.00000 \n", 534 | "tax 1 188.00000 233.000000 222.000000 666.0000 666.00000 \n", 535 | "ptratio 1 13.00000 14.750000 14.700000 20.9000 21.00000 \n", 536 | "black 1 6.73000 290.270000 84.590000 396.9000 396.90000 \n", 537 | "lstat 1 2.88300 4.680000 3.707500 23.0350 26.80750 \n", 538 | "medv 1 7.01000 12.750000 10.200000 34.8000 43.40000 \n", 539 | "\n", 540 | " p99 q1 q2 q3 sd \n", 541 | "crim 41.37033 0.082045 0.25651 3.677082 8.593041 \n", 542 | "zn 90.00000 0.000000 0.00000 12.500000 23.299396 \n", 543 | "indus 25.65000 5.190000 9.69000 18.100000 6.853571 \n", 544 | "chas 1.00000 0.000000 0.00000 0.000000 0.253743 \n", 545 | "nox 0.87100 0.449000 0.53800 0.624000 0.115763 \n", 546 | "rm 8.33500 5.885500 6.20850 6.623500 0.701923 \n", 547 | "age 100.00000 45.025000 77.50000 94.075000 28.121033 \n", 548 | "dis 9.22277 2.100175 3.20745 5.188425 2.103628 \n", 549 | "rad 24.00000 4.000000 5.00000 24.000000 8.698651 \n", 550 | "tax 666.00000 279.000000 330.00000 666.000000 168.370495 \n", 551 | "ptratio 21.20000 17.400000 19.05000 20.200000 2.162805 \n", 552 | "black 396.90000 375.377500 391.44000 396.225000 91.204607 \n", 553 | "lstat 33.91850 6.950000 11.36000 16.955000 7.134002 \n", 554 | "medv 50.00000 17.025000 21.20000 25.000000 9.188012 " 555 | ] 556 | }, 557 | "execution_count": 18, 558 | "metadata": {}, 559 | "output_type": "execute_result" 560 | } 561 | ], 562 | "source": [ 563 | "def my_descriptive_stats(df):\n", 564 | " my_stats_dict=dict()\n", 565 | " y=list(df.columns)\n", 566 | " nmiss=[]\n", 567 | " n=[]\n", 568 | " sd=[]\n", 569 | " mean_val=[]\n", 570 | " min_val=[]\n", 571 | " p1=[]\n", 572 | " p5=[]\n", 573 | " p10=[]\n", 574 | " p90=[]\n", 575 | " p95=[]\n", 576 | " p99=[]\n", 577 | " q1=[]\n", 578 | " q2=[]\n", 579 | " q3=[]\n", 580 | " max_val=[]\n", 581 | " UC=[]\n", 582 | " LC=[]\n", 583 | " outlier_flag=[]\n", 584 | " \n", 585 | " \n", 586 | " for i in range(len(df.columns)):\n", 587 | " col_name=y[i]\n", 588 | " nmiss.append(sum(df[col_name ].isnull()))\n", 589 | " x=df[df[col_name].notnull()][col_name]\n", 590 | " mean_val.append(np.mean(x))\n", 591 | " n.append(len(x))\n", 592 | " sd.append(np.std(x))\n", 593 | " min_val.append(min(x))\n", 594 | " p1.append(x.quantile(.01))\n", 595 | " p5.append(x.quantile(.05))\n", 596 | " p10.append(x.quantile(.1))\n", 597 | " q1.append(x.quantile(.25))\n", 598 | " q2.append(x.quantile(.5))\n", 599 | " q3.append(x.quantile(.75))\n", 600 | " p90.append(x.quantile(.90))\n", 601 | " p95.append(x.quantile(.95))\n", 602 | " p99.append(x.quantile(.99))\n", 603 | " max_val.append(max(x))\n", 604 | " UC.append(np.mean(x)+3*np.std(x))\n", 605 | " LC.append(np.mean(x)-3*np.std(x))\n", 606 | " outlier_flag.append(np.sum(max_val>UC or min_val\n", 36 | "\n", 37 | " \n", 38 | " \n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | "
012345
0NaNSepal.LengthSepal.WidthPetal.LengthPetal.WidthSpecies
11.05.13.51.40.2setosa
22.04.931.40.2setosa
33.04.73.21.30.2setosa
44.04.63.11.50.2setosa
\n", 96 | "" 97 | ], 98 | "text/plain": [ 99 | " 0 1 2 3 4 5\n", 100 | "0 NaN Sepal.Length Sepal.Width Petal.Length Petal.Width Species\n", 101 | "1 1.0 5.1 3.5 1.4 0.2 setosa\n", 102 | "2 2.0 4.9 3 1.4 0.2 setosa\n", 103 | "3 3.0 4.7 3.2 1.3 0.2 setosa\n", 104 | "4 4.0 4.6 3.1 1.5 0.2 setosa" 105 | ] 106 | }, 107 | "execution_count": 8, 108 | "metadata": {}, 109 | "output_type": "execute_result" 110 | } 111 | ], 112 | "source": [ 113 | "iris.head()" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 10, 119 | "metadata": { 120 | "collapsed": false 121 | }, 122 | "outputs": [], 123 | "source": [ 124 | "iris2=iris.iloc[1:,1:]" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 11, 130 | "metadata": { 131 | "collapsed": false 132 | }, 133 | "outputs": [ 134 | { 135 | "data": { 136 | "text/html": [ 137 | "
\n", 138 | "\n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | "
12345
15.13.51.40.2setosa
24.931.40.2setosa
34.73.21.30.2setosa
44.63.11.50.2setosa
553.61.40.2setosa
\n", 192 | "
" 193 | ], 194 | "text/plain": [ 195 | " 1 2 3 4 5\n", 196 | "1 5.1 3.5 1.4 0.2 setosa\n", 197 | "2 4.9 3 1.4 0.2 setosa\n", 198 | "3 4.7 3.2 1.3 0.2 setosa\n", 199 | "4 4.6 3.1 1.5 0.2 setosa\n", 200 | "5 5 3.6 1.4 0.2 setosa" 201 | ] 202 | }, 203 | "execution_count": 11, 204 | "metadata": {}, 205 | "output_type": "execute_result" 206 | } 207 | ], 208 | "source": [ 209 | "iris2.head()" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": 12, 215 | "metadata": { 216 | "collapsed": true 217 | }, 218 | "outputs": [], 219 | "source": [ 220 | "import os as os" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": 13, 226 | "metadata": { 227 | "collapsed": false 228 | }, 229 | "outputs": [ 230 | { 231 | "data": { 232 | "text/plain": [ 233 | "'C:\\\\Users\\\\Dell\\\\Documents'" 234 | ] 235 | }, 236 | "execution_count": 13, 237 | "metadata": {}, 238 | "output_type": "execute_result" 239 | } 240 | ], 241 | "source": [ 242 | "os.getcwd()" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": 18, 248 | "metadata": { 249 | "collapsed": false 250 | }, 251 | "outputs": [], 252 | "source": [ 253 | "iris2.to_csv(\"iris2.csv\")" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": 4, 259 | "metadata": { 260 | "collapsed": false 261 | }, 262 | "outputs": [ 263 | { 264 | "name": "stdout", 265 | "output_type": "stream", 266 | "text": [ 267 | "\n", 268 | "RangeIndex: 150 entries, 0 to 149\n", 269 | "Data columns (total 6 columns):\n", 270 | "Unnamed: 0 150 non-null int64\n", 271 | "Sepal.Length 150 non-null float64\n", 272 | "Sepal.Width 150 non-null float64\n", 273 | "Petal.Length 150 non-null float64\n", 274 | "Petal.Width 150 non-null float64\n", 275 | "Species 150 non-null object\n", 276 | "dtypes: float64(4), int64(1), object(1)\n", 277 | "memory usage: 7.1+ KB\n" 278 | ] 279 | } 280 | ], 281 | "source": [ 282 | "iris.info()" 283 | ] 284 | }, 285 | { 286 | "cell_type": "markdown", 287 | "metadata": {}, 288 | "source": [ 289 | "CREATE TABLE iris (\n", 290 | "Sepal_Length real,\n", 291 | "Sepal_Width real,\n", 292 | "Petal_Length real,\n", 293 | "Petal_Width real,\n", 294 | "Species varchar(20) \n", 295 | ");\n" 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": 19, 301 | "metadata": { 302 | "collapsed": false 303 | }, 304 | "outputs": [ 305 | { 306 | "data": { 307 | "text/plain": [ 308 | "'C:\\\\Users\\\\Dell\\\\Documents'" 309 | ] 310 | }, 311 | "execution_count": 19, 312 | "metadata": {}, 313 | "output_type": "execute_result" 314 | } 315 | ], 316 | "source": [ 317 | "os.getcwd()" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": 20, 323 | "metadata": { 324 | "collapsed": true 325 | }, 326 | "outputs": [], 327 | "source": [ 328 | "os.chdir('C:\\\\Users\\\\Dell\\\\Desktop')" 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": 21, 334 | "metadata": { 335 | "collapsed": false 336 | }, 337 | "outputs": [ 338 | { 339 | "data": { 340 | "text/plain": [ 341 | "['.Rhistory',\n", 342 | " '16508797_10155115909410362_414170078812994931_n.jpg',\n", 343 | " '27032014_Duplicate_Statement.pdf',\n", 344 | " '30072015_form_du-degree.pdf',\n", 345 | " 'ACK.html',\n", 346 | " 'ACK_files',\n", 347 | " 'adult.data.txt',\n", 348 | " 'AJAY.xps',\n", 349 | " 'Basics of SQL & RDBMS _ Must Skills For Data Science Professionals.html',\n", 350 | " 'Basics of SQL & RDBMS _ Must Skills For Data Science Professionals_files',\n", 351 | " 'BigDiamonds (2).csv',\n", 352 | " 'BigDiamonds.csv',\n", 353 | " 'BigDiamonds.csv (2).zip',\n", 354 | " 'BigDiamonds2.csv',\n", 355 | " 'BLOOD REPORT.pdf',\n", 356 | " 'CAM- Ajay Ohri.pdf',\n", 357 | " 'cam.xps',\n", 358 | " 'cam2.pdf',\n", 359 | " 'cdo.jpeg',\n", 360 | " 'clustersas.html',\n", 361 | " 'dap class 4.R',\n", 362 | " 'dap_class_4.html',\n", 363 | " 'desktop.ini',\n", 364 | " 'Dropbox.lnk',\n", 365 | " 'dupform.pdf',\n", 366 | " 'DVD.csv',\n", 367 | " 'GermanCredit.csv',\n", 368 | " 'Git Shell.lnk',\n", 369 | " 'GitHub.appref-ms',\n", 370 | " 'GoToMeeting.lnk',\n", 371 | " 'groceries.csv',\n", 372 | " 'Guidelines-CBSE.html',\n", 373 | " 'IMS proschool',\n", 374 | " 'iris2.csv',\n", 375 | " 'logistic regression - script for ppt.R',\n", 376 | " 'OnlineCardNSR.pdf',\n", 377 | " 'PaymentForm.pdf',\n", 378 | " 'Program 1-results.rtf',\n", 379 | " 'Rdatasets',\n", 380 | " 'Results_ Modeling and Forecasting.html',\n", 381 | " 'Results_ Program 5.sas.html',\n", 382 | " 'Results_ Time Series Exploration.ctk.html',\n", 383 | " 'Rplot.png',\n", 384 | " 'Rplot01.pdf',\n", 385 | " 'Rplot02.pdf',\n", 386 | " 'Rplot03.png',\n", 387 | " 'rsconnect',\n", 388 | " 'sas-university-edition-107140.pdf',\n", 389 | " 'SQL-1.png',\n", 390 | " 'sql.jpg',\n", 391 | " 'sqlcheatsheet.jpg',\n", 392 | " 'sqljoins_cheatsheet.png',\n", 393 | " 'Sunstone - Google Docs.pdf',\n", 394 | " 'test',\n", 395 | " 'Trarscript_Form.pdf']" 396 | ] 397 | }, 398 | "execution_count": 21, 399 | "metadata": {}, 400 | "output_type": "execute_result" 401 | } 402 | ], 403 | "source": [ 404 | "os.listdir()" 405 | ] 406 | }, 407 | { 408 | "cell_type": "code", 409 | "execution_count": null, 410 | "metadata": { 411 | "collapsed": true 412 | }, 413 | "outputs": [], 414 | "source": [] 415 | } 416 | ], 417 | "metadata": { 418 | "anaconda-cloud": {}, 419 | "kernelspec": { 420 | "display_name": "Python [conda root]", 421 | "language": "python", 422 | "name": "conda-root-py" 423 | }, 424 | "language_info": { 425 | "codemirror_mode": { 426 | "name": "ipython", 427 | "version": 3 428 | }, 429 | "file_extension": ".py", 430 | "mimetype": "text/x-python", 431 | "name": "python", 432 | "nbconvert_exporter": "python", 433 | "pygments_lexer": "ipython3", 434 | "version": "3.5.2" 435 | } 436 | }, 437 | "nbformat": 4, 438 | "nbformat_minor": 1 439 | } 440 | -------------------------------------------------------------------------------- /lambda+functions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "def f(x):return x**2" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": { 18 | "collapsed": false 19 | }, 20 | "outputs": [ 21 | { 22 | "data": { 23 | "text/plain": [ 24 | "400" 25 | ] 26 | }, 27 | "execution_count": 2, 28 | "metadata": {}, 29 | "output_type": "execute_result" 30 | } 31 | ], 32 | "source": [ 33 | "f(20)" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 3, 39 | "metadata": { 40 | "collapsed": true 41 | }, 42 | "outputs": [], 43 | "source": [ 44 | "g=lambda x:x**2" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 4, 50 | "metadata": { 51 | "collapsed": false 52 | }, 53 | "outputs": [ 54 | { 55 | "data": { 56 | "text/plain": [ 57 | "400" 58 | ] 59 | }, 60 | "execution_count": 4, 61 | "metadata": {}, 62 | "output_type": "execute_result" 63 | } 64 | ], 65 | "source": [ 66 | "g(20)" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": { 73 | "collapsed": true 74 | }, 75 | "outputs": [], 76 | "source": [] 77 | } 78 | ], 79 | "metadata": { 80 | "kernelspec": { 81 | "display_name": "Python [conda root]", 82 | "language": "python", 83 | "name": "conda-root-py" 84 | }, 85 | "language_info": { 86 | "codemirror_mode": { 87 | "name": "ipython", 88 | "version": 3 89 | }, 90 | "file_extension": ".py", 91 | "mimetype": "text/x-python", 92 | "name": "python", 93 | "nbconvert_exporter": "python", 94 | "pygments_lexer": "ipython3", 95 | "version": "3.5.2" 96 | } 97 | }, 98 | "nbformat": 4, 99 | "nbformat_minor": 1 100 | } 101 | -------------------------------------------------------------------------------- /multiple+file+concat+in+pandas.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "dataset from https://packages.revolutionanalytics.com/datasets/mortDefault.zip" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "import os as os" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": { 25 | "collapsed": false 26 | }, 27 | "outputs": [ 28 | { 29 | "data": { 30 | "text/plain": [ 31 | "'C:\\\\Users\\\\Dell'" 32 | ] 33 | }, 34 | "execution_count": 2, 35 | "metadata": {}, 36 | "output_type": "execute_result" 37 | } 38 | ], 39 | "source": [ 40 | "os.getcwd()" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 3, 46 | "metadata": { 47 | "collapsed": true 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "os.chdir('C:\\\\Users\\\\Dell\\\\Downloads\\\\mortDefault')" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 4, 57 | "metadata": { 58 | "collapsed": false 59 | }, 60 | "outputs": [ 61 | { 62 | "data": { 63 | "text/plain": [ 64 | "['mortDefault2000.csv',\n", 65 | " 'mortDefault2001.csv',\n", 66 | " 'mortDefault2002.csv',\n", 67 | " 'mortDefault2003.csv',\n", 68 | " 'mortDefault2004.csv',\n", 69 | " 'mortDefault2005.csv',\n", 70 | " 'mortDefault2006.csv',\n", 71 | " 'mortDefault2007.csv',\n", 72 | " 'mortDefault2008.csv',\n", 73 | " 'mortDefault2009.csv']" 74 | ] 75 | }, 76 | "execution_count": 4, 77 | "metadata": {}, 78 | "output_type": "execute_result" 79 | } 80 | ], 81 | "source": [ 82 | "os.listdir()" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 5, 88 | "metadata": { 89 | "collapsed": true 90 | }, 91 | "outputs": [], 92 | "source": [ 93 | "import pandas as pd" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 6, 99 | "metadata": { 100 | "collapsed": true 101 | }, 102 | "outputs": [], 103 | "source": [ 104 | "filelist=os.listdir()" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 7, 110 | "metadata": { 111 | "collapsed": true 112 | }, 113 | "outputs": [], 114 | "source": [ 115 | "mortdefault = [pd.read_csv(i) for i in filelist]\n", 116 | "\n" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 11, 122 | "metadata": { 123 | "collapsed": false 124 | }, 125 | "outputs": [ 126 | { 127 | "data": { 128 | "text/plain": [ 129 | "list" 130 | ] 131 | }, 132 | "execution_count": 11, 133 | "metadata": {}, 134 | "output_type": "execute_result" 135 | } 136 | ], 137 | "source": [ 138 | "type(mortdefault)" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 27, 144 | "metadata": { 145 | "collapsed": true 146 | }, 147 | "outputs": [], 148 | "source": [ 149 | "mort11=pd.concat(mortdefault,ignore_index=True)" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 28, 155 | "metadata": { 156 | "collapsed": false 157 | }, 158 | "outputs": [ 159 | { 160 | "name": "stdout", 161 | "output_type": "stream", 162 | "text": [ 163 | "\n", 164 | "RangeIndex: 10000000 entries, 0 to 9999999\n", 165 | "Data columns (total 6 columns):\n", 166 | "creditScore int64\n", 167 | "houseAge int64\n", 168 | "yearsEmploy int64\n", 169 | "ccDebt int64\n", 170 | "year int64\n", 171 | "default int64\n", 172 | "dtypes: int64(6)\n", 173 | "memory usage: 457.8 MB\n" 174 | ] 175 | } 176 | ], 177 | "source": [ 178 | "mort11.info()" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": null, 184 | "metadata": { 185 | "collapsed": true 186 | }, 187 | "outputs": [], 188 | "source": [] 189 | } 190 | ], 191 | "metadata": { 192 | "kernelspec": { 193 | "display_name": "Python [conda root]", 194 | "language": "python", 195 | "name": "conda-root-py" 196 | }, 197 | "language_info": { 198 | "codemirror_mode": { 199 | "name": "ipython", 200 | "version": 3 201 | }, 202 | "file_extension": ".py", 203 | "mimetype": "text/x-python", 204 | "name": "python", 205 | "nbconvert_exporter": "python", 206 | "pygments_lexer": "ipython3", 207 | "version": "3.5.2" 208 | } 209 | }, 210 | "nbformat": 4, 211 | "nbformat_minor": 1 212 | } 213 | -------------------------------------------------------------------------------- /my+first+class+in+python.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "data": { 12 | "text/plain": [ 13 | "'multiple lines of comments are being shown here'" 14 | ] 15 | }, 16 | "execution_count": 3, 17 | "metadata": {}, 18 | "output_type": "execute_result" 19 | } 20 | ], 21 | "source": [ 22 | "#comments in Python\n", 23 | "'''multiple lines of comments are being shown here'''" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "# Important\n", 31 | "this is a markdown and not a code window\n" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 1, 37 | "metadata": { 38 | "collapsed": false 39 | }, 40 | "outputs": [ 41 | { 42 | "data": { 43 | "text/plain": [ 44 | "10" 45 | ] 46 | }, 47 | "execution_count": 1, 48 | "metadata": {}, 49 | "output_type": "execute_result" 50 | } 51 | ], 52 | "source": [ 53 | "2+3+5" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 2, 59 | "metadata": { 60 | "collapsed": false 61 | }, 62 | "outputs": [ 63 | { 64 | "data": { 65 | "text/plain": [ 66 | "67" 67 | ] 68 | }, 69 | "execution_count": 2, 70 | "metadata": {}, 71 | "output_type": "execute_result" 72 | } 73 | ], 74 | "source": [ 75 | "66-3-(-4)" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 3, 81 | "metadata": { 82 | "collapsed": false 83 | }, 84 | "outputs": [ 85 | { 86 | "data": { 87 | "text/plain": [ 88 | "96" 89 | ] 90 | }, 91 | "execution_count": 3, 92 | "metadata": {}, 93 | "output_type": "execute_result" 94 | } 95 | ], 96 | "source": [ 97 | "32*3" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 4, 103 | "metadata": { 104 | "collapsed": false 105 | }, 106 | "outputs": [ 107 | { 108 | "data": { 109 | "text/plain": [ 110 | "8" 111 | ] 112 | }, 113 | "execution_count": 4, 114 | "metadata": {}, 115 | "output_type": "execute_result" 116 | } 117 | ], 118 | "source": [ 119 | "2**3" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 5, 125 | "metadata": { 126 | "collapsed": false 127 | }, 128 | "outputs": [ 129 | { 130 | "data": { 131 | "text/plain": [ 132 | "1" 133 | ] 134 | }, 135 | "execution_count": 5, 136 | "metadata": {}, 137 | "output_type": "execute_result" 138 | } 139 | ], 140 | "source": [ 141 | "2^3" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 6, 147 | "metadata": { 148 | "collapsed": false 149 | }, 150 | "outputs": [ 151 | { 152 | "data": { 153 | "text/plain": [ 154 | "14.333333333333334" 155 | ] 156 | }, 157 | "execution_count": 6, 158 | "metadata": {}, 159 | "output_type": "execute_result" 160 | } 161 | ], 162 | "source": [ 163 | "43/3" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 7, 169 | "metadata": { 170 | "collapsed": false 171 | }, 172 | "outputs": [ 173 | { 174 | "data": { 175 | "text/plain": [ 176 | "14" 177 | ] 178 | }, 179 | "execution_count": 7, 180 | "metadata": {}, 181 | "output_type": "execute_result" 182 | } 183 | ], 184 | "source": [ 185 | "43//3" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 8, 191 | "metadata": { 192 | "collapsed": false 193 | }, 194 | "outputs": [ 195 | { 196 | "data": { 197 | "text/plain": [ 198 | "1" 199 | ] 200 | }, 201 | "execution_count": 8, 202 | "metadata": {}, 203 | "output_type": "execute_result" 204 | } 205 | ], 206 | "source": [ 207 | "43%3" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": 9, 213 | "metadata": { 214 | "collapsed": true 215 | }, 216 | "outputs": [], 217 | "source": [ 218 | "import math as mt" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": 10, 224 | "metadata": { 225 | "collapsed": false 226 | }, 227 | "outputs": [ 228 | { 229 | "data": { 230 | "text/plain": [ 231 | "7.38905609893065" 232 | ] 233 | }, 234 | "execution_count": 10, 235 | "metadata": {}, 236 | "output_type": "execute_result" 237 | } 238 | ], 239 | "source": [ 240 | "mt.exp(2)" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 11, 246 | "metadata": { 247 | "collapsed": false 248 | }, 249 | "outputs": [ 250 | { 251 | "data": { 252 | "text/plain": [ 253 | "2.302585092994046" 254 | ] 255 | }, 256 | "execution_count": 11, 257 | "metadata": {}, 258 | "output_type": "execute_result" 259 | } 260 | ], 261 | "source": [ 262 | "mt.log(10)" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": 12, 268 | "metadata": { 269 | "collapsed": false 270 | }, 271 | "outputs": [ 272 | { 273 | "data": { 274 | "text/plain": [ 275 | "2.718281828459045" 276 | ] 277 | }, 278 | "execution_count": 12, 279 | "metadata": {}, 280 | "output_type": "execute_result" 281 | } 282 | ], 283 | "source": [ 284 | "mt.exp(1)" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": 14, 290 | "metadata": { 291 | "collapsed": false 292 | }, 293 | "outputs": [ 294 | { 295 | "data": { 296 | "text/plain": [ 297 | "3.0" 298 | ] 299 | }, 300 | "execution_count": 14, 301 | "metadata": {}, 302 | "output_type": "execute_result" 303 | } 304 | ], 305 | "source": [ 306 | "mt.log(8,2)" 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": 15, 312 | "metadata": { 313 | "collapsed": false 314 | }, 315 | "outputs": [ 316 | { 317 | "data": { 318 | "text/plain": [ 319 | "31.622776601683793" 320 | ] 321 | }, 322 | "execution_count": 15, 323 | "metadata": {}, 324 | "output_type": "execute_result" 325 | } 326 | ], 327 | "source": [ 328 | "mt.sqrt(1000)" 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": 17, 334 | "metadata": { 335 | "collapsed": false 336 | }, 337 | "outputs": [], 338 | "source": [ 339 | "import numpy as np" 340 | ] 341 | }, 342 | { 343 | "cell_type": "code", 344 | "execution_count": 18, 345 | "metadata": { 346 | "collapsed": false 347 | }, 348 | "outputs": [ 349 | { 350 | "data": { 351 | "text/plain": [ 352 | "21.123150806638673" 353 | ] 354 | }, 355 | "execution_count": 18, 356 | "metadata": {}, 357 | "output_type": "execute_result" 358 | } 359 | ], 360 | "source": [ 361 | "np.std([23,45,67,78])" 362 | ] 363 | }, 364 | { 365 | "cell_type": "code", 366 | "execution_count": 20, 367 | "metadata": { 368 | "collapsed": false 369 | }, 370 | "outputs": [ 371 | { 372 | "data": { 373 | "text/plain": [ 374 | "['__doc__',\n", 375 | " '__loader__',\n", 376 | " '__name__',\n", 377 | " '__package__',\n", 378 | " '__spec__',\n", 379 | " 'acos',\n", 380 | " 'acosh',\n", 381 | " 'asin',\n", 382 | " 'asinh',\n", 383 | " 'atan',\n", 384 | " 'atan2',\n", 385 | " 'atanh',\n", 386 | " 'ceil',\n", 387 | " 'copysign',\n", 388 | " 'cos',\n", 389 | " 'cosh',\n", 390 | " 'degrees',\n", 391 | " 'e',\n", 392 | " 'erf',\n", 393 | " 'erfc',\n", 394 | " 'exp',\n", 395 | " 'expm1',\n", 396 | " 'fabs',\n", 397 | " 'factorial',\n", 398 | " 'floor',\n", 399 | " 'fmod',\n", 400 | " 'frexp',\n", 401 | " 'fsum',\n", 402 | " 'gamma',\n", 403 | " 'gcd',\n", 404 | " 'hypot',\n", 405 | " 'inf',\n", 406 | " 'isclose',\n", 407 | " 'isfinite',\n", 408 | " 'isinf',\n", 409 | " 'isnan',\n", 410 | " 'ldexp',\n", 411 | " 'lgamma',\n", 412 | " 'log',\n", 413 | " 'log10',\n", 414 | " 'log1p',\n", 415 | " 'log2',\n", 416 | " 'modf',\n", 417 | " 'nan',\n", 418 | " 'pi',\n", 419 | " 'pow',\n", 420 | " 'radians',\n", 421 | " 'sin',\n", 422 | " 'sinh',\n", 423 | " 'sqrt',\n", 424 | " 'tan',\n", 425 | " 'tanh',\n", 426 | " 'trunc']" 427 | ] 428 | }, 429 | "execution_count": 20, 430 | "metadata": {}, 431 | "output_type": "execute_result" 432 | } 433 | ], 434 | "source": [ 435 | "dir(mt)" 436 | ] 437 | }, 438 | { 439 | "cell_type": "code", 440 | "execution_count": 21, 441 | "metadata": { 442 | "collapsed": false 443 | }, 444 | "outputs": [ 445 | { 446 | "data": { 447 | "text/plain": [ 448 | "int" 449 | ] 450 | }, 451 | "execution_count": 21, 452 | "metadata": {}, 453 | "output_type": "execute_result" 454 | } 455 | ], 456 | "source": [ 457 | "type(1)" 458 | ] 459 | }, 460 | { 461 | "cell_type": "code", 462 | "execution_count": 22, 463 | "metadata": { 464 | "collapsed": false 465 | }, 466 | "outputs": [ 467 | { 468 | "data": { 469 | "text/plain": [ 470 | "str" 471 | ] 472 | }, 473 | "execution_count": 22, 474 | "metadata": {}, 475 | "output_type": "execute_result" 476 | } 477 | ], 478 | "source": [ 479 | "type(\"Ajay\")" 480 | ] 481 | }, 482 | { 483 | "cell_type": "code", 484 | "execution_count": 24, 485 | "metadata": { 486 | "collapsed": true 487 | }, 488 | "outputs": [ 489 | { 490 | "data": { 491 | "text/plain": [ 492 | "list" 493 | ] 494 | }, 495 | "execution_count": 24, 496 | "metadata": {}, 497 | "output_type": "execute_result" 498 | } 499 | ], 500 | "source": [ 501 | "type([23,45,67])" 502 | ] 503 | }, 504 | { 505 | "cell_type": "code", 506 | "execution_count": 25, 507 | "metadata": { 508 | "collapsed": true 509 | }, 510 | "outputs": [], 511 | "source": [ 512 | "a=[23,45,67]" 513 | ] 514 | }, 515 | { 516 | "cell_type": "code", 517 | "execution_count": 32, 518 | "metadata": { 519 | "collapsed": false 520 | }, 521 | "outputs": [ 522 | { 523 | "data": { 524 | "text/plain": [ 525 | "3" 526 | ] 527 | }, 528 | "execution_count": 32, 529 | "metadata": {}, 530 | "output_type": "execute_result" 531 | } 532 | ], 533 | "source": [ 534 | "len(a)" 535 | ] 536 | }, 537 | { 538 | "cell_type": "code", 539 | "execution_count": 31, 540 | "metadata": { 541 | "collapsed": false 542 | }, 543 | "outputs": [ 544 | { 545 | "data": { 546 | "text/plain": [ 547 | "17.962924780409974" 548 | ] 549 | }, 550 | "execution_count": 31, 551 | "metadata": {}, 552 | "output_type": "execute_result" 553 | } 554 | ], 555 | "source": [ 556 | "np.std(a)" 557 | ] 558 | }, 559 | { 560 | "cell_type": "code", 561 | "execution_count": 28, 562 | "metadata": { 563 | "collapsed": false 564 | }, 565 | "outputs": [ 566 | { 567 | "data": { 568 | "text/plain": [ 569 | "322.66666666666669" 570 | ] 571 | }, 572 | "execution_count": 28, 573 | "metadata": {}, 574 | "output_type": "execute_result" 575 | } 576 | ], 577 | "source": [ 578 | "np.var(a)" 579 | ] 580 | }, 581 | { 582 | "cell_type": "code", 583 | "execution_count": 30, 584 | "metadata": { 585 | "collapsed": false 586 | }, 587 | "outputs": [ 588 | { 589 | "data": { 590 | "text/plain": [ 591 | "1234567891234567766543210876543211" 592 | ] 593 | }, 594 | "execution_count": 30, 595 | "metadata": {}, 596 | "output_type": "execute_result" 597 | } 598 | ], 599 | "source": [ 600 | "123456789123456789*9999999999999999" 601 | ] 602 | }, 603 | { 604 | "cell_type": "code", 605 | "execution_count": 35, 606 | "metadata": { 607 | "collapsed": false 608 | }, 609 | "outputs": [], 610 | "source": [ 611 | "np.random??" 612 | ] 613 | }, 614 | { 615 | "cell_type": "code", 616 | "execution_count": 37, 617 | "metadata": { 618 | "collapsed": false 619 | }, 620 | "outputs": [], 621 | "source": [ 622 | "from random import randrange,randint" 623 | ] 624 | }, 625 | { 626 | "cell_type": "code", 627 | "execution_count": 39, 628 | "metadata": { 629 | "collapsed": false 630 | }, 631 | "outputs": [ 632 | { 633 | "name": "stdout", 634 | "output_type": "stream", 635 | "text": [ 636 | "78\n" 637 | ] 638 | } 639 | ], 640 | "source": [ 641 | "print(randint(0,90))" 642 | ] 643 | }, 644 | { 645 | "cell_type": "code", 646 | "execution_count": 42, 647 | "metadata": { 648 | "collapsed": false 649 | }, 650 | "outputs": [ 651 | { 652 | "data": { 653 | "text/plain": [ 654 | "286" 655 | ] 656 | }, 657 | "execution_count": 42, 658 | "metadata": {}, 659 | "output_type": "execute_result" 660 | } 661 | ], 662 | "source": [ 663 | "randrange(1000)" 664 | ] 665 | }, 666 | { 667 | "cell_type": "code", 668 | "execution_count": 46, 669 | "metadata": { 670 | "collapsed": false 671 | }, 672 | "outputs": [ 673 | { 674 | "name": "stdout", 675 | "output_type": "stream", 676 | "text": [ 677 | "2472965195555081\n", 678 | "6352816454724336\n", 679 | "4809973335770632\n", 680 | "5246909950815852\n", 681 | "6348106781629098\n", 682 | "2586909203145681\n", 683 | "2509370301745813\n", 684 | "4082241628288070\n", 685 | "7691514263873286\n", 686 | "8069700113941950\n" 687 | ] 688 | } 689 | ], 690 | "source": [ 691 | "for x in range(0,10):\n", 692 | " print(randrange(10000000000000000))" 693 | ] 694 | }, 695 | { 696 | "cell_type": "code", 697 | "execution_count": 51, 698 | "metadata": { 699 | "collapsed": false 700 | }, 701 | "outputs": [], 702 | "source": [ 703 | "def mynewfunction(x,y):\n", 704 | " taxes=((x-1000000)*0.35+100000-min(y,100000))\n", 705 | " print(taxes)" 706 | ] 707 | }, 708 | { 709 | "cell_type": "code", 710 | "execution_count": 53, 711 | "metadata": { 712 | "collapsed": false 713 | }, 714 | "outputs": [ 715 | { 716 | "name": "stdout", 717 | "output_type": "stream", 718 | "text": [ 719 | "420000.0\n" 720 | ] 721 | } 722 | ], 723 | "source": [ 724 | "mynewfunction(2200000,300000)" 725 | ] 726 | }, 727 | { 728 | "cell_type": "code", 729 | "execution_count": 54, 730 | "metadata": { 731 | "collapsed": true 732 | }, 733 | "outputs": [], 734 | "source": [ 735 | "import os as os" 736 | ] 737 | }, 738 | { 739 | "cell_type": "code", 740 | "execution_count": 57, 741 | "metadata": { 742 | "collapsed": false 743 | }, 744 | "outputs": [], 745 | "source": [ 746 | "os??" 747 | ] 748 | }, 749 | { 750 | "cell_type": "code", 751 | "execution_count": 62, 752 | "metadata": { 753 | "collapsed": false 754 | }, 755 | "outputs": [ 756 | { 757 | "name": "stdout", 758 | "output_type": "stream", 759 | "text": [ 760 | "0\n", 761 | "6\n", 762 | "12\n", 763 | "18\n", 764 | "24\n" 765 | ] 766 | } 767 | ], 768 | "source": [ 769 | "for x in range(0,30,6):\n", 770 | " print(x)" 771 | ] 772 | }, 773 | { 774 | "cell_type": "code", 775 | "execution_count": 63, 776 | "metadata": { 777 | "collapsed": true 778 | }, 779 | "outputs": [], 780 | "source": [ 781 | "def mynewfunction(x,y):\n", 782 | " z=x**3+3*x*y+20*y\n", 783 | " print(z)" 784 | ] 785 | }, 786 | { 787 | "cell_type": "code", 788 | "execution_count": 65, 789 | "metadata": { 790 | "collapsed": false 791 | }, 792 | "outputs": [ 793 | { 794 | "name": "stdout", 795 | "output_type": "stream", 796 | "text": [ 797 | "200\n", 798 | "596\n", 799 | "2288\n", 800 | "6572\n", 801 | "14744\n" 802 | ] 803 | } 804 | ], 805 | "source": [ 806 | "for x in range(0,30,6):\n", 807 | " mynewfunction(x,10)" 808 | ] 809 | }, 810 | { 811 | "cell_type": "code", 812 | "execution_count": null, 813 | "metadata": { 814 | "collapsed": true 815 | }, 816 | "outputs": [], 817 | "source": [] 818 | } 819 | ], 820 | "metadata": { 821 | "anaconda-cloud": {}, 822 | "kernelspec": { 823 | "display_name": "Python [conda root]", 824 | "language": "python", 825 | "name": "conda-root-py" 826 | }, 827 | "language_info": { 828 | "codemirror_mode": { 829 | "name": "ipython", 830 | "version": 3 831 | }, 832 | "file_extension": ".py", 833 | "mimetype": "text/x-python", 834 | "name": "python", 835 | "nbconvert_exporter": "python", 836 | "pygments_lexer": "ipython3", 837 | "version": "3.5.2" 838 | } 839 | }, 840 | "nbformat": 4, 841 | "nbformat_minor": 1 842 | } 843 | -------------------------------------------------------------------------------- /nltk.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 4, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import nltk" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 5, 17 | "metadata": { 18 | "collapsed": false 19 | }, 20 | "outputs": [ 21 | { 22 | "name": "stdout", 23 | "output_type": "stream", 24 | "text": [ 25 | "showing info https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/index.xml\n" 26 | ] 27 | }, 28 | { 29 | "data": { 30 | "text/plain": [ 31 | "True" 32 | ] 33 | }, 34 | "execution_count": 5, 35 | "metadata": {}, 36 | "output_type": "execute_result" 37 | } 38 | ], 39 | "source": [ 40 | "nltk.download()" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 6, 46 | "metadata": { 47 | "collapsed": false 48 | }, 49 | "outputs": [ 50 | { 51 | "name": "stdout", 52 | "output_type": "stream", 53 | "text": [ 54 | "*** Introductory Examples for the NLTK Book ***\n", 55 | "Loading text1, ..., text9 and sent1, ..., sent9\n", 56 | "Type the name of the text or sentence to view it.\n", 57 | "Type: 'texts()' or 'sents()' to list the materials.\n", 58 | "text1: Moby Dick by Herman Melville 1851\n", 59 | "text2: Sense and Sensibility by Jane Austen 1811\n", 60 | "text3: The Book of Genesis\n", 61 | "text4: Inaugural Address Corpus\n", 62 | "text5: Chat Corpus\n", 63 | "text6: Monty Python and the Holy Grail\n", 64 | "text7: Wall Street Journal\n", 65 | "text8: Personals Corpus\n", 66 | "text9: The Man Who Was Thursday by G . K . Chesterton 1908\n" 67 | ] 68 | } 69 | ], 70 | "source": [ 71 | "from nltk.book import *" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 7, 77 | "metadata": { 78 | "collapsed": false 79 | }, 80 | "outputs": [ 81 | { 82 | "name": "stdout", 83 | "output_type": "stream", 84 | "text": [ 85 | "good whale long vast sea whole living small other large dead mighty\n", 86 | "same such last more much sperm noble old\n" 87 | ] 88 | } 89 | ], 90 | "source": [ 91 | "text1.similar(\"great\")" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 8, 97 | "metadata": { 98 | "collapsed": false 99 | }, 100 | "outputs": [ 101 | { 102 | "data": { 103 | "text/plain": [ 104 | "str" 105 | ] 106 | }, 107 | "execution_count": 8, 108 | "metadata": {}, 109 | "output_type": "execute_result" 110 | } 111 | ], 112 | "source": [ 113 | "from urllib import request\n", 114 | "url = \"http://www.gutenberg.org/files/2554/2554.txt\"\n", 115 | "response = request.urlopen(url)\n", 116 | "raw = response.read().decode('utf8')\n", 117 | "type(raw)" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 9, 123 | "metadata": { 124 | "collapsed": false 125 | }, 126 | "outputs": [ 127 | { 128 | "data": { 129 | "text/plain": [ 130 | "1176896" 131 | ] 132 | }, 133 | "execution_count": 9, 134 | "metadata": {}, 135 | "output_type": "execute_result" 136 | } 137 | ], 138 | "source": [ 139 | "len(raw) " 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 10, 145 | "metadata": { 146 | "collapsed": false 147 | }, 148 | "outputs": [ 149 | { 150 | "data": { 151 | "text/plain": [ 152 | "'The Project Gutenberg EBook of Crime and Punishment, by Fyodor Dostoevsky\\r\\n'" 153 | ] 154 | }, 155 | "execution_count": 10, 156 | "metadata": {}, 157 | "output_type": "execute_result" 158 | } 159 | ], 160 | "source": [ 161 | "raw[:75]" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 12, 167 | "metadata": { 168 | "collapsed": false 169 | }, 170 | "outputs": [ 171 | { 172 | "data": { 173 | "text/plain": [ 174 | "list" 175 | ] 176 | }, 177 | "execution_count": 12, 178 | "metadata": {}, 179 | "output_type": "execute_result" 180 | } 181 | ], 182 | "source": [ 183 | "tokens = nltk.word_tokenize(raw)\n", 184 | "type(tokens)\n" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 13, 190 | "metadata": { 191 | "collapsed": false 192 | }, 193 | "outputs": [ 194 | { 195 | "data": { 196 | "text/plain": [ 197 | "254352" 198 | ] 199 | }, 200 | "execution_count": 13, 201 | "metadata": {}, 202 | "output_type": "execute_result" 203 | } 204 | ], 205 | "source": [ 206 | "len(tokens) " 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 14, 212 | "metadata": { 213 | "collapsed": false 214 | }, 215 | "outputs": [ 216 | { 217 | "data": { 218 | "text/plain": [ 219 | "['The',\n", 220 | " 'Project',\n", 221 | " 'Gutenberg',\n", 222 | " 'EBook',\n", 223 | " 'of',\n", 224 | " 'Crime',\n", 225 | " 'and',\n", 226 | " 'Punishment',\n", 227 | " ',',\n", 228 | " 'by']" 229 | ] 230 | }, 231 | "execution_count": 14, 232 | "metadata": {}, 233 | "output_type": "execute_result" 234 | } 235 | ], 236 | "source": [ 237 | "tokens[:10]" 238 | ] 239 | }, 240 | { 241 | "cell_type": "markdown", 242 | "metadata": {}, 243 | "source": [ 244 | "http://www.cs.duke.edu/courses/spring14/compsci290/assignments/lab02.html " 245 | ] 246 | } 247 | ], 248 | "metadata": { 249 | "kernelspec": { 250 | "display_name": "Python [conda root]", 251 | "language": "python", 252 | "name": "conda-root-py" 253 | }, 254 | "language_info": { 255 | "codemirror_mode": { 256 | "name": "ipython", 257 | "version": 3 258 | }, 259 | "file_extension": ".py", 260 | "mimetype": "text/x-python", 261 | "name": "python", 262 | "nbconvert_exporter": "python", 263 | "pygments_lexer": "ipython3", 264 | "version": "3.5.2" 265 | } 266 | }, 267 | "nbformat": 4, 268 | "nbformat_minor": 1 269 | } 270 | -------------------------------------------------------------------------------- /pyspark.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stdout", 12 | "output_type": "stream", 13 | "text": [ 14 | "Collecting pyspark\n", 15 | " Downloading pyspark-2.2.0.post0.tar.gz (188.3MB)\n", 16 | "Collecting py4j==0.10.4 (from pyspark)\n", 17 | " Downloading py4j-0.10.4-py2.py3-none-any.whl (186kB)\n", 18 | "Building wheels for collected packages: pyspark\n", 19 | " Running setup.py bdist_wheel for pyspark: started\n", 20 | " Running setup.py bdist_wheel for pyspark: finished with status 'done'\n", 21 | " Stored in directory: C:\\Users\\Dell\\AppData\\Local\\pip\\Cache\\wheels\\5f\\0b\\b3\\5cb16b15d28dcc32f8e7ec91a044829642874bb7586f6e6cbe\n", 22 | "Successfully built pyspark\n", 23 | "Installing collected packages: py4j, pyspark\n", 24 | "Successfully installed py4j-0.10.4 pyspark-2.2.0\n" 25 | ] 26 | } 27 | ], 28 | "source": [ 29 | "!pip install pyspark" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 3, 35 | "metadata": { 36 | "collapsed": false 37 | }, 38 | "outputs": [], 39 | "source": [ 40 | "from pyspark import SparkContext,SparkConf\n", 41 | "sc=SparkContext()" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 4, 47 | "metadata": { 48 | "collapsed": true 49 | }, 50 | "outputs": [], 51 | "source": [ 52 | "import os" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 5, 58 | "metadata": { 59 | "collapsed": false 60 | }, 61 | "outputs": [ 62 | { 63 | "data": { 64 | "text/plain": [ 65 | "'C:\\\\Users\\\\Dell'" 66 | ] 67 | }, 68 | "execution_count": 5, 69 | "metadata": {}, 70 | "output_type": "execute_result" 71 | } 72 | ], 73 | "source": [ 74 | "os.getcwd()" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 6, 80 | "metadata": { 81 | "collapsed": true 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "os.chdir('C:\\\\Users\\\\Dell\\\\Desktop')" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 8, 91 | "metadata": { 92 | "collapsed": false 93 | }, 94 | "outputs": [ 95 | { 96 | "data": { 97 | "text/plain": [ 98 | "['desktop.ini',\n", 99 | " 'dump 2582017',\n", 100 | " 'Fusion Church.html',\n", 101 | " 'Fusion Church_files',\n", 102 | " 'iris.csv',\n", 103 | " 'KOG',\n", 104 | " 'NF22997109906610.ETicket.pdf',\n", 105 | " 'R Packages',\n", 106 | " 'Telegram.lnk',\n", 107 | " 'twitter_share.jpg',\n", 108 | " 'winutils.exe',\n", 109 | " '~$avel Reimbursements.docx',\n", 110 | " '~$thonajay.docx']" 111 | ] 112 | }, 113 | "execution_count": 8, 114 | "metadata": {}, 115 | "output_type": "execute_result" 116 | } 117 | ], 118 | "source": [ 119 | "os.listdir()" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 10, 125 | "metadata": { 126 | "collapsed": false 127 | }, 128 | "outputs": [], 129 | "source": [ 130 | "#load data\n", 131 | "data=sc.textFile('C:\\\\Users\\\\Dell\\\\Desktop\\\\iris.csv')" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 11, 137 | "metadata": { 138 | "collapsed": false 139 | }, 140 | "outputs": [ 141 | { 142 | "data": { 143 | "text/plain": [ 144 | "pyspark.rdd.RDD" 145 | ] 146 | }, 147 | "execution_count": 11, 148 | "metadata": {}, 149 | "output_type": "execute_result" 150 | } 151 | ], 152 | "source": [ 153 | "type(data)" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 12, 159 | "metadata": { 160 | "collapsed": false 161 | }, 162 | "outputs": [ 163 | { 164 | "data": { 165 | "text/plain": [ 166 | "['7.9,3.8,6.4,2,\"virginica\"']" 167 | ] 168 | }, 169 | "execution_count": 12, 170 | "metadata": {}, 171 | "output_type": "execute_result" 172 | } 173 | ], 174 | "source": [ 175 | "data.top(1)" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 13, 181 | "metadata": { 182 | "collapsed": false 183 | }, 184 | "outputs": [ 185 | { 186 | "data": { 187 | "text/plain": [ 188 | "'\"Sepal.Length\",\"Sepal.Width\",\"Petal.Length\",\"Petal.Width\",\"Species\"'" 189 | ] 190 | }, 191 | "execution_count": 13, 192 | "metadata": {}, 193 | "output_type": "execute_result" 194 | } 195 | ], 196 | "source": [ 197 | "data.first()" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": 14, 203 | "metadata": { 204 | "collapsed": true 205 | }, 206 | "outputs": [], 207 | "source": [ 208 | "from pyspark.sql import SparkSession" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": 16, 214 | "metadata": { 215 | "collapsed": false 216 | }, 217 | "outputs": [], 218 | "source": [ 219 | "spark= SparkSession.builder \\\n", 220 | " .master(\"local\") \\\n", 221 | " .appName(\"Data Exploration\") \\\n", 222 | " .getOrCreate()" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": 17, 228 | "metadata": { 229 | "collapsed": true 230 | }, 231 | "outputs": [], 232 | "source": [ 233 | "#load data as Spark DataFrame\n", 234 | "data2=spark.read.format(\"csv\") \\\n", 235 | " .option(\"header\",\"true\") \\\n", 236 | " .option(\"mode\",\"DROPMALFORMED\") \\\n", 237 | " .load('C:\\\\Users\\\\Dell\\\\Desktop\\\\iris.csv')" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": 18, 243 | "metadata": { 244 | "collapsed": false 245 | }, 246 | "outputs": [ 247 | { 248 | "data": { 249 | "text/plain": [ 250 | "pyspark.sql.dataframe.DataFrame" 251 | ] 252 | }, 253 | "execution_count": 18, 254 | "metadata": {}, 255 | "output_type": "execute_result" 256 | } 257 | ], 258 | "source": [ 259 | "type(data2)" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": 19, 265 | "metadata": { 266 | "collapsed": false 267 | }, 268 | "outputs": [ 269 | { 270 | "name": "stdout", 271 | "output_type": "stream", 272 | "text": [ 273 | "root\n", 274 | " |-- Sepal.Length: string (nullable = true)\n", 275 | " |-- Sepal.Width: string (nullable = true)\n", 276 | " |-- Petal.Length: string (nullable = true)\n", 277 | " |-- Petal.Width: string (nullable = true)\n", 278 | " |-- Species: string (nullable = true)\n", 279 | "\n" 280 | ] 281 | } 282 | ], 283 | "source": [ 284 | "data2.printSchema()" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": 25, 290 | "metadata": { 291 | "collapsed": false 292 | }, 293 | "outputs": [ 294 | { 295 | "data": { 296 | "text/plain": [ 297 | "['Sepal.Length', 'Sepal.Width', 'Petal.Length', 'Petal.Width', 'Species']" 298 | ] 299 | }, 300 | "execution_count": 25, 301 | "metadata": {}, 302 | "output_type": "execute_result" 303 | } 304 | ], 305 | "source": [ 306 | "data2.columns" 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": 28, 312 | "metadata": { 313 | "collapsed": false 314 | }, 315 | "outputs": [ 316 | { 317 | "data": { 318 | "text/plain": [ 319 | "['Sepal.Length', 'Sepal.Width', 'Petal.Length', 'Petal.Width', 'Species']" 320 | ] 321 | }, 322 | "execution_count": 28, 323 | "metadata": {}, 324 | "output_type": "execute_result" 325 | } 326 | ], 327 | "source": [ 328 | "data2.schema.names" 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": 27, 334 | "metadata": { 335 | "collapsed": false 336 | }, 337 | "outputs": [], 338 | "source": [ 339 | "newColumns=['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width', 'Species']\n" 340 | ] 341 | }, 342 | { 343 | "cell_type": "code", 344 | "execution_count": 30, 345 | "metadata": { 346 | "collapsed": true 347 | }, 348 | "outputs": [], 349 | "source": [ 350 | "from functools import reduce\n" 351 | ] 352 | }, 353 | { 354 | "cell_type": "code", 355 | "execution_count": 32, 356 | "metadata": { 357 | "collapsed": false 358 | }, 359 | "outputs": [ 360 | { 361 | "name": "stdout", 362 | "output_type": "stream", 363 | "text": [ 364 | "root\n", 365 | " |-- Sepal_Length: string (nullable = true)\n", 366 | " |-- Sepal_Width: string (nullable = true)\n", 367 | " |-- Petal_Length: string (nullable = true)\n", 368 | " |-- Petal_Width: string (nullable = true)\n", 369 | " |-- Species: string (nullable = true)\n", 370 | "\n", 371 | "+------------+-----------+------------+-----------+-------+\n", 372 | "|Sepal_Length|Sepal_Width|Petal_Length|Petal_Width|Species|\n", 373 | "+------------+-----------+------------+-----------+-------+\n", 374 | "| 5.1| 3.5| 1.4| 0.2| setosa|\n", 375 | "| 4.9| 3| 1.4| 0.2| setosa|\n", 376 | "| 4.7| 3.2| 1.3| 0.2| setosa|\n", 377 | "| 4.6| 3.1| 1.5| 0.2| setosa|\n", 378 | "| 5| 3.6| 1.4| 0.2| setosa|\n", 379 | "| 5.4| 3.9| 1.7| 0.4| setosa|\n", 380 | "| 4.6| 3.4| 1.4| 0.3| setosa|\n", 381 | "| 5| 3.4| 1.5| 0.2| setosa|\n", 382 | "| 4.4| 2.9| 1.4| 0.2| setosa|\n", 383 | "| 4.9| 3.1| 1.5| 0.1| setosa|\n", 384 | "| 5.4| 3.7| 1.5| 0.2| setosa|\n", 385 | "| 4.8| 3.4| 1.6| 0.2| setosa|\n", 386 | "| 4.8| 3| 1.4| 0.1| setosa|\n", 387 | "| 4.3| 3| 1.1| 0.1| setosa|\n", 388 | "| 5.8| 4| 1.2| 0.2| setosa|\n", 389 | "| 5.7| 4.4| 1.5| 0.4| setosa|\n", 390 | "| 5.4| 3.9| 1.3| 0.4| setosa|\n", 391 | "| 5.1| 3.5| 1.4| 0.3| setosa|\n", 392 | "| 5.7| 3.8| 1.7| 0.3| setosa|\n", 393 | "| 5.1| 3.8| 1.5| 0.3| setosa|\n", 394 | "+------------+-----------+------------+-----------+-------+\n", 395 | "only showing top 20 rows\n", 396 | "\n" 397 | ] 398 | } 399 | ], 400 | "source": [ 401 | "data2 = reduce(lambda data2, idx: data2.withColumnRenamed(oldColumns[idx], newColumns[idx]), range(len(oldColumns)), data2)\n", 402 | "data2.printSchema()\n", 403 | "data2.show()" 404 | ] 405 | }, 406 | { 407 | "cell_type": "code", 408 | "execution_count": 33, 409 | "metadata": { 410 | "collapsed": false 411 | }, 412 | "outputs": [ 413 | { 414 | "data": { 415 | "text/plain": [ 416 | "[('Sepal_Length', 'string'),\n", 417 | " ('Sepal_Width', 'string'),\n", 418 | " ('Petal_Length', 'string'),\n", 419 | " ('Petal_Width', 'string'),\n", 420 | " ('Species', 'string')]" 421 | ] 422 | }, 423 | "execution_count": 33, 424 | "metadata": {}, 425 | "output_type": "execute_result" 426 | } 427 | ], 428 | "source": [ 429 | "data2.dtypes" 430 | ] 431 | }, 432 | { 433 | "cell_type": "code", 434 | "execution_count": 35, 435 | "metadata": { 436 | "collapsed": false 437 | }, 438 | "outputs": [ 439 | { 440 | "data": { 441 | "text/plain": [ 442 | "150" 443 | ] 444 | }, 445 | "execution_count": 35, 446 | "metadata": {}, 447 | "output_type": "execute_result" 448 | } 449 | ], 450 | "source": [ 451 | "data3 = data2.select('Sepal_Length', 'Sepal_Width', 'Species')\n", 452 | "data3.cache()\n", 453 | "data3.count()" 454 | ] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "execution_count": 36, 459 | "metadata": { 460 | "collapsed": false 461 | }, 462 | "outputs": [ 463 | { 464 | "name": "stdout", 465 | "output_type": "stream", 466 | "text": [ 467 | "+------------+-----------+-------+\n", 468 | "|Sepal_Length|Sepal_Width|Species|\n", 469 | "+------------+-----------+-------+\n", 470 | "| 5.1| 3.5| setosa|\n", 471 | "| 4.9| 3| setosa|\n", 472 | "| 4.7| 3.2| setosa|\n", 473 | "| 4.6| 3.1| setosa|\n", 474 | "| 5| 3.6| setosa|\n", 475 | "| 5.4| 3.9| setosa|\n", 476 | "| 4.6| 3.4| setosa|\n", 477 | "| 5| 3.4| setosa|\n", 478 | "| 4.4| 2.9| setosa|\n", 479 | "| 4.9| 3.1| setosa|\n", 480 | "| 5.4| 3.7| setosa|\n", 481 | "| 4.8| 3.4| setosa|\n", 482 | "| 4.8| 3| setosa|\n", 483 | "| 4.3| 3| setosa|\n", 484 | "| 5.8| 4| setosa|\n", 485 | "| 5.7| 4.4| setosa|\n", 486 | "| 5.4| 3.9| setosa|\n", 487 | "| 5.1| 3.5| setosa|\n", 488 | "| 5.7| 3.8| setosa|\n", 489 | "| 5.1| 3.8| setosa|\n", 490 | "+------------+-----------+-------+\n", 491 | "only showing top 20 rows\n", 492 | "\n" 493 | ] 494 | } 495 | ], 496 | "source": [ 497 | "data3.show()" 498 | ] 499 | }, 500 | { 501 | "cell_type": "code", 502 | "execution_count": 37, 503 | "metadata": { 504 | "collapsed": false 505 | }, 506 | "outputs": [ 507 | { 508 | "data": { 509 | "text/plain": [ 510 | "DataFrame[Sepal_Length: string, Sepal_Width: string, Species: string]" 511 | ] 512 | }, 513 | "execution_count": 37, 514 | "metadata": {}, 515 | "output_type": "execute_result" 516 | } 517 | ], 518 | "source": [ 519 | "data3.limit(5)" 520 | ] 521 | }, 522 | { 523 | "cell_type": "code", 524 | "execution_count": 50, 525 | "metadata": { 526 | "collapsed": false 527 | }, 528 | "outputs": [ 529 | { 530 | "name": "stdout", 531 | "output_type": "stream", 532 | "text": [ 533 | "+------------+-----------+-------+\n", 534 | "|Sepal_Length|Sepal_Width|Species|\n", 535 | "+------------+-----------+-------+\n", 536 | "| 5.1| 3.5| setosa|\n", 537 | "| 4.9| 3| setosa|\n", 538 | "| 4.7| 3.2| setosa|\n", 539 | "| 4.6| 3.1| setosa|\n", 540 | "| 5| 3.6| setosa|\n", 541 | "+------------+-----------+-------+\n", 542 | "\n" 543 | ] 544 | } 545 | ], 546 | "source": [ 547 | "data3.limit(5).show()" 548 | ] 549 | }, 550 | { 551 | "cell_type": "code", 552 | "execution_count": 45, 553 | "metadata": { 554 | "collapsed": false 555 | }, 556 | "outputs": [ 557 | { 558 | "name": "stdout", 559 | "output_type": "stream", 560 | "text": [ 561 | "+------------+-----------+-------+\n", 562 | "|Sepal_Length|Sepal_Width|Species|\n", 563 | "+------------+-----------+-------+\n", 564 | "| 5.1| 3.5| setosa|\n", 565 | "| 4.9| 3| setosa|\n", 566 | "+------------+-----------+-------+\n", 567 | "\n" 568 | ] 569 | } 570 | ], 571 | "source": [ 572 | "data3.limit(5).limit(2).show()" 573 | ] 574 | }, 575 | { 576 | "cell_type": "code", 577 | "execution_count": 61, 578 | "metadata": { 579 | "collapsed": false 580 | }, 581 | "outputs": [], 582 | "source": [ 583 | "data4=data2.selectExpr('CAST(Sepal_Length AS INT) AS Sepal_Length')" 584 | ] 585 | }, 586 | { 587 | "cell_type": "code", 588 | "execution_count": 62, 589 | "metadata": { 590 | "collapsed": false 591 | }, 592 | "outputs": [ 593 | { 594 | "data": { 595 | "text/plain": [ 596 | "DataFrame[Sepal_Length: int]" 597 | ] 598 | }, 599 | "execution_count": 62, 600 | "metadata": {}, 601 | "output_type": "execute_result" 602 | } 603 | ], 604 | "source": [ 605 | "data4" 606 | ] 607 | }, 608 | { 609 | "cell_type": "code", 610 | "execution_count": 63, 611 | "metadata": { 612 | "collapsed": false 613 | }, 614 | "outputs": [], 615 | "source": [ 616 | "from pyspark.sql.functions import *" 617 | ] 618 | }, 619 | { 620 | "cell_type": "code", 621 | "execution_count": 65, 622 | "metadata": { 623 | "collapsed": false 624 | }, 625 | "outputs": [ 626 | { 627 | "name": "stdout", 628 | "output_type": "stream", 629 | "text": [ 630 | "+-----------------+\n", 631 | "|avg(Sepal_Length)|\n", 632 | "+-----------------+\n", 633 | "|5.386666666666667|\n", 634 | "+-----------------+\n", 635 | "\n" 636 | ] 637 | } 638 | ], 639 | "source": [ 640 | "data4.select('Sepal_Length').agg(mean('Sepal_Length')).show()" 641 | ] 642 | }, 643 | { 644 | "cell_type": "code", 645 | "execution_count": 66, 646 | "metadata": { 647 | "collapsed": true 648 | }, 649 | "outputs": [], 650 | "source": [ 651 | "data5=data2.selectExpr('CAST(Sepal_Length AS INT) AS Sepal_Length','CAST(Petal_Width AS INT) AS Petal_Width','CAST(Sepal_Width AS INT) AS Sepal_Width','CAST(Petal_Length AS INT) AS Petal_Length','Species')" 652 | ] 653 | }, 654 | { 655 | "cell_type": "code", 656 | "execution_count": 67, 657 | "metadata": { 658 | "collapsed": false 659 | }, 660 | "outputs": [ 661 | { 662 | "data": { 663 | "text/plain": [ 664 | "DataFrame[Sepal_Length: int, Petal_Width: int, Sepal_Width: int, Petal_Length: int, Species: string]" 665 | ] 666 | }, 667 | "execution_count": 67, 668 | "metadata": {}, 669 | "output_type": "execute_result" 670 | } 671 | ], 672 | "source": [ 673 | "data5" 674 | ] 675 | }, 676 | { 677 | "cell_type": "code", 678 | "execution_count": 68, 679 | "metadata": { 680 | "collapsed": false 681 | }, 682 | "outputs": [ 683 | { 684 | "data": { 685 | "text/plain": [ 686 | "['Sepal_Length', 'Petal_Width', 'Sepal_Width', 'Petal_Length', 'Species']" 687 | ] 688 | }, 689 | "execution_count": 68, 690 | "metadata": {}, 691 | "output_type": "execute_result" 692 | } 693 | ], 694 | "source": [ 695 | "data5.columns" 696 | ] 697 | }, 698 | { 699 | "cell_type": "code", 700 | "execution_count": 76, 701 | "metadata": { 702 | "collapsed": false 703 | }, 704 | "outputs": [ 705 | { 706 | "name": "stdout", 707 | "output_type": "stream", 708 | "text": [ 709 | "+----------+-----------------+\n", 710 | "| Species|avg(Sepal_Length)|\n", 711 | "+----------+-----------------+\n", 712 | "| virginica| 6.08|\n", 713 | "|versicolor| 5.48|\n", 714 | "| setosa| 4.6|\n", 715 | "+----------+-----------------+\n", 716 | "\n" 717 | ] 718 | } 719 | ], 720 | "source": [ 721 | "data5.select('Sepal_Length','Species').groupBy('Species').agg(mean(\"Sepal_Length\")).show()" 722 | ] 723 | }, 724 | { 725 | "cell_type": "code", 726 | "execution_count": null, 727 | "metadata": { 728 | "collapsed": true 729 | }, 730 | "outputs": [], 731 | "source": [] 732 | } 733 | ], 734 | "metadata": { 735 | "anaconda-cloud": {}, 736 | "kernelspec": { 737 | "display_name": "Python [default]", 738 | "language": "python", 739 | "name": "python3" 740 | }, 741 | "language_info": { 742 | "codemirror_mode": { 743 | "name": "ipython", 744 | "version": 3 745 | }, 746 | "file_extension": ".py", 747 | "mimetype": "text/x-python", 748 | "name": "python", 749 | "nbconvert_exporter": "python", 750 | "pygments_lexer": "ipython3", 751 | "version": "3.5.2" 752 | } 753 | }, 754 | "nbformat": 4, 755 | "nbformat_minor": 1 756 | } 757 | -------------------------------------------------------------------------------- /python+with+postgres (1).ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import psycopg2\n", 12 | "import pandas as pd\n", 13 | "import sqlalchemy as sa\n", 14 | "import time\n", 15 | "import seaborn as sns\n", 16 | "import re" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 36, 22 | "metadata": { 23 | "collapsed": false 24 | }, 25 | "outputs": [ 26 | { 27 | "name": "stdout", 28 | "output_type": "stream", 29 | "text": [ 30 | "Requirement already satisfied: pandasql in c:\\users\\dell\\anaconda3\\lib\\site-packages\n", 31 | "Requirement already satisfied: pandas in c:\\users\\dell\\anaconda3\\lib\\site-packages (from pandasql)\n", 32 | "Requirement already satisfied: sqlalchemy in c:\\users\\dell\\anaconda3\\lib\\site-packages (from pandasql)\n", 33 | "Requirement already satisfied: numpy in c:\\users\\dell\\anaconda3\\lib\\site-packages (from pandasql)\n", 34 | "Requirement already satisfied: python-dateutil>=2 in c:\\users\\dell\\anaconda3\\lib\\site-packages (from pandas->pandasql)\n", 35 | "Requirement already satisfied: pytz>=2011k in c:\\users\\dell\\anaconda3\\lib\\site-packages (from pandas->pandasql)\n", 36 | "Requirement already satisfied: six>=1.5 in c:\\users\\dell\\anaconda3\\lib\\site-packages (from python-dateutil>=2->pandas->pandasql)\n" 37 | ] 38 | } 39 | ], 40 | "source": [ 41 | "! pip install pandasql" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 2, 47 | "metadata": { 48 | "collapsed": false 49 | }, 50 | "outputs": [ 51 | { 52 | "name": "stdout", 53 | "output_type": "stream", 54 | "text": [ 55 | "Requirement already satisfied: psycopg2 in c:\\users\\dell\\anaconda3\\lib\\site-packages\n" 56 | ] 57 | } 58 | ], 59 | "source": [ 60 | "! pip install psycopg2" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 4, 66 | "metadata": { 67 | "collapsed": true 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "parameters = { \n", 72 | " 'username': 'postgres', \n", 73 | " 'password': 'root',\n", 74 | " 'server': 'localhost',\n", 75 | " 'database': 'ajay'\n", 76 | " }\n" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 6, 82 | "metadata": { 83 | "collapsed": true 84 | }, 85 | "outputs": [], 86 | "source": [ 87 | "connection= 'postgresql://{username}:{password}@{server}:5432/{database}'.format(**parameters)" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 17, 93 | "metadata": { 94 | "collapsed": false 95 | }, 96 | "outputs": [ 97 | { 98 | "name": "stdout", 99 | "output_type": "stream", 100 | "text": [ 101 | "postgresql://postgres:root@localhost:5432/ajay\n" 102 | ] 103 | } 104 | ], 105 | "source": [ 106 | "print (connection)\n", 107 | "\n", 108 | "\n" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 18, 114 | "metadata": { 115 | "collapsed": false 116 | }, 117 | "outputs": [], 118 | "source": [ 119 | "engine = sa.create_engine(connection, encoding=\"utf-8\")\n", 120 | "\n", 121 | "\n" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 19, 127 | "metadata": { 128 | "collapsed": true 129 | }, 130 | "outputs": [], 131 | "source": [ 132 | "insp = sa.inspect(engine)\n" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 10, 138 | "metadata": { 139 | "collapsed": false 140 | }, 141 | "outputs": [ 142 | { 143 | "name": "stdout", 144 | "output_type": "stream", 145 | "text": [ 146 | "\n" 147 | ] 148 | } 149 | ], 150 | "source": [ 151 | "print(insp)" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 11, 157 | "metadata": { 158 | "collapsed": false 159 | }, 160 | "outputs": [ 161 | { 162 | "name": "stdout", 163 | "output_type": "stream", 164 | "text": [ 165 | "['information_schema', 'public']\n" 166 | ] 167 | } 168 | ], 169 | "source": [ 170 | "db_list = insp.get_schema_names()\n", 171 | "print(db_list)\n" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 12, 177 | "metadata": { 178 | "collapsed": false 179 | }, 180 | "outputs": [ 181 | { 182 | "data": { 183 | "text/plain": [ 184 | "['iris', 'temp', 'sales']" 185 | ] 186 | }, 187 | "execution_count": 12, 188 | "metadata": {}, 189 | "output_type": "execute_result" 190 | } 191 | ], 192 | "source": [ 193 | "engine.table_names()" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": 13, 199 | "metadata": { 200 | "collapsed": true 201 | }, 202 | "outputs": [], 203 | "source": [ 204 | "data3= pd.read_sql_query('select * from \"sales\" limit 10',con=engine)" 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": 14, 210 | "metadata": { 211 | "collapsed": false 212 | }, 213 | "outputs": [ 214 | { 215 | "name": "stdout", 216 | "output_type": "stream", 217 | "text": [ 218 | "\n", 219 | "RangeIndex: 10 entries, 0 to 9\n", 220 | "Data columns (total 4 columns):\n", 221 | "customer_id 10 non-null int64\n", 222 | "sales 10 non-null int64\n", 223 | "date 10 non-null object\n", 224 | "product_id 10 non-null int64\n", 225 | "dtypes: int64(3), object(1)\n", 226 | "memory usage: 400.0+ bytes\n" 227 | ] 228 | } 229 | ], 230 | "source": [ 231 | "data3.info()" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": 14, 237 | "metadata": { 238 | "collapsed": false 239 | }, 240 | "outputs": [ 241 | { 242 | "data": { 243 | "text/html": [ 244 | "
\n", 245 | "\n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | "
customer_idsalesdateproduct_id
01000152302017-02-07524
11000227812017-05-12469
21000320832016-12-18917
3100042142015-01-19354
41000594072016-09-26292
51000647052015-10-17380
61000747292016-01-02469
71000877152015-09-12480
81000998982015-04-05611
91001057972015-08-13959
\n", 328 | "
" 329 | ], 330 | "text/plain": [ 331 | " customer_id sales date product_id\n", 332 | "0 10001 5230 2017-02-07 524\n", 333 | "1 10002 2781 2017-05-12 469\n", 334 | "2 10003 2083 2016-12-18 917\n", 335 | "3 10004 214 2015-01-19 354\n", 336 | "4 10005 9407 2016-09-26 292\n", 337 | "5 10006 4705 2015-10-17 380\n", 338 | "6 10007 4729 2016-01-02 469\n", 339 | "7 10008 7715 2015-09-12 480\n", 340 | "8 10009 9898 2015-04-05 611\n", 341 | "9 10010 5797 2015-08-13 959" 342 | ] 343 | }, 344 | "execution_count": 14, 345 | "metadata": {}, 346 | "output_type": "execute_result" 347 | } 348 | ], 349 | "source": [ 350 | "data3" 351 | ] 352 | }, 353 | { 354 | "cell_type": "code", 355 | "execution_count": 29, 356 | "metadata": { 357 | "collapsed": true 358 | }, 359 | "outputs": [], 360 | "source": [ 361 | "data5= pd.read_sql_query('select * from \"sales\" limit 20',con=engine)" 362 | ] 363 | }, 364 | { 365 | "cell_type": "code", 366 | "execution_count": 30, 367 | "metadata": { 368 | "collapsed": false 369 | }, 370 | "outputs": [ 371 | { 372 | "data": { 373 | "text/html": [ 374 | "
\n", 375 | "\n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | "
customer_idsalesdateproduct_id
01000152302017-02-07524
11000227812017-05-12469
21000320832016-12-18917
3100042142015-01-19354
41000594072016-09-26292
51000647052015-10-17380
61000747292016-01-02469
71000877152015-09-12480
81000998982015-04-05611
91001057972015-08-13959
101001112832016-04-22950
111001227512015-01-01322
121001344222017-07-11965
131001462352015-03-07783
141001573022016-04-06792
151001664082016-10-21347
161001718802015-08-23187
171001837382017-03-12222
18100199002015-07-20236
191002055162017-05-10828
\n", 528 | "
" 529 | ], 530 | "text/plain": [ 531 | " customer_id sales date product_id\n", 532 | "0 10001 5230 2017-02-07 524\n", 533 | "1 10002 2781 2017-05-12 469\n", 534 | "2 10003 2083 2016-12-18 917\n", 535 | "3 10004 214 2015-01-19 354\n", 536 | "4 10005 9407 2016-09-26 292\n", 537 | "5 10006 4705 2015-10-17 380\n", 538 | "6 10007 4729 2016-01-02 469\n", 539 | "7 10008 7715 2015-09-12 480\n", 540 | "8 10009 9898 2015-04-05 611\n", 541 | "9 10010 5797 2015-08-13 959\n", 542 | "10 10011 1283 2016-04-22 950\n", 543 | "11 10012 2751 2015-01-01 322\n", 544 | "12 10013 4422 2017-07-11 965\n", 545 | "13 10014 6235 2015-03-07 783\n", 546 | "14 10015 7302 2016-04-06 792\n", 547 | "15 10016 6408 2016-10-21 347\n", 548 | "16 10017 1880 2015-08-23 187\n", 549 | "17 10018 3738 2017-03-12 222\n", 550 | "18 10019 900 2015-07-20 236\n", 551 | "19 10020 5516 2017-05-10 828" 552 | ] 553 | }, 554 | "execution_count": 30, 555 | "metadata": {}, 556 | "output_type": "execute_result" 557 | } 558 | ], 559 | "source": [ 560 | "data5" 561 | ] 562 | }, 563 | { 564 | "cell_type": "code", 565 | "execution_count": 23, 566 | "metadata": { 567 | "collapsed": true 568 | }, 569 | "outputs": [], 570 | "source": [ 571 | "import pandasql as pdsql" 572 | ] 573 | }, 574 | { 575 | "cell_type": "code", 576 | "execution_count": 32, 577 | "metadata": { 578 | "collapsed": true 579 | }, 580 | "outputs": [], 581 | "source": [ 582 | "str1=\"select * from data5 limit 5;\"" 583 | ] 584 | }, 585 | { 586 | "cell_type": "code", 587 | "execution_count": 33, 588 | "metadata": { 589 | "collapsed": true 590 | }, 591 | "outputs": [], 592 | "source": [ 593 | "df11=pdsql.sqldf(str1)" 594 | ] 595 | }, 596 | { 597 | "cell_type": "code", 598 | "execution_count": 34, 599 | "metadata": { 600 | "collapsed": false 601 | }, 602 | "outputs": [ 603 | { 604 | "data": { 605 | "text/html": [ 606 | "
\n", 607 | "\n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | "
customer_idsalesdateproduct_id
01000152302017-02-07524
11000227812017-05-12469
21000320832016-12-18917
3100042142015-01-19354
41000594072016-09-26292
\n", 655 | "
" 656 | ], 657 | "text/plain": [ 658 | " customer_id sales date product_id\n", 659 | "0 10001 5230 2017-02-07 524\n", 660 | "1 10002 2781 2017-05-12 469\n", 661 | "2 10003 2083 2016-12-18 917\n", 662 | "3 10004 214 2015-01-19 354\n", 663 | "4 10005 9407 2016-09-26 292" 664 | ] 665 | }, 666 | "execution_count": 34, 667 | "metadata": {}, 668 | "output_type": "execute_result" 669 | } 670 | ], 671 | "source": [ 672 | "df11" 673 | ] 674 | }, 675 | { 676 | "cell_type": "code", 677 | "execution_count": 35, 678 | "metadata": { 679 | "collapsed": false 680 | }, 681 | "outputs": [ 682 | { 683 | "data": { 684 | "text/plain": [ 685 | "pandas.core.frame.DataFrame" 686 | ] 687 | }, 688 | "execution_count": 35, 689 | "metadata": {}, 690 | "output_type": "execute_result" 691 | } 692 | ], 693 | "source": [ 694 | "type(data5)" 695 | ] 696 | }, 697 | { 698 | "cell_type": "code", 699 | "execution_count": 43, 700 | "metadata": { 701 | "collapsed": true 702 | }, 703 | "outputs": [], 704 | "source": [ 705 | "data5= pd.read_sql_query('select * from \"sales\" limit 250',con=engine)" 706 | ] 707 | }, 708 | { 709 | "cell_type": "code", 710 | "execution_count": 47, 711 | "metadata": { 712 | "collapsed": false 713 | }, 714 | "outputs": [ 715 | { 716 | "data": { 717 | "text/html": [ 718 | "
\n", 719 | "\n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | "
customer_idsalesdateproduct_id
01000152302017-02-07524
11000227812017-05-12469
21000320832016-12-18917
3100042142015-01-19354
41000594072016-09-26292
\n", 767 | "
" 768 | ], 769 | "text/plain": [ 770 | " customer_id sales date product_id\n", 771 | "0 10001 5230 2017-02-07 524\n", 772 | "1 10002 2781 2017-05-12 469\n", 773 | "2 10003 2083 2016-12-18 917\n", 774 | "3 10004 214 2015-01-19 354\n", 775 | "4 10005 9407 2016-09-26 292" 776 | ] 777 | }, 778 | "execution_count": 47, 779 | "metadata": {}, 780 | "output_type": "execute_result" 781 | } 782 | ], 783 | "source": [ 784 | "data5.head()" 785 | ] 786 | }, 787 | { 788 | "cell_type": "code", 789 | "execution_count": 44, 790 | "metadata": { 791 | "collapsed": true 792 | }, 793 | "outputs": [], 794 | "source": [ 795 | "str2=\"select avg(sales) from data5 ;\"" 796 | ] 797 | }, 798 | { 799 | "cell_type": "code", 800 | "execution_count": 45, 801 | "metadata": { 802 | "collapsed": false 803 | }, 804 | "outputs": [], 805 | "source": [ 806 | "df111=pdsql.sqldf(str2)" 807 | ] 808 | }, 809 | { 810 | "cell_type": "code", 811 | "execution_count": 46, 812 | "metadata": { 813 | "collapsed": false 814 | }, 815 | "outputs": [ 816 | { 817 | "data": { 818 | "text/html": [ 819 | "
\n", 820 | "\n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | "
avg(sales)
05226.868
\n", 834 | "
" 835 | ], 836 | "text/plain": [ 837 | " avg(sales)\n", 838 | "0 5226.868" 839 | ] 840 | }, 841 | "execution_count": 46, 842 | "metadata": {}, 843 | "output_type": "execute_result" 844 | } 845 | ], 846 | "source": [ 847 | "df111" 848 | ] 849 | }, 850 | { 851 | "cell_type": "code", 852 | "execution_count": null, 853 | "metadata": { 854 | "collapsed": true 855 | }, 856 | "outputs": [], 857 | "source": [] 858 | } 859 | ], 860 | "metadata": { 861 | "anaconda-cloud": {}, 862 | "kernelspec": { 863 | "display_name": "Python [conda root]", 864 | "language": "python", 865 | "name": "conda-root-py" 866 | }, 867 | "language_info": { 868 | "codemirror_mode": { 869 | "name": "ipython", 870 | "version": 3 871 | }, 872 | "file_extension": ".py", 873 | "mimetype": "text/x-python", 874 | "name": "python", 875 | "nbconvert_exporter": "python", 876 | "pygments_lexer": "ipython3", 877 | "version": "3.5.2" 878 | } 879 | }, 880 | "nbformat": 4, 881 | "nbformat_minor": 1 882 | } 883 | -------------------------------------------------------------------------------- /python+with+postgres.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 4, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import psycopg2\n", 12 | "import pandas as pd\n", 13 | "import sqlalchemy as sa\n", 14 | "import time\n", 15 | "import seaborn as sns\n", 16 | "import re" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 2, 22 | "metadata": { 23 | "collapsed": false 24 | }, 25 | "outputs": [ 26 | { 27 | "name": "stdout", 28 | "output_type": "stream", 29 | "text": [ 30 | "Requirement already satisfied: psycopg2 in c:\\users\\dell\\anaconda3\\lib\\site-packages\n" 31 | ] 32 | } 33 | ], 34 | "source": [ 35 | "! pip install psycopg2" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 5, 41 | "metadata": { 42 | "collapsed": true 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "parameters = { \n", 47 | " 'username': 'postgres', \n", 48 | " 'password': 'root',\n", 49 | " 'server': 'localhost',\n", 50 | " 'database': 'ajay'\n", 51 | " }\n" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 6, 57 | "metadata": { 58 | "collapsed": true 59 | }, 60 | "outputs": [], 61 | "source": [ 62 | "connection= 'postgresql://{username}:{password}@{server}:5432/{database}'.format(**parameters)" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 7, 68 | "metadata": { 69 | "collapsed": false 70 | }, 71 | "outputs": [ 72 | { 73 | "name": "stdout", 74 | "output_type": "stream", 75 | "text": [ 76 | "postgresql://postgres:root@localhost:5432/ajay\n" 77 | ] 78 | } 79 | ], 80 | "source": [ 81 | "print (connection)\n", 82 | "\n", 83 | "\n" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 9, 89 | "metadata": { 90 | "collapsed": false 91 | }, 92 | "outputs": [], 93 | "source": [ 94 | "engine = sa.create_engine(connection, encoding=\"utf-8\")\n", 95 | "\n", 96 | "\n" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 10, 102 | "metadata": { 103 | "collapsed": true 104 | }, 105 | "outputs": [], 106 | "source": [ 107 | "insp = sa.inspect(engine)\n" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 11, 113 | "metadata": { 114 | "collapsed": false 115 | }, 116 | "outputs": [ 117 | { 118 | "name": "stdout", 119 | "output_type": "stream", 120 | "text": [ 121 | "['information_schema', 'public']\n" 122 | ] 123 | } 124 | ], 125 | "source": [ 126 | "db_list = insp.get_schema_names()\n", 127 | "print(db_list)\n" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 12, 133 | "metadata": { 134 | "collapsed": false 135 | }, 136 | "outputs": [ 137 | { 138 | "data": { 139 | "text/plain": [ 140 | "['sales', 'iris']" 141 | ] 142 | }, 143 | "execution_count": 12, 144 | "metadata": {}, 145 | "output_type": "execute_result" 146 | } 147 | ], 148 | "source": [ 149 | "engine.table_names()" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 13, 155 | "metadata": { 156 | "collapsed": true 157 | }, 158 | "outputs": [], 159 | "source": [ 160 | "data3= pd.read_sql_query('select * from \"sales\" limit 10',con=engine)" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 14, 166 | "metadata": { 167 | "collapsed": false 168 | }, 169 | "outputs": [ 170 | { 171 | "name": "stdout", 172 | "output_type": "stream", 173 | "text": [ 174 | "\n", 175 | "RangeIndex: 10 entries, 0 to 9\n", 176 | "Data columns (total 4 columns):\n", 177 | "customer_id 10 non-null int64\n", 178 | "sales 10 non-null int64\n", 179 | "date 10 non-null object\n", 180 | "product_id 10 non-null int64\n", 181 | "dtypes: int64(3), object(1)\n", 182 | "memory usage: 400.0+ bytes\n" 183 | ] 184 | } 185 | ], 186 | "source": [ 187 | "data3.info()" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": 15, 193 | "metadata": { 194 | "collapsed": false 195 | }, 196 | "outputs": [ 197 | { 198 | "data": { 199 | "text/html": [ 200 | "
\n", 201 | "\n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | "
customer_idsalesdateproduct_id
01000152302017-02-07524
11000227812017-05-12469
21000320832016-12-18917
3100042142015-01-19354
41000594072016-09-26292
51000647052015-10-17380
61000747292016-01-02469
71000877152015-09-12480
81000998982015-04-05611
91001057972015-08-13959
\n", 284 | "
" 285 | ], 286 | "text/plain": [ 287 | " customer_id sales date product_id\n", 288 | "0 10001 5230 2017-02-07 524\n", 289 | "1 10002 2781 2017-05-12 469\n", 290 | "2 10003 2083 2016-12-18 917\n", 291 | "3 10004 214 2015-01-19 354\n", 292 | "4 10005 9407 2016-09-26 292\n", 293 | "5 10006 4705 2015-10-17 380\n", 294 | "6 10007 4729 2016-01-02 469\n", 295 | "7 10008 7715 2015-09-12 480\n", 296 | "8 10009 9898 2015-04-05 611\n", 297 | "9 10010 5797 2015-08-13 959" 298 | ] 299 | }, 300 | "execution_count": 15, 301 | "metadata": {}, 302 | "output_type": "execute_result" 303 | } 304 | ], 305 | "source": [ 306 | "data3" 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": 16, 312 | "metadata": { 313 | "collapsed": true 314 | }, 315 | "outputs": [], 316 | "source": [ 317 | "data5= pd.read_sql_query('select * from \"iris\" limit 10',con=engine)" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": 17, 323 | "metadata": { 324 | "collapsed": false 325 | }, 326 | "outputs": [ 327 | { 328 | "data": { 329 | "text/html": [ 330 | "
\n", 331 | "\n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
05.13.51.40.2setosa
14.93.01.40.2setosa
24.73.21.30.2setosa
34.63.11.50.2setosa
45.03.61.40.2setosa
55.43.91.70.4setosa
64.63.41.40.3setosa
75.03.41.50.2setosa
84.42.91.40.2setosa
94.93.11.50.1setosa
\n", 425 | "
" 426 | ], 427 | "text/plain": [ 428 | " sepal_length sepal_width petal_length petal_width species\n", 429 | "0 5.1 3.5 1.4 0.2 setosa\n", 430 | "1 4.9 3.0 1.4 0.2 setosa\n", 431 | "2 4.7 3.2 1.3 0.2 setosa\n", 432 | "3 4.6 3.1 1.5 0.2 setosa\n", 433 | "4 5.0 3.6 1.4 0.2 setosa\n", 434 | "5 5.4 3.9 1.7 0.4 setosa\n", 435 | "6 4.6 3.4 1.4 0.3 setosa\n", 436 | "7 5.0 3.4 1.5 0.2 setosa\n", 437 | "8 4.4 2.9 1.4 0.2 setosa\n", 438 | "9 4.9 3.1 1.5 0.1 setosa" 439 | ] 440 | }, 441 | "execution_count": 17, 442 | "metadata": {}, 443 | "output_type": "execute_result" 444 | } 445 | ], 446 | "source": [ 447 | "data5" 448 | ] 449 | }, 450 | { 451 | "cell_type": "code", 452 | "execution_count": null, 453 | "metadata": { 454 | "collapsed": true 455 | }, 456 | "outputs": [], 457 | "source": [] 458 | } 459 | ], 460 | "metadata": { 461 | "kernelspec": { 462 | "display_name": "Python [conda root]", 463 | "language": "python", 464 | "name": "conda-root-py" 465 | }, 466 | "language_info": { 467 | "codemirror_mode": { 468 | "name": "ipython", 469 | "version": 3 470 | }, 471 | "file_extension": ".py", 472 | "mimetype": "text/x-python", 473 | "name": "python", 474 | "nbconvert_exporter": "python", 475 | "pygments_lexer": "ipython3", 476 | "version": "3.5.2" 477 | } 478 | }, 479 | "nbformat": 4, 480 | "nbformat_minor": 1 481 | } 482 | --------------------------------------------------------------------------------