"
817 | ]
818 | },
819 | "metadata": {},
820 | "output_type": "display_data"
821 | }
822 | ],
823 | "source": [
824 | "metrics = MulticlassMetrics(predictions.select(\n",
825 | " \"prediction\", \"target_cat\").rdd)\n",
826 | "conf_matrix = metrics.confusionMatrix().toArray()\n",
827 | "plot_confusion_matrix(conf_matrix)"
828 | ]
829 | },
830 | {
831 | "cell_type": "code",
832 | "execution_count": 33,
833 | "metadata": {
834 | "collapsed": false
835 | },
836 | "outputs": [
837 | {
838 | "data": {
839 | "text/plain": [
840 | "DataFrame[duration: double, protocol_type: string, service: string, flag: string, src_bytes: double, dst_bytes: double, land: double, wrong_fragment: double, urgent: double, hot: double, num_failed_logins: double, logged_in: double, num_compromised: double, root_shell: double, su_attempted: double, num_root: double, num_file_creations: double, num_shells: double, num_access_files: double, num_outbound_cmds: double, is_host_login: double, is_guest_login: double, count: double, srv_count: double, serror_rate: double, srv_serror_rate: double, rerror_rate: double, srv_rerror_rate: double, same_srv_rate: double, diff_srv_rate: double, srv_diff_host_rate: double, dst_host_count: double, dst_host_srv_count: double, dst_host_same_srv_rate: double, dst_host_diff_srv_rate: double, dst_host_same_src_port_rate: double, dst_host_srv_diff_host_rate: double, dst_host_serror_rate: double, dst_host_srv_serror_rate: double, dst_host_rerror_rate: double, dst_host_srv_rerror_rate: double, target: string, protocol_type_cat: double, service_cat: double, flag_cat: double, target_cat: double, features: vector]"
841 | ]
842 | },
843 | "execution_count": 33,
844 | "metadata": {},
845 | "output_type": "execute_result"
846 | }
847 | ],
848 | "source": [
849 | "#cleanup\n",
850 | "bc_sample_rates.unpersist()\n",
851 | "sampled_train_df.unpersist()\n",
852 | "train.unpersist()"
853 | ]
854 | },
855 | {
856 | "cell_type": "code",
857 | "execution_count": null,
858 | "metadata": {
859 | "collapsed": true
860 | },
861 | "outputs": [],
862 | "source": []
863 | }
864 | ],
865 | "metadata": {
866 | "kernelspec": {
867 | "display_name": "Python 2",
868 | "language": "python",
869 | "name": "python2"
870 | },
871 | "language_info": {
872 | "codemirror_mode": {
873 | "name": "ipython",
874 | "version": 2
875 | },
876 | "file_extension": ".py",
877 | "mimetype": "text/x-python",
878 | "name": "python",
879 | "nbconvert_exporter": "python",
880 | "pygments_lexer": "ipython2",
881 | "version": "2.7.6"
882 | }
883 | },
884 | "nbformat": 4,
885 | "nbformat_minor": 0
886 | }
887 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2016 Packt Publishing
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | # Large Scale Machine Learning with Python
5 | This is the code repository for [Large Scale Machine Learning with Python](https://www.packtpub.com/big-data-and-business-intelligence/large-scale-machine-learning-python?utm_source=github&utm_medium=repository&utm_campaign=9781785887215), published by Packt. It contains all the supporting project files necessary to work through the book from start to finish.
6 |
7 | ## Instructions
8 | The execution of the code examples provided in this book requires an installation of Python 2.7 or higher versions on macOS, Linux, or Microsoft Windows.
9 | The examples throughout the book will make frequent use of Python's essential libraries, such as SciPy, NumPy, Scikit-learn, and StatsModels, and to a minor extent, matplotlib and pandas, for scientific and statistical computing. We will also make use of an out-of-core cloud computing application called H2O.
10 | This book is highly dependent on Jupyter and its Notebooks powered by the Python kernel. We will use its most recent version, 4.1, for this book.
11 | The first chapter will provide you with all the step-by-step instructions and some useful tips to set up your Python environment, these core libraries, and all the necessary tools.
12 |
13 | ## Related books
14 | - [R Machine Learning By Example](https://www.packtpub.com/big-data-and-business-intelligence/r-machine-learning-example?utm_source=github&utm_medium=repository&utm_campaign=9781784390846)
15 | - [R Machine Learning Essentials](https://www.packtpub.com/big-data-and-business-intelligence/r-machine-learning-essentials?utm_source=github&utm_medium=repository&utm_campaign=9781783987740)
16 | - [Machine Learning with R](https://www.packtpub.com/big-data-and-business-intelligence/machine-learning-r?utm_source=github&utm_medium=repository&utm_campaign=9781782162148)
17 | ### Download a free PDF
18 |
19 | If you have already purchased a print or Kindle version of this book, you can get a DRM-free PDF version at no cost.
Simply click on the link to claim your free PDF.
20 | https://packt.link/free-ebook/9781785887215
--------------------------------------------------------------------------------
/vowpal_wabbit_for_windows/x64/vw.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Large-Scale-Machine-Learning-With-Python/681b476109470a04f354c9f4d152b8de40670eb7/vowpal_wabbit_for_windows/x64/vw.exe
--------------------------------------------------------------------------------
/vowpal_wabbit_for_windows/x86/vw.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Large-Scale-Machine-Learning-With-Python/681b476109470a04f354c9f4d152b8de40670eb7/vowpal_wabbit_for_windows/x86/vw.exe
--------------------------------------------------------------------------------