├── Chapter06 ├── Images │ ├── Readme.md │ ├── B09828_06_01.png │ ├── B09828_06_02.png │ ├── B09828_06_03.png │ ├── B09828_06_04.png │ ├── B09828_06_05.png │ ├── B09828_06_06.png │ ├── B09828_06_07.png │ ├── B09828_06_08.png │ ├── B09828_06_09.png │ ├── B09828_06_10.png │ ├── B09828_06_11.png │ ├── B09828_06_12.png │ ├── B09828_06_13.png │ └── B09828_06_14.png ├── Readme.md └── Taxi_drop-off.ipynb ├── Chapter07 ├── Images │ ├── Readme.md │ ├── B09828_07_01.png │ ├── B09828_07_02.png │ ├── B09828_07_03.png │ ├── B09828_07_04.png │ ├── B09828_07_05.png │ ├── B09828_07_06.png │ ├── B09828_07_07.png │ ├── B09828_07_08.png │ ├── B09828_07_09.png │ ├── B09828_07_10.png │ ├── B09828_07_11.png │ └── B09828_07_12.png ├── Readme.md └── loader_celebA.py ├── Chapter09 ├── Images │ ├── Readme.md │ ├── B09828_13_01.png │ ├── B09828_13_02.png │ ├── B09828_13_03.png │ ├── B09828_13_04.png │ ├── B09828_13_05.png │ ├── B09828_13_06.png │ ├── B09828_13_07.png │ ├── B09828_13_08.png │ └── B09828_13_09.png ├── Readme.md └── Heart_Disease_Prediction.ipynb ├── Chapter10 ├── Readme.md ├── B09828_14_01.png ├── B09828_14_02.png ├── B09828_14_03.png ├── B09828_14_04.png ├── B09828_14_05.png ├── B09828_14_06.png ├── B09828_14_07.png └── B09828_14_08.png ├── Chapter03 ├── Images │ ├── Readme.md │ ├── B09828_03_01.png │ ├── B09828_03_02.png │ ├── B09828_03_03.png │ ├── B09828_03_04.png │ ├── B09828_03_05.png │ ├── B09828_03_06.png │ ├── B09828_03_07.png │ ├── B09828_03_08.png │ ├── B09828_03_09.png │ ├── B09828_03_10.png │ └── B09828_03_11.png ├── Folds5x2_pp.xlsx ├── readme.md ├── LogisticRegressor.py └── Wine_quality_using_Ensemble_learning.ipynb ├── Chapter02 ├── Readme.md ├── Hdf5_with_PyTables.ipynb ├── OpenPyXL_example.ipynb ├── Txt_files.ipynb └── NoSQL with Python.ipynb ├── Chapter01 ├── Fig01.png ├── Fig02.png ├── Fig03.png ├── Fig04.png ├── Fig05.png ├── Fig06.png ├── Readme.md ├── matrix_multiplication.ipynb └── matrix_multiplication_keras.ipynb ├── Chapter12 ├── video.avi ├── B09828_09_01.png ├── B09828_09_02.png ├── B09828_09_03.png ├── B09828_09_04.png ├── B09828_09_05.png ├── B09828_09_06.png ├── B09828_09_07.png ├── B09828_09_08.png ├── B09828_09_09.png ├── B09828_09_10.png ├── Readme.md └── text_preprocessing.ipynb ├── Chapter04 ├── Folds5x2_pp.xlsx ├── images │ ├── B09828_04_01.png │ ├── B09828_04_02.png │ ├── B09828_04_03.png │ ├── B09828_04_04.png │ ├── B09828_04_05.png │ ├── B09828_04_06.png │ ├── B09828_04_07.png │ ├── B09828_04_08.png │ ├── B09828_04_09.png │ ├── B09828_04_10.png │ ├── B09828_04_12.png │ ├── B09828_04_13.png │ ├── B09828_04_14.png │ ├── B09828_04_15.png │ ├── B09828_04_16.png │ ├── B09828_04_17.png │ ├── B09828_04_18.png │ ├── B09828_04_19.png │ ├── B09828_04_20.png │ ├── B09828_04_21.png │ ├── B09828_04_22.png │ ├── B09828_04_23.png │ └── B09828_04_24.png └── Readme.md ├── Chapter05 ├── Folds5x2_pp.xlsx ├── Images │ ├── B09828_05_01.png │ ├── B09828_05_02.png │ ├── B09828_05_03.png │ ├── B09828_05_04.png │ ├── B09828_05_05.png │ ├── B09828_05_06.png │ ├── B09828_05_07.png │ ├── B09828_05_08.png │ └── B09828_05_09.png ├── Readme.md ├── dag.py ├── GuessTheWord.ipynb ├── Genetic CNN.ipynb └── Genetic_RNN.ipynb ├── Chapter11 ├── B09828_15_01.png ├── B09828_15_02.png ├── B09828_15_03.png ├── B09828_15_04.png ├── B09828_15_05.png ├── readme.md └── SF_crime_category_detection.ipynb ├── Chapter08 ├── Images │ ├── B09828_08_01.png │ ├── B09828_08_01a.png │ ├── B09828_08_02.png │ ├── B09828_08_03.png │ ├── B09828_08_04.png │ ├── B09828_08_05.png │ ├── B09828_08_06.png │ ├── B09828_08_07.png │ ├── B09828_08_08.png │ ├── B09828_08_09.png │ ├── B09828_08_10.png │ ├── B09828_08_11.png │ ├── B09828_08_12.png │ ├── B09828_08_13.png │ ├── B09828_08_14.png │ ├── B09828_08_15.png │ ├── B09828_08_16.png │ ├── B09828_08_17.png │ └── B09828_08_18.png ├── Readme.md └── Boston_Price_MLlib.ipynb ├── LICENSE └── README.md /Chapter06/Images/Readme.md: -------------------------------------------------------------------------------- 1 | Images 2 | -------------------------------------------------------------------------------- /Chapter07/Images/Readme.md: -------------------------------------------------------------------------------- 1 | Images 2 | -------------------------------------------------------------------------------- /Chapter09/Images/Readme.md: -------------------------------------------------------------------------------- 1 | Images 2 | -------------------------------------------------------------------------------- /Chapter10/Readme.md: -------------------------------------------------------------------------------- 1 | AI for Industrial IoT 2 | -------------------------------------------------------------------------------- /Chapter03/Images/Readme.md: -------------------------------------------------------------------------------- 1 | Images used in the chapter 2 | -------------------------------------------------------------------------------- /Chapter02/Readme.md: -------------------------------------------------------------------------------- 1 | # Data Access and Distributed Processing for ioT 2 | -------------------------------------------------------------------------------- /Chapter01/Fig01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter01/Fig01.png -------------------------------------------------------------------------------- /Chapter01/Fig02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter01/Fig02.png -------------------------------------------------------------------------------- /Chapter01/Fig03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter01/Fig03.png -------------------------------------------------------------------------------- /Chapter01/Fig04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter01/Fig04.png -------------------------------------------------------------------------------- /Chapter01/Fig05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter01/Fig05.png -------------------------------------------------------------------------------- /Chapter01/Fig06.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter01/Fig06.png -------------------------------------------------------------------------------- /Chapter12/video.avi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter12/video.avi -------------------------------------------------------------------------------- /Chapter03/Folds5x2_pp.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter03/Folds5x2_pp.xlsx -------------------------------------------------------------------------------- /Chapter04/Folds5x2_pp.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter04/Folds5x2_pp.xlsx -------------------------------------------------------------------------------- /Chapter05/Folds5x2_pp.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter05/Folds5x2_pp.xlsx -------------------------------------------------------------------------------- /Chapter10/B09828_14_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter10/B09828_14_01.png -------------------------------------------------------------------------------- /Chapter10/B09828_14_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter10/B09828_14_02.png -------------------------------------------------------------------------------- /Chapter10/B09828_14_03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter10/B09828_14_03.png -------------------------------------------------------------------------------- /Chapter10/B09828_14_04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter10/B09828_14_04.png -------------------------------------------------------------------------------- /Chapter10/B09828_14_05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter10/B09828_14_05.png -------------------------------------------------------------------------------- /Chapter10/B09828_14_06.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter10/B09828_14_06.png -------------------------------------------------------------------------------- /Chapter10/B09828_14_07.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter10/B09828_14_07.png -------------------------------------------------------------------------------- /Chapter10/B09828_14_08.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter10/B09828_14_08.png -------------------------------------------------------------------------------- /Chapter11/B09828_15_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter11/B09828_15_01.png -------------------------------------------------------------------------------- /Chapter11/B09828_15_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter11/B09828_15_02.png -------------------------------------------------------------------------------- /Chapter11/B09828_15_03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter11/B09828_15_03.png -------------------------------------------------------------------------------- /Chapter11/B09828_15_04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter11/B09828_15_04.png -------------------------------------------------------------------------------- /Chapter11/B09828_15_05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter11/B09828_15_05.png -------------------------------------------------------------------------------- /Chapter12/B09828_09_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter12/B09828_09_01.png -------------------------------------------------------------------------------- /Chapter12/B09828_09_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter12/B09828_09_02.png -------------------------------------------------------------------------------- /Chapter12/B09828_09_03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter12/B09828_09_03.png -------------------------------------------------------------------------------- /Chapter12/B09828_09_04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter12/B09828_09_04.png -------------------------------------------------------------------------------- /Chapter12/B09828_09_05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter12/B09828_09_05.png -------------------------------------------------------------------------------- /Chapter12/B09828_09_06.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter12/B09828_09_06.png -------------------------------------------------------------------------------- /Chapter12/B09828_09_07.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter12/B09828_09_07.png -------------------------------------------------------------------------------- /Chapter12/B09828_09_08.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter12/B09828_09_08.png -------------------------------------------------------------------------------- /Chapter12/B09828_09_09.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter12/B09828_09_09.png -------------------------------------------------------------------------------- /Chapter12/B09828_09_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter12/B09828_09_10.png -------------------------------------------------------------------------------- /Chapter03/Images/B09828_03_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter03/Images/B09828_03_01.png -------------------------------------------------------------------------------- /Chapter03/Images/B09828_03_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter03/Images/B09828_03_02.png -------------------------------------------------------------------------------- /Chapter03/Images/B09828_03_03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter03/Images/B09828_03_03.png -------------------------------------------------------------------------------- /Chapter03/Images/B09828_03_04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter03/Images/B09828_03_04.png -------------------------------------------------------------------------------- /Chapter03/Images/B09828_03_05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter03/Images/B09828_03_05.png -------------------------------------------------------------------------------- /Chapter03/Images/B09828_03_06.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter03/Images/B09828_03_06.png -------------------------------------------------------------------------------- /Chapter03/Images/B09828_03_07.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter03/Images/B09828_03_07.png -------------------------------------------------------------------------------- /Chapter03/Images/B09828_03_08.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter03/Images/B09828_03_08.png -------------------------------------------------------------------------------- /Chapter03/Images/B09828_03_09.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter03/Images/B09828_03_09.png -------------------------------------------------------------------------------- /Chapter03/Images/B09828_03_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter03/Images/B09828_03_10.png -------------------------------------------------------------------------------- /Chapter03/Images/B09828_03_11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter03/Images/B09828_03_11.png -------------------------------------------------------------------------------- /Chapter04/images/B09828_04_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter04/images/B09828_04_01.png -------------------------------------------------------------------------------- /Chapter04/images/B09828_04_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter04/images/B09828_04_02.png -------------------------------------------------------------------------------- /Chapter04/images/B09828_04_03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter04/images/B09828_04_03.png -------------------------------------------------------------------------------- /Chapter04/images/B09828_04_04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter04/images/B09828_04_04.png -------------------------------------------------------------------------------- /Chapter04/images/B09828_04_05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter04/images/B09828_04_05.png -------------------------------------------------------------------------------- /Chapter04/images/B09828_04_06.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter04/images/B09828_04_06.png -------------------------------------------------------------------------------- /Chapter04/images/B09828_04_07.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter04/images/B09828_04_07.png -------------------------------------------------------------------------------- /Chapter04/images/B09828_04_08.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter04/images/B09828_04_08.png -------------------------------------------------------------------------------- /Chapter04/images/B09828_04_09.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter04/images/B09828_04_09.png -------------------------------------------------------------------------------- /Chapter04/images/B09828_04_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter04/images/B09828_04_10.png -------------------------------------------------------------------------------- /Chapter04/images/B09828_04_12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter04/images/B09828_04_12.png -------------------------------------------------------------------------------- /Chapter04/images/B09828_04_13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter04/images/B09828_04_13.png -------------------------------------------------------------------------------- /Chapter04/images/B09828_04_14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter04/images/B09828_04_14.png -------------------------------------------------------------------------------- /Chapter04/images/B09828_04_15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter04/images/B09828_04_15.png -------------------------------------------------------------------------------- /Chapter04/images/B09828_04_16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter04/images/B09828_04_16.png -------------------------------------------------------------------------------- /Chapter04/images/B09828_04_17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter04/images/B09828_04_17.png -------------------------------------------------------------------------------- /Chapter04/images/B09828_04_18.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter04/images/B09828_04_18.png -------------------------------------------------------------------------------- /Chapter04/images/B09828_04_19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter04/images/B09828_04_19.png -------------------------------------------------------------------------------- /Chapter04/images/B09828_04_20.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter04/images/B09828_04_20.png -------------------------------------------------------------------------------- /Chapter04/images/B09828_04_21.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter04/images/B09828_04_21.png -------------------------------------------------------------------------------- /Chapter04/images/B09828_04_22.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter04/images/B09828_04_22.png -------------------------------------------------------------------------------- /Chapter04/images/B09828_04_23.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter04/images/B09828_04_23.png -------------------------------------------------------------------------------- /Chapter04/images/B09828_04_24.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter04/images/B09828_04_24.png -------------------------------------------------------------------------------- /Chapter05/Images/B09828_05_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter05/Images/B09828_05_01.png -------------------------------------------------------------------------------- /Chapter05/Images/B09828_05_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter05/Images/B09828_05_02.png -------------------------------------------------------------------------------- /Chapter05/Images/B09828_05_03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter05/Images/B09828_05_03.png -------------------------------------------------------------------------------- /Chapter05/Images/B09828_05_04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter05/Images/B09828_05_04.png -------------------------------------------------------------------------------- /Chapter05/Images/B09828_05_05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter05/Images/B09828_05_05.png -------------------------------------------------------------------------------- /Chapter05/Images/B09828_05_06.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter05/Images/B09828_05_06.png -------------------------------------------------------------------------------- /Chapter05/Images/B09828_05_07.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter05/Images/B09828_05_07.png -------------------------------------------------------------------------------- /Chapter05/Images/B09828_05_08.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter05/Images/B09828_05_08.png -------------------------------------------------------------------------------- /Chapter05/Images/B09828_05_09.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter05/Images/B09828_05_09.png -------------------------------------------------------------------------------- /Chapter06/Images/B09828_06_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter06/Images/B09828_06_01.png -------------------------------------------------------------------------------- /Chapter06/Images/B09828_06_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter06/Images/B09828_06_02.png -------------------------------------------------------------------------------- /Chapter06/Images/B09828_06_03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter06/Images/B09828_06_03.png -------------------------------------------------------------------------------- /Chapter06/Images/B09828_06_04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter06/Images/B09828_06_04.png -------------------------------------------------------------------------------- /Chapter06/Images/B09828_06_05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter06/Images/B09828_06_05.png -------------------------------------------------------------------------------- /Chapter06/Images/B09828_06_06.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter06/Images/B09828_06_06.png -------------------------------------------------------------------------------- /Chapter06/Images/B09828_06_07.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter06/Images/B09828_06_07.png -------------------------------------------------------------------------------- /Chapter06/Images/B09828_06_08.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter06/Images/B09828_06_08.png -------------------------------------------------------------------------------- /Chapter06/Images/B09828_06_09.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter06/Images/B09828_06_09.png -------------------------------------------------------------------------------- /Chapter06/Images/B09828_06_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter06/Images/B09828_06_10.png -------------------------------------------------------------------------------- /Chapter06/Images/B09828_06_11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter06/Images/B09828_06_11.png -------------------------------------------------------------------------------- /Chapter06/Images/B09828_06_12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter06/Images/B09828_06_12.png -------------------------------------------------------------------------------- /Chapter06/Images/B09828_06_13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter06/Images/B09828_06_13.png -------------------------------------------------------------------------------- /Chapter06/Images/B09828_06_14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter06/Images/B09828_06_14.png -------------------------------------------------------------------------------- /Chapter07/Images/B09828_07_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter07/Images/B09828_07_01.png -------------------------------------------------------------------------------- /Chapter07/Images/B09828_07_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter07/Images/B09828_07_02.png -------------------------------------------------------------------------------- /Chapter07/Images/B09828_07_03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter07/Images/B09828_07_03.png -------------------------------------------------------------------------------- /Chapter07/Images/B09828_07_04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter07/Images/B09828_07_04.png -------------------------------------------------------------------------------- /Chapter07/Images/B09828_07_05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter07/Images/B09828_07_05.png -------------------------------------------------------------------------------- /Chapter07/Images/B09828_07_06.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter07/Images/B09828_07_06.png -------------------------------------------------------------------------------- /Chapter07/Images/B09828_07_07.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter07/Images/B09828_07_07.png -------------------------------------------------------------------------------- /Chapter07/Images/B09828_07_08.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter07/Images/B09828_07_08.png -------------------------------------------------------------------------------- /Chapter07/Images/B09828_07_09.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter07/Images/B09828_07_09.png -------------------------------------------------------------------------------- /Chapter07/Images/B09828_07_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter07/Images/B09828_07_10.png -------------------------------------------------------------------------------- /Chapter07/Images/B09828_07_11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter07/Images/B09828_07_11.png -------------------------------------------------------------------------------- /Chapter07/Images/B09828_07_12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter07/Images/B09828_07_12.png -------------------------------------------------------------------------------- /Chapter08/Images/B09828_08_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter08/Images/B09828_08_01.png -------------------------------------------------------------------------------- /Chapter08/Images/B09828_08_01a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter08/Images/B09828_08_01a.png -------------------------------------------------------------------------------- /Chapter08/Images/B09828_08_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter08/Images/B09828_08_02.png -------------------------------------------------------------------------------- /Chapter08/Images/B09828_08_03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter08/Images/B09828_08_03.png -------------------------------------------------------------------------------- /Chapter08/Images/B09828_08_04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter08/Images/B09828_08_04.png -------------------------------------------------------------------------------- /Chapter08/Images/B09828_08_05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter08/Images/B09828_08_05.png -------------------------------------------------------------------------------- /Chapter08/Images/B09828_08_06.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter08/Images/B09828_08_06.png -------------------------------------------------------------------------------- /Chapter08/Images/B09828_08_07.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter08/Images/B09828_08_07.png -------------------------------------------------------------------------------- /Chapter08/Images/B09828_08_08.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter08/Images/B09828_08_08.png -------------------------------------------------------------------------------- /Chapter08/Images/B09828_08_09.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter08/Images/B09828_08_09.png -------------------------------------------------------------------------------- /Chapter08/Images/B09828_08_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter08/Images/B09828_08_10.png -------------------------------------------------------------------------------- /Chapter08/Images/B09828_08_11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter08/Images/B09828_08_11.png -------------------------------------------------------------------------------- /Chapter08/Images/B09828_08_12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter08/Images/B09828_08_12.png -------------------------------------------------------------------------------- /Chapter08/Images/B09828_08_13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter08/Images/B09828_08_13.png -------------------------------------------------------------------------------- /Chapter08/Images/B09828_08_14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter08/Images/B09828_08_14.png -------------------------------------------------------------------------------- /Chapter08/Images/B09828_08_15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter08/Images/B09828_08_15.png -------------------------------------------------------------------------------- /Chapter08/Images/B09828_08_16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter08/Images/B09828_08_16.png -------------------------------------------------------------------------------- /Chapter08/Images/B09828_08_17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter08/Images/B09828_08_17.png -------------------------------------------------------------------------------- /Chapter08/Images/B09828_08_18.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter08/Images/B09828_08_18.png -------------------------------------------------------------------------------- /Chapter09/Images/B09828_13_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter09/Images/B09828_13_01.png -------------------------------------------------------------------------------- /Chapter09/Images/B09828_13_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter09/Images/B09828_13_02.png -------------------------------------------------------------------------------- /Chapter09/Images/B09828_13_03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter09/Images/B09828_13_03.png -------------------------------------------------------------------------------- /Chapter09/Images/B09828_13_04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter09/Images/B09828_13_04.png -------------------------------------------------------------------------------- /Chapter09/Images/B09828_13_05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter09/Images/B09828_13_05.png -------------------------------------------------------------------------------- /Chapter09/Images/B09828_13_06.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter09/Images/B09828_13_06.png -------------------------------------------------------------------------------- /Chapter09/Images/B09828_13_07.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter09/Images/B09828_13_07.png -------------------------------------------------------------------------------- /Chapter09/Images/B09828_13_08.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter09/Images/B09828_13_08.png -------------------------------------------------------------------------------- /Chapter09/Images/B09828_13_09.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wikibook/ai-iot/master/Chapter09/Images/B09828_13_09.png -------------------------------------------------------------------------------- /Chapter11/readme.md: -------------------------------------------------------------------------------- 1 | The chapter introduces the reader to smart cities. Case studies would be used to show to the reader how the concepts learnt in this book can be applied in developing various smart city components. After reading the chapter the reader will: 2 | 3 | * Know what is a smart city 4 | * Learn about the essential components of a smart city 5 | * Learn about cities across the globe implementing smart solutions. 6 | * Understand the challenges in building smart cities 7 | * Write a code to detect crime description from San Francisco crime data 8 | -------------------------------------------------------------------------------- /Chapter12/Readme.md: -------------------------------------------------------------------------------- 1 | Now that we have understood and implemented different Artificial Intelligence (AI)/machine learning (ML) algorithms, it is time to combine it all together, understand which type of data is best suited for each and at the same time understand the basic pre-processing required for each type of data. By the end of this chapter you will: 2 | 3 | * Know the different types of data that can be fed to your model 4 | * Learn how to process time series data 5 | * Pre-processing of textual data 6 | * Different transforms that can be done on image data 7 | * How to handle video files 8 | * How to handle speech data 9 | * Cloud computing options 10 | -------------------------------------------------------------------------------- /Chapter08/Readme.md: -------------------------------------------------------------------------------- 1 | The advances in distributed computing environments and an easy availability of internet worldwide has resulted in the emergence of Distributed AI. In this chapter, we will learn about two frameworks one by Apache the MLLib and another H2O.ai, both provide distributed and scalable machine learning for large, streaming data. The chapter will start with an introduction to Apache's Spark the defacto distributed data processing system. By the end of the chapter you will: 2 | 3 | * Know about Spark and its importance in distributed data processing 4 | * Understand the Spark architecture 5 | * Learn about MLLib 6 | * Use MLLib in your deep learning pipeline 7 | * Delve deep into H2O.ai platform 8 | -------------------------------------------------------------------------------- /Chapter09/Readme.md: -------------------------------------------------------------------------------- 1 | Now that you are fully equipped with machine learning and deep learning knowledge, and have learned how to use it for big data, image tasks, text task and time series data. It is the time to explore some real uses of the algorithms and the techniques that you have learned. This chapter and the following two chapters will now concentrate on some specific case studies. This chapter will focus on personal and home IoT use-cases. After going through the chapter you will: 2 | 3 | * Know about the successful IoT applications. 4 | * Learn about wearables and their role in personal IoT. 5 | * Learn how to monitor heart using machine learning. 6 | * Understand what makes home "smart home". 7 | * Learn about some devices used in smart home 8 | * Explore the application of AI in predicting human activity recognition 9 | -------------------------------------------------------------------------------- /Chapter06/Readme.md: -------------------------------------------------------------------------------- 1 | Reinforcement Learning is very different from both supervised and unsupervised learning. It is the way most living beings learn - interacting with the environment. In this chapter, we will study different algorithms employed for reinforcement learning. As you will progress through the chapter, you will: 2 | 3 | * Know what is reinforcement learning, and how it is different from supervised learning and unsupervised learning. 4 | * Know different elements of reinforcement learning. 5 | * Learn about some fascinating applications of RL in the real world. 6 | * Understand the OpenAI interface for training RL agents. 7 | * Know about Q-learning and use it to train an RL agent. 8 | * Know about Deep Q Networks and employ them to train an agent to play Atari. 9 | * Know about Policy Gradient algorithm and use it to 10 | -------------------------------------------------------------------------------- /Chapter03/readme.md: -------------------------------------------------------------------------------- 1 | # Machine Learning for IoT 2 | The term "machine learning" refers to the computer programs that can automatically detect meaningful patterns in data and improve with experience. Though it is not a new field, it is presently at the peak of its hype cycle. This chapter introduces the readers to the standard machine learning algorithms and their applications in the field of IoT. After reading this chapter you will: 3 | 4 | * Know what machine learning is and the role it plays in IoT pipeline. 5 | * Know about the supervised and unsupervised learning paradigms 6 | * Know about regression and learn how to perform linear regression using TensorFlow/Keras 7 | * Know about popular machine learning classifiers and implement them in TensorFlow/Keras 8 | * Know about Decision Trees, Random Forests and techniques to perform boosting. We will also learn how to write code for them. 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Packt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Chapter05/Readme.md: -------------------------------------------------------------------------------- 1 | # Genetic Algorithms for IoT 2 | In the last chapter, we saw different deep learning-based algorithms, these algorithms have shown their success in the field of recognition, detection, reconstruction and even generation of vision, speech and text data. While at present deep learning is on top in terms of both application and employability, it has a close competition with evolutionary algorithms. The Algorithms inspired by the natural process of evolution, the world's best optimizers. Yep, even we are the result of years of genetic evolution. In this chapter, you will be introduced to the fascinating world of evolutionary algorithms and learn about a specific type of evolutionary algorithms, i.e. genetic algorithms in more detail. After reading this chapter you will: 3 | 4 | * Know what is optimization. 5 | * Will learn different methods to solve an optimization problem. 6 | * Understand the intuition behind genetic algorithms 7 | * Know about the advantages of genetic algorithms 8 | * Understand and implement the process of cross-over, mutation and fitness function selection. 9 | * Use a genetic algorithm to find the lost password. 10 | * Learn about various uses of genetic algorithm in optimizing your models. 11 | * Learn the DEAP genetic algorithm library 12 | -------------------------------------------------------------------------------- /Chapter07/Readme.md: -------------------------------------------------------------------------------- 1 | Machine learning (ML) and Artificial Intelligence (AI) have touched almost all fields related to man. Agriculture, music, health, defence, you will not find a single field where AI has not leave its mark. The enormous success of AI/ML, besides the presence of computational powers, also depends on the generation of a significant amount of data. Majority of the data generated is unlabelled, and thus understanding the inherent distribution of the data is an important ML task. It is here that Generative Models come into the picture. 2 | 3 | In past few years, deep Generative models have shown great success in understanding the data distribution and have been used in a variety of applications. Two of the most popular generative models are Variational Autoencoders (VAE) and Generative Adversarial Networks (GAN). 4 | 5 | In this chapter, we will learn about both VAEs and GANs and use them to generate images. After reading the chapter, you will: 6 | 7 | * Know the difference between generative networks and discriminative networks. 8 | * Learn about Variational Autoencoders 9 | * Understand the intuitive functioning of Generative Adversarial Networks 10 | * Implement the Vanilla GAN and use it to generate handwritten digits. 11 | * Know the most popular variation of GAN, the DCGAN 12 | * Implement DCGAN in TensorFlow and use it to generate faces. 13 | * Know further variations and applications of GAN. 14 | -------------------------------------------------------------------------------- /Chapter01/Readme.md: -------------------------------------------------------------------------------- 1 | # Chapter 1: Principles and foundations of IoT and AI 2 | This book deals with the three big trends in the current Business Scenario, __Internet of things (IoT)__, __big data__ and __artificial intelligence (AI)__. The exponential growth of the number of devices connected to the internet and the exponential volume of data created by them necessitates the need of using the analytical and predictive techniques of artificial intelligence and deep learning. This book specifically targets the third component, the various analytical and predictive methods/models available in the field of AI for the big data generated by IoT. 3 | This chapter will briefly introduce you to these three trends and will expand on how they are interdependent. The data generated by IoT devices is uploaded to the cloud, thus you will also be introduced to the various IoT cloud platforms and the data services they offer. The chapter will cover the following points: 4 | 5 | * Knowing what is a thing in IoT, what devices constitute things, what are the different IoT Platforms, and what is an IoT verticals. 6 | * Know what is big data, understand how the amount of data generated by IoT lies in the range of big data. 7 | * Understand how and why AI can be useful for making sense of the voluminous data generated by IoT. 8 | * With the help of an illustration, understand how IoT, big data and AI together can help shape a better world. 9 | * And finally, learn about some of the tools that would be needed to perform the analysis. 10 | -------------------------------------------------------------------------------- /Chapter04/Readme.md: -------------------------------------------------------------------------------- 1 | # Deep Learning for IoT 2 | In the last chapter, we learned about different machine learning algorithms, the focus of this chapter is the neural network based multiple layered models a.k.a. Deep learning models. They have become the buzzword in the last few years, and an absolute favourite of Investors in the field of Artificial-Intelligence-based startups. Achieving above human level accuracy in the task of object detection, defeating the world's Dan Nine Go master are some of the feats possible by deep learning. In this chapter and few subsequent chapters, we will learn about the different deep learning models and how to use deep learning on our IoT generated data. In this chapter, we will start with a glimpse to the journey of deep learning, and learn about four popular models, the multilayered perceptron (MLP), the convolution neural networks (CNN), Recurrent neural network (RNN) and Autoencoders. Specifically, you will: 3 | 4 | * Know about the history of deep learning and the factors responsible for its present success. 5 | * Know about an artificial neuron, and how they can be connected to solve non-linear problems. 6 | * Know about the backpropagation algorithm and use it to train the MLP model. 7 | * Learn about the different optimizers and activation functions available in TensorFlow. 8 | * Know how the convolutional neural network works, and the concept behind kernel, padding and strides. 9 | * Use CNN model for classification/recognition.. 10 | * Know about the recurrent neural networks and the modified RNNs, the Long Short-Term Memory and Gated Recurrent Units. 11 | * Know the architecture and functioning of Autoencoders. 12 | -------------------------------------------------------------------------------- /Chapter01/matrix_multiplication.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "[[ 1.05043888 0.57121533]\n", 13 | " [ 1.45642126 0.61392534]\n", 14 | " [ 0.52326083 0.47913069]]\n" 15 | ] 16 | } 17 | ], 18 | "source": [ 19 | "# import libraries and modules needed for the code\n", 20 | "import tensorflow as tf\n", 21 | "import numpy as np\n", 22 | "\n", 23 | "# Data\n", 24 | "# A random matrix of size [3,5]\n", 25 | "mat1 = np.random.rand(3,5) \n", 26 | "# A random matrix of size [5,2]\n", 27 | "mat2 = np.random.rand(5,2) \n", 28 | "\n", 29 | "\n", 30 | "#Computation Graph\n", 31 | "A = tf.placeholder(tf.float32, None, name='A')\n", 32 | "B = tf.placeholder(tf.float32, None, name='B')\n", 33 | "C = tf.matmul(A,B)\n", 34 | "\n", 35 | "\n", 36 | "#Execution Graph\n", 37 | "with tf.Session() as sess:\n", 38 | " result = sess.run(C, feed_dict={A: mat1, B:mat2})\n", 39 | " print(result)\n", 40 | " " 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [] 49 | } 50 | ], 51 | "metadata": { 52 | "kernelspec": { 53 | "display_name": "Python [conda env:tensorflow]", 54 | "language": "python", 55 | "name": "conda-env-tensorflow-py" 56 | }, 57 | "language_info": { 58 | "codemirror_mode": { 59 | "name": "ipython", 60 | "version": 3 61 | }, 62 | "file_extension": ".py", 63 | "mimetype": "text/x-python", 64 | "name": "python", 65 | "nbconvert_exporter": "python", 66 | "pygments_lexer": "ipython3", 67 | "version": "3.5.4" 68 | } 69 | }, 70 | "nbformat": 4, 71 | "nbformat_minor": 2 72 | } 73 | -------------------------------------------------------------------------------- /Chapter01/matrix_multiplication_keras.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "[[ 127.55612946 125.59892273 122.93148041 ..., 119.09933472\n", 13 | " 120.31015015 123.70563507]\n", 14 | " [ 126.91525269 120.60913086 122.53115845 ..., 114.7818985 117.73587036\n", 15 | " 119.22080994]\n", 16 | " [ 125.19042969 124.79894257 128.43052673 ..., 121.96895599\n", 17 | " 116.95805359 123.73403168]\n", 18 | " ..., \n", 19 | " [ 126.55613708 125.86196899 127.1171875 ..., 123.17983246 123.4691925\n", 20 | " 123.82902527]\n", 21 | " [ 130.45053101 133.09960938 136.35180664 ..., 130.31216431\n", 22 | " 129.79626465 130.48962402]\n", 23 | " [ 130.10887146 127.49655914 129.2310791 ..., 126.10153961 123.6025238\n", 24 | " 126.6700592 ]]\n" 25 | ] 26 | } 27 | ], 28 | "source": [ 29 | "import keras.backend as K\n", 30 | "import numpy as np\n", 31 | "A = np.random.rand(20,500)\n", 32 | "B = np.random.rand(500,3000)\n", 33 | "\n", 34 | "x = K.variable(value=A)\n", 35 | "y = K.variable(value=B)\n", 36 | "\n", 37 | "z = K.dot(x,y)\n", 38 | "\n", 39 | "print(K.eval(z))\n" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [] 48 | } 49 | ], 50 | "metadata": { 51 | "kernelspec": { 52 | "display_name": "Python [conda env:tensorflow]", 53 | "language": "python", 54 | "name": "conda-env-tensorflow-py" 55 | }, 56 | "language_info": { 57 | "codemirror_mode": { 58 | "name": "ipython", 59 | "version": 3 60 | }, 61 | "file_extension": ".py", 62 | "mimetype": "text/x-python", 63 | "name": "python", 64 | "nbconvert_exporter": "python", 65 | "pygments_lexer": "ipython3", 66 | "version": "3.5.4" 67 | } 68 | }, 69 | "nbformat": 4, 70 | "nbformat_minor": 2 71 | } 72 | -------------------------------------------------------------------------------- /Chapter03/LogisticRegressor.py: -------------------------------------------------------------------------------- 1 | 2 | import tensorflow as tf 3 | import numpy as np 4 | import pandas as pd 5 | import matplotlib.pyplot as plt 6 | from sklearn.preprocessing import MinMaxScaler 7 | from sklearn.metrics import mean_squared_error, r2_score 8 | from sklearn.model_selection import train_test_split 9 | 10 | class LogisticRegressor: 11 | def __init__(self, d, n, lr=0.001): 12 | 13 | # Place holders for input-output training data 14 | self.X = tf.placeholder(tf.float32, \ 15 | shape=[None, d], name='input') 16 | self.Y = tf.placeholder(tf.float32, \ 17 | name='output') 18 | # Variables for weight and bias 19 | self.b = tf.Variable(tf.zeros(n), dtype=tf.float32) 20 | self.W = tf.Variable(tf.random_normal([d, n]), \ 21 | dtype=tf.float32) 22 | 23 | # The Linear Regression Model 24 | h = tf.matmul(self.X, self.W) + self.b 25 | self.Ypred = tf.nn.sigmoid(h) 26 | 27 | # Loss function 28 | self.loss = tf.reduce_mean(-tf.reduce_sum(self.Y * tf.log(self.Ypred), \ 29 | reduction_indices=1), name='cross-entropy-loss') 30 | 31 | # Gradient Descent with learning 32 | # rate of 0.05 to minimize loss 33 | optimizer = tf.train.GradientDescentOptimizer(lr) 34 | self.optimize = optimizer.minimize(self.loss) 35 | 36 | # Initializing Variables 37 | init_op = tf.global_variables_initializer() 38 | self.sess = tf.Session() 39 | self.sess.run(init_op) 40 | 41 | def fit(self, X, Y, epochs=500): 42 | total = [] 43 | for i in range(epochs): 44 | _, l = self.sess.run([self.optimize, self.loss], \ 45 | feed_dict={self.X: X, self.Y: Y}) 46 | total.append(l) 47 | if i % 1000 == 0: 48 | print('Epoch {0}/{1}: Loss {2}'.format(i, epochs, l)) 49 | return total 50 | 51 | def predict(self, X): 52 | return self.sess.run(self.Ypred, feed_dict={self.X: X}) 53 | 54 | def get_weights(self): 55 | return self.sess.run([self.W, self.b]) 56 | 57 | -------------------------------------------------------------------------------- /Chapter02/Hdf5_with_PyTables.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# Import module and create a random d\n", 10 | "import numpy as np\n", 11 | "arr = np.random.rand(5,4)\n", 12 | "np.savetxt('temp.csv', arr, delimiter=',')" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "arr = np.loadtxt('temp.csv', skiprows=1, usecols=(2,3),\n", 22 | " delimiter=',')" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 3, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "import tables\n", 32 | "h5filename = 'pytable_demo.hdf5'\n", 33 | "with tables.open_file(h5filename,mode='w') as h5file:\n", 34 | " root = h5file.root\n", 35 | " h5file.create_array(root,'global_power',arr)\n", 36 | " h5file.close()\n", 37 | " " 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 4, 43 | "metadata": {}, 44 | "outputs": [ 45 | { 46 | "name": "stdout", 47 | "output_type": "stream", 48 | "text": [ 49 | " (4, 2)\n", 50 | "[[0.66775171 0.5654945 ]\n", 51 | " [0.31355147 0.37325779]\n", 52 | " [0.25665679 0.79335901]\n", 53 | " [0.96372349 0.45597874]]\n" 54 | ] 55 | } 56 | ], 57 | "source": [ 58 | "with tables.open_file(h5filename,mode='r') as h5file:\n", 59 | " root = h5file.root\n", 60 | " for node in h5file.root:\n", 61 | " ds = node.read()\n", 62 | " print(type(ds),ds.shape)\n", 63 | " print(ds)" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [] 72 | } 73 | ], 74 | "metadata": { 75 | "kernelspec": { 76 | "display_name": "Python [conda env:anaconda3]", 77 | "language": "python", 78 | "name": "conda-env-anaconda3-py" 79 | }, 80 | "language_info": { 81 | "codemirror_mode": { 82 | "name": "ipython", 83 | "version": 3 84 | }, 85 | "file_extension": ".py", 86 | "mimetype": "text/x-python", 87 | "name": "python", 88 | "nbconvert_exporter": "python", 89 | "pygments_lexer": "ipython3", 90 | "version": "3.6.5" 91 | } 92 | }, 93 | "nbformat": 4, 94 | "nbformat_minor": 2 95 | } 96 | -------------------------------------------------------------------------------- /Chapter07/loader_celebA.py: -------------------------------------------------------------------------------- 1 | import os 2 | import zipfile 3 | import requests 4 | import math 5 | import numpy as np 6 | from PIL import Image 7 | from tqdm import tqdm 8 | import os 9 | from glob import glob 10 | from matplotlib import pyplot 11 | from PIL import Image 12 | import numpy as np 13 | 14 | 15 | def download_celeb_a(): 16 | """ 17 | The function unzips the img_align_celeba.zip into the cwd 18 | """ 19 | dirpath = os.getcwd() 20 | data_dir = 'celebA' 21 | if os.path.exists(os.path.join(dirpath, data_dir)): 22 | print('Found Celeb-A - skip') 23 | return 24 | 25 | filename = "img_align_celeba.zip" 26 | save_path = os.path.join(dirpath, filename) 27 | 28 | if not DEBUG: 29 | zip_dir = '' 30 | with zipfile.ZipFile(save_path) as zf: 31 | zip_dir = zf.namelist()[0] 32 | zf.extractall(dirpath) 33 | 34 | # Rename the directory as celebA 35 | os.rename(os.path.join(dirpath, zip_dir), os.path.join(dirpath, data_dir)) 36 | 37 | def plot_images(images, mode='RGB'): 38 | """ 39 | Function to plot images in a square grid 40 | """ 41 | # Get maximum size for square grid of images 42 | save_size = math.floor(np.sqrt(images.shape[0])) 43 | # Scale to 0-255 44 | images = (((images - images.min()) * 255) / (images.max() - images.min())).astype(np.uint8) 45 | # Put images in a square arrangement 46 | images_in_square = np.reshape( 47 | images[:save_size*save_size], 48 | (save_size, save_size, images.shape[1], images.shape[2], images.shape[3])) 49 | # Combine images to grid image 50 | new_im = Image.new(mode, (images.shape[1] * save_size, images.shape[2] * save_size)) 51 | for col_i, col_images in enumerate(images_in_square): 52 | for image_i, image in enumerate(col_images): 53 | im = Image.fromarray(image, mode) 54 | new_im.paste(im, (col_i * images.shape[1], image_i * images.shape[2])) 55 | 56 | return new_im 57 | 58 | 59 | def get_image(image_path, width, height, mode): 60 | """ 61 | Read image from image_path 62 | """ 63 | image = Image.open(image_path) 64 | 65 | if image.size != (width, height): 66 | # Remove most pixels that aren't part of a face 67 | face_width = face_height = 108 68 | j = (image.size[0] - face_width) // 2 69 | i = (image.size[1] - face_height) // 2 70 | image = image.crop([j, i, j + face_width, i + face_height]) 71 | image = image.resize([width, height], Image.BILINEAR) 72 | 73 | return np.array(image.convert(mode)) 74 | 75 | def get_batch(image_files, width, height, mode='RGB'): 76 | """ 77 | Get a single batch of data as an NumPy array 78 | """ 79 | data_batch = np.array( 80 | [get_image(sample_file, width, height, mode) for sample_file in image_files]).astype(np.float32) 81 | 82 | # Make sure the images are in 4 dimensions 83 | if len(data_batch.shape) < 4: 84 | data_batch = data_batch.reshape(data_batch.shape + (1,)) 85 | 86 | return data_batch 87 | 88 | def get_batches(batch_size, shape, data_files): 89 | """ 90 | Generate batches 91 | """ 92 | IMAGE_MAX_VALUE = 255 93 | 94 | 95 | current_index = 0 96 | while current_index + batch_size <= shape[0]: 97 | data_batch = get_batch( 98 | data_files[current_index:current_index + batch_size], 99 | *shape[1:3]) 100 | 101 | current_index += batch_size 102 | 103 | yield data_batch / IMAGE_MAX_VALUE - 0.5 104 | -------------------------------------------------------------------------------- /Chapter02/OpenPyXL_example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "Collecting openpyxl\n", 13 | " Downloading openpyxl-2.5.0.tar.gz (169kB)\n", 14 | "\u001b[K 100% |████████████████████████████████| 174kB 393kB/s ta 0:00:01\n", 15 | "\u001b[?25hCollecting jdcal (from openpyxl)\n", 16 | " Downloading jdcal-1.3.tar.gz\n", 17 | "Collecting et_xmlfile (from openpyxl)\n", 18 | " Downloading et_xmlfile-1.0.1.tar.gz\n", 19 | "Building wheels for collected packages: openpyxl, jdcal, et-xmlfile\n", 20 | " Running setup.py bdist_wheel for openpyxl ... \u001b[?25ldone\n", 21 | "\u001b[?25h Stored in directory: /Users/am/Library/Caches/pip/wheels/a7/88/96/29c1f91ba5a9b94dfc39a9f6f72d0eb92d6f0d917cf2341a3f\n", 22 | " Running setup.py bdist_wheel for jdcal ... \u001b[?25ldone\n", 23 | "\u001b[?25h Stored in directory: /Users/am/Library/Caches/pip/wheels/0f/63/92/19ac65ed64189de4d662f269d39dd08a887258842ad2f29549\n", 24 | " Running setup.py bdist_wheel for et-xmlfile ... \u001b[?25ldone\n", 25 | "\u001b[?25h Stored in directory: /Users/am/Library/Caches/pip/wheels/99/f6/53/5e18f3ff4ce36c990fa90ebdf2b80cd9b44dc461f750a1a77c\n", 26 | "Successfully built openpyxl jdcal et-xmlfile\n", 27 | "Installing collected packages: jdcal, et-xmlfile, openpyxl\n", 28 | "Successfully installed et-xmlfile-1.0.1 jdcal-1.3 openpyxl-2.5.0\n" 29 | ] 30 | } 31 | ], 32 | "source": [ 33 | "# Install if OpenPyXl is not already installed \n", 34 | "!pip install openpyxl" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 12, 40 | "metadata": {}, 41 | "outputs": [ 42 | { 43 | "name": "stdout", 44 | "output_type": "stream", 45 | "text": [ 46 | "A\n" 47 | ] 48 | } 49 | ], 50 | "source": [ 51 | "from openpyxl import Workbook\n", 52 | "from openpyxl.compat import range\n", 53 | "from openpyxl.utils import get_column_letter\n", 54 | "\n", 55 | "wb = Workbook()\n", 56 | "\n", 57 | "dest_filename = 'empty_book.xlsx'\n", 58 | "\n", 59 | "ws1 = wb.active\n", 60 | "ws1.title = \"range names\"\n", 61 | "\n", 62 | "for row in range(1, 40):\n", 63 | " ws1.append(range(0,100,5))\n", 64 | "\n", 65 | "ws2 = wb.create_sheet(title=\"Pi\")\n", 66 | "ws2['F5'] = 2 * 3.14\n", 67 | "ws2.cell(column=1, row=5, value= 3.14)\n", 68 | "\n", 69 | "ws3 = wb.create_sheet(title=\"Data\")\n", 70 | "for row in range(1, 20):\n", 71 | " for col in range(1, 15):\n", 72 | " _ = ws3.cell(column=col, row=row, value=\"{0}\".format(get_column_letter(col)))\n", 73 | "print(ws3['A10'].value)\n", 74 | "\n", 75 | "wb.save(filename = dest_filename)" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 14, 81 | "metadata": {}, 82 | "outputs": [ 83 | { 84 | "name": "stdout", 85 | "output_type": "stream", 86 | "text": [ 87 | "['range names', 'Pi', 'Data']\n", 88 | "15\n" 89 | ] 90 | }, 91 | { 92 | "name": "stderr", 93 | "output_type": "stream", 94 | "text": [ 95 | "/Users/am/anaconda3/envs/tensorflow/lib/python3.5/site-packages/ipykernel/__main__.py:4: DeprecationWarning: Call to deprecated function get_sheet_names (Use wb.sheetnames).\n" 96 | ] 97 | } 98 | ], 99 | "source": [ 100 | "from openpyxl import load_workbook\n", 101 | "wb = load_workbook(filename = 'empty_book.xlsx')\n", 102 | "sheet_ranges = wb['range names']\n", 103 | "print(wb.get_sheet_names())\n", 104 | "print(sheet_ranges['D18'].value)" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [] 113 | } 114 | ], 115 | "metadata": { 116 | "kernelspec": { 117 | "display_name": "Python [conda env:tensorflow]", 118 | "language": "python", 119 | "name": "conda-env-tensorflow-py" 120 | }, 121 | "language_info": { 122 | "codemirror_mode": { 123 | "name": "ipython", 124 | "version": 3 125 | }, 126 | "file_extension": ".py", 127 | "mimetype": "text/x-python", 128 | "name": "python", 129 | "nbconvert_exporter": "python", 130 | "pygments_lexer": "ipython3", 131 | "version": "3.5.4" 132 | } 133 | }, 134 | "nbformat": 4, 135 | "nbformat_minor": 2 136 | } 137 | -------------------------------------------------------------------------------- /Chapter02/Txt_files.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 32, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "data_folder = '../../data/Shakespeare'\n", 10 | "data_file = 'alllines.txt'" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 37, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "import os\n", 20 | "\n", 21 | "#f = open(os.path.join(data_folder,data_file),newline='') \n", 22 | "f = open(data_file)\n", 23 | "contents = f.read()" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 44, 29 | "metadata": {}, 30 | "outputs": [ 31 | { 32 | "name": "stdout", 33 | "output_type": "stream", 34 | "text": [ 35 | "\"ACT I\"\n", 36 | "\"SCENE I. London. The palace.\"\n", 37 | "\"Enter KING HENRY, LORD JOHN OF LANCASTER, the EARL of WESTMORELAND, SIR WALTER BLUNT, and others\"\n", 38 | "\"So shaken as we are, so wan with care,\"\n", 39 | "\"Find we a time for frighted peace to pant,\"\n", 40 | "\"And breathe short-winded accents of new broils\"\n", 41 | "\"To be commenced in strands afar remote.\"\n", 42 | "\"No more the thirsty entrance of this soil\"\n", 43 | "\"Shall daub her lips with her own children's blood,\"\n", 44 | "\"Nor more shall trenching war channel her fields,\"\n", 45 | "\"Nor bruise her flowerets with the armed hoofs\"\n", 46 | "\"Of hostile paces: those opposed eyes,\"\n", 47 | "\"Which, like the meteors of a troubled heaven,\"\n", 48 | "\"All of one nature, of one substance bred,\"\n", 49 | "\"Did lately meet in the intestine shock\"\n", 50 | "\"And furious close of civil butchery\"\n", 51 | "\"Shall now, in mutual well-beseeming ranks,\"\n", 52 | "\"March all one way and be no more opposed\"\n", 53 | "\"Against acquaintance, kindred and allies:\"\n", 54 | "\"The edge of war, like an ill-sheathed knife,\"\n", 55 | "\"No more shall cut his master. Therefore, friends,\"\n", 56 | "\"As far as to the sepulchre of Christ,\"\n", 57 | "\"Whose \n" 58 | ] 59 | } 60 | ], 61 | "source": [ 62 | "print(contents[:1000])\n" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 42, 68 | "metadata": {}, 69 | "outputs": [ 70 | { 71 | "data": { 72 | "text/plain": [ 73 | "4583798" 74 | ] 75 | }, 76 | "execution_count": 42, 77 | "metadata": {}, 78 | "output_type": "execute_result" 79 | } 80 | ], 81 | "source": [ 82 | "len(contents)" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 46, 88 | "metadata": {}, 89 | "outputs": [], 90 | "source": [ 91 | "import numpy as np\n", 92 | "data_file = 'household_power_consumption.txt'" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 54, 98 | "metadata": {}, 99 | "outputs": [ 100 | { 101 | "name": "stdout", 102 | "output_type": "stream", 103 | "text": [ 104 | "['Date;Time;Global_active_power;Global_reactive_power;Voltage;Global_intensity;Sub_metering_1;Sub_metering_2;Sub_metering_3'\n", 105 | " '16/12/2006;17:24:00;4.216;0.418;234.840;18.400;0.000;1.000;17.000'\n", 106 | " '16/12/2006;17:25:00;5.360;0.436;233.630;23.000;0.000;1.000;16.000'\n", 107 | " '16/12/2006;17:26:00;5.374;0.498;233.290;23.000;0.000;2.000;17.000'\n", 108 | " '16/12/2006;17:27:00;5.388;0.502;233.740;23.000;0.000;1.000;17.000'\n", 109 | " '16/12/2006;17:28:00;3.666;0.528;235.680;15.800;0.000;1.000;17.000'\n", 110 | " '16/12/2006;17:29:00;3.520;0.522;235.020;15.000;0.000;2.000;17.000'\n", 111 | " '16/12/2006;17:30:00;3.702;0.520;235.090;15.800;0.000;1.000;17.000'\n", 112 | " '16/12/2006;17:31:00;3.700;0.520;235.220;15.800;0.000;1.000;17.000'\n", 113 | " '16/12/2006;17:32:00;3.668;0.510;233.990;15.800;0.000;1.000;17.000']\n" 114 | ] 115 | } 116 | ], 117 | "source": [ 118 | "data = np.genfromtxt(data_file,dtype='str')\n", 119 | "print(data[:10])" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [] 128 | } 129 | ], 130 | "metadata": { 131 | "kernelspec": { 132 | "display_name": "Python 3", 133 | "language": "python", 134 | "name": "python3" 135 | }, 136 | "language_info": { 137 | "codemirror_mode": { 138 | "name": "ipython", 139 | "version": 3 140 | }, 141 | "file_extension": ".py", 142 | "mimetype": "text/x-python", 143 | "name": "python", 144 | "nbconvert_exporter": "python", 145 | "pygments_lexer": "ipython3", 146 | "version": "3.6.5" 147 | } 148 | }, 149 | "nbformat": 4, 150 | "nbformat_minor": 2 151 | } 152 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Hands-On Artificial Intelligence for IoT 2 | 3 | Book Name 4 | 5 | This is the code repository for [Hands-On Artificial Intelligence for IoT](https://www.packtpub.com/big-data-and-business-intelligence/hands-artificial-intelligence-iot?utm_source=github&utm_medium=repository&utm_campaign=9781788836067), published by Packt. 6 | 7 | **Expert machine learning and deep learning techniques for developing smarter IoT systems** 8 | 9 | ## What is this book about? 10 | There are many applications that use data science and analytics to gain insights from terabytes of data. These apps, however, do not address the challenge of continually discovering patterns for IoT data. In Hands-On Artificial Intelligence for IoT, we cover various aspects of artificial intelligence (AI) and its implementation to make your IoT solutions smarter. 11 | This book covers the following exciting features: 12 | * Apply different AI techniques including machine learning and deep learning using TensorFlow and Keras 13 | * Access and process data from various distributed sources 14 | * Perform supervised and unsupervised machine learning for IoT data 15 | * Implement distributed processing of IoT data over Apache Spark using the MLLib and H2O.ai platforms 16 | * Forecast time-series data using deep learning methods 17 | 18 | If you feel this book is for you, get your [copy](https://www.amazon.com/dp/1788836065) today! 19 | 20 | https://www.packtpub.com/ 22 | 23 | 24 | ## Instructions and Navigations 25 | All of the code is organized into folders. For example, Chapter02. 26 | 27 | The code will look like the following: 28 | ``` 29 | A = tf.placeholder(tf.float32, None, name='A') 30 | B = tf.placeholder(tf.float32, None, name='B') 31 | ``` 32 | 33 | **Following is what you need for this book:** 34 | 35 | If you are a data science professional or a machine learning developer looking to build smart systems for IoT, Hands-On Artificial Intelligence for IoT is for you. If you want to learn how popular artificial intelligence (AI) techniques can be used in the Internet of Things domain, this book will also be of benefit. A basic understanding of machine learning concepts will be required to get the best out of this book. 36 | 37 | With the following software and hardware list you can run all code files present in the book (Chapter 1-12). 38 | 39 | ### Software and Hardware List 40 | 41 | | Chapter | Software required | OS required | 42 | | -------- | ------------------------------------ | -----------------------------------| 43 | | 1 | TensorFlow1.x Python 3.5> Numpy 1.14>| Windows10 MacOS 10.x Ubuntu 16.04+ | 44 | | 2 | TensorFlow1.x Python 3.5> Numpy 1.14>| Windows10 MacOS 10.x Ubuntu 16.04+ | 45 | | | Keras OpenpyXL SQL | | 46 | | | HDFS H5py | | 47 | | | | | 48 | | 3-5,7,9-11| TensorFlow1.x Python 3.5> Numpy 1.14>| Windows10 MacOS 10.x Ubuntu 16.04+ | 49 | | | Keras Scikit Learn Matplotlib | | 50 | | | Pandas Scipy | | 51 | | | | | 52 | | 6 | TensorFlow1.x Python 3.5> Numpy 1.14>| MacOS 10.x Ubuntu 16.04+ | 53 | | | Keras Scikit Learn Matplotlib | | 54 | | | Pandas Open AI Gym Random | | 55 | | | | | 56 | | 8 | TensorFlow1.x Python 3.5> Numpy 1.14>| Ubuntu 16.04 | 57 | | | Keras Scikit Learn Matplotlib | | 58 | | | Scipy Pandas Kafka | | 59 | | | TensorFrames SparkDL PySpark | 60 | | | TensorFlowOnSpark | 61 | 62 | 63 | 64 | 65 | We also provide a PDF file that has color images of the screenshots/diagrams used in this book. [Click here to download it](http://www.packtpub.com/sites/default/files/downloads/9781788836067_ColorImages.pdf). 66 | 67 | ### Related products 68 | * Artificial Intelligence with Python [[Packt]](https://www.packtpub.com/big-data-and-business-intelligence/artificial-intelligence-python?utm_source=github&utm_medium=repository&utm_campaign=9781786464392) [[Amazon]](https://www.amazon.com/dp/178646439X) 69 | 70 | * Artificial Intelligence By Example [[Packt]](https://www.packtpub.com/big-data-and-business-intelligence/artificial-intelligence-example?utm_source=github&utm_medium=repository&utm_campaign=9781788990547) [[Amazon]](https://www.amazon.com/dp/1788990544) 71 | 72 | ## Get to Know the Author 73 | **Amita Kapoor** is an associate professor in the Department of Electronics, SRCASW, University of Delhi, and has been actively teaching neural networks and artificial intelligence for the last 20 years. She completed her master's in electronics in 1996 and her PhD in 2011. During her PhD she was awarded the prestigious DAAD fellowship to pursue part of her research at the Karlsruhe Institute of Technology, Karlsruhe, Germany. She was awarded the Best Presentation Award at the Photonics 2008 international conference. She is an active member of ACM, AAAI, IEEE, and INNS. She has co-authored two books. She has more than 40 publications in international journals and conferences. Her present research areas include machine learning, artificial intelligence, deep reinforcement learning, and robotics. 74 | 75 | 76 | ## Other books by the author 77 | * [TensorFlow 1.x Deep Learning Cookbook](https://www.packtpub.com/big-data-and-business-intelligence/tensorflow-1x-deep-learning-cookbook?utm_source=github&utm_medium=repository&utm_campaign=9781788293594) 78 | 79 | ### Suggestions and Feedback 80 | [Click here](https://docs.google.com/forms/d/e/1FAIpQLSdy7dATC6QmEL81FIUuymZ0Wy9vH1jHkvpY57OiMeKGqib_Ow/viewform) if you have any feedback or suggestions. 81 | -------------------------------------------------------------------------------- /Chapter05/dag.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from copy import copy, deepcopy 3 | from collections import deque 4 | 5 | try: 6 | from collections import OrderedDict 7 | except: 8 | from ordereddict import OrderedDict 9 | 10 | 11 | class DAGValidationError(Exception): 12 | pass 13 | 14 | 15 | class DAG(object): 16 | """ Directed acyclic graph implementation. """ 17 | 18 | def __init__(self): 19 | """ Construct a new DAG with no nodes or edges. """ 20 | self.reset_graph() 21 | 22 | def add_node(self, node_name, graph=None): 23 | """ Add a node if it does not exist yet, or error out. """ 24 | if not graph: 25 | graph = self.graph 26 | if node_name in graph: 27 | raise KeyError('node %s already exists' % node_name) 28 | graph[node_name] = set() 29 | 30 | def add_node_if_not_exists(self, node_name, graph=None): 31 | try: 32 | self.add_node(node_name, graph=graph) 33 | except KeyError: 34 | pass 35 | 36 | def delete_node(self, node_name, graph=None): 37 | """ Deletes this node and all edges referencing it. """ 38 | if not graph: 39 | graph = self.graph 40 | if node_name not in graph: 41 | raise KeyError('node %s does not exist' % node_name) 42 | graph.pop(node_name) 43 | 44 | for node, edges in graph.items(): 45 | if node_name in edges: 46 | edges.remove(node_name) 47 | 48 | def delete_node_if_exists(self, node_name, graph=None): 49 | try: 50 | self.delete_node(node_name, graph=graph) 51 | except KeyError: 52 | pass 53 | 54 | def add_edge(self, ind_node, dep_node, graph=None): 55 | """ Add an edge (dependency) between the specified nodes. """ 56 | if not graph: 57 | graph = self.graph 58 | if ind_node not in graph or dep_node not in graph: 59 | raise KeyError('one or more nodes do not exist in graph') 60 | test_graph = deepcopy(graph) 61 | test_graph[ind_node].add(dep_node) 62 | is_valid, message = self.validate(test_graph) 63 | if is_valid: 64 | graph[ind_node].add(dep_node) 65 | else: 66 | raise DAGValidationError() 67 | 68 | def delete_edge(self, ind_node, dep_node, graph=None): 69 | """ Delete an edge from the graph. """ 70 | if not graph: 71 | graph = self.graph 72 | if dep_node not in graph.get(ind_node, []): 73 | raise KeyError('this edge does not exist in graph') 74 | graph[ind_node].remove(dep_node) 75 | 76 | def rename_edges(self, old_task_name, new_task_name, graph=None): 77 | """ Change references to a task in existing edges. """ 78 | if not graph: 79 | graph = self.graph 80 | for node, edges in graph.items(): 81 | 82 | if node == old_task_name: 83 | graph[new_task_name] = copy(edges) 84 | del graph[old_task_name] 85 | 86 | else: 87 | if old_task_name in edges: 88 | edges.remove(old_task_name) 89 | edges.add(new_task_name) 90 | 91 | def predecessors(self, node, graph=None): 92 | """ Returns a list of all predecessors of the given node """ 93 | if graph is None: 94 | graph = self.graph 95 | return [key for key in graph if node in graph[key]] 96 | 97 | def downstream(self, node, graph=None): 98 | """ Returns a list of all nodes this node has edges towards. """ 99 | if graph is None: 100 | graph = self.graph 101 | if node not in graph: 102 | raise KeyError('node %s is not in graph' % node) 103 | return list(graph[node]) 104 | 105 | def all_downstreams(self, node, graph=None): 106 | """Returns a list of all nodes ultimately downstream 107 | of the given node in the dependency graph, in 108 | topological order.""" 109 | if graph is None: 110 | graph = self.graph 111 | nodes = [node] 112 | nodes_seen = set() 113 | i = 0 114 | while i < len(nodes): 115 | downstreams = self.downstream(nodes[i], graph) 116 | for downstream_node in downstreams: 117 | if downstream_node not in nodes_seen: 118 | nodes_seen.add(downstream_node) 119 | nodes.append(downstream_node) 120 | i += 1 121 | return filter(lambda node: node in nodes_seen, self.topological_sort(graph=graph)) 122 | 123 | def all_leaves(self, graph=None): 124 | """ Return a list of all leaves (nodes with no downstreams) """ 125 | if graph is None: 126 | graph = self.graph 127 | return [key for key in graph if not graph[key]] 128 | 129 | def from_dict(self, graph_dict): 130 | """ Reset the graph and build it from the passed dictionary. 131 | 132 | The dictionary takes the form of {node_name: [directed edges]} 133 | """ 134 | 135 | self.reset_graph() 136 | for new_node in graph_dict.iterkeys(): 137 | self.add_node(new_node) 138 | for ind_node, dep_nodes in graph_dict.items(): 139 | if not isinstance(dep_nodes, list): 140 | raise TypeError('dict values must be lists') 141 | for dep_node in dep_nodes: 142 | self.add_edge(ind_node, dep_node) 143 | 144 | def reset_graph(self): 145 | """ Restore the graph to an empty state. """ 146 | self.graph = OrderedDict() 147 | 148 | def ind_nodes(self, graph=None): 149 | """ Returns a list of all nodes in the graph with no dependencies. """ 150 | if graph is None: 151 | graph = self.graph 152 | dependent_nodes = set(tuple(node) for dependents in graph.items() for node in dependents) 153 | return [node for node in graph.keys() if node not in dependent_nodes] 154 | 155 | def validate(self, graph=None): 156 | """ Returns (Boolean, message) of whether DAG is valid. """ 157 | graph = graph if graph is not None else self.graph 158 | if len(self.ind_nodes(graph)) == 0: 159 | return (False, 'no independent nodes detected') 160 | try: 161 | self.topological_sort(graph) 162 | except ValueError: 163 | return (False, 'failed topological sort') 164 | return (True, 'valid') 165 | 166 | def topological_sort(self, graph=None): 167 | """ Returns a topological ordering of the DAG. 168 | 169 | Raises an error if this is not possible (graph is not valid). 170 | """ 171 | if graph is None: 172 | graph = self.graph 173 | 174 | in_degree = {} 175 | for u in graph: 176 | in_degree[u] = 0 177 | 178 | for u in graph: 179 | for v in graph[u]: 180 | in_degree[v] += 1 181 | 182 | queue = deque() 183 | for u in in_degree: 184 | if in_degree[u] == 0: 185 | queue.appendleft(u) 186 | 187 | l = [] 188 | while queue: 189 | u = queue.pop() 190 | l.append(u) 191 | for v in graph[u]: 192 | in_degree[v] -= 1 193 | if in_degree[v] == 0: 194 | queue.appendleft(v) 195 | 196 | if len(l) == len(graph): 197 | return l 198 | else: 199 | raise ValueError('graph is not acyclic') 200 | 201 | def size(self): 202 | return len(self.graph) 203 | -------------------------------------------------------------------------------- /Chapter06/Taxi_drop-off.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "scrolled": true 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stderr", 12 | "output_type": "stream", 13 | "text": [ 14 | "[2018-06-29 21:51:21,422] Making new env: Taxi-v2\n" 15 | ] 16 | }, 17 | { 18 | "name": "stdout", 19 | "output_type": "stream", 20 | "text": [ 21 | "+---------+\n", 22 | "|\u001b[34;1mR\u001b[0m: | : :G|\n", 23 | "| : : : : |\n", 24 | "| : :\u001b[43m \u001b[0m: : |\n", 25 | "| | : | : |\n", 26 | "|Y| : |\u001b[35mB\u001b[0m: |\n", 27 | "+---------+\n", 28 | "\n" 29 | ] 30 | } 31 | ], 32 | "source": [ 33 | "import gym\n", 34 | "import numpy as np\n", 35 | "env = gym.make('Taxi-v2')\n", 36 | "obs = env.reset()\n", 37 | "env.render()" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 2, 43 | "metadata": { 44 | "scrolled": true 45 | }, 46 | "outputs": [ 47 | { 48 | "name": "stdout", 49 | "output_type": "stream", 50 | "text": [ 51 | "The Q-table will have 500 rows and 6 columns, resulting in total 3000 entries\n" 52 | ] 53 | } 54 | ], 55 | "source": [ 56 | "m = env.observation_space.n # size of the state space\n", 57 | "n = env.action_space.n # size of action space\n", 58 | "\n", 59 | "print(\"The Q-table will have {} rows and {} columns, resulting in total {} entries\".\\\n", 60 | " format(m,n,m*n))" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 3, 66 | "metadata": { 67 | "scrolled": true 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "# Intialize the Q-table and hyperparameters\n", 72 | "Q = np.zeros([m,n])\n", 73 | "gamma = 0.97\n", 74 | "max_episode = 1000\n", 75 | "max_steps = 100\n", 76 | "alpha = 0.7\n", 77 | "epsilon = 0.3" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 4, 83 | "metadata": { 84 | "scrolled": true 85 | }, 86 | "outputs": [], 87 | "source": [ 88 | "for i in range(max_episode):\n", 89 | " # Start with new environment\n", 90 | " s = env.reset()\n", 91 | " done = False\n", 92 | " counter = 0\n", 93 | " for _ in range(max_steps):\n", 94 | " # Choose an action using epsilon greedy policy\n", 95 | " p = np.random.rand()\n", 96 | " if p > epsilon or (not np.any(Q[s,:])):\n", 97 | " a = env.action_space.sample() #explore\n", 98 | " else:\n", 99 | " a = np.argmax(Q[s,:]) #exploit\n", 100 | " \n", 101 | " s_new, r, done, _ = env.step(a)\n", 102 | " # Update Q table\n", 103 | " Q[s,a] = (1-alpha)*Q[s,a] + alpha*(r + gamma*np.max(Q[s_new,:]))\n", 104 | " #print(Q[s,a],r)\n", 105 | " s = s_new\n", 106 | " if done:\n", 107 | " break\n", 108 | " " 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 5, 114 | "metadata": { 115 | "scrolled": false 116 | }, 117 | "outputs": [ 118 | { 119 | "name": "stdout", 120 | "output_type": "stream", 121 | "text": [ 122 | "+---------+\n", 123 | "|R: | : :\u001b[35m\u001b[43mG\u001b[0m\u001b[0m|\n", 124 | "| : : : : |\n", 125 | "| : : : : |\n", 126 | "| | : | : |\n", 127 | "|\u001b[34;1mY\u001b[0m| : |B: |\n", 128 | "+---------+\n", 129 | "\n", 130 | "+---------+\n", 131 | "|R: | : :\u001b[35mG\u001b[0m|\n", 132 | "| : : : :\u001b[43m \u001b[0m|\n", 133 | "| : : : : |\n", 134 | "| | : | : |\n", 135 | "|\u001b[34;1mY\u001b[0m| : |B: |\n", 136 | "+---------+\n", 137 | " (South)\n", 138 | "+---------+\n", 139 | "|R: | : :\u001b[35mG\u001b[0m|\n", 140 | "| : : : : |\n", 141 | "| : : : :\u001b[43m \u001b[0m|\n", 142 | "| | : | : |\n", 143 | "|\u001b[34;1mY\u001b[0m| : |B: |\n", 144 | "+---------+\n", 145 | " (South)\n", 146 | "+---------+\n", 147 | "|R: | : :\u001b[35mG\u001b[0m|\n", 148 | "| : : : : |\n", 149 | "| : : :\u001b[43m \u001b[0m: |\n", 150 | "| | : | : |\n", 151 | "|\u001b[34;1mY\u001b[0m| : |B: |\n", 152 | "+---------+\n", 153 | " (West)\n", 154 | "+---------+\n", 155 | "|R: | : :\u001b[35mG\u001b[0m|\n", 156 | "| : : : : |\n", 157 | "| : :\u001b[43m \u001b[0m: : |\n", 158 | "| | : | : |\n", 159 | "|\u001b[34;1mY\u001b[0m| : |B: |\n", 160 | "+---------+\n", 161 | " (West)\n", 162 | "+---------+\n", 163 | "|R: | : :\u001b[35mG\u001b[0m|\n", 164 | "| : : : : |\n", 165 | "| :\u001b[43m \u001b[0m: : : |\n", 166 | "| | : | : |\n", 167 | "|\u001b[34;1mY\u001b[0m| : |B: |\n", 168 | "+---------+\n", 169 | " (West)\n", 170 | "+---------+\n", 171 | "|R: | : :\u001b[35mG\u001b[0m|\n", 172 | "| : : : : |\n", 173 | "|\u001b[43m \u001b[0m: : : : |\n", 174 | "| | : | : |\n", 175 | "|\u001b[34;1mY\u001b[0m| : |B: |\n", 176 | "+---------+\n", 177 | " (West)\n", 178 | "+---------+\n", 179 | "|R: | : :\u001b[35mG\u001b[0m|\n", 180 | "| : : : : |\n", 181 | "| : : : : |\n", 182 | "|\u001b[43m \u001b[0m| : | : |\n", 183 | "|\u001b[34;1mY\u001b[0m| : |B: |\n", 184 | "+---------+\n", 185 | " (South)\n", 186 | "+---------+\n", 187 | "|R: | : :\u001b[35mG\u001b[0m|\n", 188 | "| : : : : |\n", 189 | "| : : : : |\n", 190 | "| | : | : |\n", 191 | "|\u001b[34;1m\u001b[43mY\u001b[0m\u001b[0m| : |B: |\n", 192 | "+---------+\n", 193 | " (South)\n", 194 | "+---------+\n", 195 | "|R: | : :\u001b[35mG\u001b[0m|\n", 196 | "| : : : : |\n", 197 | "| : : : : |\n", 198 | "| | : | : |\n", 199 | "|\u001b[42mY\u001b[0m| : |B: |\n", 200 | "+---------+\n", 201 | " (Pickup)\n", 202 | "+---------+\n", 203 | "|R: | : :\u001b[35mG\u001b[0m|\n", 204 | "| : : : : |\n", 205 | "| : : : : |\n", 206 | "|\u001b[42m_\u001b[0m| : | : |\n", 207 | "|Y| : |B: |\n", 208 | "+---------+\n", 209 | " (North)\n", 210 | "+---------+\n", 211 | "|R: | : :\u001b[35mG\u001b[0m|\n", 212 | "| : : : : |\n", 213 | "|\u001b[42m_\u001b[0m: : : : |\n", 214 | "| | : | : |\n", 215 | "|Y| : |B: |\n", 216 | "+---------+\n", 217 | " (North)\n", 218 | "+---------+\n", 219 | "|R: | : :\u001b[35mG\u001b[0m|\n", 220 | "| : : : : |\n", 221 | "| :\u001b[42m_\u001b[0m: : : |\n", 222 | "| | : | : |\n", 223 | "|Y| : |B: |\n", 224 | "+---------+\n", 225 | " (East)\n", 226 | "+---------+\n", 227 | "|R: | : :\u001b[35mG\u001b[0m|\n", 228 | "| : : : : |\n", 229 | "| : :\u001b[42m_\u001b[0m: : |\n", 230 | "| | : | : |\n", 231 | "|Y| : |B: |\n", 232 | "+---------+\n", 233 | " (East)\n", 234 | "+---------+\n", 235 | "|R: | : :\u001b[35mG\u001b[0m|\n", 236 | "| : : : : |\n", 237 | "| : : :\u001b[42m_\u001b[0m: |\n", 238 | "| | : | : |\n", 239 | "|Y| : |B: |\n", 240 | "+---------+\n", 241 | " (East)\n", 242 | "+---------+\n", 243 | "|R: | : :\u001b[35mG\u001b[0m|\n", 244 | "| : : :\u001b[42m_\u001b[0m: |\n", 245 | "| : : : : |\n", 246 | "| | : | : |\n", 247 | "|Y| : |B: |\n", 248 | "+---------+\n", 249 | " (North)\n", 250 | "+---------+\n", 251 | "|R: | : :\u001b[35mG\u001b[0m|\n", 252 | "| : : : :\u001b[42m_\u001b[0m|\n", 253 | "| : : : : |\n", 254 | "| | : | : |\n", 255 | "|Y| : |B: |\n", 256 | "+---------+\n", 257 | " (East)\n", 258 | "+---------+\n", 259 | "|R: | : :\u001b[35m\u001b[42mG\u001b[0m\u001b[0m|\n", 260 | "| : : : : |\n", 261 | "| : : : : |\n", 262 | "| | : | : |\n", 263 | "|Y| : |B: |\n", 264 | "+---------+\n", 265 | " (North)\n", 266 | "+---------+\n", 267 | "|R: | : :\u001b[35m\u001b[42mG\u001b[0m\u001b[0m|\n", 268 | "| : : : : |\n", 269 | "| : : : : |\n", 270 | "| | : | : |\n", 271 | "|Y| : |B: |\n", 272 | "+---------+\n", 273 | " (Dropoff)\n" 274 | ] 275 | } 276 | ], 277 | "source": [ 278 | "s = env.reset()\n", 279 | "done = False\n", 280 | "env.render()\n", 281 | "# Test the learned Agent\n", 282 | "for i in range(max_steps):\n", 283 | " a = np.argmax(Q[s,:])\n", 284 | " s, _, done, _ = env.step(a)\n", 285 | " env.render()\n", 286 | " if done:\n", 287 | " break " 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": null, 293 | "metadata": { 294 | "scrolled": true 295 | }, 296 | "outputs": [], 297 | "source": [] 298 | } 299 | ], 300 | "metadata": { 301 | "kernelspec": { 302 | "display_name": "Python [conda env:tensorflow]", 303 | "language": "python", 304 | "name": "conda-env-tensorflow-py" 305 | }, 306 | "language_info": { 307 | "codemirror_mode": { 308 | "name": "ipython", 309 | "version": 3 310 | }, 311 | "file_extension": ".py", 312 | "mimetype": "text/x-python", 313 | "name": "python", 314 | "nbconvert_exporter": "python", 315 | "pygments_lexer": "ipython3", 316 | "version": "3.5.4" 317 | } 318 | }, 319 | "nbformat": 4, 320 | "nbformat_minor": 2 321 | } 322 | -------------------------------------------------------------------------------- /Chapter05/GuessTheWord.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import string\n", 12 | "import random\n", 13 | "\n", 14 | "from deap import base, creator, tools" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "## Create a Finess base class which is to be minimized\n", 24 | "# weights is a tuple -sign tells to minimize, +1 to maximize\n", 25 | "\n", 26 | "creator.create(\"FitnessMax\", base.Fitness, weights=(1.0,)) \n", 27 | "\n" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "This will define a class ```FitnessMax``` which inherits the Fitness class of deep.base module. The attribute weight which is a tuple is used to specify whether fitness function is to be maximized (weights=1.0) or minimized weights=-1.0. The DEAP library allows multi-objective Fitness function. \n", 35 | "\n", 36 | "### Individual\n", 37 | "\n", 38 | "Next we create a ```Individual``` class, which inherits the class ```list``` and has the ```FitnessMax``` class in its Fitness attribute. " 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 3, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "# Now we create a individual class\n", 48 | "\n", 49 | "creator.create(\"Individual\", list, fitness=creator.FitnessMax)" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "# Population\n", 57 | "\n", 58 | "Once the individuals are created we need to create population and define gene pool, to do this we use DEAP toolbox. All the objects that we will need now onwards- an individual, the population, the functions, the operators and the arguments are stored in the container called ```Toolbox```\n", 59 | "\n", 60 | "We can add or remove content in the container ```Toolbox``` using ```register()``` and ```unregister()``` methods" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 4, 66 | "metadata": { 67 | "collapsed": true 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "toolbox = base.Toolbox()\n", 72 | "\n", 73 | "# Gene Pool\n", 74 | "toolbox.register(\"attr_string\", random.choice, string.ascii_letters + string.digits )" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 5, 80 | "metadata": { 81 | "collapsed": true 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "#Number of characters in word\n", 86 | "word = list('hello')\n", 87 | "N = len(word)\n", 88 | "\n", 89 | "# Initialize population\n", 90 | "toolbox.register(\"individual\", tools.initRepeat, creator.Individual, toolbox.attr_string, N )\n", 91 | "toolbox.register(\"population\",tools.initRepeat, list, toolbox.individual)" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 6, 97 | "metadata": { 98 | "collapsed": true 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "def evalWord(individual, word):\n", 103 | " #word = list('hello')\n", 104 | " return sum(individual[i] == word[i] for i in range(len(individual))),\n" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 7, 110 | "metadata": { 111 | "collapsed": true 112 | }, 113 | "outputs": [], 114 | "source": [ 115 | "toolbox.register(\"evaluate\", evalWord, word)\n", 116 | "toolbox.register(\"mate\", tools.cxTwoPoint)\n", 117 | "toolbox.register(\"mutate\", tools.mutShuffleIndexes, indpb=0.05)\n", 118 | "toolbox.register(\"select\", tools.selTournament, tournsize=3)" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "We define the other operators/functions we will need by registering them in the toolbox. This allows us to easily switch between the operators if desired.\n", 126 | "\n", 127 | "## Evolving the Population\n", 128 | "Once the representation and the genetic operators are chosen, we will define an algorithm combining all the individual parts and performing the evolution of our population until the One Max problem is solved. It is good style in programming to do so within a function, generally named main().\n", 129 | "\n", 130 | "Creating the Population\n", 131 | "First of all, we need to actually instantiate our population. But this step is effortlessly done using the population() method we registered in our toolbox earlier on." 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 8, 137 | "metadata": { 138 | "collapsed": true 139 | }, 140 | "outputs": [], 141 | "source": [ 142 | "def main():\n", 143 | " random.seed(64)\n", 144 | "\n", 145 | " # create an initial population of 300 individuals (where\n", 146 | " # each individual is a list of integers)\n", 147 | " pop = toolbox.population(n=300)\n", 148 | "\n", 149 | " # CXPB is the probability with which two individuals\n", 150 | " # are crossed\n", 151 | " #\n", 152 | " # MUTPB is the probability for mutating an individual\n", 153 | " CXPB, MUTPB = 0.5, 0.2\n", 154 | " \n", 155 | " print(\"Start of evolution\")\n", 156 | " \n", 157 | " # Evaluate the entire population\n", 158 | " fitnesses = list(map(toolbox.evaluate, pop))\n", 159 | " for ind, fit in zip(pop, fitnesses):\n", 160 | " #print(ind, fit)\n", 161 | " ind.fitness.values = fit\n", 162 | " \n", 163 | " print(\" Evaluated %i individuals\" % len(pop))\n", 164 | "\n", 165 | " # Extracting all the fitnesses of \n", 166 | " fits = [ind.fitness.values[0] for ind in pop]\n", 167 | "\n", 168 | " # Variable keeping track of the number of generations\n", 169 | " g = 0\n", 170 | " \n", 171 | " # Begin the evolution\n", 172 | " while max(fits) < 5 and g < 1000:\n", 173 | " # A new generation\n", 174 | " g = g + 1\n", 175 | " print(\"-- Generation %i --\" % g)\n", 176 | " \n", 177 | " # Select the next generation individuals\n", 178 | " offspring = toolbox.select(pop, len(pop))\n", 179 | " # Clone the selected individuals\n", 180 | " offspring = list(map(toolbox.clone, offspring))\n", 181 | " \n", 182 | " # Apply crossover and mutation on the offspring\n", 183 | " for child1, child2 in zip(offspring[::2], offspring[1::2]):\n", 184 | "\n", 185 | " # cross two individuals with probability CXPB\n", 186 | " if random.random() < CXPB:\n", 187 | " toolbox.mate(child1, child2)\n", 188 | "\n", 189 | " # fitness values of the children\n", 190 | " # must be recalculated later\n", 191 | " del child1.fitness.values\n", 192 | " del child2.fitness.values\n", 193 | "\n", 194 | " for mutant in offspring:\n", 195 | "\n", 196 | " # mutate an individual with probability MUTPB\n", 197 | " if random.random() < MUTPB:\n", 198 | " toolbox.mutate(mutant)\n", 199 | " del mutant.fitness.values\n", 200 | " \n", 201 | " # Evaluate the individuals with an invalid fitness\n", 202 | " invalid_ind = [ind for ind in offspring if not ind.fitness.valid]\n", 203 | " fitnesses = map(toolbox.evaluate, invalid_ind)\n", 204 | " for ind, fit in zip(invalid_ind, fitnesses):\n", 205 | " ind.fitness.values = fit\n", 206 | " \n", 207 | " print(\" Evaluated %i individuals\" % len(invalid_ind))\n", 208 | " \n", 209 | " # The population is entirely replaced by the offspring\n", 210 | " pop[:] = offspring\n", 211 | " \n", 212 | " # Gather all the fitnesses in one list and print the stats\n", 213 | " fits = [ind.fitness.values[0] for ind in pop]\n", 214 | " \n", 215 | " length = len(pop)\n", 216 | " mean = sum(fits) / length\n", 217 | " sum2 = sum(x*x for x in fits)\n", 218 | " std = abs(sum2 / length - mean**2)**0.5\n", 219 | " \n", 220 | " print(\" Min %s\" % min(fits))\n", 221 | " print(\" Max %s\" % max(fits))\n", 222 | " print(\" Avg %s\" % mean)\n", 223 | " print(\" Std %s\" % std)\n", 224 | " \n", 225 | " print(\"-- End of (successful) evolution --\")\n", 226 | " \n", 227 | " best_ind = tools.selBest(pop, 1)[0]\n", 228 | " print(\"Best individual is %s, %s\" % (''.join(best_ind), best_ind.fitness.values))" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": 9, 234 | "metadata": {}, 235 | "outputs": [ 236 | { 237 | "name": "stdout", 238 | "output_type": "stream", 239 | "text": [ 240 | "Start of evolution\n", 241 | " Evaluated 300 individuals\n", 242 | "-- Generation 1 --\n", 243 | " Evaluated 178 individuals\n", 244 | " Min 0.0\n", 245 | " Max 2.0\n", 246 | " Avg 0.22\n", 247 | " Std 0.4526956299030656\n", 248 | "-- Generation 2 --\n", 249 | " Evaluated 174 individuals\n", 250 | " Min 0.0\n", 251 | " Max 2.0\n", 252 | " Avg 0.51\n", 253 | " Std 0.613650280425803\n", 254 | "-- Generation 3 --\n", 255 | " Evaluated 191 individuals\n", 256 | " Min 0.0\n", 257 | " Max 3.0\n", 258 | " Avg 0.9766666666666667\n", 259 | " Std 0.6502221842484989\n", 260 | "-- Generation 4 --\n", 261 | " Evaluated 167 individuals\n", 262 | " Min 0.0\n", 263 | " Max 4.0\n", 264 | " Avg 1.45\n", 265 | " Std 0.6934214687571574\n", 266 | "-- Generation 5 --\n", 267 | " Evaluated 191 individuals\n", 268 | " Min 0.0\n", 269 | " Max 4.0\n", 270 | " Avg 1.9833333333333334\n", 271 | " Std 0.7765665171481163\n", 272 | "-- Generation 6 --\n", 273 | " Evaluated 168 individuals\n", 274 | " Min 0.0\n", 275 | " Max 4.0\n", 276 | " Avg 2.48\n", 277 | " Std 0.7678541528180985\n", 278 | "-- Generation 7 --\n", 279 | " Evaluated 192 individuals\n", 280 | " Min 1.0\n", 281 | " Max 5.0\n", 282 | " Avg 3.013333333333333\n", 283 | " Std 0.6829999186595044\n", 284 | "-- End of (successful) evolution --\n", 285 | "Best individual is hello, (5.0,)\n" 286 | ] 287 | } 288 | ], 289 | "source": [ 290 | "main()" 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": null, 296 | "metadata": { 297 | "collapsed": true 298 | }, 299 | "outputs": [], 300 | "source": [] 301 | } 302 | ], 303 | "metadata": { 304 | "kernelspec": { 305 | "display_name": "Python [conda env:tensorflow]", 306 | "language": "python", 307 | "name": "conda-env-tensorflow-py" 308 | }, 309 | "language_info": { 310 | "codemirror_mode": { 311 | "name": "ipython", 312 | "version": 3 313 | }, 314 | "file_extension": ".py", 315 | "mimetype": "text/x-python", 316 | "name": "python", 317 | "nbconvert_exporter": "python", 318 | "pygments_lexer": "ipython3", 319 | "version": "3.5.4" 320 | } 321 | }, 322 | "nbformat": 4, 323 | "nbformat_minor": 2 324 | } 325 | -------------------------------------------------------------------------------- /Chapter03/Wine_quality_using_Ensemble_learning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# Import the modules\n", 10 | "import numpy as np\n", 11 | "import pandas as pd\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "from sklearn.preprocessing import MinMaxScaler, LabelEncoder\n", 14 | "from sklearn.model_selection import train_test_split\n", 15 | "from sklearn.metrics import confusion_matrix, accuracy_score\n", 16 | "from sklearn.svm import SVC \n", 17 | "from sklearn.naive_bayes import GaussianNB \n", 18 | "from LogisticRegressor import LogisticRegressor \n", 19 | "from sklearn.tree import DecisionTreeClassifier\n", 20 | "from sklearn.ensemble import VotingClassifier\n", 21 | "import seaborn as sns\n", 22 | "% matplotlib inline" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 2, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "filename = 'winequality-red.csv' #Download the file from https://archive.ics.uci.edu/ml/datasets/wine+quality\n", 32 | "df = pd.read_csv(filename, sep=';')\n", 33 | "#categorize wine quality in three levels\n", 34 | "bins = (0,3.5,5.5,10)\n", 35 | "categories = pd.cut(df['quality'], bins, labels = ['bad','ok','good'])\n", 36 | "df['quality'] = categories\n", 37 | "# Preprocessing and splitting data to X and y\n", 38 | "X = df.drop(['quality'], axis = 1)\n", 39 | "scaler = MinMaxScaler()\n", 40 | "X_new = scaler.fit_transform(X)\n", 41 | "y = df['quality']\n", 42 | "from sklearn.preprocessing import LabelEncoder\n", 43 | "labelencoder_y = LabelEncoder()\n", 44 | "y = labelencoder_y.fit_transform(y)\n", 45 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 323)\n", 46 | "\n" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 3, 52 | "metadata": {}, 53 | "outputs": [ 54 | { 55 | "data": { 56 | "text/plain": [ 57 | "array([2, 2, 1, ..., 2, 2, 1])" 58 | ] 59 | }, 60 | "execution_count": 3, 61 | "metadata": {}, 62 | "output_type": "execute_result" 63 | } 64 | ], 65 | "source": [ 66 | "y_train" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 4, 72 | "metadata": {}, 73 | "outputs": [ 74 | { 75 | "data": { 76 | "text/plain": [ 77 | "good 855\n", 78 | "ok 734\n", 79 | "bad 10\n", 80 | "Name: quality, dtype: int64" 81 | ] 82 | }, 83 | "execution_count": 4, 84 | "metadata": {}, 85 | "output_type": "execute_result" 86 | } 87 | ], 88 | "source": [ 89 | "df['quality'].value_counts()" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 5, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "clf1 = SVC(random_state=22)\n", 99 | "clf2 = DecisionTreeClassifier(random_state=23)\n", 100 | "clf3 = GaussianNB()\n", 101 | "X = np.array(X_train)\n", 102 | "y = np.array(y_train)\n", 103 | "eclf = VotingClassifier(estimators=[\n", 104 | " ('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='hard')\n", 105 | "eclf = eclf.fit(X, y)\n", 106 | "y_pred = eclf.predict(X_test)" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 6, 112 | "metadata": {}, 113 | "outputs": [ 114 | { 115 | "data": { 116 | "text/plain": [ 117 | "" 118 | ] 119 | }, 120 | "execution_count": 6, 121 | "metadata": {}, 122 | "output_type": "execute_result" 123 | }, 124 | { 125 | "data": { 126 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAAD8CAYAAABJsn7AAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvAOZPmwAAFvBJREFUeJzt3Xl8VPW5x/HPM5NAAAOKFIkBFRD1ulRQxAUXLFUBrUurrRv6stRUrnuvreBKrVXbq1gtVowgVYu7pbgA4latC6tyAUUsm0IIUHd2yOS5f2TEEbNMwkx+mcP3zeu8kvnN5JyHGL75+ZzfmWPujoiINL5Y6AJERLZXCmARkUAUwCIigSiARUQCUQCLiASiABYRCUQBLCISiAJYRKQGZvaAma0ys7kpY/9rZh+Y2WwzG2dmOybH9zCz9WY2K7mNrGv/CmARkZr9Fei31diLwP7u/n3gQ2BoynML3b17cruorp3nZazMmg7QrFiX2mVZzCx0CZGXH8/6PxUB1q5bss0/zJs/WZR25uS361Lr8dz9dTPbY6uxySkPpwCn16e+VJoBi4g03M+BiSmPO5vZu2b2mpkdVdcX69e6iERLZSLtl5pZCVCSMlTq7qVpfu21QAUwNjlUDuzm7p+a2cHAP8xsP3f/qqZ9KIBFJFoSFWm/NBm2aQVuKjM7HzgJ6OvJdzRz943AxuTnM81sIbAXMKOm/SiARSRS3Cuzun8z6wdcDRzj7utSxr8HfObuCTPrAnQDFtW2LwWwiERLZeYC2MweBfoA7cxsGXAjVasemgMvWtUJ8CnJFQ9HAzeZWQWQAC5y989q3X+23w9YqyCyT6sgsk+rIBpHJlZBbFr6f2lnTrNOBwb9x6OfKhGJlnqchAtNASwi0ZLlHnAmKYBFJFK8HqsgQlMAi0i0ZPAkXLYpgEUkWtSCEBEJRCfhREQC0QxYRCQQnYQTEQlEJ+FERMJwVw9YRCQM9YBFRAJRC0JEJBDNgEVEAklsDl1B2hTAIhItakGIiASiFoSISCCaAYuIBKIAFhEJw3USTkQkEPWARUQCUQtCRCQQzYBFRALRDFhEJBDNgEVEAqnInTdkj4UuoKk5qMcBvPvOS3zw/hvcOfym0OVE0k2//Q0LF0zjs0/nhy4l0v4x/kGmTJnI9BmTuevu3xOLbSf/3L0y/S2w7eS/SPruGXErgwdfzT77Hkm3PTvT74RjQ5cUOc89/xK9jzwpdBmRN/DciznssP4c0vN42rVry49/fGLokhpHZWX6W2B1tiDMbB/gFKAYcGA58Iy7z8tybY2uQ4f2FLYuZMrUmQA8PPYpTj65H5NeeDVwZdEybdo7oUvYLqxevQaAvLw8mjXLx90DV9RImsDMNl21zoDN7GrgMcCAacD05OePmtmQ7JfXuIp37UDZsvItj8uWlVO8a4eAFYlsm/HjH2LJRzNZs3ot48ZNCF1O48ihGXBdLYhBwCHufpu7/y253Qb0Sj4XKWb2nTFnO5k1SCSdcsp5dO3Si2bNm9GnzxGhy2kcEeoBVwK7VjNelHyuWmZWYmYzzGxGZeXabamvUS0rK6e4Y9GWx8Udi1i+fGXAikS23caNG5nw/EuceNJxoUtpHBUV6W+B1RXAVwAvm9lEMytNbpOAl4HLa/oidy91957u3jMWa5XJerNqxYpVrF69hkN7HQTAwHNO59lnXwhclUj9tWrVkg4dvgdAPB7n+BOO5cMPFwauqpG4p78FVutJOHefZGZ7UdVyKKaq/7sMmO65dO/nerjkkqGMHn0nLQoKmPTCq0yc9ErokiLn1luu5Wc/O5WWLVuwaOF0xox5lN/dPDx0WZHSqlVLnnhyFM2bNSMWj/Paa28x6v6xoctqHE2gt5suy/aZ0bxmxeF/zURcrJretWRWflzXLDWGteuWbPMP8/qx16edOS3O+V3Qfzz6qRKRaGkCJ9fSpQsxRCRaEon0tzqY2QNmtsrM5qaMtTWzF83s38mPOyXHzczuNrMFZjbbzA6qa/8KYBGJlsyuA/4r0G+rsSHAy+7ejaoFCV9fE9Ef6JbcSoB769q5AlhEoiWDAezurwOfbTV8CvBg8vMHgVNTxh/yKlOAHc2siFoogEUkWupxIUbqNQvJrSSNI+zi7uUAyY/tk+PFwNKU1y1LjtVIJ+FEJFK8Mv2FV+5eCpRm6NDVraiotRgFsIhES/bXAa80syJ3L0+2GFYlx5cBnVJe15GqNy+rkVoQIhItGVwFUYNngPOTn58PjE8ZPy+5GuIw4MuvWxU10QxYRKIlgzNgM3sU6AO0M7NlwI3AbcATZjYI+Bg4I/nyCcAAYAGwDrigrv0rgEUkWjIYwO5+Vg1P9a3mtQ5cXJ/9K4BFJFqawJvspEsBLCLRkkNvxqMAFpFoqccytNAUwCISLQ1f3dDoFMAiEimuFoSISCBqQYiIBJJD7wesABaRaNEMWEQkkAqdhBMRCUMtCBGRQNSCEBEJQ8vQRERC0QxYRCQQBbCISCC6FFlEJIz63BMuNAWwiESLAlhEJBCtghARCUQzYBGRQBTAIiJheEItCGlEa8teD11C5A3reV3oEiRdmgGLiIShZWgiIqEogEVEAsmdFrACWESixStyJ4EVwCISLbmTvwpgEYkWnYQTEQlFM2ARkTA0AxYRCUUzYBGRMLwidAXpUwCLSKTk0F3pFcAiEjEKYBGRMDI1AzazvYHHU4a6ADcAOwIXAv9Jjl/j7hMacgwFsIhESqYC2N3nA90BzCwOlAHjgAuAO9399m09hgJYRCLFE5aN3fYFFrr7R2aZ238sY3sSEWkCvDL9zcxKzGxGylZSw27PBB5NeXyJmc02swfMbKeG1qoAFpFI8UpLf3MvdfeeKVvp1vszs2bAycCTyaF7ga5UtSfKgTsaWqtaECISKVlYhtYfeMfdVwJ8/RHAzO4HnmvojhXAIhIp7hnvAZ9FSvvBzIrcvTz58DRgbkN3rAAWkUjJ5AzYzFoCxwG/TBn+o5l1BxxYstVz9aIAFpFIqczgKgh3XwfsvNXYwEztXwEsIpHilVlZhpYVCmARiRQFsIhIIJ47bwesABaRaNEMWEQkkCwsQ8saBbCIREoiO+8FkRUKYBGJFM2ARUQCUQ9YRCQQrYIQEQlEM+AcdlCPAxg9+k5aFBQwcdIrXPmrG0KXlJOuu2U4r785jbY77cg//jYSgNtHjOK1N6eSl59Hp+Iibr7mV7Qu3AGA+QsWc9Mf72bN2nXEYjEeG3UXzZs3C/lXyCl5zfO58PEbiDfPIxaP897Eqbx859N0OXxf+l97DvH8PMrmLmbcb0qpTOTQTdMaIFGZO++ymzuVNpJ7RtzK4MFXs8++R9Jtz870O+HY0CXlpFMHHMfI4Td/a+zwQ3ow7uGRjHvoXvboVMyoh6tut1VRkWDITX/k+l9fyvix9zFmxB/Iy4uHKDtnVWzczOizb2ZE/6GMGDCUbsccyG4HdeMndwzmsUv/zN0nXM0Xyz6hx0+ODl1q1rmnv4WmAE7RoUN7ClsXMmXqTAAeHvsUJ5/cL3BVualn9wNo07rwW2O9Dz14S7B+f799WLnqEwDemjaTvbp2Zp9uXQDYsU1r4nEFcH1tWrcRgHhenHhenMrKShKbNvPp4hUALHhjDvv17xWyxEZR6Zb2FppaECmKd+1A2bLyLY/LlpVTvGuHgBVF17jnJ9Ov7zEAfLS0DDOj5Mpr+fyLL+n/w2P4+TlnBK4w91jMuPi539N29w5MfXgyy2YtJJYXp/iAzpTNWcz+Aw6lTVHb0GVm3XaxDM3MLnD3MZksJrTqbrbnNIH/T4mY+x58lHg8zknHV7V3KhIJ3p39Ho+NuouCgub84rKh7Lv3nhzWs0fgSnOLVzojBlxDQeuWnHPflbTfqyOPXzaCAdcPJK9ZPv/+1+zI93+habQW0rUtM+DfAtUGcPLGdiUAFm9DLNZqGw7TeJaVlVPcsWjL4+KORSxfvrKWr5D6Gj/hRV5/cxqj7r51yy+8Xdq3o2f3A9hpxzYAHHX4Ibw/f6ECuIE2fLWOxVPmsdcxB/LG/c9z/09vAmDPow6gXeeiOr469zWF1kK6au0BJ+/6Wd02B9ilpq9LvdFdroQvwIoVq1i9eg2H9joIgIHnnM6zz74QuKroeGPKDEaPfZI//+FGWhQUbBnv3etgPly4mPUbNlBRkWDGrDl07bxbwEpzT8u2hRS0bglUrYjo2nt//rNwOa12bg1AvFkeR1/0I6aNfSlkmY0iURlLewutrhnwLsAJwOdbjRvwVlYqCuySS4ZuWYY26YVXmTjpldAl5aRf33gb09+dzRdffEXfU8/lvwcNZNTDj7Np82YuvOJaoOpE3I2/uZQ2rQs578wfc+agyzEzjjr8EI45IvonizKpsP2OnH7HYGKxGBYz5jw/hfmvvEu/oWezd98emBnTxr7EorffD11q1uVQBwLzWhomZjYaGOPub1Tz3CPufnZdB8hrVpxL34+ctH75v0KXEHnDel4XuoTtwu+XPLLN/YO3in6SduYcUf500H5FrTNgdx9Uy3N1hq+ISGPbLlZBiIg0Rbm0zkMBLCKR4mgGLCISRIVaECIiYWgGLCISiHrAIiKBaAYsIhKIZsAiIoEkNAMWEQkjh+5IpAAWkWip1AxYRCSMXHrzGQWwiESKTsKJiARSWc2dbZoqBbCIREoidAH1oAAWkUjJ5CoIM1sCrKYq1yvcvaeZtQUeB/YAlgA/dfetb1qRlvD35BARyaBKLO0tTce6e3d375l8PAR42d27AS8nHzeIAlhEIsXrsTXQKcCDyc8fBE5t6I4UwCISKZWW/pYGByab2czk3d4BdnH3coDkx/YNrVU9YBGJlPosQ0uGaknKUKm7l6Y87u3uy82sPfCimX2QkSKTFMAiEimJepyES4ZtaS3PL09+XGVm44BewEozK3L3cjMrAlY1tFa1IEQkUirrsdXGzFqZWeHXnwPHA3OBZ4Dzky87Hxjf0Fo1AxaRSMnglXC7AOOs6sKOPOARd59kZtOBJ8xsEPAxcEZDD6AAFpFIydQt4dx9EXBgNeOfAn0zcQwFsIhEit4LQkQkEF2KLCISiN6QXUQkELUgREQCUQCLiASiO2KIiASiHrCISCBaBZF6gFg824fY7l3X89rQJUTesDEZWXcvjaAyh5oQmgGLSKToJJyISCC5M/9VAItIxGgGLCISSIXlzhxYASwikZI78asAFpGIUQtCRCQQLUMTEQkkd+JXASwiEaMWhIhIIIkcmgMrgEUkUjQDFhEJxDUDFhEJQzNgEZFAtAxNRCSQ3IlfBbCIRExFDkWwAlhEIkUn4UREAtFJOBGRQDQDFhEJRDNgEZFAEq4ZsIhIEFoHLCISiHrAIiKBqAcsIhJILrUgYqELEBHJJK/Hn9qYWScze9XM5pnZe2Z2eXJ8mJmVmdms5DagobVqBiwikZLBVRAVwP+4+ztmVgjMNLMXk8/d6e63b+sBFMAiEimZakG4ezlQnvx8tZnNA4ozsvMktSBEJFIq67Gly8z2AHoAU5NDl5jZbDN7wMx2amitCmARiZT69IDNrMTMZqRsJVvvz8x2AJ4GrnD3r4B7ga5Ad6pmyHc0tFa1IEQkUurTgnD3UqC0pufNLJ+q8B3r7n9Pfs3KlOfvB55raK0K4BQtWhTwyCP30qXL7iQSlTz//Etcf/1tocvKeXnN87no8RuIN88nHo8zZ+JUXrzzKboesR8nXnMOFjM2rt3Ak1eN5NOPVta9QwHgxocm8vqcRbQtbMnTN1wAwOSZ8xn53JssXvEpfxsykP127wDAF2vWc1XpeN77aAUnH7Y/Q8/6YcjSs8ozdBLOzAwYDcxz9+Ep40XJ/jDAacDchh5DAbyVP/2plNdee5v8/HwmTXqU44/vw+TJ/wxdVk6r2LiZ0rNvZtO6jcTy4gx+ahjz/zmL024exEMX3s6qhcs57Nzj+MGlp/HkVSNDl5szTj58f87scxDX/XXClrE9d23H8F+eyu/GTv7Wa5vnx7n45CNZsPwTFpR90tilNqoM3pa+NzAQmGNms5Jj1wBnmVl3qm6+sQT4ZUMPUGcAm9k+VJ35m+rua1LG+7n7pIYeuClav34Dr732NgCbN2/m3Xfn0rFjUeCqomHTuo0AxPPixPPiVbMUd5oXtgCgoHVLvlr5ecgSc87B3TpR9smX3xrrUrRzta9t0bwZPfbsyMervmiM0oLK4CqINwCr5qkJ1Yw1SK0BbGaXARcD84DRZna5u49PPn0LEKkATtWmTWtOPPGH3HPPA6FLiQSLGZc9dws7796Btx+ezNJZC3lqSCkXjLmaig2b2LBmPfecdkPoMiUCMtWCaAx1zYAvBA529zXJZRhPmdke7n4X1f9miIR4PM5DD/2Ze+4Zw+LFH4cuJxK80rlrwFAKWrfkvPt+xS57deSoQQMYc8EfWDprIUeXnMRJ153L00PuD12q5LgoXYoc/7rt4O5LgD5AfzMbTi0BnLq0I5FYU9PLmqy//OU2FixYwogRo0OXEjkbvlrHoinz2LtPd4r+a3eWzloIwOzn3mb3g/cKXJ1EQaYuRW4MdQXwimSzGYBkGJ8EtAMOqOmL3L3U3Xu6e894fIfMVNpIhg27itatC7nqqmGhS4mMVm0LKWjdEqhaEbFn7/1ZtaCMgsKWtOtcdZa+25EHsGpBWcgyJSIS7mlvoVlt/RIz6whUuPuKap7r7e5v1nWAgoLdwv8t01Rc3IGFC6fxwQf/ZuPGTQCMHPkgY8Y8Friy2l3eoXfoEmrVYZ/d+Okdg4nFYljMmP38FF6+++/sd0JPjrvyDNyd9V+u5alf38dnS1eFLrdaw8b0DV3CdwwZ9SwzPlzKF2vW07Z1Swb/qDdtWhZw2+Mv8/ma9RS2aM7endpz72VnAND/mvtYu2ETmxMJCls0597LzqDrru0C/y2+rcWxv9jm1mbv4h+knTlvlr0StJVaawBnQi4FcK5q6gEcBU0xgKMoEwF8ePGxaWfO22WvBg1grQMWkUiJ0ioIEZGckkurIBTAIhIpTWF1Q7oUwCISKQnPnbvCKYBFJFLUAxYRCUQ9YBGRQNQDFhEJpFItCBGRMDQDFhEJRKsgREQCUQtCRCQQtSBERALRDFhEJBDNgEVEAkl4InQJaVMAi0ik6FJkEZFAdCmyiEggmgGLiASiVRAiIoFoFYSISCC6FFlEJBD1gEVEAlEPWEQkEM2ARUQC0TpgEZFANAMWEQlEqyBERALJpZNwsdAFiIhkkrunvdXFzPqZ2XwzW2BmQzJdqwJYRCLF6/GnNmYWB+4B+gP7AmeZ2b6ZrFUBLCKRksEZcC9ggbsvcvdNwGPAKZmsVT1gEYmUDPaAi4GlKY+XAYdmaufQCAG8YcPHlu1jZJqZlbh7aeg6okzf4+zbXr/HFZvK0s4cMysBSlKGSlO+Z9XtJ6Nn+NSCqF5J3S+RbaTvcfbpe1wHdy91954pW+ovrGVAp5THHYHlmTy+AlhEpHrTgW5m1tnMmgFnAs9k8gDqAYuIVMPdK8zsEuAFIA484O7vZfIYCuDqbXd9swD0Pc4+fY+3kbtPACZka/+WS9dNi4hEiXrAIiKBKIBTZPuyQwEze8DMVpnZ3NC1RJWZdTKzV81snpm9Z2aXh65JqqcWRFLyssMPgeOoWn4yHTjL3d8PWljEmNnRwBrgIXffP3Q9UWRmRUCRu79jZoXATOBU/Sw3PZoBfyPrlx0KuPvrwGeh64gydy9393eSn68G5lF1VZc0MQrgb1R32aF+aCWnmdkeQA9gathKpDoK4G9k/bJDkcZkZjsATwNXuPtXoeuR71IAfyPrlx2KNBYzy6cqfMe6+99D1yPVUwB/I+uXHYo0BjMzYDQwz92Hh65HaqYATnL3CuDryw7nAU9k+rJDATN7FHgb2NvMlpnZoNA1RVBvYCDwAzObldwGhC5KvkvL0EREAtEMWEQkEAWwiEggCmARkUAUwCIigSiARUQCUQCLiASiABYRCUQBLCISyP8DT+5yQNIVPNgAAAAASUVORK5CYII=\n", 127 | "text/plain": [ 128 | "" 129 | ] 130 | }, 131 | "metadata": {}, 132 | "output_type": "display_data" 133 | } 134 | ], 135 | "source": [ 136 | "cm = confusion_matrix(y_test, y_pred)\n", 137 | "sns.heatmap(cm,annot=True,fmt='2.0f')" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 7, 143 | "metadata": {}, 144 | "outputs": [ 145 | { 146 | "name": "stdout", 147 | "output_type": "stream", 148 | "text": [ 149 | "Accuracy is 0.740625\n" 150 | ] 151 | } 152 | ], 153 | "source": [ 154 | "print(\"Accuracy is {}\".format(accuracy_score(y_test, y_pred)))" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "metadata": {}, 161 | "outputs": [], 162 | "source": [] 163 | } 164 | ], 165 | "metadata": { 166 | "kernelspec": { 167 | "display_name": "Python [conda env:tensorflow]", 168 | "language": "python", 169 | "name": "conda-env-tensorflow-py" 170 | }, 171 | "language_info": { 172 | "codemirror_mode": { 173 | "name": "ipython", 174 | "version": 3 175 | }, 176 | "file_extension": ".py", 177 | "mimetype": "text/x-python", 178 | "name": "python", 179 | "nbconvert_exporter": "python", 180 | "pygments_lexer": "ipython3", 181 | "version": "3.5.4" 182 | } 183 | }, 184 | "nbformat": 4, 185 | "nbformat_minor": 2 186 | } 187 | -------------------------------------------------------------------------------- /Chapter02/NoSQL with Python.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "Requirement already satisfied: pymongo in /Users/am/anaconda3/lib/python3.6/site-packages (3.7.1)\n", 13 | "\u001b[31mdistributed 1.21.8 requires msgpack, which is not installed.\u001b[0m\n", 14 | "\u001b[33mYou are using pip version 10.0.1, however version 18.0 is available.\n", 15 | "You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n" 16 | ] 17 | } 18 | ], 19 | "source": [ 20 | "!pip install pymongo" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 2, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "import pymongo\n", 30 | "client = pymongo.MongoClient()\n", 31 | "db = client.test" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 3, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "from sklearn.datasets import load_breast_cancer\n", 41 | "import pandas as pd\n", 42 | "\n", 43 | "cancer = load_breast_cancer()\n", 44 | "data = pd.DataFrame(cancer.data, columns=[cancer.feature_names])" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 4, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "import json" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 5, 59 | "metadata": {}, 60 | "outputs": [ 61 | { 62 | "data": { 63 | "text/html": [ 64 | "
\n", 65 | "\n", 78 | "\n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | "
mean radiusmean texturemean perimetermean areamean smoothnessmean compactnessmean concavitymean concave pointsmean symmetrymean fractal dimension...worst radiusworst textureworst perimeterworst areaworst smoothnessworst compactnessworst concavityworst concave pointsworst symmetryworst fractal dimension
017.9910.38122.801001.00.118400.277600.30010.147100.24190.07871...25.3817.33184.602019.00.16220.66560.71190.26540.46010.11890
120.5717.77132.901326.00.084740.078640.08690.070170.18120.05667...24.9923.41158.801956.00.12380.18660.24160.18600.27500.08902
219.6921.25130.001203.00.109600.159900.19740.127900.20690.05999...23.5725.53152.501709.00.14440.42450.45040.24300.36130.08758
311.4220.3877.58386.10.142500.283900.24140.105200.25970.09744...14.9126.5098.87567.70.20980.86630.68690.25750.66380.17300
420.2914.34135.101297.00.100300.132800.19800.104300.18090.05883...22.5416.67152.201575.00.13740.20500.40000.16250.23640.07678
\n", 228 | "

5 rows × 30 columns

\n", 229 | "
" 230 | ], 231 | "text/plain": [ 232 | " mean radius mean texture mean perimeter mean area mean smoothness \\\n", 233 | "0 17.99 10.38 122.80 1001.0 0.11840 \n", 234 | "1 20.57 17.77 132.90 1326.0 0.08474 \n", 235 | "2 19.69 21.25 130.00 1203.0 0.10960 \n", 236 | "3 11.42 20.38 77.58 386.1 0.14250 \n", 237 | "4 20.29 14.34 135.10 1297.0 0.10030 \n", 238 | "\n", 239 | " mean compactness mean concavity mean concave points mean symmetry \\\n", 240 | "0 0.27760 0.3001 0.14710 0.2419 \n", 241 | "1 0.07864 0.0869 0.07017 0.1812 \n", 242 | "2 0.15990 0.1974 0.12790 0.2069 \n", 243 | "3 0.28390 0.2414 0.10520 0.2597 \n", 244 | "4 0.13280 0.1980 0.10430 0.1809 \n", 245 | "\n", 246 | " mean fractal dimension ... worst radius worst texture \\\n", 247 | "0 0.07871 ... 25.38 17.33 \n", 248 | "1 0.05667 ... 24.99 23.41 \n", 249 | "2 0.05999 ... 23.57 25.53 \n", 250 | "3 0.09744 ... 14.91 26.50 \n", 251 | "4 0.05883 ... 22.54 16.67 \n", 252 | "\n", 253 | " worst perimeter worst area worst smoothness worst compactness \\\n", 254 | "0 184.60 2019.0 0.1622 0.6656 \n", 255 | "1 158.80 1956.0 0.1238 0.1866 \n", 256 | "2 152.50 1709.0 0.1444 0.4245 \n", 257 | "3 98.87 567.7 0.2098 0.8663 \n", 258 | "4 152.20 1575.0 0.1374 0.2050 \n", 259 | "\n", 260 | " worst concavity worst concave points worst symmetry worst fractal dimension \n", 261 | "0 0.7119 0.2654 0.4601 0.11890 \n", 262 | "1 0.2416 0.1860 0.2750 0.08902 \n", 263 | "2 0.4504 0.2430 0.3613 0.08758 \n", 264 | "3 0.6869 0.2575 0.6638 0.17300 \n", 265 | "4 0.4000 0.1625 0.2364 0.07678 \n", 266 | "\n", 267 | "[5 rows x 30 columns]" 268 | ] 269 | }, 270 | "execution_count": 5, 271 | "metadata": {}, 272 | "output_type": "execute_result" 273 | } 274 | ], 275 | "source": [ 276 | "data.head()" 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": 6, 282 | "metadata": {}, 283 | "outputs": [], 284 | "source": [ 285 | "data_in_json = data.to_json(orient='split')" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": 7, 291 | "metadata": {}, 292 | "outputs": [], 293 | "source": [ 294 | "rows = json.loads(data_in_json)" 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "execution_count": 8, 300 | "metadata": {}, 301 | "outputs": [ 302 | { 303 | "name": "stderr", 304 | "output_type": "stream", 305 | "text": [ 306 | "/Users/am/anaconda3/lib/python3.6/site-packages/ipykernel/__main__.py:1: DeprecationWarning: insert is deprecated. Use insert_one or insert_many instead.\n", 307 | " if __name__ == '__main__':\n" 308 | ] 309 | }, 310 | { 311 | "data": { 312 | "text/plain": [ 313 | "ObjectId('5ba272f0d82f8a68a1fa33ab')" 314 | ] 315 | }, 316 | "execution_count": 8, 317 | "metadata": {}, 318 | "output_type": "execute_result" 319 | } 320 | ], 321 | "source": [ 322 | "db.cancer_data_2.insert(rows)" 323 | ] 324 | }, 325 | { 326 | "cell_type": "code", 327 | "execution_count": 9, 328 | "metadata": {}, 329 | "outputs": [], 330 | "source": [ 331 | "cursor = db['cancer_data_2'].find({})" 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": 10, 337 | "metadata": {}, 338 | "outputs": [ 339 | { 340 | "name": "stdout", 341 | "output_type": "stream", 342 | "text": [ 343 | " _id \\\n", 344 | "0 5ba272f0d82f8a68a1fa33ab \n", 345 | "\n", 346 | " columns \\\n", 347 | "0 [[mean radius], [mean texture], [mean perimete... \n", 348 | "\n", 349 | " data \\\n", 350 | "0 [[17.99, 10.38, 122.8, 1001.0, 0.1184, 0.2776,... \n", 351 | "\n", 352 | " index \n", 353 | "0 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,... \n" 354 | ] 355 | } 356 | ], 357 | "source": [ 358 | "df = pd.DataFrame(list(cursor))\n", 359 | "print(df)" 360 | ] 361 | }, 362 | { 363 | "cell_type": "code", 364 | "execution_count": null, 365 | "metadata": {}, 366 | "outputs": [], 367 | "source": [] 368 | } 369 | ], 370 | "metadata": { 371 | "kernelspec": { 372 | "display_name": "Python [default]", 373 | "language": "python", 374 | "name": "python3" 375 | }, 376 | "language_info": { 377 | "codemirror_mode": { 378 | "name": "ipython", 379 | "version": 3 380 | }, 381 | "file_extension": ".py", 382 | "mimetype": "text/x-python", 383 | "name": "python", 384 | "nbconvert_exporter": "python", 385 | "pygments_lexer": "ipython3", 386 | "version": "3.6.5" 387 | } 388 | }, 389 | "nbformat": 4, 390 | "nbformat_minor": 2 391 | } 392 | -------------------------------------------------------------------------------- /Chapter11/SF_crime_category_detection.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from pyspark.ml.classification import LogisticRegression as LR\n", 10 | "from pyspark.ml.feature import RegexTokenizer, StopWordsRemover, CountVectorizer\n", 11 | "from pyspark.ml.feature import OneHotEncoder, StringIndexer, VectorAssembler\n", 12 | "from pyspark.ml.evaluation import MulticlassClassificationEvaluator\n", 13 | "from pyspark.ml import Pipeline\n", 14 | "from pyspark.sql.functions import col\n", 15 | "\n", 16 | "\n", 17 | "from pyspark.sql import SparkSession\n", 18 | "\n", 19 | "spark = SparkSession.builder \\\n", 20 | " .appName(\"Crime Category Prediction\") \\\n", 21 | " .config(\"spark.executor.memory\", \"70g\") \\\n", 22 | " .config(\"spark.driver.memory\", \"50g\") \\\n", 23 | " .config(\"spark.memory.offHeap.enabled\",True) \\\n", 24 | " .config(\"spark.memory.offHeap.size\",\"16g\") \\\n", 25 | " .getOrCreate()" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 2, 31 | "metadata": {}, 32 | "outputs": [], 33 | "source": [ 34 | "data = spark.read.format(\"csv\"). \\\n", 35 | " options(header=\"true\", inferschema=\"true\"). \\\n", 36 | " load(\"sf_crime_dataset.csv\")" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 3, 42 | "metadata": {}, 43 | "outputs": [ 44 | { 45 | "data": { 46 | "text/plain": [ 47 | "['Dates',\n", 48 | " 'Category',\n", 49 | " 'Descript',\n", 50 | " 'DayOfWeek',\n", 51 | " 'PdDistrict',\n", 52 | " 'Resolution',\n", 53 | " 'Address',\n", 54 | " 'X',\n", 55 | " 'Y']" 56 | ] 57 | }, 58 | "execution_count": 3, 59 | "metadata": {}, 60 | "output_type": "execute_result" 61 | } 62 | ], 63 | "source": [ 64 | "data.columns" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 4, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "drop_list = ['Dates', 'DayOfWeek', 'PdDistrict', 'Resolution', 'Address', 'X', 'Y']\n", 74 | "\n", 75 | "data = data.select([column for column in data.columns if column not in drop_list])" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 5, 81 | "metadata": {}, 82 | "outputs": [ 83 | { 84 | "name": "stdout", 85 | "output_type": "stream", 86 | "text": [ 87 | "+--------------+--------------------+\n", 88 | "| Category| Descript|\n", 89 | "+--------------+--------------------+\n", 90 | "| WARRANTS| WARRANT ARREST|\n", 91 | "|OTHER OFFENSES|TRAFFIC VIOLATION...|\n", 92 | "|OTHER OFFENSES|TRAFFIC VIOLATION...|\n", 93 | "| LARCENY/THEFT|GRAND THEFT FROM ...|\n", 94 | "| LARCENY/THEFT|GRAND THEFT FROM ...|\n", 95 | "+--------------+--------------------+\n", 96 | "only showing top 5 rows\n", 97 | "\n" 98 | ] 99 | } 100 | ], 101 | "source": [ 102 | "data.show(5)" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 6, 108 | "metadata": {}, 109 | "outputs": [ 110 | { 111 | "name": "stdout", 112 | "output_type": "stream", 113 | "text": [ 114 | "root\n", 115 | " |-- Category: string (nullable = true)\n", 116 | " |-- Descript: string (nullable = true)\n", 117 | "\n" 118 | ] 119 | } 120 | ], 121 | "source": [ 122 | "data.printSchema()" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 7, 128 | "metadata": {}, 129 | "outputs": [ 130 | { 131 | "name": "stdout", 132 | "output_type": "stream", 133 | "text": [ 134 | "+--------------------+------+\n", 135 | "| Category| count|\n", 136 | "+--------------------+------+\n", 137 | "| LARCENY/THEFT|174900|\n", 138 | "| OTHER OFFENSES|126182|\n", 139 | "| NON-CRIMINAL| 92304|\n", 140 | "| ASSAULT| 76876|\n", 141 | "| DRUG/NARCOTIC| 53971|\n", 142 | "| VEHICLE THEFT| 53781|\n", 143 | "| VANDALISM| 44725|\n", 144 | "| WARRANTS| 42214|\n", 145 | "| BURGLARY| 36755|\n", 146 | "| SUSPICIOUS OCC| 31414|\n", 147 | "| MISSING PERSON| 25989|\n", 148 | "| ROBBERY| 23000|\n", 149 | "| FRAUD| 16679|\n", 150 | "|FORGERY/COUNTERFE...| 10609|\n", 151 | "| SECONDARY CODES| 9985|\n", 152 | "| WEAPON LAWS| 8555|\n", 153 | "| PROSTITUTION| 7484|\n", 154 | "| TRESPASS| 7326|\n", 155 | "| STOLEN PROPERTY| 4540|\n", 156 | "|SEX OFFENSES FORC...| 4388|\n", 157 | "+--------------------+------+\n", 158 | "only showing top 20 rows\n", 159 | "\n" 160 | ] 161 | } 162 | ], 163 | "source": [ 164 | "# By top 20 categories\n", 165 | "data.groupBy(\"Category\") \\\n", 166 | " .count() \\\n", 167 | " .orderBy(col(\"count\").desc()) \\\n", 168 | " .show()" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 8, 174 | "metadata": {}, 175 | "outputs": [ 176 | { 177 | "name": "stdout", 178 | "output_type": "stream", 179 | "text": [ 180 | "+--------------------+-----+\n", 181 | "| Descript|count|\n", 182 | "+--------------------+-----+\n", 183 | "|GRAND THEFT FROM ...|60022|\n", 184 | "| LOST PROPERTY|31729|\n", 185 | "| BATTERY|27441|\n", 186 | "| STOLEN AUTOMOBILE|26897|\n", 187 | "|DRIVERS LICENSE, ...|26839|\n", 188 | "| WARRANT ARREST|23754|\n", 189 | "|SUSPICIOUS OCCURR...|21891|\n", 190 | "|AIDED CASE, MENTA...|21497|\n", 191 | "|PETTY THEFT FROM ...|19771|\n", 192 | "|MALICIOUS MISCHIE...|17789|\n", 193 | "| TRAFFIC VIOLATION|16471|\n", 194 | "|PETTY THEFT OF PR...|16196|\n", 195 | "|MALICIOUS MISCHIE...|15957|\n", 196 | "|THREATS AGAINST LIFE|14716|\n", 197 | "| FOUND PROPERTY|12146|\n", 198 | "|ENROUTE TO OUTSID...|11470|\n", 199 | "|GRAND THEFT OF PR...|11010|\n", 200 | "|POSSESSION OF NAR...|10050|\n", 201 | "|PETTY THEFT FROM ...|10029|\n", 202 | "|PETTY THEFT SHOPL...| 9571|\n", 203 | "+--------------------+-----+\n", 204 | "only showing top 20 rows\n", 205 | "\n" 206 | ] 207 | } 208 | ], 209 | "source": [ 210 | "# By top 20 descriptions\n", 211 | "data.groupBy(\"Descript\") \\\n", 212 | " .count() \\\n", 213 | " .orderBy(col(\"count\").desc()) \\\n", 214 | " .show()" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": 9, 220 | "metadata": {}, 221 | "outputs": [], 222 | "source": [ 223 | "# regular expression tokenizer\n", 224 | "regexTokenizer = RegexTokenizer(inputCol=\"Descript\", outputCol=\"words\", pattern=\"\\\\W\")\n", 225 | "\n", 226 | "# stop words\n", 227 | "add_stopwords = [\"http\",\"https\",\"amp\",\"rt\",\"t\",\"c\",\"the\"] \n", 228 | "\n", 229 | "stopwordsRemover = StopWordsRemover(inputCol=\"words\", outputCol=\"filtered\").setStopWords(add_stopwords)\n", 230 | "\n", 231 | "# bag of words count\n", 232 | "countVectors = CountVectorizer(inputCol=\"filtered\", outputCol=\"features\", vocabSize=10000, minDF=5)" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": 10, 238 | "metadata": {}, 239 | "outputs": [], 240 | "source": [ 241 | "label_stringIdx = StringIndexer(inputCol = \"Category\", outputCol = \"label\")" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": 11, 247 | "metadata": {}, 248 | "outputs": [], 249 | "source": [ 250 | "pipeline = Pipeline(stages=[regexTokenizer, stopwordsRemover, countVectors, label_stringIdx])\n", 251 | "\n", 252 | "# Fit the pipeline to data.\n", 253 | "pipelineFit = pipeline.fit(data)\n", 254 | "dataset = pipelineFit.transform(data)" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": 12, 260 | "metadata": {}, 261 | "outputs": [ 262 | { 263 | "name": "stdout", 264 | "output_type": "stream", 265 | "text": [ 266 | "+--------------+--------------------+--------------------+--------------------+--------------------+-----+\n", 267 | "| Category| Descript| words| filtered| features|label|\n", 268 | "+--------------+--------------------+--------------------+--------------------+--------------------+-----+\n", 269 | "| WARRANTS| WARRANT ARREST| [warrant, arrest]| [warrant, arrest]|(809,[17,32],[1.0...| 7.0|\n", 270 | "|OTHER OFFENSES|TRAFFIC VIOLATION...|[traffic, violati...|[traffic, violati...|(809,[11,17,35],[...| 1.0|\n", 271 | "|OTHER OFFENSES|TRAFFIC VIOLATION...|[traffic, violati...|[traffic, violati...|(809,[11,17,35],[...| 1.0|\n", 272 | "| LARCENY/THEFT|GRAND THEFT FROM ...|[grand, theft, fr...|[grand, theft, fr...|(809,[0,2,3,4,6],...| 0.0|\n", 273 | "| LARCENY/THEFT|GRAND THEFT FROM ...|[grand, theft, fr...|[grand, theft, fr...|(809,[0,2,3,4,6],...| 0.0|\n", 274 | "+--------------+--------------------+--------------------+--------------------+--------------------+-----+\n", 275 | "only showing top 5 rows\n", 276 | "\n" 277 | ] 278 | } 279 | ], 280 | "source": [ 281 | "dataset.show(5)" 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": 13, 287 | "metadata": {}, 288 | "outputs": [ 289 | { 290 | "name": "stdout", 291 | "output_type": "stream", 292 | "text": [ 293 | "Training Dataset Count: 614666\n", 294 | "Test Dataset Count: 263383\n" 295 | ] 296 | } 297 | ], 298 | "source": [ 299 | "### Randomly split data into training and test data sets.\n", 300 | "(trainingData, testData) = dataset.randomSplit([0.7, 0.3], seed = 100)\n", 301 | "print(\"Training Dataset Count: \" + str(trainingData.count()))\n", 302 | "print(\"Test Dataset Count: \" + str(testData.count()))" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": 14, 308 | "metadata": {}, 309 | "outputs": [], 310 | "source": [ 311 | "# Build the model\n", 312 | "logistic_regrssor = LR(maxIter=20, regParam=0.3, elasticNetParam=0)\n", 313 | "\n", 314 | "# Train model with Training Data\n", 315 | "model = logistic_regrssor.fit(trainingData)" 316 | ] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "execution_count": 15, 321 | "metadata": {}, 322 | "outputs": [ 323 | { 324 | "name": "stdout", 325 | "output_type": "stream", 326 | "text": [ 327 | "+------------------------------+-------------+------------------------------+-----+----------+\n", 328 | "| Descript| Category| probability|label|prediction|\n", 329 | "+------------------------------+-------------+------------------------------+-----+----------+\n", 330 | "|THEFT, BICYCLE, <$50, NO SE...|LARCENY/THEFT|[0.8711581002180425,0.02115...| 0.0| 0.0|\n", 331 | "|THEFT, BICYCLE, <$50, NO SE...|LARCENY/THEFT|[0.8711581002180425,0.02115...| 0.0| 0.0|\n", 332 | "|THEFT, BICYCLE, <$50, NO SE...|LARCENY/THEFT|[0.8711581002180425,0.02115...| 0.0| 0.0|\n", 333 | "|THEFT, BICYCLE, <$50, NO SE...|LARCENY/THEFT|[0.8711581002180425,0.02115...| 0.0| 0.0|\n", 334 | "|THEFT, BICYCLE, <$50, NO SE...|LARCENY/THEFT|[0.8711581002180425,0.02115...| 0.0| 0.0|\n", 335 | "|THEFT, BICYCLE, <$50, NO SE...|LARCENY/THEFT|[0.8711581002180425,0.02115...| 0.0| 0.0|\n", 336 | "|THEFT, BICYCLE, <$50, NO SE...|LARCENY/THEFT|[0.8711581002180425,0.02115...| 0.0| 0.0|\n", 337 | "|THEFT, BICYCLE, <$50, NO SE...|LARCENY/THEFT|[0.8711581002180425,0.02115...| 0.0| 0.0|\n", 338 | "|THEFT, BICYCLE, <$50, NO SE...|LARCENY/THEFT|[0.8711581002180425,0.02115...| 0.0| 0.0|\n", 339 | "|THEFT, BICYCLE, <$50, NO SE...|LARCENY/THEFT|[0.8711581002180425,0.02115...| 0.0| 0.0|\n", 340 | "+------------------------------+-------------+------------------------------+-----+----------+\n", 341 | "only showing top 10 rows\n", 342 | "\n" 343 | ] 344 | } 345 | ], 346 | "source": [ 347 | "# Make predictions on Test Data\n", 348 | "predictions = model.transform(testData)\n", 349 | "\n", 350 | "# List top crime predictions\n", 351 | "\n", 352 | "predictions.filter(predictions['prediction'] == 0) \\\n", 353 | " .select(\"Descript\",\"Category\",\"probability\",\"label\",\"prediction\") \\\n", 354 | " .orderBy(\"probability\", ascending=False) \\\n", 355 | " .show(n = 10, truncate = 30)" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": 16, 361 | "metadata": {}, 362 | "outputs": [ 363 | { 364 | "data": { 365 | "text/plain": [ 366 | "0.9725282146509521" 367 | ] 368 | }, 369 | "execution_count": 16, 370 | "metadata": {}, 371 | "output_type": "execute_result" 372 | } 373 | ], 374 | "source": [ 375 | "evaluator = MulticlassClassificationEvaluator(predictionCol=\"prediction\")\n", 376 | "evaluator.evaluate(predictions)" 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "execution_count": null, 382 | "metadata": {}, 383 | "outputs": [], 384 | "source": [] 385 | } 386 | ], 387 | "metadata": { 388 | "kernelspec": { 389 | "display_name": "Python [conda env:spark]", 390 | "language": "python", 391 | "name": "conda-env-spark-py" 392 | }, 393 | "language_info": { 394 | "codemirror_mode": { 395 | "name": "ipython", 396 | "version": 3 397 | }, 398 | "file_extension": ".py", 399 | "mimetype": "text/x-python", 400 | "name": "python", 401 | "nbconvert_exporter": "python", 402 | "pygments_lexer": "ipython3", 403 | "version": "3.5.6" 404 | } 405 | }, 406 | "nbformat": 4, 407 | "nbformat_minor": 2 408 | } 409 | -------------------------------------------------------------------------------- /Chapter12/text_preprocessing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "HARI SELDON-... bom In the 1 1,988th year of the Galactic Era; died 12,069. The dates are more commonly given In terms of the current Foundational Era as - 79 to the year 1 F.E. Born to middle-class parents on Flelicon, Arcturus sector (where his father, In a legend of doubtful authenticity, was a tobacco grower in the hydroponic plants of the planet), he early showed amazing ability in mathematics. Anecdotes concerning his ability are innumerable, and some are contradictory. At the age of two, he is said to have ... Undoubtedly his greatest contributions were in the field of psychohistory. Seldon found the field little more than a set of vague axioms; he left it a profound statistical science.... The best existing authority we have for the details of his life is the biography written by Gaal Dornick who. as a young man, met Seldon two years before the great mathematician's death. The story of the meeting ... \n" 13 | ] 14 | } 15 | ], 16 | "source": [ 17 | "f = open('foundation.txt')\n", 18 | "text = f.read()\n", 19 | "print(text)\n" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 2, 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "name": "stdout", 29 | "output_type": "stream", 30 | "text": [ 31 | "HARI SELDON bom In the 1 1 988th year of the Galactic Era died 12 069 The dates are more commonly given In terms of the current Foundational Era as 79 to the year 1 F E Born to middle class parents on Flelicon Arcturus sector where his father In a legend of doubtful authenticity was a tobacco grower in the hydroponic plants of the planet he early showed amazing ability in mathematics Anecdotes concerning his ability are innumerable and some are contradictory At the age of two he is said to have Undoubtedly his greatest contributions were in the field of psychohistory Seldon found the field little more than a set of vague axioms he left it a profound statistical science The best existing authority we have for the details of his life is the biography written by Gaal Dornick who as a young man met Seldon two years before the great mathematician s death The story of the meeting \n" 32 | ] 33 | } 34 | ], 35 | "source": [ 36 | "# clean data\n", 37 | "import re\n", 38 | "# remove Punctuation\n", 39 | "text = re.sub(r\"[^a-zA-Z0-9]\", \" \", text) \n", 40 | "print(text)" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 3, 46 | "metadata": {}, 47 | "outputs": [ 48 | { 49 | "name": "stdout", 50 | "output_type": "stream", 51 | "text": [ 52 | "hari seldon bom in the 1 1 988th year of the galactic era died 12 069 the dates are more commonly given in terms of the current foundational era as 79 to the year 1 f e born to middle class parents on flelicon arcturus sector where his father in a legend of doubtful authenticity was a tobacco grower in the hydroponic plants of the planet he early showed amazing ability in mathematics anecdotes concerning his ability are innumerable and some are contradictory at the age of two he is said to have undoubtedly his greatest contributions were in the field of psychohistory seldon found the field little more than a set of vague axioms he left it a profound statistical science the best existing authority we have for the details of his life is the biography written by gaal dornick who as a young man met seldon two years before the great mathematician s death the story of the meeting \n" 53 | ] 54 | } 55 | ], 56 | "source": [ 57 | "# Normalize text\n", 58 | "# Convert to lowercase\n", 59 | "text = text.lower() \n", 60 | "print(text)" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 4, 66 | "metadata": {}, 67 | "outputs": [ 68 | { 69 | "name": "stdout", 70 | "output_type": "stream", 71 | "text": [ 72 | "['hari', 'seldon', 'bom', 'in', 'the', '1', '1', '988th', 'year', 'of', 'the', 'galactic', 'era', 'died', '12', '069', 'the', 'dates', 'are', 'more', 'commonly', 'given', 'in', 'terms', 'of', 'the', 'current', 'foundational', 'era', 'as', '79', 'to', 'the', 'year', '1', 'f', 'e', 'born', 'to', 'middle', 'class', 'parents', 'on', 'flelicon', 'arcturus', 'sector', 'where', 'his', 'father', 'in', 'a', 'legend', 'of', 'doubtful', 'authenticity', 'was', 'a', 'tobacco', 'grower', 'in', 'the', 'hydroponic', 'plants', 'of', 'the', 'planet', 'he', 'early', 'showed', 'amazing', 'ability', 'in', 'mathematics', 'anecdotes', 'concerning', 'his', 'ability', 'are', 'innumerable', 'and', 'some', 'are', 'contradictory', 'at', 'the', 'age', 'of', 'two', 'he', 'is', 'said', 'to', 'have', 'undoubtedly', 'his', 'greatest', 'contributions', 'were', 'in', 'the', 'field', 'of', 'psychohistory', 'seldon', 'found', 'the', 'field', 'little', 'more', 'than', 'a', 'set', 'of', 'vague', 'axioms', 'he', 'left', 'it', 'a', 'profound', 'statistical', 'science', 'the', 'best', 'existing', 'authority', 'we', 'have', 'for', 'the', 'details', 'of', 'his', 'life', 'is', 'the', 'biography', 'written', 'by', 'gaal', 'dornick', 'who', 'as', 'a', 'young', 'man', 'met', 'seldon', 'two', 'years', 'before', 'the', 'great', 'mathematician', 's', 'death', 'the', 'story', 'of', 'the', 'meeting']\n" 73 | ] 74 | } 75 | ], 76 | "source": [ 77 | "# Tokenize the texts\n", 78 | "words = text.split()\n", 79 | "print(words)" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 5, 85 | "metadata": {}, 86 | "outputs": [ 87 | { 88 | "name": "stderr", 89 | "output_type": "stream", 90 | "text": [ 91 | "[nltk_data] Downloading package punkt to /Users/am/nltk_data...\n" 92 | ] 93 | }, 94 | { 95 | "name": "stdout", 96 | "output_type": "stream", 97 | "text": [ 98 | "['hari', 'seldon', 'bom', 'in', 'the', '1', '1', '988th', 'year', 'of', 'the', 'galactic', 'era', 'died', '12', '069', 'the', 'dates', 'are', 'more', 'commonly', 'given', 'in', 'terms', 'of', 'the', 'current', 'foundational', 'era', 'as', '79', 'to', 'the', 'year', '1', 'f', 'e', 'born', 'to', 'middle', 'class', 'parents', 'on', 'flelicon', 'arcturus', 'sector', 'where', 'his', 'father', 'in', 'a', 'legend', 'of', 'doubtful', 'authenticity', 'was', 'a', 'tobacco', 'grower', 'in', 'the', 'hydroponic', 'plants', 'of', 'the', 'planet', 'he', 'early', 'showed', 'amazing', 'ability', 'in', 'mathematics', 'anecdotes', 'concerning', 'his', 'ability', 'are', 'innumerable', 'and', 'some', 'are', 'contradictory', 'at', 'the', 'age', 'of', 'two', 'he', 'is', 'said', 'to', 'have', 'undoubtedly', 'his', 'greatest', 'contributions', 'were', 'in', 'the', 'field', 'of', 'psychohistory', 'seldon', 'found', 'the', 'field', 'little', 'more', 'than', 'a', 'set', 'of', 'vague', 'axioms', 'he', 'left', 'it', 'a', 'profound', 'statistical', 'science', 'the', 'best', 'existing', 'authority', 'we', 'have', 'for', 'the', 'details', 'of', 'his', 'life', 'is', 'the', 'biography', 'written', 'by', 'gaal', 'dornick', 'who', 'as', 'a', 'young', 'man', 'met', 'seldon', 'two', 'years', 'before', 'the', 'great', 'mathematician', 's', 'death', 'the', 'story', 'of', 'the', 'meeting']\n" 99 | ] 100 | }, 101 | { 102 | "name": "stderr", 103 | "output_type": "stream", 104 | "text": [ 105 | "[nltk_data] Package punkt is already up-to-date!\n" 106 | ] 107 | } 108 | ], 109 | "source": [ 110 | "import os\n", 111 | "import nltk\n", 112 | "nltk.download('punkt') \n", 113 | "from nltk.tokenize import word_tokenize\n", 114 | "\n", 115 | "# Split text into words using NLTK\n", 116 | "words_nltk = word_tokenize(text)\n", 117 | "print(words_nltk)" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 6, 123 | "metadata": {}, 124 | "outputs": [ 125 | { 126 | "name": "stdout", 127 | "output_type": "stream", 128 | "text": [ 129 | "['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', \"you're\", \"you've\", \"you'll\", \"you'd\", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', \"she's\", 'her', 'hers', 'herself', 'it', \"it's\", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', \"that'll\", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', \"don't\", 'should', \"should've\", 'now', 'd', 'll', 'm', 'o', 're', 've', 'y', 'ain', 'aren', \"aren't\", 'couldn', \"couldn't\", 'didn', \"didn't\", 'doesn', \"doesn't\", 'hadn', \"hadn't\", 'hasn', \"hasn't\", 'haven', \"haven't\", 'isn', \"isn't\", 'ma', 'mightn', \"mightn't\", 'mustn', \"mustn't\", 'needn', \"needn't\", 'shan', \"shan't\", 'shouldn', \"shouldn't\", 'wasn', \"wasn't\", 'weren', \"weren't\", 'won', \"won't\", 'wouldn', \"wouldn't\"]\n" 130 | ] 131 | }, 132 | { 133 | "name": "stderr", 134 | "output_type": "stream", 135 | "text": [ 136 | "[nltk_data] Downloading package stopwords to /Users/am/nltk_data...\n", 137 | "[nltk_data] Package stopwords is already up-to-date!\n" 138 | ] 139 | } 140 | ], 141 | "source": [ 142 | "# List stop words\n", 143 | "from nltk.corpus import stopwords\n", 144 | "nltk.download('stopwords')\n", 145 | "print(stopwords.words(\"english\"))" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 7, 151 | "metadata": {}, 152 | "outputs": [ 153 | { 154 | "name": "stdout", 155 | "output_type": "stream", 156 | "text": [ 157 | "['hari', 'seldon', 'bom', '1', '1', '988th', 'year', 'galactic', 'era', 'died', '12', '069', 'dates', 'commonly', 'given', 'terms', 'current', 'foundational', 'era', '79', 'year', '1', 'f', 'e', 'born', 'middle', 'class', 'parents', 'flelicon', 'arcturus', 'sector', 'father', 'legend', 'doubtful', 'authenticity', 'tobacco', 'grower', 'hydroponic', 'plants', 'planet', 'early', 'showed', 'amazing', 'ability', 'mathematics', 'anecdotes', 'concerning', 'ability', 'innumerable', 'contradictory', 'age', 'two', 'said', 'undoubtedly', 'greatest', 'contributions', 'field', 'psychohistory', 'seldon', 'found', 'field', 'little', 'set', 'vague', 'axioms', 'left', 'profound', 'statistical', 'science', 'best', 'existing', 'authority', 'details', 'life', 'biography', 'written', 'gaal', 'dornick', 'young', 'man', 'met', 'seldon', 'two', 'years', 'great', 'mathematician', 'death', 'story', 'meeting']\n" 158 | ] 159 | } 160 | ], 161 | "source": [ 162 | "#Remove stop words\n", 163 | "words = [w for w in words if w not in stopwords.words(\"english\")]\n", 164 | "print(words)" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": 8, 170 | "metadata": {}, 171 | "outputs": [ 172 | { 173 | "name": "stdout", 174 | "output_type": "stream", 175 | "text": [ 176 | "['hari', 'seldon', 'bom', '1', '1', '988th', 'year', 'galact', 'era', 'die', '12', '069', 'date', 'commonli', 'given', 'term', 'current', 'foundat', 'era', '79', 'year', '1', 'f', 'e', 'born', 'middl', 'class', 'parent', 'flelicon', 'arcturu', 'sector', 'father', 'legend', 'doubt', 'authent', 'tobacco', 'grower', 'hydropon', 'plant', 'planet', 'earli', 'show', 'amaz', 'abil', 'mathemat', 'anecdot', 'concern', 'abil', 'innumer', 'contradictori', 'age', 'two', 'said', 'undoubtedli', 'greatest', 'contribut', 'field', 'psychohistori', 'seldon', 'found', 'field', 'littl', 'set', 'vagu', 'axiom', 'left', 'profound', 'statist', 'scienc', 'best', 'exist', 'author', 'detail', 'life', 'biographi', 'written', 'gaal', 'dornick', 'young', 'man', 'met', 'seldon', 'two', 'year', 'great', 'mathematician', 'death', 'stori', 'meet']\n" 177 | ] 178 | } 179 | ], 180 | "source": [ 181 | "from nltk.stem.porter import PorterStemmer\n", 182 | "\n", 183 | "# Reduce words to their stems\n", 184 | "stemmed = [PorterStemmer().stem(w) for w in words]\n", 185 | "print(stemmed)" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 9, 191 | "metadata": {}, 192 | "outputs": [ 193 | { 194 | "name": "stdout", 195 | "output_type": "stream", 196 | "text": [ 197 | "['hari', 'seldon', 'bom', '1', '1', '988th', 'year', 'galactic', 'era', 'died', '12', '069', 'date', 'commonly', 'given', 'term', 'current', 'foundational', 'era', '79', 'year', '1', 'f', 'e', 'born', 'middle', 'class', 'parent', 'flelicon', 'arcturus', 'sector', 'father', 'legend', 'doubtful', 'authenticity', 'tobacco', 'grower', 'hydroponic', 'plant', 'planet', 'early', 'showed', 'amazing', 'ability', 'mathematics', 'anecdote', 'concerning', 'ability', 'innumerable', 'contradictory', 'age', 'two', 'said', 'undoubtedly', 'greatest', 'contribution', 'field', 'psychohistory', 'seldon', 'found', 'field', 'little', 'set', 'vague', 'axiom', 'left', 'profound', 'statistical', 'science', 'best', 'existing', 'authority', 'detail', 'life', 'biography', 'written', 'gaal', 'dornick', 'young', 'man', 'met', 'seldon', 'two', 'year', 'great', 'mathematician', 'death', 'story', 'meeting']\n" 198 | ] 199 | } 200 | ], 201 | "source": [ 202 | "from nltk.stem.wordnet import WordNetLemmatizer\n", 203 | "\n", 204 | "# Reduce words to their root form\n", 205 | "lemmed = [WordNetLemmatizer().lemmatize(w) for w in words]\n", 206 | "print(lemmed)" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": null, 212 | "metadata": {}, 213 | "outputs": [], 214 | "source": [] 215 | } 216 | ], 217 | "metadata": { 218 | "kernelspec": { 219 | "display_name": "Python [conda env:tensorflow]", 220 | "language": "python", 221 | "name": "conda-env-tensorflow-py" 222 | }, 223 | "language_info": { 224 | "codemirror_mode": { 225 | "name": "ipython", 226 | "version": 3 227 | }, 228 | "file_extension": ".py", 229 | "mimetype": "text/x-python", 230 | "name": "python", 231 | "nbconvert_exporter": "python", 232 | "pygments_lexer": "ipython3", 233 | "version": "3.5.6" 234 | } 235 | }, 236 | "nbformat": 4, 237 | "nbformat_minor": 2 238 | } 239 | -------------------------------------------------------------------------------- /Chapter09/Heart_Disease_Prediction.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Heart Monitor\n", 8 | "Another very useful personal application of AI in IoT is the in the detection of heart diseases. A large number of wearables exist which can be used to monitor and record the heart rate. The data can be used to predict any harmful heart condition. Here we will employ the AI/ML tools to predict cardiac Arrhythmia, a group of conditions where the heart rate is irregular, it can be either too fast (above 100 beats per minute) or too slow (below 60 beats per minute). The data used is taken from the [UCI Ml dataset repo](https://archive.ics.uci.edu/ml/datasets/heart+Disease). The dataset consists of 76 attributes, not all required for prediction of the presence of disease, the dataset has a \"goal\" field associated with each data row, it has five possible values 0-4, the value 0 indicates healthy heart, any other value means there is a disease. The problem can be broken into a binary classification problem for better accuracy. The code is inspired from the GitHub link of [Mohammed Rashad](https://github.com/MohammedRashad/Deep-Learning-and-Wearable-IoT-to-Monitor-and-Predict-Cardiac-Arrhytmia), it is shared under the GNU GPL 3.0 license.\n", 9 | "\n", 10 | "The first step as always is to import the necessary modules. Since we are now classifying the patients as suffering from heart disease or not, we will need a classifier. Here for simplicity, we use SVC classifier. You can experiment with MLP classifier" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 1, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "# importing required libraries\n", 20 | "import numpy as np\n", 21 | "import pandas as pd\n", 22 | "import matplotlib.pyplot as plt\n", 23 | "\n", 24 | "from sklearn.svm import SVC\n", 25 | "from sklearn import metrics\n", 26 | "from sklearn.metrics import confusion_matrix\n", 27 | "from sklearn.model_selection import train_test_split" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "Next, read the dataset, preprocess the dataset to select attributes you will be considering, we choose 13 attributes from 76. And we convert the target from a multi-class value to binary class. Finally, the data is split in train and test dataset." 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 2, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "# reading csv file and extracting class column to y.\n", 44 | "dataset = pd.read_csv(\"data.csv\")\n", 45 | "dataset.fillna(dataset.mean(), inplace=True)\n", 46 | "\n", 47 | "dataset_to_array = np.array(dataset)\n", 48 | "label = dataset_to_array[:,57] # \"Target\" classes having 0 and 1\n", 49 | "label = label.astype('int')\n", 50 | "label[label>0] = 1 # When it is 0 heart is healthy, 1 otherwise" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 3, 56 | "metadata": {}, 57 | "outputs": [ 58 | { 59 | "name": "stdout", 60 | "output_type": "stream", 61 | "text": [ 62 | "The Dataset dimensions are : (617, 13) \n", 63 | "\n" 64 | ] 65 | } 66 | ], 67 | "source": [ 68 | "# extracting 13 features\n", 69 | "dataset = np.column_stack((\n", 70 | " dataset_to_array[:,4] , # pain location\n", 71 | " dataset_to_array[:,6] , # relieved after rest\n", 72 | " dataset_to_array[:,9] , # pain type \n", 73 | " dataset_to_array[:,11], # resting blood pressure\n", 74 | " dataset_to_array[:,33], # maximum heart rate achieved\n", 75 | " dataset_to_array[:,34], # resting heart rate \n", 76 | " dataset_to_array[:,35], # peak exercise blood pressure (first of 2 parts) \n", 77 | " dataset_to_array[:,36], # peak exercise blood pressure (second of 2 parts) \n", 78 | " dataset_to_array[:,38], # resting blood pressure \n", 79 | " dataset_to_array[:,39], # exercise induced angina (1 = yes; 0 = no) \n", 80 | " dataset.age, # age \n", 81 | " dataset.sex , # sex\n", 82 | " dataset.hypertension # hyper tension\n", 83 | " ))\n", 84 | "\n", 85 | "print (\"The Dataset dimensions are : \" , dataset.shape , \"\\n\")" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 4, 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [ 94 | "# dividing data into train and test data\n", 95 | "X_train, X_test, y_train, y_test = train_test_split(dataset, label, random_state = 223)" 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": {}, 101 | "source": [ 102 | "Now, we define the model to be used, here we are using support vector classifier, using the fit function train it on train dataset." 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 5, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "model = SVC(kernel = 'linear').fit(X_train, y_train)\n" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "Let us see its performance on the test dataset. " 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 6, 124 | "metadata": {}, 125 | "outputs": [ 126 | { 127 | "name": "stdout", 128 | "output_type": "stream", 129 | "text": [ 130 | "Accuracy of the model is : 0.7419354838709677 \n", 131 | "Approximately : 74.0 %\n", 132 | "\n" 133 | ] 134 | } 135 | ], 136 | "source": [ 137 | "model_predictions = model.predict(X_test)\n", 138 | "# model accuracy for X_test \n", 139 | "accuracy = metrics.accuracy_score(y_test, model_predictions)\n", 140 | "print (\"Accuracy of the model is :\" , \n", 141 | " accuracy , \"\\nApproximately : \", \n", 142 | " round(accuracy*100) , \"%\\n\")" 143 | ] 144 | }, 145 | { 146 | "cell_type": "markdown", 147 | "metadata": {}, 148 | "source": [ 149 | "You can see that it provides an accuracy of 74%, using MLP we can increase it further. But do remember to normalize all the input features before using MLP Classifier. Below is the confusion matrix of our trained support vector classifier on the test dataset." 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 7, 155 | "metadata": {}, 156 | "outputs": [], 157 | "source": [ 158 | "# creating a confusion matrix\n", 159 | "cm = confusion_matrix(y_test, model_predictions)\n" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": 8, 165 | "metadata": {}, 166 | "outputs": [ 167 | { 168 | "data": { 169 | "text/plain": [ 170 | "" 171 | ] 172 | }, 173 | "execution_count": 8, 174 | "metadata": {}, 175 | "output_type": "execute_result" 176 | }, 177 | { 178 | "data": { 179 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAiAAAAGfCAYAAABm/WkhAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAFG5JREFUeJzt3XuwplV1J+DfAuSmI3exA5iAoshYJTqEEa2JFAoomsSEaGENFFGSTlDMWJlEYMaM8VYDuYyXGSpjI3IRpQVCJoYYHEQJJJEWAi1BSII2GJu74VLRAoQ+e/7oM1Rr2v6+htP76336eai32vN+fd6ziqqDq9Zv7/1Way0AAD1tNesCAIAtjwYEAOhOAwIAdKcBAQC604AAAN1pQACA7jQgAEB3GhAAoDsNCADQ3Tab+gd8ec+3OGoVZuDCHfzqwaycdcfF1fPnPf7dVQv2C/+M3ffrUrsJCADQ3SafgAAAm9jcmllXsNFMQACA7kxAAGB0bW7WFWw0DQgAjG5uvAZEBAMAdGcCAgCDayIYAKA7EQwAwGQmIAAwOhEMANCdg8gAACYzAQGA0YlgAIDu7IIBAJjMBAQABucgMgCgPxEMAMBkJiAAMDoRDADQnYPIAAAmMwEBgNGJYACA7uyCAQCYzAQEAEYnggEAuhPBAABMZgICAINrbbxzQDQgADC6AdeAiGAAgO5MQABgdAMuQtWAAMDoBoxgNCAAMDovowMAmMwEBABGJ4IBALobcBGqCAYA6M4EBABGJ4IBALoTwQAATGYCAgCjG3ACogEBgMGN+DZcEQwA0J0JCACMTgQDAHQ34DZcEQwA0J0JCACMTgQDAHQnggEAmMwEBABGJ4IBALoTwQAATGYCAgCjE8EAAN0N2ICIYACA7kxAAGB0Ay5C1YAAwOhEMAAAk5mAAMDoRDAAQHciGACAyUxAAGB0IhgAoDsRDADAZCYgADC6AScgGhAAGF1rs65go4lgAIDuTEAAYHQiGACguwEbEBEMANCdCQgAjM5BZABAdyIYAIDJTEAAYHQDngOiAQGA0YlgAAAmMwEBgNENOAHRgADA6AbchiuCAQC6MwEBgMG1ObtgAIDeBlwDIoIBALrTgADA6Nrcwl0TVNXOVXVJVf19Vd1aVYdW1a5VdUVV3Tb/5y6TnqMBAYDRzbWFuyb7WJLLW2sHJHlpkluTnJrkytba/kmunP96gzQgAMBUqurZSX4mydlJ0lr7QWvtoSQ/n+S8+b92XpI3TXqWBgQARjc3t2BXVS2tquvXuZau85P2S3J/knOq6saq+mRVPTPJnq21u5Nk/s/nTCrZLhgAGN0C7oJprS1LsuzHfLxNkpcneVdrbUVVfSxTxC3rYwICAKNrbeGuDVudZHVrbcX815dkbUNyb1UtSZL5P++b9CANCAAwldbaPUm+U1Uvmr/1miS3JPl8khPm752Q5E8nPUsEAwCj63sQ2buSfKaqtk2yKsnbsnagcVFVnZjkn5K8edJDNCD8sK0qP/1/T89j9zyQm447I3u9/ajss/QN2XHf5+aaF5+Yxx/4l1lXCIvKLkt2y9v/x8nZaY+d0+Zarr7wS7nynC9knwN/Ksd9+FfzjO22zZon1uQzv/PJ3PH1b866XDZXHY9ib62tTHLwej56zcY8RwPCD9nnV4/O92+7M9v8mx2SJA9/7R/yz1fckJdd+r4ZVwaL09wTa3Lxh87PP33j9mz3zO3zO392Rm655qYcc+px+bOPXZybr1qZlxz2svzSacflD4793VmXCwtmYgNSVQdk7f7evZK0JHcl+Xxr7dZNXBudbbdk1+x2xMvz7Y9emn1+/Y1Jku/dfMdsi4JF7uH7H8rD9z+UJHns+4/m7m/dmZ2fu2uSlu2ftWOSZMdn75iH7n1whlWy2ZviBNPNzQYbkKo6JclbkyxP8rX523snubCqlrfWTt/E9dHR/h/85XzrAxdk62ftMOtSYIu02957ZJ8D983tK2/L8vefm3ef/968+b8cn9pqq5x+zH+ddXlszgZ8G+6kXTAnJvnp1trprbUL5q/Tkxwy/9l6rXuIyWWPrFrIetlEdjvi5fnBdx/Ov9x0+6xLgS3Sdjtun5P+6LfyuQ+ck0e/90gOO+7IXPTBc3PKK0/KRR88NyeccdKsS4QFNakBmUvyE+u5v2T+s/VqrS1rrR3cWjv4jTvs93Tqo5OdDnlRdj/q4Bx63f/Kv/3Eu7PLq16SA89816zLgi3C1ttsnZP+93/Oiv9zTW784tph86HHHJYbLl971ML1f/7V7PvSF8yyRDZzbW5uwa5eJq0BeXeSK6vqtiTfmb/3vCQvSHLypiyMvlZ9+MKs+vCFSZKdX3lgnveOn80t7/yfM64KtgwnnHFS7v7mnbni7MuevPfwfQ/kha84MP947S054JUvyX133DPDCtnsDRjBbLABaa1dXlUvzNrIZa8klbWnoF3XWlvToT5mbO9feX2e986fy7bP2TmHfOX3889X3pi//81PzLosWDRecPABOfSYV2f1rd/Of/vC7ydJLv29z+b8Uz+RY9/3tmy1zVZ5/LHHc/5pfu9YXKpNPnb1afnynm8Zry2DReDCHfzqwaycdcfF1fPnff9Dxy3YL/wz33tBl9qdAwIAoxswgvEuGACgOxMQABhd33fBLAgNCACMTgQDADCZCQgAjG6xvQsGABiACAYAYDITEAAYXM93uCwUDQgAjE4EAwAwmQkIAIxuwAmIBgQARjfgNlwRDADQnQkIAIxOBAMA9NYGbEBEMABAdyYgADC6AScgGhAAGN2AJ6GKYACA7kxAAGB0IhgAoLsBGxARDADQnQkIAAyutfEmIBoQABidCAYAYDITEAAY3YATEA0IAAzOu2AAAKZgAgIAoxtwAqIBAYDRjfcqGBEMANCfCQgADG7ERagaEAAY3YANiAgGAOjOBAQARjfgIlQNCAAMbsQ1ICIYAKA7ExAAGJ0IBgDoTQQDADAFExAAGJ0IBgDorWlAAIDuBmxArAEBALozAQGAwYlgAID+BmxARDAAQHcmIAAwOBEMANDdiA2ICAYA6M4EBAAGN+IERAMCAKNrNesKNpoIBgDozgQEAAYnggEAumtzIhgAgIlMQABgcCIYAKC7ZhcMAMBkJiAAMDgRDADQnV0wAABTMAEBgMG1NusKNp4GBAAGJ4IBAJiCCQgADG7ECYgGBAAGN+IaEBEMANCdCQgADE4EAwB0510wAABTMAEBgMF5FwwA0N2cCAYAYDINCAAMrrVasGsaVbV1Vd1YVZfNf31uVd1eVSvnr4MmPUMEAwCDm8E23P+U5NYkz17n3m+31i6Z9gEmIADA1Kpq7yRvSPLJp/McDQgADK61hbuqamlVXb/OtfRHftxHk7wnyY/uvflwVd1UVR+pqu0m1SyCAYDBLWQE01pblmTZ+j6rqjcmua+19rdVddg6H52W5J4k285/7ylJPrChn2MCAgBM61VJfq6q7kiyPMnhVXVBa+3uttZjSc5JcsikB2lAAGBwc60W7NqQ1tpprbW9W2s/leTYJF9urR1XVUuSpKoqyZuS3DypZhEMAAxuM3gXzGeqao8klWRlkl+f9A0aEABgo7XWrkpy1fz/Pnxjv18DAgCDa23WFWw8DQgADM67YAAApmACAgCD2wwWoW40DQgADG7ENSAiGACgu00+ATnywb/e1D8CWI9HvnHNrEsAOhlxEaoIBgAGN+IaEBEMANCdCQgADE4EAwB0N+AmGA0IAIxuxAmINSAAQHcmIAAwuBF3wWhAAGBwc7Mu4CkQwQAA3ZmAAMDgWkQwAEBncwPuwxXBAADdmYAAwODmRDAAQG8jrgERwQAA3ZmAAMDgRjwHRAMCAIMTwQAATMEEBAAGJ4IBALobsQERwQAA3ZmAAMDgRlyEqgEBgMHNjdd/iGAAgP5MQABgcN4FAwB012ZdwFMgggEAujMBAYDBjXgOiAYEAAY3V+OtARHBAADdmYAAwOBGXISqAQGAwY24BkQEAwB0ZwICAIMb8Sh2DQgADG7Ek1BFMABAdyYgADA4u2AAgO5GXAMiggEAujMBAYDBjXgOiAYEAAY34hoQEQwA0J0JCAAMbsRFqBoQABjciGtARDAAQHcmIAAwuBEnIBoQABhcG3ANiAgGAOjOBAQABieCAQC6G7EBEcEAAN2ZgADA4EY8il0DAgCDG/EkVBEMANCdCQgADG7ERagaEAAY3IgNiAgGAOjOBAQABmcXDADQ3Yi7YDQgADA4a0AAAKZgAgIAg7MGBADobm7AFkQEAwB0ZwICAIMbcRGqBgQABjdeACOCAQBmwAQEAAYnggEAuhvxJFQRDADQnQkIAAxuxHNANCAAMLjx2g8RDAAwAyYgADA4u2AAgO5GXAMiggEAutOAAMDg2gJeG1JV21fV16rq61X1jap6//z9fatqRVXdVlWfq6ptJ9WsAQGAwc0t4DXBY0kOb629NMlBSV5XVa9IckaSj7TW9k/yYJITJz1IAwIATKWt9b35L58xf7Ukhye5ZP7+eUneNOlZGhAAGNxc2oJdVbW0qq5f51q67s+qqq2ramWS+5JckeRbSR5qrT0x/1dWJ9lrUs12wQDA4BZyD0xrbVmSZRv4fE2Sg6pq5yR/kuTFT6UkExAAYKO11h5KclWSVyTZuar+/1Bj7yR3Tfp+DQgADK7XItSq2mN+8pGq2iHJa5PcmuQrSX5p/q+dkORPJ9UsggGAwbV+B5EtSXJeVW2dtUOMi1prl1XVLUmWV9WHktyY5OxJD9KAAABTaa3dlORl67m/KskhG/MsDQgADM67YACA7rwLBgBgCiYgADC48eYfGhAAGJ4IhqGdtewPc9fqr2fljVc+ee+M//7e3Px3f5kb/vaKXHLxJ7PTTs+eYYWwON3+7dU55oR3Pnn9+yN+MZ/+3J88+fk5n70kL3nV6/PgQw/PsEpYWBoQnnT++RflDW/8jz9070tXXp2XHnR4Xv7vjshtt63KqaecPKPqYPHa9yf3zh+fd2b++Lwzc9GnPp7tt98+r3n1K5Mkd997f7563Y1ZsudzZlwlm7OOb8NdMBoQnnTNX63IAw8+9EP3rvjS1VmzZk2S5NoVN2SvvZbMojTYYlx7/crss9eS/MRz90yS/N7HP5HffMeJqZpxYWzW2gL+08tTbkCq6m0LWQibv7f98rG5/ItfmXUZsKj9xZV/maNf++okyVeuuTbP2WP3HLD/fjOuChbe05mAvH/BqmCzd9qpv5Ennngin/3spbMuBRatxx9/PFf91Yocefh/yCOPPppl5y/Pyb9y/KzLYgAjRjAb3AVTVTf9uI+S7LmB71uaZGmS1NY7ZautnvmUC2T2jj/+zXnD0a/NEUe9ZdalwKJ2zbXX58UvfH5233WX/OO3bs+dd92TY054R5Lk3vu/mze//V1ZftZHs/tuu864UjY3PaOThTJpG+6eSY5K8uCP3K8kf/Pjvqm1tizJsiTZZtu9xvu3wpOOOvKw/PZvvSOHv+aYPPLIo7MuBxa1L1xxVY4+4rAkyQufv2+u/vPlT3525DEn5HNnfzy77LzTjKqDhTWpAbksybNaayt/9IOqumqTVMTMXPDpM/Pqnzk0u+++a+5YdX3e/4E/yCnvOTnbbbddLv+Ltf8hXLHihrzz5FNnXCksPo88+mi+et2Ned97fmPWpTCgEd8FU61t2gGFCQjMxiN3XTPrEmCL9Yzd9+u6b+n4n/zFBfv/2k9/+9IutduGCwB05yh2ABjciFGDBgQABuddMAAAUzABAYDBLcZzQACAzdyI23BFMABAdyYgADC4ERehakAAYHAjrgERwQAA3ZmAAMDgRlyEqgEBgMFt6ve6bQoiGACgOxMQABicXTAAQHfWgAAA3dmGCwAwBRMQABicNSAAQHe24QIATMEEBAAGZxcMANCdXTAAAFMwAQGAwdkFAwB0ZxcMAMAUTEAAYHAiGACgO7tgAACmYAICAIObG3ARqgYEAAY3XvshggEAZsAEBAAGZxcMANDdiA2ICAYA6M4EBAAGN+JR7BoQABicCAYAYAomIAAwuBGPYteAAMDgRlwDIoIBALozAQGAwY24CFUDAgCDE8EAAEzBBAQABieCAQC6G3EbrggGAOjOBAQABjc34CJUDQgADE4EAwAwBRMQABicCAYA6E4EAwAwBRMQABicCAYA6E4EAwAwBRMQABicCAYA6E4EAwAwBRMQABhca3OzLmGjaUAAYHBzIhgAgMlMQABgcM0uGACgNxEMAMAUTEAAYHAiGACguxFPQhXBAADdmYAAwOAcxQ4AdNdaW7Brkqr6VFXdV1U3r3Pvd6vqzqpaOX8dPek5GhAAGNxc2oJdUzg3yevWc/8jrbWD5q8vTHqIBgQAmFpr7eokDzzd52hAAGBwCxnBVNXSqrp+nWvplGWcXFU3zUc0u0z6yxoQABjcXGsLdrXWlrXWDl7nWjZFCX+U5PlJDkpyd5I/nPQNGhAA4Glprd3bWlvTWptLclaSQyZ9j224ADC4WZ+EWlVLWmt3z3/5C0lu3tDfTzQgADC8ni+jq6oLkxyWZPeqWp3kfUkOq6qDkrQkdyT5tUnP0YAAAFNrrb11PbfP3tjnaEAAYHCzjmCeCg0IAAzOy+gAAKZgAgIAgxvxZXQaEAAYnAgGAGAKJiAAMDi7YACA7kZcAyKCAQC6MwEBgMGJYACA7kZsQEQwAEB3JiAAMLjx5h9JjTi2oZ+qWtpaWzbrOmBL43ePxU4EwyRLZ10AbKH87rGoaUAAgO40IABAdxoQJpFBw2z43WNRswgVAOjOBAQA6E4DwnpV1euq6h+q6ptVdeqs64EtRVV9qqruq6qbZ10LbEoaEP6Vqto6yZlJXp/kwCRvraoDZ1sVbDHOTfK6WRcBm5oGhPU5JMk3W2urWms/SLI8yc/PuCbYIrTWrk7ywKzrgE1NA8L67JXkO+t8vXr+HgAsCA0I61PruWe7FAALRgPC+qxOss86X++d5K4Z1QLAIqQBYX2uS7J/Ve1bVdsmOTbJ52dcEwCLiAaEf6W19kSSk5N8McmtSS5qrX1jtlXBlqGqLkzy1SQvqqrVVXXirGuCTcFJqABAdyYgAEB3GhAAoDsNCADQnQYEAOhOAwIAdKcBAQC604AAAN1pQACA7v4f0EKawomTUIYAAAAASUVORK5CYII=\n", 180 | "text/plain": [ 181 | "
" 182 | ] 183 | }, 184 | "metadata": {}, 185 | "output_type": "display_data" 186 | } 187 | ], 188 | "source": [ 189 | "import pandas as pd\n", 190 | "import seaborn as sn\n", 191 | "import matplotlib.pyplot as plt\n", 192 | "%matplotlib inline\n", 193 | "df_cm = pd.DataFrame(cm, index = [i for i in \"01\"],\n", 194 | " columns = [i for i in \"01\"])\n", 195 | "plt.figure(figsize = (10,7))\n", 196 | "sn.heatmap(df_cm, annot=True)" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": null, 202 | "metadata": {}, 203 | "outputs": [], 204 | "source": [] 205 | } 206 | ], 207 | "metadata": { 208 | "kernelspec": { 209 | "display_name": "Python [conda env:tensorflow]", 210 | "language": "python", 211 | "name": "conda-env-tensorflow-py" 212 | }, 213 | "language_info": { 214 | "codemirror_mode": { 215 | "name": "ipython", 216 | "version": 3 217 | }, 218 | "file_extension": ".py", 219 | "mimetype": "text/x-python", 220 | "name": "python", 221 | "nbconvert_exporter": "python", 222 | "pygments_lexer": "ipython3", 223 | "version": "3.5.6" 224 | } 225 | }, 226 | "nbformat": 4, 227 | "nbformat_minor": 2 228 | } 229 | -------------------------------------------------------------------------------- /Chapter05/Genetic CNN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Genetic CNN\n", 8 | "#### CNN architecture exploration using Genetic Algorithm as discussed in the following paper: Genetic CNN" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "#### Import required libraries \n", 16 | "1. DEAP for Genetic Algorithm\n", 17 | "2. py-dag for Directed Asyclic Graph (Did few changes for Python 3, check dag.py)\n", 18 | "3. Tensorflow" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 1, 24 | "metadata": {}, 25 | "outputs": [ 26 | { 27 | "name": "stderr", 28 | "output_type": "stream", 29 | "text": [ 30 | "/home/am/anaconda3/envs/tensorflow/lib/python3.5/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n", 31 | " from ._conv import register_converters as _register_converters\n" 32 | ] 33 | } 34 | ], 35 | "source": [ 36 | "import random\n", 37 | "import numpy as np\n", 38 | "\n", 39 | "from deap import base, creator, tools, algorithms\n", 40 | "from scipy.stats import bernoulli\n", 41 | "from dag import DAG, DAGValidationError\n", 42 | "\n", 43 | "import tensorflow as tf\n", 44 | "from tensorflow.examples.tutorials.mnist import input_data" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 2, 50 | "metadata": {}, 51 | "outputs": [ 52 | { 53 | "name": "stdout", 54 | "output_type": "stream", 55 | "text": [ 56 | "Extracting MNIST_data/train-images-idx3-ubyte.gz\n", 57 | "Extracting MNIST_data/train-labels-idx1-ubyte.gz\n", 58 | "Extracting MNIST_data/t10k-images-idx3-ubyte.gz\n", 59 | "Extracting MNIST_data/t10k-labels-idx1-ubyte.gz\n" 60 | ] 61 | } 62 | ], 63 | "source": [ 64 | "mnist = input_data.read_data_sets(\"MNIST_data/\", one_hot=True)\n", 65 | "train_imgs = mnist.train.images\n", 66 | "train_labels = mnist.train.labels\n", 67 | "test_imgs = mnist.test.images\n", 68 | "test_labels = mnist.test.labels\n", 69 | "\n", 70 | "train_imgs = np.reshape(train_imgs,[-1,28,28,1])\n", 71 | "test_imgs = np.reshape(test_imgs,[-1,28,28,1])" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 3, 77 | "metadata": {}, 78 | "outputs": [ 79 | { 80 | "name": "stdout", 81 | "output_type": "stream", 82 | "text": [ 83 | "19\n" 84 | ] 85 | } 86 | ], 87 | "source": [ 88 | "STAGES = np.array([\"s1\",\"s2\",\"s3\"]) # S\n", 89 | "NUM_NODES = np.array([3,4,5]) # K\n", 90 | "\n", 91 | "L = 0 # genome length\n", 92 | "BITS_INDICES, l_bpi = np.empty((0,2),dtype = np.int32), 0 # to keep track of bits for each stage S\n", 93 | "for nn in NUM_NODES:\n", 94 | " t = nn * (nn - 1)\n", 95 | " BITS_INDICES = np.vstack([BITS_INDICES,[l_bpi, l_bpi + int(0.5 * t)]])\n", 96 | " l_bpi = int(0.5 * t)\n", 97 | " L += t\n", 98 | "L = int(0.5 * L)\n", 99 | "\n", 100 | "print(L)\n", 101 | "\n", 102 | "TRAINING_EPOCHS = 20\n", 103 | "BATCH_SIZE = 20\n", 104 | "TOTAL_BATCHES = train_imgs.shape[0] // BATCH_SIZE" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [ 113 | "def weight_variable(weight_name, weight_shape):\n", 114 | " return tf.Variable(tf.truncated_normal(weight_shape, stddev = 0.1),name = ''.join([\"weight_\", weight_name]))\n", 115 | "\n", 116 | "def bias_variable(bias_name,bias_shape):\n", 117 | " return tf.Variable(tf.constant(0.01, shape = bias_shape),name = ''.join([\"bias_\", bias_name]))\n", 118 | "\n", 119 | "def linear_layer(x,n_hidden_units,layer_name):\n", 120 | " n_input = int(x.get_shape()[1])\n", 121 | " weights = weight_variable(layer_name,[n_input, n_hidden_units])\n", 122 | " biases = bias_variable(layer_name,[n_hidden_units])\n", 123 | " return tf.add(tf.matmul(x,weights),biases)\n", 124 | "\n", 125 | "def apply_convolution(x,kernel_height,kernel_width,num_channels,depth,layer_name):\n", 126 | " weights = weight_variable(layer_name,[kernel_height, kernel_width, num_channels, depth])\n", 127 | " biases = bias_variable(layer_name,[depth])\n", 128 | " return tf.nn.relu(tf.add(tf.nn.conv2d(x, weights,[1,2,2,1],padding = \"SAME\"),biases)) \n", 129 | "\n", 130 | "def apply_pool(x,kernel_height,kernel_width,stride_size):\n", 131 | " return tf.nn.max_pool(x, ksize=[1, kernel_height, kernel_width, 1], \n", 132 | " strides=[1, 1, stride_size, 1], padding = \"SAME\")\n", 133 | "\n", 134 | "def add_node(node_name, connector_node_name, h = 5, w = 5, nc = 1, d = 1):\n", 135 | " with tf.name_scope(node_name) as scope:\n", 136 | " conv = apply_convolution(tf.get_default_graph().get_tensor_by_name(connector_node_name), \n", 137 | " kernel_height = h, kernel_width = w, num_channels = nc , depth = d, \n", 138 | " layer_name = ''.join([\"conv_\",node_name]))\n", 139 | "\n", 140 | "def sum_tensors(tensor_a,tensor_b,activation_function_pattern):\n", 141 | " if not tensor_a.startswith(\"Add\"):\n", 142 | " tensor_a = ''.join([tensor_a,activation_function_pattern])\n", 143 | " \n", 144 | " return tf.add(tf.get_default_graph().get_tensor_by_name(tensor_a),\n", 145 | " tf.get_default_graph().get_tensor_by_name(''.join([tensor_b,activation_function_pattern])))\n", 146 | "\n", 147 | "def has_same_elements(x):\n", 148 | " return len(set(x)) <= 1\n", 149 | "\n", 150 | "'''This method will come handy to first generate DAG independent of Tensorflow, \n", 151 | " afterwards generated graph can be used to generate Tensorflow graph'''\n", 152 | "def generate_dag(optimal_indvidual,stage_name,num_nodes):\n", 153 | " # create nodes for the graph\n", 154 | " nodes = np.empty((0), dtype = np.str)\n", 155 | " for n in range(1,(num_nodes + 1)):\n", 156 | " nodes = np.append(nodes,''.join([stage_name,\"_\",str(n)]))\n", 157 | " \n", 158 | " # initialize directed asyclic graph (DAG) and add nodes to it\n", 159 | " dag = DAG()\n", 160 | " for n in nodes:\n", 161 | " dag.add_node(n)\n", 162 | "\n", 163 | " # split best indvidual found via GA to identify vertices connections and connect them in DAG \n", 164 | " edges = np.split(optimal_indvidual,np.cumsum(range(num_nodes - 1)))[1:]\n", 165 | " v2 = 2\n", 166 | " for e in edges:\n", 167 | " v1 = 1\n", 168 | " for i in e:\n", 169 | " if i:\n", 170 | " dag.add_edge(''.join([stage_name,\"_\",str(v1)]),''.join([stage_name,\"_\",str(v2)])) \n", 171 | " v1 += 1\n", 172 | " v2 += 1\n", 173 | "\n", 174 | " # delete nodes not connected to anyother node from DAG\n", 175 | " for n in nodes:\n", 176 | " if len(dag.predecessors(n)) == 0 and len(dag.downstream(n)) == 0:\n", 177 | " dag.delete_node(n)\n", 178 | " nodes = np.delete(nodes, np.where(nodes == n)[0][0])\n", 179 | " \n", 180 | " return dag, nodes\n", 181 | "\n", 182 | "def generate_tensorflow_graph(individual,stages,num_nodes,bits_indices):\n", 183 | " activation_function_pattern = \"/Relu:0\"\n", 184 | " \n", 185 | " tf.reset_default_graph()\n", 186 | " X = tf.placeholder(tf.float32, shape = [None,28,28,1], name = \"X\")\n", 187 | " Y = tf.placeholder(tf.float32,[None,10],name = \"Y\")\n", 188 | " \n", 189 | " d_node = X\n", 190 | " for stage_name,num_node,bpi in zip(stages,num_nodes,bits_indices):\n", 191 | " indv = individual[bpi[0]:bpi[1]]\n", 192 | "\n", 193 | " add_node(''.join([stage_name,\"_input\"]),d_node.name)\n", 194 | " pooling_layer_name = ''.join([stage_name,\"_input\",activation_function_pattern])\n", 195 | "\n", 196 | " if not has_same_elements(indv):\n", 197 | " # ------------------- Temporary DAG to hold all connections implied by GA solution ------------- # \n", 198 | "\n", 199 | " # get DAG and nodes in the graph\n", 200 | " dag, nodes = generate_dag(indv,stage_name,num_node) \n", 201 | " # get nodes without any predecessor, these will be connected to input node\n", 202 | " without_predecessors = dag.ind_nodes() \n", 203 | " # get nodes without any successor, these will be connected to output node\n", 204 | " without_successors = dag.all_leaves()\n", 205 | "\n", 206 | " # ----------------------------------------------------------------------------------------------- #\n", 207 | "\n", 208 | " # --------------------------- Initialize tensforflow graph based on DAG ------------------------- #\n", 209 | "\n", 210 | " for wop in without_predecessors:\n", 211 | " add_node(wop,''.join([stage_name,\"_input\",activation_function_pattern]))\n", 212 | "\n", 213 | " for n in nodes:\n", 214 | " predecessors = dag.predecessors(n)\n", 215 | " if len(predecessors) == 0:\n", 216 | " continue\n", 217 | " elif len(predecessors) > 1:\n", 218 | " first_predecessor = predecessors[0]\n", 219 | " for prd in range(1,len(predecessors)):\n", 220 | " t = sum_tensors(first_predecessor,predecessors[prd],activation_function_pattern)\n", 221 | " first_predecessor = t.name\n", 222 | " add_node(n,first_predecessor)\n", 223 | " elif predecessors:\n", 224 | " add_node(n,''.join([predecessors[0],activation_function_pattern]))\n", 225 | "\n", 226 | " if len(without_successors) > 1:\n", 227 | " first_successor = without_successors[0]\n", 228 | " for suc in range(1,len(without_successors)):\n", 229 | " t = sum_tensors(first_successor,without_successors[suc],activation_function_pattern)\n", 230 | " first_successor = t.name\n", 231 | " add_node(''.join([stage_name,\"_output\"]),first_successor) \n", 232 | " else:\n", 233 | " add_node(''.join([stage_name,\"_output\"]),''.join([without_successors[0],activation_function_pattern])) \n", 234 | "\n", 235 | " pooling_layer_name = ''.join([stage_name,\"_output\",activation_function_pattern])\n", 236 | " # ------------------------------------------------------------------------------------------ #\n", 237 | "\n", 238 | " d_node = apply_pool(tf.get_default_graph().get_tensor_by_name(pooling_layer_name), \n", 239 | " kernel_height = 16, kernel_width = 16,stride_size = 2)\n", 240 | "\n", 241 | " shape = d_node.get_shape().as_list()\n", 242 | " flat = tf.reshape(d_node, [-1, shape[1] * shape[2] * shape[3]])\n", 243 | " logits = linear_layer(flat,10,\"logits\")\n", 244 | " \n", 245 | " xentropy = tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels = Y)\n", 246 | " loss_function = tf.reduce_mean(xentropy)\n", 247 | " optimizer = tf.train.AdamOptimizer().minimize(loss_function) \n", 248 | " accuracy = tf.reduce_mean(tf.cast( tf.equal(tf.argmax(tf.nn.softmax(logits),1), tf.argmax(Y,1)), tf.float32))\n", 249 | " \n", 250 | " return X, Y, optimizer, loss_function, accuracy\n", 251 | "\n", 252 | "def evaluateModel(individual):\n", 253 | " score = 0.0\n", 254 | " X, Y, optimizer, loss_function, accuracy = generate_tensorflow_graph(individual,STAGES,NUM_NODES,BITS_INDICES)\n", 255 | " with tf.Session() as session:\n", 256 | " tf.global_variables_initializer().run()\n", 257 | " for epoch in range(TRAINING_EPOCHS):\n", 258 | " for b in range(TOTAL_BATCHES):\n", 259 | " offset = (epoch * BATCH_SIZE) % (train_labels.shape[0] - BATCH_SIZE)\n", 260 | " batch_x = train_imgs[offset:(offset + BATCH_SIZE), :, :, :]\n", 261 | " batch_y = train_labels[offset:(offset + BATCH_SIZE), :]\n", 262 | " _, c = session.run([optimizer, loss_function],feed_dict={X: batch_x, Y : batch_y})\n", 263 | " \n", 264 | " score = session.run(accuracy, feed_dict={X: test_imgs, Y: test_labels})\n", 265 | " #print('Accuracy: ',score)\n", 266 | " return score," 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": null, 272 | "metadata": { 273 | "scrolled": false 274 | }, 275 | "outputs": [ 276 | { 277 | "name": "stdout", 278 | "output_type": "stream", 279 | "text": [ 280 | "gen\tnevals\n", 281 | "0 \t20 \n", 282 | "1 \t5 \n" 283 | ] 284 | } 285 | ], 286 | "source": [ 287 | "population_size = 20\n", 288 | "num_generations = 3\n", 289 | "\n", 290 | "creator.create(\"FitnessMax\", base.Fitness, weights = (1.0,))\n", 291 | "creator.create(\"Individual\", list , fitness = creator.FitnessMax)\n", 292 | "\n", 293 | "toolbox = base.Toolbox()\n", 294 | "toolbox.register(\"binary\", bernoulli.rvs, 0.5)\n", 295 | "toolbox.register(\"individual\", tools.initRepeat, creator.Individual, toolbox.binary, n = L)\n", 296 | "toolbox.register(\"population\", tools.initRepeat, list , toolbox.individual)\n", 297 | "\n", 298 | "toolbox.register(\"mate\", tools.cxOrdered)\n", 299 | "toolbox.register(\"mutate\", tools.mutShuffleIndexes, indpb = 0.8)\n", 300 | "toolbox.register(\"select\", tools.selRoulette)\n", 301 | "toolbox.register(\"evaluate\", evaluateModel)\n", 302 | "\n", 303 | "popl = toolbox.population(n = population_size)\n", 304 | "import time\n", 305 | "t = time.time()\n", 306 | "result = algorithms.eaSimple(popl, toolbox, cxpb = 0.4, mutpb = 0.05, ngen = num_generations, verbose = True)\n", 307 | "\n", 308 | "t1 = time.time() - t\n", 309 | "print(t1)" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": null, 315 | "metadata": {}, 316 | "outputs": [], 317 | "source": [ 318 | "# print top-3 optimal solutions \n", 319 | "best_individuals = tools.selBest(popl, k = 3)\n", 320 | "for bi in best_individuals:\n", 321 | " print(bi)" 322 | ] 323 | }, 324 | { 325 | "cell_type": "markdown", 326 | "metadata": {}, 327 | "source": [ 328 | "--------------------------------------------------------------------------------------------------------------------" 329 | ] 330 | } 331 | ], 332 | "metadata": { 333 | "anaconda-cloud": {}, 334 | "kernelspec": { 335 | "display_name": "Python [conda env:tensorflow]", 336 | "language": "python", 337 | "name": "conda-env-tensorflow-py" 338 | }, 339 | "language_info": { 340 | "codemirror_mode": { 341 | "name": "ipython", 342 | "version": 3 343 | }, 344 | "file_extension": ".py", 345 | "mimetype": "text/x-python", 346 | "name": "python", 347 | "nbconvert_exporter": "python", 348 | "pygments_lexer": "ipython3", 349 | "version": "3.5.4" 350 | } 351 | }, 352 | "nbformat": 4, 353 | "nbformat_minor": 1 354 | } 355 | -------------------------------------------------------------------------------- /Chapter08/Boston_Price_MLlib.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from pyspark.ml.regression import LinearRegression as LR\n", 10 | "from pyspark.ml.feature import VectorAssembler\n", 11 | "from pyspark.ml.evaluation import RegressionEvaluator\n", 12 | "\n", 13 | "from pyspark.sql import SparkSession\n", 14 | "\n", 15 | "spark = SparkSession.builder \\\n", 16 | " .appName(\"Boston Price Prediction\") \\\n", 17 | " .config(\"spark.executor.memory\", \"70g\") \\\n", 18 | " .config(\"spark.driver.memory\", \"50g\") \\\n", 19 | " .config(\"spark.memory.offHeap.enabled\",True) \\\n", 20 | " .config(\"spark.memory.offHeap.size\",\"16g\") \\\n", 21 | " .getOrCreate()" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 2, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "house_df = spark.read.format(\"csv\"). \\\n", 31 | " options(header=\"true\", inferschema=\"true\"). \\\n", 32 | " load(\"boston/train.csv\")" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 3, 38 | "metadata": {}, 39 | "outputs": [ 40 | { 41 | "name": "stdout", 42 | "output_type": "stream", 43 | "text": [ 44 | "+---+-------+----+-----+----+-----+-----+----+------+---+---+-------+------+-----+----+\n", 45 | "| ID| crim| zn|indus|chas| nox| rm| age| dis|rad|tax|ptratio| black|lstat|medv|\n", 46 | "+---+-------+----+-----+----+-----+-----+----+------+---+---+-------+------+-----+----+\n", 47 | "| 1|0.00632|18.0| 2.31| 0|0.538|6.575|65.2| 4.09| 1|296| 15.3| 396.9| 4.98|24.0|\n", 48 | "| 2|0.02731| 0.0| 7.07| 0|0.469|6.421|78.9|4.9671| 2|242| 17.8| 396.9| 9.14|21.6|\n", 49 | "| 4|0.03237| 0.0| 2.18| 0|0.458|6.998|45.8|6.0622| 3|222| 18.7|394.63| 2.94|33.4|\n", 50 | "+---+-------+----+-----+----+-----+-----+----+------+---+---+-------+------+-----+----+\n", 51 | "only showing top 3 rows\n", 52 | "\n" 53 | ] 54 | } 55 | ], 56 | "source": [ 57 | "house_df.show(3)" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 4, 63 | "metadata": {}, 64 | "outputs": [ 65 | { 66 | "name": "stdout", 67 | "output_type": "stream", 68 | "text": [ 69 | "root\n", 70 | " |-- ID: integer (nullable = true)\n", 71 | " |-- crim: double (nullable = true)\n", 72 | " |-- zn: double (nullable = true)\n", 73 | " |-- indus: double (nullable = true)\n", 74 | " |-- chas: integer (nullable = true)\n", 75 | " |-- nox: double (nullable = true)\n", 76 | " |-- rm: double (nullable = true)\n", 77 | " |-- age: double (nullable = true)\n", 78 | " |-- dis: double (nullable = true)\n", 79 | " |-- rad: integer (nullable = true)\n", 80 | " |-- tax: integer (nullable = true)\n", 81 | " |-- ptratio: double (nullable = true)\n", 82 | " |-- black: double (nullable = true)\n", 83 | " |-- lstat: double (nullable = true)\n", 84 | " |-- medv: double (nullable = true)\n", 85 | "\n" 86 | ] 87 | } 88 | ], 89 | "source": [ 90 | "# DataFrame Schema\n", 91 | "house_df.printSchema()" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 5, 97 | "metadata": {}, 98 | "outputs": [ 99 | { 100 | "data": { 101 | "text/html": [ 102 | "
\n", 103 | "\n", 116 | "\n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | "
01234
summarycountmeanstddevminmax
ID333250.95195195195194147.85943780185971506
crim3333.36034147147147087.3522718367811040.0063273.5341
zn33310.6891891891891922.6747617966182170.0100.0
indus33311.293483483483466.9981231044773120.7427.74
chas3330.060060060060060060.237955642816448301
nox3330.5571441441441450.114954508302893120.3850.871
rm3336.2656186186186160.70395157573344713.5618.725
age33368.2264264264264128.133343605623386.0100.0
dis3333.70993363363363351.98112305144070011.129610.7103
rad3339.6336336336336348.742174349631064124
tax333409.27927927927925170.84198846058237188711
ptratio33318.4480480480479942.151821329439083612.621.2
black333359.466096096095386.584566857183933.5396.9
lstat33312.5154354354354327.06778080358578451.7337.97
medv33322.7687687687687839.1734680273154155.050.0
\n", 258 | "
" 259 | ], 260 | "text/plain": [ 261 | " 0 1 2 3 4\n", 262 | "summary count mean stddev min max\n", 263 | "ID 333 250.95195195195194 147.8594378018597 1 506\n", 264 | "crim 333 3.3603414714714708 7.352271836781104 0.00632 73.5341\n", 265 | "zn 333 10.68918918918919 22.674761796618217 0.0 100.0\n", 266 | "indus 333 11.29348348348346 6.998123104477312 0.74 27.74\n", 267 | "chas 333 0.06006006006006006 0.2379556428164483 0 1\n", 268 | "nox 333 0.557144144144145 0.11495450830289312 0.385 0.871\n", 269 | "rm 333 6.265618618618616 0.7039515757334471 3.561 8.725\n", 270 | "age 333 68.22642642642641 28.13334360562338 6.0 100.0\n", 271 | "dis 333 3.7099336336336335 1.9811230514407001 1.1296 10.7103\n", 272 | "rad 333 9.633633633633634 8.742174349631064 1 24\n", 273 | "tax 333 409.27927927927925 170.84198846058237 188 711\n", 274 | "ptratio 333 18.448048048047994 2.1518213294390836 12.6 21.2\n", 275 | "black 333 359.4660960960953 86.58456685718393 3.5 396.9\n", 276 | "lstat 333 12.515435435435432 7.0677808035857845 1.73 37.97\n", 277 | "medv 333 22.768768768768783 9.173468027315415 5.0 50.0" 278 | ] 279 | }, 280 | "execution_count": 5, 281 | "metadata": {}, 282 | "output_type": "execute_result" 283 | } 284 | ], 285 | "source": [ 286 | "house_df.describe().toPandas().transpose()" 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": 6, 292 | "metadata": {}, 293 | "outputs": [ 294 | { 295 | "name": "stdout", 296 | "output_type": "stream", 297 | "text": [ 298 | "+--------------------+----+\n", 299 | "| features|medv|\n", 300 | "+--------------------+----+\n", 301 | "|[0.00632,18.0,2.3...|24.0|\n", 302 | "|[0.02731,0.0,7.07...|21.6|\n", 303 | "|[0.03237,0.0,2.18...|33.4|\n", 304 | "|[0.06905,0.0,2.18...|36.2|\n", 305 | "|[0.08829,12.5,7.8...|22.9|\n", 306 | "+--------------------+----+\n", 307 | "only showing top 5 rows\n", 308 | "\n" 309 | ] 310 | } 311 | ], 312 | "source": [ 313 | "vectors = VectorAssembler(inputCols = ['crim', 'zn','indus','chas',\n", 314 | " 'nox','rm','age','dis', 'rad', 'tax',\n", 315 | " 'ptratio','black', 'lstat'],\n", 316 | " outputCol = 'features')\n", 317 | "\n", 318 | "vhouse_df = vectors.transform(house_df)\n", 319 | "vhouse_df = vhouse_df.select(['features', 'medv'])\n", 320 | "vhouse_df.show(5)" 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": 7, 326 | "metadata": {}, 327 | "outputs": [], 328 | "source": [ 329 | "train_df, test_df = vhouse_df.randomSplit([0.7,0.3])" 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": 8, 335 | "metadata": {}, 336 | "outputs": [], 337 | "source": [ 338 | "regressor = LR(featuresCol = 'features', labelCol='medv',\\\n", 339 | " maxIter=20, regParam=0.3, elasticNetParam=0.8)\n", 340 | "\n", 341 | "model = regressor.fit(train_df)" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": 9, 347 | "metadata": {}, 348 | "outputs": [ 349 | { 350 | "name": "stdout", 351 | "output_type": "stream", 352 | "text": [ 353 | "Coefficients: [-0.010279413081980417,0.034113414577108085,0.0,5.6415385374198,-7.783264348644399,3.085680504353533,0.0,-0.8290283633263736,0.016467345168122184,0.0,-0.5849152858717687,0.009195354138663316,-0.5627105522578837]\n", 354 | "Intercept: 24.28872820161242\n" 355 | ] 356 | } 357 | ], 358 | "source": [ 359 | "print(\"Coefficients:\", model.coefficients)\n", 360 | "print(\"Intercept:\", model.intercept)" 361 | ] 362 | }, 363 | { 364 | "cell_type": "code", 365 | "execution_count": 10, 366 | "metadata": {}, 367 | "outputs": [ 368 | { 369 | "name": "stdout", 370 | "output_type": "stream", 371 | "text": [ 372 | "RMSE is 4.735492439102997 and r2 is 0.7060968639352195\n", 373 | "Number of Iterations is 21\n" 374 | ] 375 | } 376 | ], 377 | "source": [ 378 | "modelSummary = model.summary\n", 379 | "print(\"RMSE is {} and r2 is {}\".format(modelSummary.rootMeanSquaredError, modelSummary.r2))\n", 380 | "print(\"Number of Iterations is \",modelSummary.totalIterations)" 381 | ] 382 | }, 383 | { 384 | "cell_type": "code", 385 | "execution_count": 11, 386 | "metadata": {}, 387 | "outputs": [ 388 | { 389 | "name": "stdout", 390 | "output_type": "stream", 391 | "text": [ 392 | "+--------------------+----+------------------+\n", 393 | "| features|medv| prediction|\n", 394 | "+--------------------+----+------------------+\n", 395 | "|[0.00906,90.0,2.9...|32.2| 30.33845691985376|\n", 396 | "|[0.01311,90.0,1.2...|35.4| 29.92694704599407|\n", 397 | "|[0.01501,80.0,2.0...|24.5| 27.53450962471756|\n", 398 | "|[0.01951,17.5,1.3...|33.0|24.177663554797803|\n", 399 | "|[0.01965,80.0,1.7...|20.1|20.945922220637236|\n", 400 | "+--------------------+----+------------------+\n", 401 | "only showing top 5 rows\n", 402 | "\n" 403 | ] 404 | } 405 | ], 406 | "source": [ 407 | "model_predictions = model.transform(test_df)\n", 408 | "model_predictions.select(\"features\", \"medv\", \"prediction\").show(5)" 409 | ] 410 | }, 411 | { 412 | "cell_type": "code", 413 | "execution_count": 13, 414 | "metadata": {}, 415 | "outputs": [ 416 | { 417 | "name": "stdout", 418 | "output_type": "stream", 419 | "text": [ 420 | "R2 value on test dataset is: 0.6849428292603987\n", 421 | "RMSE value is 5.557545888924286\n" 422 | ] 423 | } 424 | ], 425 | "source": [ 426 | "model_evaluator = RegressionEvaluator(predictionCol=\"prediction\",\n", 427 | " labelCol=\"medv\", metricName=\"r2\")\n", 428 | "print(\"R2 value on test dataset is: \", model_evaluator.evaluate(model_predictions))\n", 429 | "print(\"RMSE value is\", model.evaluate(test_df).rootMeanSquaredError)" 430 | ] 431 | }, 432 | { 433 | "cell_type": "code", 434 | "execution_count": 14, 435 | "metadata": {}, 436 | "outputs": [], 437 | "source": [ 438 | "spark.stop()" 439 | ] 440 | }, 441 | { 442 | "cell_type": "code", 443 | "execution_count": null, 444 | "metadata": {}, 445 | "outputs": [], 446 | "source": [] 447 | } 448 | ], 449 | "metadata": { 450 | "kernelspec": { 451 | "display_name": "Python [default]", 452 | "language": "python", 453 | "name": "python3" 454 | }, 455 | "language_info": { 456 | "codemirror_mode": { 457 | "name": "ipython", 458 | "version": 3 459 | }, 460 | "file_extension": ".py", 461 | "mimetype": "text/x-python", 462 | "name": "python", 463 | "nbconvert_exporter": "python", 464 | "pygments_lexer": "ipython3", 465 | "version": "3.5.6" 466 | } 467 | }, 468 | "nbformat": 4, 469 | "nbformat_minor": 2 470 | } 471 | -------------------------------------------------------------------------------- /Chapter05/Genetic_RNN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stderr", 10 | "output_type": "stream", 11 | "text": [ 12 | "/home/am/anaconda3/envs/tensorflow/lib/python3.5/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n", 13 | " from ._conv import register_converters as _register_converters\n", 14 | "Using TensorFlow backend.\n" 15 | ] 16 | } 17 | ], 18 | "source": [ 19 | "import numpy as np\n", 20 | "import pandas as pd\n", 21 | "from sklearn.metrics import mean_squared_error\n", 22 | "from sklearn.model_selection import train_test_split as split\n", 23 | "\n", 24 | "from keras.layers import LSTM, Input, Dense\n", 25 | "from keras.models import Model\n", 26 | "\n", 27 | "from deap import base, creator, tools, algorithms\n", 28 | "from scipy.stats import bernoulli\n", 29 | "from bitstring import BitArray\n", 30 | "\n", 31 | "np.random.seed(1120)" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 2, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "data = pd.read_csv('train.csv')\n", 41 | "data = np.reshape(np.array(data['wp1']),(len(data['wp1']),1))\n", 42 | "\n", 43 | "train_data = data[0:17257]\n", 44 | "test_data = data[17257:]" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 3, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "def prepare_dataset(data, window_size):\n", 54 | " X, Y = np.empty((0,window_size)), np.empty((0))\n", 55 | " for i in range(len(data)-window_size-1):\n", 56 | " X = np.vstack([X,data[i:(i + window_size),0]])\n", 57 | " Y = np.append(Y,data[i + window_size,0]) \n", 58 | " X = np.reshape(X,(len(X),window_size,1))\n", 59 | " Y = np.reshape(Y,(len(Y),1))\n", 60 | " return X, Y\n", 61 | "\n", 62 | "def train_evaluate(ga_individual_solution): \n", 63 | " # Decode GA solution to integer for window_size and num_units\n", 64 | " window_size_bits = BitArray(ga_individual_solution[0:6])\n", 65 | " num_units_bits = BitArray(ga_individual_solution[6:]) \n", 66 | " window_size = window_size_bits.uint\n", 67 | " num_units = num_units_bits.uint\n", 68 | " print('\\nWindow Size: ', window_size, ', Num of Units: ', num_units)\n", 69 | " \n", 70 | " # Return fitness score of 100 if window_size or num_unit is zero\n", 71 | " if window_size == 0 or num_units == 0:\n", 72 | " return 100, \n", 73 | " \n", 74 | " # Segment the train_data based on new window_size; split into train and validation (80/20)\n", 75 | " X,Y = prepare_dataset(train_data,window_size)\n", 76 | " X_train, X_val, y_train, y_val = split(X, Y, test_size = 0.20, random_state = 1120)\n", 77 | " \n", 78 | " # Train LSTM model and predict on validation set\n", 79 | " inputs = Input(shape=(window_size,1))\n", 80 | " x = LSTM(num_units, input_shape=(window_size,1))(inputs)\n", 81 | " predictions = Dense(1, activation='linear')(x)\n", 82 | " model = Model(inputs=inputs, outputs=predictions)\n", 83 | " model.compile(optimizer='adam',loss='mean_squared_error')\n", 84 | " model.fit(X_train, y_train, epochs=5, batch_size=10,shuffle=True)\n", 85 | " y_pred = model.predict(X_val)\n", 86 | " \n", 87 | " # Calculate the RMSE score as fitness score for GA\n", 88 | " rmse = np.sqrt(mean_squared_error(y_val, y_pred))\n", 89 | " print('Validation RMSE: ', rmse,'\\n')\n", 90 | " \n", 91 | " return rmse," 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 4, 97 | "metadata": {}, 98 | "outputs": [ 99 | { 100 | "name": "stdout", 101 | "output_type": "stream", 102 | "text": [ 103 | "\n", 104 | "Window Size: 36 , Num of Units: 2\n", 105 | "Epoch 1/5\n", 106 | "13776/13776 [==============================] - 34s 2ms/step - loss: 0.0188\n", 107 | "Epoch 2/5\n", 108 | "13776/13776 [==============================] - 34s 2ms/step - loss: 0.0089\n", 109 | "Epoch 3/5\n", 110 | "13776/13776 [==============================] - 34s 2ms/step - loss: 0.0068\n", 111 | "Epoch 4/5\n", 112 | "13776/13776 [==============================] - 34s 2ms/step - loss: 0.0060\n", 113 | "Epoch 5/5\n", 114 | "13776/13776 [==============================] - 34s 2ms/step - loss: 0.0057\n", 115 | "Validation RMSE: 0.07637882615270873 \n", 116 | "\n", 117 | "\n", 118 | "Window Size: 56 , Num of Units: 8\n", 119 | "Epoch 1/5\n", 120 | "13760/13760 [==============================] - 53s 4ms/step - loss: 0.0132\n", 121 | "Epoch 2/5\n", 122 | "13760/13760 [==============================] - 53s 4ms/step - loss: 0.0064\n", 123 | "Epoch 3/5\n", 124 | "13760/13760 [==============================] - 53s 4ms/step - loss: 0.0058\n", 125 | "Epoch 4/5\n", 126 | "13760/13760 [==============================] - 53s 4ms/step - loss: 0.0057\n", 127 | "Epoch 5/5\n", 128 | "13760/13760 [==============================] - 54s 4ms/step - loss: 0.0057\n", 129 | "Validation RMSE: 0.07416215025613665 \n", 130 | "\n", 131 | "\n", 132 | "Window Size: 60 , Num of Units: 9\n", 133 | "Epoch 1/5\n", 134 | "13756/13756 [==============================] - 57s 4ms/step - loss: 0.0185\n", 135 | "Epoch 2/5\n", 136 | "13756/13756 [==============================] - 57s 4ms/step - loss: 0.0074\n", 137 | "Epoch 3/5\n", 138 | "13756/13756 [==============================] - 57s 4ms/step - loss: 0.0060\n", 139 | "Epoch 4/5\n", 140 | "13756/13756 [==============================] - 57s 4ms/step - loss: 0.0057\n", 141 | "Epoch 5/5\n", 142 | "13756/13756 [==============================] - 57s 4ms/step - loss: 0.0057\n", 143 | "Validation RMSE: 0.07466883465012926 \n", 144 | "\n", 145 | "\n", 146 | "Window Size: 49 , Num of Units: 9\n", 147 | "Epoch 1/5\n", 148 | "13765/13765 [==============================] - 48s 3ms/step - loss: 0.0148\n", 149 | "Epoch 2/5\n", 150 | "13765/13765 [==============================] - 47s 3ms/step - loss: 0.0066\n", 151 | "Epoch 3/5\n", 152 | "13765/13765 [==============================] - 47s 3ms/step - loss: 0.0059\n", 153 | "Epoch 4/5\n", 154 | "13765/13765 [==============================] - 47s 3ms/step - loss: 0.0058\n", 155 | "Epoch 5/5\n", 156 | "13765/13765 [==============================] - 47s 3ms/step - loss: 0.0058\n", 157 | "Validation RMSE: 0.0740184976178182 \n", 158 | "\n", 159 | "\n", 160 | "Window Size: 60 , Num of Units: 9\n", 161 | "Epoch 1/5\n", 162 | "13756/13756 [==============================] - 58s 4ms/step - loss: 0.0227\n", 163 | "Epoch 2/5\n", 164 | "13756/13756 [==============================] - 58s 4ms/step - loss: 0.0066\n", 165 | "Epoch 3/5\n", 166 | "13756/13756 [==============================] - 58s 4ms/step - loss: 0.0058\n", 167 | "Epoch 4/5\n", 168 | "13756/13756 [==============================] - 58s 4ms/step - loss: 0.0057\n", 169 | "Epoch 5/5\n", 170 | "13756/13756 [==============================] - 58s 4ms/step - loss: 0.0057\n", 171 | "Validation RMSE: 0.0760438347342536 \n", 172 | "\n", 173 | "\n", 174 | "Window Size: 60 , Num of Units: 9\n", 175 | "Epoch 1/5\n", 176 | "13756/13756 [==============================] - 58s 4ms/step - loss: 0.0135\n", 177 | "Epoch 2/5\n", 178 | "13756/13756 [==============================] - 57s 4ms/step - loss: 0.0064\n", 179 | "Epoch 3/5\n", 180 | "13756/13756 [==============================] - 57s 4ms/step - loss: 0.0058\n", 181 | "Epoch 4/5\n", 182 | "13756/13756 [==============================] - 58s 4ms/step - loss: 0.0058\n", 183 | "Epoch 5/5\n", 184 | "13756/13756 [==============================] - 58s 4ms/step - loss: 0.0057\n", 185 | "Validation RMSE: 0.07489840863581337 \n", 186 | "\n", 187 | "\n", 188 | "Window Size: 48 , Num of Units: 9\n", 189 | "Epoch 1/5\n", 190 | "13766/13766 [==============================] - 47s 3ms/step - loss: 0.0144\n", 191 | "Epoch 2/5\n", 192 | "13766/13766 [==============================] - 47s 3ms/step - loss: 0.0068\n", 193 | "Epoch 3/5\n", 194 | "13766/13766 [==============================] - 47s 3ms/step - loss: 0.0058\n", 195 | "Epoch 4/5\n", 196 | "13766/13766 [==============================] - 47s 3ms/step - loss: 0.0057\n", 197 | "Epoch 5/5\n", 198 | "13766/13766 [==============================] - 47s 3ms/step - loss: 0.0057\n", 199 | "Validation RMSE: 0.0774579464584324 \n", 200 | "\n", 201 | "\n", 202 | "Window Size: 61 , Num of Units: 9\n", 203 | "Epoch 1/5\n", 204 | "13756/13756 [==============================] - 59s 4ms/step - loss: 0.0135\n", 205 | "Epoch 2/5\n", 206 | "13756/13756 [==============================] - 58s 4ms/step - loss: 0.0065\n", 207 | "Epoch 3/5\n", 208 | "13756/13756 [==============================] - 58s 4ms/step - loss: 0.0057\n", 209 | "Epoch 4/5\n", 210 | "13756/13756 [==============================] - 58s 4ms/step - loss: 0.0057\n", 211 | "Epoch 5/5\n", 212 | "13756/13756 [==============================] - 59s 4ms/step - loss: 0.0057\n", 213 | "Validation RMSE: 0.07473174433542637 \n", 214 | "\n", 215 | "\n", 216 | "Window Size: 49 , Num of Units: 9\n", 217 | "Epoch 1/5\n", 218 | "13765/13765 [==============================] - 48s 3ms/step - loss: 0.0116\n", 219 | "Epoch 2/5\n", 220 | "13765/13765 [==============================] - 47s 3ms/step - loss: 0.0063\n", 221 | "Epoch 3/5\n", 222 | "13765/13765 [==============================] - 47s 3ms/step - loss: 0.0058\n", 223 | "Epoch 4/5\n", 224 | "13765/13765 [==============================] - 47s 3ms/step - loss: 0.0058\n", 225 | "Epoch 5/5\n", 226 | "13765/13765 [==============================] - 47s 3ms/step - loss: 0.0058\n", 227 | "Validation RMSE: 0.07458360715108725 \n", 228 | "\n", 229 | "\n", 230 | "Window Size: 49 , Num of Units: 9\n", 231 | "Epoch 1/5\n", 232 | "13765/13765 [==============================] - 48s 3ms/step - loss: 0.0159\n", 233 | "Epoch 2/5\n", 234 | "13765/13765 [==============================] - 47s 3ms/step - loss: 0.0065\n", 235 | "Epoch 3/5\n", 236 | "13765/13765 [==============================] - 47s 3ms/step - loss: 0.0058\n", 237 | "Epoch 4/5\n", 238 | "13765/13765 [==============================] - 47s 3ms/step - loss: 0.0058\n", 239 | "Epoch 5/5\n", 240 | "13765/13765 [==============================] - 47s 3ms/step - loss: 0.0058\n", 241 | "Validation RMSE: 0.07410878901036914 \n", 242 | "\n", 243 | "\n", 244 | "Window Size: 49 , Num of Units: 9\n", 245 | "Epoch 1/5\n", 246 | "13765/13765 [==============================] - 48s 3ms/step - loss: 0.0151\n", 247 | "Epoch 2/5\n", 248 | "13765/13765 [==============================] - 47s 3ms/step - loss: 0.0065\n", 249 | "Epoch 3/5\n", 250 | "13765/13765 [==============================] - 47s 3ms/step - loss: 0.0059\n", 251 | "Epoch 4/5\n", 252 | "13765/13765 [==============================] - 47s 3ms/step - loss: 0.0058\n", 253 | "Epoch 5/5\n", 254 | "13765/13765 [==============================] - 47s 3ms/step - loss: 0.0058\n", 255 | "Validation RMSE: 0.07407377472540635 \n", 256 | "\n", 257 | "\n", 258 | "Window Size: 49 , Num of Units: 9\n", 259 | "Epoch 1/5\n", 260 | "13765/13765 [==============================] - 48s 3ms/step - loss: 0.0176\n", 261 | "Epoch 2/5\n", 262 | "13765/13765 [==============================] - 47s 3ms/step - loss: 0.0062\n", 263 | "Epoch 3/5\n", 264 | "13765/13765 [==============================] - 47s 3ms/step - loss: 0.0058\n", 265 | "Epoch 4/5\n", 266 | "13765/13765 [==============================] - 47s 3ms/step - loss: 0.0058\n", 267 | "Epoch 5/5\n", 268 | "13765/13765 [==============================] - 47s 3ms/step - loss: 0.0057\n", 269 | "Validation RMSE: 0.07401475117886316 \n", 270 | "\n" 271 | ] 272 | } 273 | ], 274 | "source": [ 275 | "population_size = 4\n", 276 | "num_generations = 4\n", 277 | "gene_length = 10\n", 278 | "\n", 279 | "# As we are trying to minimize the RMSE score, that's why using -1.0. \n", 280 | "# In case, when you want to maximize accuracy for instance, use 1.0\n", 281 | "creator.create('FitnessMax', base.Fitness, weights = (-1.0,))\n", 282 | "creator.create('Individual', list , fitness = creator.FitnessMax)\n", 283 | "\n", 284 | "toolbox = base.Toolbox()\n", 285 | "toolbox.register('binary', bernoulli.rvs, 0.5)\n", 286 | "toolbox.register('individual', tools.initRepeat, creator.Individual, toolbox.binary, n = gene_length)\n", 287 | "toolbox.register('population', tools.initRepeat, list , toolbox.individual)\n", 288 | "\n", 289 | "toolbox.register('mate', tools.cxOrdered)\n", 290 | "toolbox.register('mutate', tools.mutShuffleIndexes, indpb = 0.6)\n", 291 | "toolbox.register('select', tools.selRoulette)\n", 292 | "toolbox.register('evaluate', train_evaluate)\n", 293 | "\n", 294 | "population = toolbox.population(n = population_size)\n", 295 | "r = algorithms.eaSimple(population, toolbox, cxpb = 0.4, mutpb = 0.1, ngen = num_generations, verbose = False)\n" 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": 5, 301 | "metadata": {}, 302 | "outputs": [ 303 | { 304 | "name": "stdout", 305 | "output_type": "stream", 306 | "text": [ 307 | "\n", 308 | "Window Size: 49 , Num of Units: 9\n" 309 | ] 310 | } 311 | ], 312 | "source": [ 313 | "best_individuals = tools.selBest(population,k = 1)\n", 314 | "best_window_size = None\n", 315 | "best_num_units = None\n", 316 | "\n", 317 | "for bi in best_individuals:\n", 318 | " window_size_bits = BitArray(bi[0:6])\n", 319 | " num_units_bits = BitArray(bi[6:]) \n", 320 | " best_window_size = window_size_bits.uint\n", 321 | " best_num_units = num_units_bits.uint\n", 322 | " print('\\nWindow Size: ', best_window_size, ', Num of Units: ', best_num_units)" 323 | ] 324 | }, 325 | { 326 | "cell_type": "code", 327 | "execution_count": 6, 328 | "metadata": {}, 329 | "outputs": [ 330 | { 331 | "name": "stdout", 332 | "output_type": "stream", 333 | "text": [ 334 | "Epoch 1/5\n", 335 | "17207/17207 [==============================] - 60s 3ms/step - loss: 0.0147\n", 336 | "Epoch 2/5\n", 337 | "17207/17207 [==============================] - 59s 3ms/step - loss: 0.0062\n", 338 | "Epoch 3/5\n", 339 | "17207/17207 [==============================] - 59s 3ms/step - loss: 0.0058\n", 340 | "Epoch 4/5\n", 341 | "17207/17207 [==============================] - 59s 3ms/step - loss: 0.0057\n", 342 | "Epoch 5/5\n", 343 | "17207/17207 [==============================] - 59s 3ms/step - loss: 0.0057\n", 344 | "Test RMSE: 0.09183506777697063\n" 345 | ] 346 | } 347 | ], 348 | "source": [ 349 | "X_train,y_train = prepare_dataset(train_data,best_window_size)\n", 350 | "X_test, y_test = prepare_dataset(test_data,best_window_size)\n", 351 | "\n", 352 | "inputs = Input(shape=(best_window_size,1))\n", 353 | "x = LSTM(best_num_units, input_shape=(best_window_size,1))(inputs)\n", 354 | "predictions = Dense(1, activation='linear')(x)\n", 355 | "model = Model(inputs = inputs, outputs = predictions)\n", 356 | "model.compile(optimizer='adam',loss='mean_squared_error')\n", 357 | "model.fit(X_train, y_train, epochs=5, batch_size=10,shuffle=True)\n", 358 | "y_pred = model.predict(X_test)\n", 359 | "\n", 360 | "rmse = np.sqrt(mean_squared_error(y_test, y_pred))\n", 361 | "print('Test RMSE: ', rmse)" 362 | ] 363 | } 364 | ], 365 | "metadata": { 366 | "kernelspec": { 367 | "display_name": "Python [conda env:tensorflow]", 368 | "language": "python", 369 | "name": "conda-env-tensorflow-py" 370 | }, 371 | "language_info": { 372 | "codemirror_mode": { 373 | "name": "ipython", 374 | "version": 3 375 | }, 376 | "file_extension": ".py", 377 | "mimetype": "text/x-python", 378 | "name": "python", 379 | "nbconvert_exporter": "python", 380 | "pygments_lexer": "ipython3", 381 | "version": "3.5.4" 382 | } 383 | }, 384 | "nbformat": 4, 385 | "nbformat_minor": 2 386 | } 387 | --------------------------------------------------------------------------------