├── tests ├── __init__.py ├── Makefile ├── test_s1_kmeans.py ├── test_normalization.py ├── test_boston_forest.py ├── test_boston_tree.py ├── test_sonar_forest.py ├── test_pca.py ├── test_cartpole_dqn.py ├── test_heart_tree.py ├── test_banknote_tree.py ├── test_heart.py ├── test_adult_tree.py ├── test_adult_forest.py ├── test_iris.py ├── test_iris_bayes.py ├── test_search.py ├── test_iris_tree.py ├── test_iris_neighbor.py ├── test_adult.py ├── test_fashion.py ├── test_heart_forest.py ├── test_banknote_forest.py ├── test_heart_bayes.py ├── test_iris_svm.py ├── test_fishlength.py ├── test_banknote.py ├── test_mnist_svm.py └── test_functions.py ├── pykitml ├── datasets │ ├── __init__.py │ ├── s1clustering.py │ ├── fishlength.py │ ├── boston.py │ ├── banknote.py │ ├── heartdisease.py │ ├── sonar.py │ ├── mnist.py │ └── iris.py ├── linear_regression.py ├── _exceptions.py ├── __init__.py ├── pklhandler.py ├── logistic_regression.py ├── svm.py ├── smote.py ├── testing.py ├── nearest_neighbor.py ├── _shared_array.py ├── _regressor.py ├── pca.py ├── cross_val.py ├── _single_layer_model.py ├── kmeans_clustering.py ├── random_search.py ├── _heatmap.py ├── _functions.py ├── fceux.py ├── normalize.py ├── preprocessing.py └── random_forest.py ├── docs ├── requirements.txt ├── SMOTE.rst ├── demo_pics │ ├── tree.png │ ├── kmeans.png │ ├── pca_compressed.png │ ├── pca_uncompressed.png │ ├── bayes_confusion_matrix.png │ ├── linear_svm_perf_graph.png │ ├── tree_confusion_matrix.png │ ├── forest_confusion_matrix.png │ ├── gaussian_svm_perf_graph.png │ ├── neighbor_confusion_matrix.png │ ├── neural_network_perf_graph.png │ ├── linear_svm_confusion_matrix.png │ ├── forest_heart_confusion_matrix.png │ ├── gaussian_svm_confusion_matrix.png │ ├── linear_regression_perf_graph.png │ ├── logistic_regression_perf_graph.png │ ├── gaussian_bayes_confusion_matrix.png │ ├── neural_network_confusion_matrix.png │ └── logistic_regression_confusion_matrix.png ├── CrossValidation.rst ├── SavingAndLoading.rst ├── PreprocessingDatasets.rst ├── KMeans.rst ├── RandomSearch.rst ├── LSTM.rst ├── Makefile ├── PrincipalComponentAnalysis.rst ├── DQN.rst ├── Optimizers.rst ├── make.bat ├── Linear Regression.rst ├── GaussianNaiveBayes.rst ├── index.rst ├── NearestNeighbor.rst ├── NaiveBayes.rst ├── DecisionTree.rst ├── LogisticRegression.rst ├── FeedForwardNetwork.rst ├── RandomForest.rst ├── SVM.rst ├── Datasets.rst ├── Normalization.rst ├── FCEUX.rst └── conf.py ├── pykitml128.png ├── requirements.txt ├── pyproject.toml ├── .readthedocs.yml ├── Pipfile ├── Makefile ├── setup.py ├── LICENSE ├── .gitignore ├── README.md └── .vscode └── launch.json /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pykitml/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx 2 | sphinx_rtd_theme -------------------------------------------------------------------------------- /docs/SMOTE.rst: -------------------------------------------------------------------------------- 1 | SMOTE 2 | ===== 3 | 4 | .. autofunction:: pykitml.smote -------------------------------------------------------------------------------- /pykitml128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RainingComputers/pykitml/HEAD/pykitml128.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | matplotlib 3 | tqdm 4 | graphviz 5 | sphinx-rtd-theme 6 | -------------------------------------------------------------------------------- /docs/demo_pics/tree.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RainingComputers/pykitml/HEAD/docs/demo_pics/tree.png -------------------------------------------------------------------------------- /docs/CrossValidation.rst: -------------------------------------------------------------------------------- 1 | Cross Validation 2 | ================ 3 | 4 | .. autofunction:: pykitml.cross_validate -------------------------------------------------------------------------------- /docs/demo_pics/kmeans.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RainingComputers/pykitml/HEAD/docs/demo_pics/kmeans.png -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "wheel"] 3 | build-backend = "setuptools.build_meta:__legacy__" -------------------------------------------------------------------------------- /docs/demo_pics/pca_compressed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RainingComputers/pykitml/HEAD/docs/demo_pics/pca_compressed.png -------------------------------------------------------------------------------- /docs/demo_pics/pca_uncompressed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RainingComputers/pykitml/HEAD/docs/demo_pics/pca_uncompressed.png -------------------------------------------------------------------------------- /docs/demo_pics/bayes_confusion_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RainingComputers/pykitml/HEAD/docs/demo_pics/bayes_confusion_matrix.png -------------------------------------------------------------------------------- /docs/demo_pics/linear_svm_perf_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RainingComputers/pykitml/HEAD/docs/demo_pics/linear_svm_perf_graph.png -------------------------------------------------------------------------------- /docs/demo_pics/tree_confusion_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RainingComputers/pykitml/HEAD/docs/demo_pics/tree_confusion_matrix.png -------------------------------------------------------------------------------- /docs/demo_pics/forest_confusion_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RainingComputers/pykitml/HEAD/docs/demo_pics/forest_confusion_matrix.png -------------------------------------------------------------------------------- /docs/demo_pics/gaussian_svm_perf_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RainingComputers/pykitml/HEAD/docs/demo_pics/gaussian_svm_perf_graph.png -------------------------------------------------------------------------------- /docs/demo_pics/neighbor_confusion_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RainingComputers/pykitml/HEAD/docs/demo_pics/neighbor_confusion_matrix.png -------------------------------------------------------------------------------- /docs/demo_pics/neural_network_perf_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RainingComputers/pykitml/HEAD/docs/demo_pics/neural_network_perf_graph.png -------------------------------------------------------------------------------- /docs/demo_pics/linear_svm_confusion_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RainingComputers/pykitml/HEAD/docs/demo_pics/linear_svm_confusion_matrix.png -------------------------------------------------------------------------------- /docs/demo_pics/forest_heart_confusion_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RainingComputers/pykitml/HEAD/docs/demo_pics/forest_heart_confusion_matrix.png -------------------------------------------------------------------------------- /docs/demo_pics/gaussian_svm_confusion_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RainingComputers/pykitml/HEAD/docs/demo_pics/gaussian_svm_confusion_matrix.png -------------------------------------------------------------------------------- /docs/demo_pics/linear_regression_perf_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RainingComputers/pykitml/HEAD/docs/demo_pics/linear_regression_perf_graph.png -------------------------------------------------------------------------------- /docs/demo_pics/logistic_regression_perf_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RainingComputers/pykitml/HEAD/docs/demo_pics/logistic_regression_perf_graph.png -------------------------------------------------------------------------------- /docs/demo_pics/gaussian_bayes_confusion_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RainingComputers/pykitml/HEAD/docs/demo_pics/gaussian_bayes_confusion_matrix.png -------------------------------------------------------------------------------- /docs/demo_pics/neural_network_confusion_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RainingComputers/pykitml/HEAD/docs/demo_pics/neural_network_confusion_matrix.png -------------------------------------------------------------------------------- /docs/demo_pics/logistic_regression_confusion_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RainingComputers/pykitml/HEAD/docs/demo_pics/logistic_regression_confusion_matrix.png -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | build: 3 | os: ubuntu-22.04 4 | tools: 5 | python: "3.10" 6 | sphinx: 7 | configuration: docs/conf.py 8 | python: 9 | install: 10 | - method: pip 11 | path: . 12 | - requirements: requirements.txt 13 | -------------------------------------------------------------------------------- /docs/SavingAndLoading.rst: -------------------------------------------------------------------------------- 1 | Saving and Loading Objects/Models 2 | ================================= 3 | 4 | Saving objects to file 5 | ---------------------- 6 | .. autofunction:: pykitml.save 7 | 8 | Loading objects from file 9 | ------------------------- 10 | .. autofunction:: pykitml.load -------------------------------------------------------------------------------- /docs/PreprocessingDatasets.rst: -------------------------------------------------------------------------------- 1 | Preprocessing Datasets 2 | ====================== 3 | 4 | Dealing with categorical/one-hot values 5 | --------------------------------------- 6 | 7 | .. autofunction:: pykitml.onehot 8 | 9 | .. autofunction:: pykitml.onehot_cols 10 | 11 | .. autofunction:: pykitml.onehot_cols_traintest 12 | 13 | Generating Polynomial Features 14 | ------------------------------ 15 | 16 | .. autofunction:: pykitml.polynomial -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | name = "pypi" 3 | url = "https://pypi.org/simple" 4 | verify_ssl = true 5 | 6 | [dev-packages] 7 | 8 | [packages] 9 | numpy = "*" 10 | matplotlib = "*" 11 | pytest = "*" 12 | pylint = "*" 13 | radon = "*" 14 | sphinx = "*" 15 | sphinx-rtd-theme = "*" 16 | sphinx-bootstrap-theme = "*" 17 | tqdm = "*" 18 | graphviz = "*" 19 | gprof2dot = "*" 20 | gymnasium = "*" 21 | autopep8 = "*" 22 | pygame = "*" 23 | 24 | [requires] 25 | python_version = "3.10.0" 26 | -------------------------------------------------------------------------------- /docs/KMeans.rst: -------------------------------------------------------------------------------- 1 | K-Means Clustering 2 | ================== 3 | 4 | K-Means Function 5 | ---------------- 6 | 7 | .. autofunction:: pykitml.kmeans 8 | 9 | Example: S1 Dataset 10 | ------------------- 11 | 12 | **Dataset** 13 | 14 | :ref:`s1clustering_dataset` 15 | 16 | **Training** 17 | 18 | .. literalinclude:: ../tests/test_s1_kmeans.py 19 | :pyobject: test_s1_kmeans 20 | :lines: 3- 21 | :end-before: # Assert 22 | :dedent: 4 23 | 24 | **Scatter Plot** 25 | 26 | .. image:: ./demo_pics/kmeans.png -------------------------------------------------------------------------------- /docs/RandomSearch.rst: -------------------------------------------------------------------------------- 1 | Random Search for Hyperparameters 2 | ================================= 3 | 4 | Class Reference 5 | --------------- 6 | 7 | .. autoclass:: pykitml.RandomSearch 8 | 9 | .. automethod:: search 10 | 11 | .. automethod:: set_cost 12 | 13 | .. autoattribute:: best 14 | 15 | Example: Tuning Feed-forward network for fashion-MNIST 16 | ------------------------------------------------------- 17 | 18 | .. literalinclude:: ../tests/test_search.py 19 | :pyobject: test_search 20 | :lines: 3- 21 | :end-before: # Assert 22 | :dedent: 4 -------------------------------------------------------------------------------- /docs/LSTM.rst: -------------------------------------------------------------------------------- 1 | Long short-term memory (LSTM) Network 2 | ===================================== 3 | 4 | Class Reference 5 | --------------- 6 | 7 | .. autoclass:: pykitml.LSTM 8 | 9 | .. automethod:: __init__ 10 | 11 | .. automethod:: feed 12 | 13 | .. automethod:: get_output 14 | 15 | .. automethod:: get_output_onehot 16 | 17 | .. automethod:: train 18 | 19 | .. automethod:: reset 20 | 21 | .. automethod:: plot_performance 22 | 23 | .. automethod:: cost 24 | 25 | .. automethod:: accuracy 26 | 27 | .. automethod:: r2score 28 | 29 | .. automethod:: confusion_matrix 30 | 31 | .. autoattribute:: nlayers -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SOURCEDIR = . 8 | BUILDDIR = _build 9 | 10 | # Put it first so that "make" without argument is like "make help". 11 | help: 12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 13 | 14 | .PHONY: help Makefile 15 | 16 | # Catch-all target: route all unknown targets to Sphinx using the new 17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 18 | %: Makefile 19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /pykitml/linear_regression.py: -------------------------------------------------------------------------------- 1 | from ._single_layer_model import SingleLayerModel 2 | from ._regressor import Regressor 3 | from . import _functions 4 | 5 | 6 | class LinearRegression(SingleLayerModel, Regressor): 7 | ''' 8 | Implements linear regression. 9 | ''' 10 | 11 | @property 12 | def _activ_func(self): 13 | return _functions.identity 14 | 15 | @property 16 | def _activ_func_prime(self): 17 | return _functions.identity_prime 18 | 19 | @property 20 | def _cost_func(self): 21 | return _functions.mse 22 | 23 | @property 24 | def _cost_func_prime(self): 25 | return _functions.mse_prime 26 | -------------------------------------------------------------------------------- /pykitml/_exceptions.py: -------------------------------------------------------------------------------- 1 | class InvalidFeatureType(Exception): 2 | ''' 3 | Raised when specified feature type is invalid for the model. 4 | ''' 5 | 6 | 7 | class InvalidDistributionType(Exception): 8 | ''' 9 | Raised when specified distribution type is invalid for the model. 10 | ''' 11 | 12 | 13 | def _valid_list(input_list, valid_items): 14 | ''' 15 | Used to check if items in a list are valid. 16 | 17 | Parameters 18 | ---------- 19 | input_list : list 20 | The list to check/validate. 21 | valid_items : list 22 | List of valid items the list can contain. 23 | ''' 24 | return all(item in valid_items for item in input_list) and len(input_list) > 0 25 | -------------------------------------------------------------------------------- /tests/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY : help viewprofiles clean $(wildcard *.dat) 2 | help : 3 | @echo "clean : Remove auto-generated files." 4 | @echo "test : Run tests." 5 | @echo "viewprofiles : View all *.dat profile files." 6 | 7 | now=$(shell date +%d-%m_%H-%M-%S) 8 | 9 | test: 10 | rm -f *.pkl 11 | python3 -m pytest 12 | 13 | viewprofiles: $(wildcard *.dat) 14 | 15 | $(wildcard *.dat): 16 | $(eval name=$(patsubst %64,%,$(basename $(notdir $@)))) 17 | gprof2dot -f pstats $@ | dot -Tpng -o profile_$(name)_$(now).png 18 | xdg-open profile_$(name)_$(now).png 19 | 20 | clean: 21 | rm -f *.pkl 22 | rm -f -r __pycache__ 23 | rm -f *.dat 24 | rm -f profile_*.png 25 | rm -f *.gz 26 | rm -f *.gv.png 27 | rm -f *.gv.pdf 28 | rm -f *.gv -------------------------------------------------------------------------------- /docs/PrincipalComponentAnalysis.rst: -------------------------------------------------------------------------------- 1 | Principal Component Analysis 2 | ============================ 3 | 4 | Class Reference 5 | --------------- 6 | 7 | .. autoclass:: pykitml.PCA 8 | 9 | .. automethod:: __init__ 10 | 11 | .. automethod:: transform 12 | 13 | .. automethod:: inverse_transform 14 | 15 | .. autoattribute:: retention 16 | 17 | Example: Compressing Fashion MNIST dataset 18 | ------------------------------------------ 19 | 20 | .. literalinclude:: ../tests/test_pca.py 21 | :pyobject: test_pca_compression 22 | :lines: 3- 23 | :dedent: 4 24 | 25 | **Original/Uncompressed** 26 | 27 | .. image:: ./demo_pics/pca_uncompressed.png 28 | 29 | **Recovered/Compressed** 30 | 31 | .. image:: ./demo_pics/pca_compressed.png 32 | -------------------------------------------------------------------------------- /docs/DQN.rst: -------------------------------------------------------------------------------- 1 | Deep Q Learning 2 | =============== 3 | 4 | DQNAgent Class 5 | -------------- 6 | 7 | .. autoclass:: pykitml.DQNAgent 8 | 9 | .. automethod:: __init__ 10 | 11 | .. automethod:: train 12 | 13 | .. automethod:: exploit 14 | 15 | .. automethod:: plot_performance 16 | 17 | .. _environment: 18 | 19 | Environment Class 20 | ----------------- 21 | 22 | .. autoclass:: pykitml.Environment 23 | 24 | .. automethod:: reset 25 | 26 | .. automethod:: step 27 | 28 | .. automethod:: close 29 | 30 | .. automethod:: render 31 | 32 | Example : Cartpole using gymnasium 33 | ---------------------------------- 34 | 35 | .. literalinclude:: ../tests/test_cartpole_dqn.py 36 | :pyobject: test_cartpole 37 | :lines: 3- 38 | :dedent: 4 39 | -------------------------------------------------------------------------------- /docs/Optimizers.rst: -------------------------------------------------------------------------------- 1 | .. _optimizers: 2 | 3 | Optimizers 4 | ========== 5 | 6 | Gradient descent 7 | ---------------- 8 | 9 | .. autoclass:: pykitml.GradientDescent 10 | 11 | .. automethod:: __init__ 12 | 13 | Momentum 14 | -------- 15 | 16 | .. autoclass:: pykitml.Momentum 17 | 18 | .. automethod:: __init__ 19 | 20 | Nesterov momentum 21 | ----------------- 22 | 23 | .. autoclass:: pykitml.Nesterov 24 | 25 | .. automethod:: __init__ 26 | 27 | Adagrad 28 | ------- 29 | 30 | .. autoclass:: pykitml.Adagrad 31 | 32 | .. automethod:: __init__ 33 | 34 | RMSprop 35 | ------- 36 | 37 | .. autoclass:: pykitml.RMSprop 38 | 39 | .. automethod:: __init__ 40 | 41 | Adam 42 | ---- 43 | 44 | .. autoclass:: pykitml.Adam 45 | 46 | .. automethod:: __init__ 47 | -------------------------------------------------------------------------------- /pykitml/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | pykitml (Python Kit for Machine Learning), 3 | pykitml Machine Learning library. 4 | Copyrights(c) Vishnu Shankar 5 | MIT License (See LICENSE file) 6 | https://github.com/RainingComputers 7 | ''' 8 | 9 | from .network import * 10 | from .linear_regression import * 11 | from .logistic_regression import * 12 | from .svm import * 13 | from .naive_bayes import * 14 | from .decision_tree import * 15 | from .random_forest import * 16 | from .nearest_neighbor import * 17 | from .pca import * 18 | from .kmeans_clustering import * 19 | from .pklhandler import * 20 | from .normalize import * 21 | from .optimizers import * 22 | from .preprocessing import * 23 | from .cross_val import * 24 | from .lstm import * 25 | from .smote import * 26 | from .random_search import * 27 | from .fceux import * 28 | from .dqn import * 29 | 30 | from . import testing 31 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY : help, checkmicc, clean, lint, test, gendocs 2 | help : 3 | @echo "checkmicc : Check maintainability-index and cyclomatic-complexity." 4 | @echo "clean : Remove auto-generated files." 5 | @echo "lint : Tun pylint" 6 | @echo "test : Run tests." 7 | @echo "gendocs : Generate documentation." 8 | @echo "opendocs : Generate and open documentation in default browser." 9 | 10 | checkmicc: 11 | python3 -m radon mi pykitml 12 | python3 -m radon cc pykitml 13 | 14 | clean: 15 | rm -f *.pkl 16 | rm -f -r .pytest_cache 17 | rm -f -r pykitml/__pycache__ 18 | make -C tests/ clean 19 | make -C docs/ clean 20 | rm -f -r build/ 21 | rm -f -r dist/ 22 | 23 | lint: 24 | pylint pykitml tests --rcfile ./.pylintrc 25 | 26 | test: 27 | make -C tests/ test 28 | 29 | gendocs: 30 | make -C docs/ clean 31 | make -C docs/ html 32 | 33 | opendocs: gendocs 34 | xdg-open docs/_build/html/index.html 35 | 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /tests/test_s1_kmeans.py: -------------------------------------------------------------------------------- 1 | from pykitml.testing import pktest_graph 2 | 3 | 4 | @pktest_graph 5 | def test_s1_kmeans(): 6 | import os 7 | 8 | import pykitml as pk 9 | from pykitml.datasets import s1clustering 10 | import matplotlib.pyplot as plt 11 | 12 | # Download the dataset 13 | if not os.path.exists('s1.pkl'): 14 | s1clustering.get() 15 | 16 | # Load the dataset 17 | train_data = s1clustering.load() 18 | 19 | # Run KMeans 20 | clusters, cost = pk.kmeans(train_data, 15) 21 | 22 | # Plot dataset, x and y 23 | plt.scatter(train_data[:, 0], train_data[:, 1]) 24 | 25 | # Plot clusters, x and y 26 | plt.scatter(clusters[:, 0], clusters[:, 1], c='red') 27 | 28 | # Show graph 29 | plt.show() 30 | 31 | # Assert cost 32 | assert cost <= 1790000000 33 | 34 | 35 | if __name__ == '__main__': 36 | try: 37 | test_s1_kmeans.__wrapped__() 38 | except AssertionError: 39 | pass 40 | -------------------------------------------------------------------------------- /tests/test_normalization.py: -------------------------------------------------------------------------------- 1 | # ================================================ 2 | # = Test normalization/feature-scaling functions = 3 | # ================================================ 4 | 5 | 6 | import numpy as np 7 | 8 | import pykitml as pk 9 | 10 | eg_array = np.array([ 11 | [0.1, 0.3434, 1.3434, 3], 12 | [1.2, 4.54, 6.7, 3.456], 13 | [5.678, 2.345, 2.453, 8.345], 14 | [2.3, 6.2, 8.3, 1.2] 15 | ]) 16 | 17 | 18 | def test_minmax(): 19 | expected_output = (np.array([0.1, 0.3434, 1.3434, 1.2]), 20 | np.array([5.678, 6.2, 8.3, 8.345])) 21 | 22 | assert np.allclose(pk.get_minmax(eg_array), expected_output) 23 | 24 | 25 | def test_normalize(): 26 | array_min, array_max = pk.get_minmax(eg_array) 27 | 28 | norm_array = pk.normalize_minmax(eg_array, array_min, array_max) 29 | denorm_array = pk.denormalize_minmax(norm_array, array_min, array_max) 30 | 31 | assert np.allclose(denorm_array, eg_array) 32 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open('README.md', 'r') as f: 4 | long_description = f.read() 5 | 6 | setuptools.setup( 7 | name='pykitml', 8 | version='0.1.3', 9 | author='RainingComputers', 10 | author_email='vishnu.vish.shankar@gmail.com', 11 | description='Machine Learning library written in Python and NumPy.', 12 | long_description=long_description, 13 | long_description_content_type='text/markdown', 14 | url='https://github.com/RainingComputers/pykitml', 15 | packages=setuptools.find_packages(exclude=['docs', 'tests']), 16 | python_requires='>=3.10', 17 | install_requires=[ 18 | 'numpy', 'matplotlib', 'tqdm', 'graphviz' 19 | ], 20 | classifiers=[ 21 | 'Programming Language :: Python :: 3', 22 | 'License :: OSI Approved :: MIT License', 23 | 'Operating System :: OS Independent', 24 | 'Development Status :: 3 - Alpha', 25 | 'Topic :: Scientific/Engineering :: Artificial Intelligence' 26 | ], 27 | keywords='pykitml' 28 | ) 29 | -------------------------------------------------------------------------------- /docs/Linear Regression.rst: -------------------------------------------------------------------------------- 1 | Linear Regression 2 | ================= 3 | 4 | Class Reference 5 | --------------- 6 | 7 | .. autoclass:: pykitml.LinearRegression 8 | 9 | .. automethod:: __init__ 10 | 11 | .. automethod:: feed 12 | 13 | .. automethod:: get_output 14 | 15 | .. automethod:: train 16 | 17 | .. automethod:: plot_performance 18 | 19 | .. automethod:: r2score 20 | 21 | .. automethod:: cost 22 | 23 | 24 | Example: Predicting Fish Length 25 | ------------------------------- 26 | **Dataset** 27 | 28 | :ref:`fishlength_dataset` 29 | 30 | **Training Model** 31 | 32 | .. literalinclude:: ../tests/test_fishlength.py 33 | :pyobject: test_fishlength 34 | :lines: 3- 35 | :end-before: # Assert 36 | :dedent: 4 37 | 38 | **Predict length of fish that is 28 days old at 25C** 39 | 40 | .. literalinclude:: ../tests/test_fishlength.py 41 | :pyobject: test_predict_fishlength 42 | :lines: 3- 43 | :dedent: 4 44 | 45 | **Performance Graph** 46 | 47 | .. image:: ./demo_pics/linear_regression_perf_graph.png -------------------------------------------------------------------------------- /docs/GaussianNaiveBayes.rst: -------------------------------------------------------------------------------- 1 | Gaussian Naive Bayes 2 | ==================== 3 | 4 | Class Reference 5 | --------------- 6 | 7 | .. autoclass:: pykitml.GaussianNaiveBayes 8 | 9 | .. automethod:: __init__ 10 | 11 | .. automethod:: feed 12 | 13 | .. automethod:: get_output 14 | 15 | .. automethod:: get_output_onehot 16 | 17 | .. automethod:: train 18 | 19 | .. automethod:: accuracy 20 | 21 | .. automethod:: confusion_matrix 22 | 23 | Example: Classifying Iris 24 | ------------------------- 25 | 26 | **Dataset** 27 | 28 | :ref:`iris_dataset` 29 | 30 | **Training** 31 | 32 | .. literalinclude:: ../tests/test_iris_bayes.py 33 | :pyobject: test_iris_bayes 34 | :lines: 3- 35 | :end-before: # Assert 36 | :dedent: 4 37 | 38 | **Predict type of species with sepal-length, sepal-width, petal-length, petal-width: 39 | 5.8, 2.7, 3.9, 1.2** 40 | 41 | .. literalinclude:: ../tests/test_iris_bayes.py 42 | :pyobject: test_predict_iris_bayes 43 | :lines: 3- 44 | :dedent: 4 45 | 46 | **Confusion Matrix** 47 | 48 | .. image:: ./demo_pics/gaussian_bayes_confusion_matrix.png 49 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. pykitml documentation master file, created by 2 | sphinx-quickstart on Thu Feb 21 17:21:36 2019. 3 | 4 | pykitml (Python Kit for Machine Learning) Docs 5 | ============================================== 6 | 7 | .. image:: ../pykitml128.png 8 | 9 | Documentation and reference for pykitml, simple Machine Learning library written in Python and NumPy. 10 | 11 | 12 | Installation 13 | ------------ 14 | 15 | .. code-block:: bash 16 | 17 | python3 -m pip install pykitml 18 | 19 | .. toctree:: 20 | :maxdepth: 1 21 | :caption: Package Reference: 22 | 23 | Linear Regression 24 | LogisticRegression 25 | SVM 26 | FeedForwardNetwork 27 | LSTM 28 | Optimizers 29 | NearestNeighbor 30 | DecisionTree 31 | RandomForest 32 | NaiveBayes 33 | GaussianNaiveBayes 34 | KMeans 35 | PrincipalComponentAnalysis 36 | RandomSearch 37 | Normalization 38 | PreprocessingDatasets 39 | SMOTE 40 | CrossValidation 41 | Datasets 42 | SavingAndLoading 43 | FCEUX 44 | DQN 45 | 46 | 47 | Indices and tables 48 | ================== 49 | 50 | * :ref:`genindex` 51 | -------------------------------------------------------------------------------- /docs/NearestNeighbor.rst: -------------------------------------------------------------------------------- 1 | Nearest Neighbor 2 | ================= 3 | 4 | Class Reference 5 | --------------- 6 | 7 | .. autoclass:: pykitml.NearestNeighbor 8 | 9 | .. automethod:: __init__ 10 | 11 | .. automethod:: feed 12 | 13 | .. automethod:: get_output 14 | 15 | .. automethod:: get_output_onehot 16 | 17 | .. automethod:: train 18 | 19 | .. automethod:: accuracy 20 | 21 | .. automethod:: r2score 22 | 23 | .. automethod:: confusion_matrix 24 | 25 | Example: Classifying Iris 26 | ------------------------- 27 | 28 | **Dataset** 29 | 30 | :ref:`iris_dataset` 31 | 32 | **Training** 33 | 34 | .. literalinclude:: ../tests/test_iris_neighbor.py 35 | :pyobject: test_iris_neighbor 36 | :lines: 3- 37 | :end-before: # Assert 38 | :dedent: 4 39 | 40 | **Predict type of species with sepal-length, sepal-width, petal-length, petal-width: 41 | 5.8, 2.7, 3.9, 1.2** 42 | 43 | .. literalinclude:: ../tests/test_iris_neighbor.py 44 | :pyobject: test_predict_iris_neighbor 45 | :lines: 3- 46 | :dedent: 4 47 | 48 | **Confusion Matrix** 49 | 50 | .. image:: ./demo_pics/neighbor_confusion_matrix.png -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 B Vishnu Shankar 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /docs/NaiveBayes.rst: -------------------------------------------------------------------------------- 1 | Naive Bayes 2 | =========== 3 | 4 | Class Reference 5 | --------------- 6 | 7 | .. autoclass:: pykitml.NaiveBayes 8 | 9 | .. automethod:: __init__ 10 | 11 | .. automethod:: feed 12 | 13 | .. automethod:: get_output 14 | 15 | .. automethod:: get_output_onehot 16 | 17 | .. automethod:: train 18 | 19 | .. automethod:: accuracy 20 | 21 | .. automethod:: confusion_matrix 22 | 23 | Example: Heart Disease Prediction 24 | --------------------------------- 25 | 26 | **Dataset** 27 | 28 | :ref:`heart_dataset` 29 | 30 | **Training** 31 | 32 | .. literalinclude:: ../tests/test_heart_bayes.py 33 | :pyobject: test_heart_bayes 34 | :lines: 3- 35 | :end-before: # Assert 36 | :dedent: 4 37 | 38 | **Predict heartdisease for a person with 39 | age, sex, cp, trestbps, chol, fbs, restecg, thalach, exang, oldpeak, slope, ca, thal: 40 | 67, 1, 4, 160, 286, 0, 2, 108, 1, 1.5, 2, 3, 3** 41 | 42 | .. literalinclude:: ../tests/test_heart_bayes.py 43 | :pyobject: test_predict_heart_bayes 44 | :lines: 3- 45 | :dedent: 4 46 | 47 | **Confusion Matrix** 48 | 49 | .. image:: ./demo_pics/bayes_confusion_matrix.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Pickle files 2 | *.pkl 3 | 4 | # Datasets 5 | *.data 6 | *.gz 7 | *.*data 8 | 9 | # TODO list 10 | TODO.txt 11 | 12 | # Python 13 | __pycache__/ 14 | *.py[cod] 15 | *$py.class 16 | 17 | # Sphinx 18 | docs/_build/* 19 | docs/_static/* 20 | 21 | # Distribution / packaging 22 | .Python 23 | build/ 24 | develop-eggs/ 25 | dist/ 26 | downloads/ 27 | eggs/ 28 | .eggs/ 29 | lib/ 30 | lib64/ 31 | parts/ 32 | sdist/ 33 | var/ 34 | wheels/ 35 | pip-wheel-metadata/ 36 | share/python-wheels/ 37 | *.egg-info/ 38 | .installed.cfg 39 | *.egg 40 | MANIFEST 41 | 42 | # pytest 43 | .pytest_cache/ 44 | 45 | # profiling 46 | *.dat 47 | profile_*.png 48 | 49 | # pyenv 50 | .python-version 51 | 52 | # Environments 53 | .env 54 | .venv 55 | env/ 56 | venv/ 57 | ENV/ 58 | env.bak/ 59 | venv.bak/ 60 | 61 | # Windows thumbnail cache files 62 | Thumbs.db 63 | ehthumbs.db 64 | ehthumbs_vista.db 65 | 66 | # Dump file 67 | *.stackdump 68 | 69 | # Folder config file 70 | [Dd]esktop.ini 71 | 72 | # Recycle Bin used on file shares 73 | $RECYCLE.BIN/ 74 | 75 | # VS CODE 76 | .vscode/settings.json 77 | 78 | # graphviz 79 | *.gv 80 | *.gv.png 81 | *.gv.pdf 82 | 83 | # temp folder 84 | temp/ 85 | -------------------------------------------------------------------------------- /pykitml/pklhandler.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | ''' 4 | This module contains functions for saving and 5 | loading .pkl files 6 | ''' 7 | 8 | 9 | def save(object_, file_name): 10 | ''' 11 | Saves an object into a file. 12 | 13 | Parameters 14 | ---------- 15 | object_ : object 16 | The object to save 17 | file_name : str 18 | The name of the file to save the object in. 19 | 20 | Raises 21 | ------ 22 | OSError 23 | If the file cannot be created due to a system-related error. 24 | ''' 25 | file = open(file_name, 'wb') 26 | pickle.dump(object_, file) 27 | file.close() 28 | 29 | 30 | def load(file_name): 31 | ''' 32 | Loads an object from file. 33 | 34 | Parameters 35 | ---------- 36 | file_name : str 37 | The name of the file to load the object from. 38 | 39 | Returns 40 | ------- 41 | object 42 | The python object stored in the file. 43 | 44 | Raises 45 | ------ 46 | FileNotFoundError 47 | If the file does not exist. 48 | ''' 49 | file = open(file_name, 'rb') 50 | object_ = pickle.load(file) 51 | file.close() 52 | return object_ 53 | -------------------------------------------------------------------------------- /docs/DecisionTree.rst: -------------------------------------------------------------------------------- 1 | Decision Tree 2 | ============= 3 | 4 | Class Reference 5 | --------------- 6 | 7 | .. autoclass:: pykitml.DecisionTree 8 | 9 | .. automethod:: __init__ 10 | 11 | .. automethod:: feed 12 | 13 | .. automethod:: get_output 14 | 15 | .. automethod:: get_output_onehot 16 | 17 | .. automethod:: train 18 | 19 | .. automethod:: accuracy 20 | 21 | .. automethod:: confusion_matrix 22 | 23 | .. automethod:: r2score 24 | 25 | .. automethod:: show_tree 26 | 27 | Example: Classifying Iris 28 | ------------------------- 29 | 30 | **Dataset** 31 | 32 | :ref:`iris_dataset` 33 | 34 | **Training** 35 | 36 | .. literalinclude:: ../tests/test_iris_tree.py 37 | :pyobject: test_iris_tree 38 | :lines: 3- 39 | :end-before: # Assert 40 | :dedent: 4 41 | 42 | **Predict type of species with sepal-length, sepal-width, petal-length, petal-width: 43 | 5.8, 2.7, 3.9, 1.2** 44 | 45 | .. literalinclude:: ../tests/test_iris_tree.py 46 | :pyobject: test_predict_iris_tree 47 | :lines: 3- 48 | :dedent: 4 49 | 50 | **Tree Graph** 51 | 52 | .. image:: ./demo_pics/tree.png 53 | 54 | **Confusion Matrix** 55 | 56 | .. image:: ./demo_pics/tree_confusion_matrix.png -------------------------------------------------------------------------------- /docs/LogisticRegression.rst: -------------------------------------------------------------------------------- 1 | Logistic Regression 2 | =================== 3 | 4 | Class Reference 5 | --------------- 6 | 7 | .. autoclass:: pykitml.LogisticRegression 8 | 9 | .. automethod:: __init__ 10 | 11 | .. automethod:: feed 12 | 13 | .. automethod:: get_output 14 | 15 | .. automethod:: get_output_onehot 16 | 17 | .. automethod:: train 18 | 19 | .. automethod:: plot_performance 20 | 21 | .. automethod:: cost 22 | 23 | .. automethod:: accuracy 24 | 25 | .. automethod:: confusion_matrix 26 | 27 | Example: Banknote Authentication 28 | -------------------------------- 29 | 30 | **Dataset** 31 | 32 | :ref:`banknote_dataset` 33 | 34 | **Training** 35 | 36 | .. literalinclude:: ../tests/test_banknote.py 37 | :pyobject: test_banknote 38 | :lines: 3- 39 | :end-before: # Assert 40 | :dedent: 4 41 | 42 | **Predict banknote validity with variance, skewness, curtosis, entropy: 43 | -2.3, -9.3, 9.37, -0.86** 44 | 45 | .. literalinclude:: ../tests/test_banknote.py 46 | :pyobject: test_predict_banknote 47 | :lines: 3- 48 | :dedent: 4 49 | 50 | **Performance Graph** 51 | 52 | .. image:: ./demo_pics/logistic_regression_perf_graph.png 53 | 54 | **Confusion Matrix** 55 | 56 | .. image:: ./demo_pics/logistic_regression_confusion_matrix.png 57 | -------------------------------------------------------------------------------- /docs/FeedForwardNetwork.rst: -------------------------------------------------------------------------------- 1 | Feed-Forward Neural Network 2 | =========================== 3 | 4 | Class Reference 5 | --------------- 6 | 7 | .. autoclass:: pykitml.NeuralNetwork 8 | 9 | .. automethod:: __init__ 10 | 11 | .. automethod:: feed 12 | 13 | .. automethod:: get_output 14 | 15 | .. automethod:: get_output_onehot 16 | 17 | .. automethod:: train 18 | 19 | .. automethod:: plot_performance 20 | 21 | .. automethod:: cost 22 | 23 | .. automethod:: accuracy 24 | 25 | .. automethod:: r2score 26 | 27 | .. automethod:: confusion_matrix 28 | 29 | .. autoattribute:: nlayers 30 | 31 | Example: Handwritten Digit Recognition (MNIST) 32 | ---------------------------------------------- 33 | 34 | **Dataset** 35 | 36 | :ref:`mnist_dataset` 37 | 38 | **Training** 39 | 40 | .. literalinclude:: ../tests/test_mnist.py 41 | :pyobject: test_adam 42 | :lines: 3- 43 | :end-before: # Assert 44 | :dedent: 4 45 | 46 | **Predicting** 47 | 48 | .. literalinclude:: ../tests/test_mnist.py 49 | :pyobject: test_predict_mnist_adam 50 | :lines: 3- 51 | :dedent: 4 52 | 53 | **Performance Graph** 54 | 55 | .. image:: ./demo_pics/neural_network_perf_graph.png 56 | 57 | **Confusion Matrix** 58 | 59 | .. image:: ./demo_pics/neural_network_confusion_matrix.png -------------------------------------------------------------------------------- /pykitml/logistic_regression.py: -------------------------------------------------------------------------------- 1 | from ._single_layer_model import SingleLayerModel 2 | from ._classifier import Classifier 3 | from . import _functions 4 | 5 | 6 | class LogisticRegression(SingleLayerModel, Classifier): 7 | ''' 8 | Implements logistic regression for classification. 9 | ''' 10 | 11 | def __init__(self, input_size, output_size, reg_param=0): 12 | # Initialize base class 13 | super(LogisticRegression, self).__init__(input_size, output_size, reg_param) 14 | 15 | # Choose output activation function 16 | if output_size == 1: 17 | # For binary classification 18 | self._afunc = _functions.sigmoid 19 | self._afunc_prime = _functions.sigmoid_prime 20 | else: 21 | # For multiclass classification 22 | self._afunc = _functions.softmax 23 | self._afunc_prime = _functions.softmax_prime 24 | 25 | @property 26 | def _activ_func(self): 27 | return self._afunc 28 | 29 | @property 30 | def _activ_func_prime(self): 31 | return self._afunc_prime 32 | 33 | @property 34 | def _cost_func(self): 35 | return _functions.cross_entropy 36 | 37 | @property 38 | def _cost_func_prime(self): 39 | return _functions.cross_entropy_prime 40 | -------------------------------------------------------------------------------- /tests/test_boston_forest.py: -------------------------------------------------------------------------------- 1 | from pykitml.testing import pktest_nograph 2 | 3 | 4 | @pktest_nograph 5 | def test_boston_forest(): 6 | import pykitml as pk 7 | from pykitml.datasets import boston 8 | 9 | import os 10 | 11 | # Download the dataset 12 | if not os.path.exists('boston.pkl'): 13 | boston.get() 14 | 15 | # Load heart data set 16 | inputs_train, outputs_train, inputs_test, outputs_test = boston.load() 17 | 18 | # Create model 19 | ftypes = [ 20 | 'continues', 'continues', 'continues', 21 | 'categorical', 'continues', 'continues', 22 | 'continues', 'continues', 'continues', 23 | 'continues', 'continues', 'continues', 'continues' 24 | ] 25 | forest_boston = pk.RandomForest(13, 1, feature_type=ftypes, max_depth=4, min_split=20, regression=True) 26 | 27 | # Train 28 | forest_boston.train(inputs_train, outputs_train) 29 | 30 | # Print r2score 31 | r2score_train = forest_boston.r2score(inputs_train, outputs_train) 32 | print('Train r2score:', r2score_train) 33 | r2score = forest_boston.r2score(inputs_test, outputs_test) 34 | print('Test r2score:', r2score) 35 | 36 | # Assert r2score 37 | assert r2score_train > 0.7 38 | 39 | 40 | if __name__ == '__main__': 41 | try: 42 | test_boston_forest.__wrapped__() 43 | except AssertionError: 44 | pass 45 | -------------------------------------------------------------------------------- /tests/test_boston_tree.py: -------------------------------------------------------------------------------- 1 | from pykitml.testing import pktest_nograph 2 | 3 | 4 | @pktest_nograph 5 | def test_boston_tree(): 6 | import pykitml as pk 7 | from pykitml.datasets import boston 8 | 9 | import os 10 | 11 | # Download the dataset 12 | if not os.path.exists('boston.pkl'): 13 | boston.get() 14 | 15 | # Load heart data set 16 | inputs_train, outputs_train, inputs_test, outputs_test = boston.load() 17 | 18 | # Create model 19 | ftypes = [ 20 | 'continues', 'continues', 'continues', 21 | 'categorical', 'continues', 'continues', 22 | 'continues', 'continues', 'continues', 23 | 'continues', 'continues', 'continues', 'continues' 24 | ] 25 | tree_boston = pk.DecisionTree(13, 1, feature_type=ftypes, max_depth=8, min_split=20, regression=True) 26 | 27 | # Train 28 | tree_boston.train(inputs_train, outputs_train) 29 | 30 | # Print r2score 31 | r2score_train = tree_boston.r2score(inputs_train, outputs_train) 32 | print('Train r2score:', r2score_train) 33 | r2score = tree_boston.r2score(inputs_test, outputs_test) 34 | print('Test r2score:', r2score) 35 | 36 | # Show the tree 37 | tree_boston.show_tree() 38 | 39 | # Assert r2score 40 | assert r2score_train > 0.9 41 | 42 | 43 | if __name__ == '__main__': 44 | try: 45 | test_boston_tree.__wrapped__() 46 | except AssertionError: 47 | pass 48 | -------------------------------------------------------------------------------- /tests/test_sonar_forest.py: -------------------------------------------------------------------------------- 1 | from pykitml.testing import pktest_graph 2 | 3 | 4 | @pktest_graph 5 | def test_sonar_forest(): 6 | import os 7 | 8 | import pykitml as pk 9 | from pykitml.datasets import sonar 10 | 11 | # Download the dataset 12 | if not os.path.exists('sonar.pkl'): 13 | sonar.get() 14 | 15 | # Load the sonar dataset 16 | inputs_train, outputs_train, inputs_test, outputs_test = sonar.load() 17 | outputs_train = pk.onehot(outputs_train) 18 | outputs_test = pk.onehot(outputs_test) 19 | 20 | # Create model 21 | forest_sonar_classifier = pk.RandomForest(60, 2, max_depth=9, feature_type=['continues']*60) 22 | 23 | # Train the model 24 | forest_sonar_classifier.train(inputs_train, outputs_train, num_feature_bag=60) 25 | 26 | # Save it 27 | pk.save(forest_sonar_classifier, 'forest_sonar_classifier.pkl') 28 | 29 | # Print accuracy 30 | accuracy = forest_sonar_classifier.accuracy(inputs_train, outputs_train) 31 | print('Train accuracy:', accuracy) 32 | accuracy = forest_sonar_classifier.accuracy(inputs_test, outputs_test) 33 | print('Test accuracy:', accuracy) 34 | 35 | # Plot confusion matrix 36 | forest_sonar_classifier.confusion_matrix(inputs_test, outputs_test, 37 | gnames=['False', 'True']) 38 | 39 | 40 | if __name__ == '__main__': 41 | try: 42 | test_sonar_forest.__wrapped__() 43 | except AssertionError: 44 | pass 45 | -------------------------------------------------------------------------------- /tests/test_pca.py: -------------------------------------------------------------------------------- 1 | from pykitml.testing import pktest_graph 2 | 3 | 4 | @pktest_graph 5 | def test_pca_compression(): 6 | import os.path 7 | import random 8 | 9 | import matplotlib.pyplot as plt 10 | import pykitml as pk 11 | from pykitml.datasets import mnist 12 | 13 | # Download dataset 14 | if not os.path.exists('mnist.pkl'): 15 | mnist.get() 16 | 17 | # Load dataset 18 | training_data, _, _, _ = mnist.load() 19 | 20 | # Train PCA, reduce 784 dimensions to 250 dimensions 21 | pca = pk.PCA(training_data, 250) 22 | print('Variance retention:', pca.retention) 23 | 24 | # Pick random datapoints 25 | indices = random.sample(range(1, 1000), 16) 26 | examples = training_data[indices] 27 | 28 | # Show the original images 29 | plt.figure('Original', figsize=(10, 7)) 30 | for i in range(1, 17): 31 | plt.subplot(4, 4, i) 32 | plt.imshow(examples[i-1].reshape((28, 28)), cmap='gray') 33 | 34 | # Transform the example and compress 35 | transformed_examples = pca.transform(examples) 36 | 37 | # Inverse transform and recover the examples 38 | recovered_examples = pca.inverse_transform(transformed_examples) 39 | 40 | # Show the inverse transformed examples 41 | plt.figure('Recovered', figsize=(10, 7)) 42 | for i in range(1, 17): 43 | plt.subplot(4, 4, i) 44 | plt.imshow(recovered_examples[i-1].reshape((28, 28)), cmap='gray') 45 | 46 | # Show results 47 | plt.show() 48 | 49 | 50 | if __name__ == '__main__': 51 | test_pca_compression.__wrapped__() 52 | -------------------------------------------------------------------------------- /tests/test_cartpole_dqn.py: -------------------------------------------------------------------------------- 1 | from pykitml.testing import pktest_graph 2 | 3 | @pktest_graph 4 | def test_cartpole(): 5 | import gymnasium as gym 6 | import pykitml as pk 7 | 8 | # Wrapper class around the environment 9 | class Environment: 10 | def __init__(self): 11 | self._env = gym.make('CartPole-v1', render_mode="human") 12 | 13 | def reset(self): 14 | return self._env.reset()[0] 15 | 16 | def step(self, action): 17 | obs, reward, done, _, _ = self._env.step(action) 18 | 19 | x, _, theta, _ = obs 20 | x_threshold = self._env.env.env.env.x_threshold 21 | theta_threshold_radians = self._env.env.env.env.theta_threshold_radians 22 | 23 | # Reward function, from 24 | # https://github.com/keon/deep-q-learning/blob/master/ddqn.py 25 | r1 = (x_threshold - abs(x)) / x_threshold - 0.8 26 | r2 = (theta_threshold_radians - abs(theta)) / theta_threshold_radians - 0.5 27 | reward = r1 + r2 28 | 29 | return obs, reward, done 30 | 31 | def close(self): 32 | self._env.close() 33 | 34 | def render(self): 35 | self._env.render() 36 | 37 | env = Environment() 38 | 39 | # Create DQN agent and train it 40 | agent = pk.DQNAgent([4, 64, 64, 2]) 41 | agent.set_save_freq(100, 'cartpole_agent') 42 | agent.train(env, 500, pk.Adam(0.001), render=True) 43 | 44 | # Plot reward graph 45 | agent.plot_performance() 46 | 47 | if __name__ == '__main__': 48 | test_cartpole.__wrapped__() 49 | -------------------------------------------------------------------------------- /tests/test_heart_tree.py: -------------------------------------------------------------------------------- 1 | from pykitml.testing import pktest_graph 2 | 3 | 4 | @pktest_graph 5 | def test_heart_tree(): 6 | import os.path 7 | 8 | import pykitml as pk 9 | from pykitml.datasets import heartdisease 10 | 11 | # Download the dataset 12 | if not os.path.exists('heartdisease.pkl'): 13 | heartdisease.get() 14 | 15 | # Load heart data set 16 | inputs, outputs = heartdisease.load() 17 | outputs = pk.onehot(outputs) 18 | 19 | # Create model 20 | ftypes = [ 21 | 'continues', 'categorical', 'categorical', 22 | 'continues', 'continues', 'categorical', 'categorical', 23 | 'continues', 'categorical', 'continues', 'categorical', 24 | 'categorical', 'categorical' 25 | ] 26 | tree_heart_classifier = pk.DecisionTree(13, 2, max_depth=7, feature_type=ftypes) 27 | 28 | # Train 29 | tree_heart_classifier.train(inputs, outputs) 30 | 31 | # Save it 32 | pk.save(tree_heart_classifier, 'tree_heart_classifier.pkl') 33 | 34 | # Print accuracy 35 | accuracy = tree_heart_classifier.accuracy(inputs, outputs) 36 | print('Accuracy:', accuracy) 37 | 38 | # Plot confusion matrix 39 | tree_heart_classifier.confusion_matrix(inputs, outputs, 40 | gnames=['False', 'True']) 41 | 42 | # Plot descision tree 43 | tree_heart_classifier.show_tree() 44 | 45 | # Assert accuracy 46 | assert (tree_heart_classifier.accuracy(inputs, outputs)) >= 94 47 | 48 | 49 | if __name__ == '__main__': 50 | try: 51 | test_heart_tree.__wrapped__() 52 | except AssertionError: 53 | pass 54 | -------------------------------------------------------------------------------- /pykitml/svm.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from ._single_layer_model import SingleLayerModel 4 | from ._classifier import Classifier 5 | from . import _functions 6 | 7 | 8 | def gaussian_kernel(input_data, training_inputs, sigma=1): 9 | ''' 10 | Transforms the give input data using the gaussian kernel. 11 | 12 | Parameters 13 | ---------- 14 | input_data : numpy.array 15 | The input data points to transform. 16 | training_inputs : numpy.array 17 | The training data. 18 | sigma : float 19 | Hyperparameter that determines the 'spread' of the kernel. 20 | 21 | ''' 22 | # Calculate squared L2 norm of each data point with 23 | # every other data point 24 | distances = _functions.pdist(input_data, training_inputs) 25 | # Apply gaussian kernel 26 | transformed_inputs = np.exp((-1/(2*sigma**2))*distances) 27 | # return 28 | return transformed_inputs 29 | 30 | 31 | class SVM(SingleLayerModel, Classifier): 32 | ''' 33 | Implements Support Vector Machine with Linear Kernel. 34 | 35 | Note 36 | ---- 37 | The outputs/targets in the training/testing data should have :code:`-1` instead 38 | of :code:`0` for training. See example for more details. 39 | ''' 40 | 41 | @property 42 | def _activ_func(self): 43 | return _functions.identity 44 | 45 | @property 46 | def _activ_func_prime(self): 47 | return _functions.identity_prime 48 | 49 | @property 50 | def _cost_func(self): 51 | return _functions.hinge_loss 52 | 53 | @property 54 | def _cost_func_prime(self): 55 | return _functions.hinge_loss_prime 56 | -------------------------------------------------------------------------------- /tests/test_banknote_tree.py: -------------------------------------------------------------------------------- 1 | from pykitml.testing import pktest_graph 2 | 3 | 4 | @pktest_graph 5 | def test_banknote_tree(): 6 | import os 7 | 8 | import pykitml as pk 9 | from pykitml.datasets import banknote 10 | 11 | # Download the dataset 12 | if not os.path.exists('banknote.pkl'): 13 | banknote.get() 14 | 15 | # Load heart data set 16 | inputs_train, outputs_train, inputs_test, outputs_test = banknote.load() 17 | 18 | # Change 0/False to [1, 0] 19 | # Change 1/True to [0, 1] 20 | outputs_train = pk.onehot(outputs_train) 21 | outputs_test = pk.onehot(outputs_test) 22 | 23 | # Create model 24 | ftypes = ['continues']*4 25 | tree_banknote_classifier = pk.DecisionTree(4, 2, max_depth=7, feature_type=ftypes) 26 | 27 | # Train 28 | tree_banknote_classifier.train(inputs_train, outputs_train) 29 | 30 | # Save it 31 | pk.save(tree_banknote_classifier, 'tree_banknote_classifier.pkl') 32 | 33 | # Print accuracy 34 | accuracy = tree_banknote_classifier.accuracy(inputs_train, outputs_train) 35 | print('Train accuracy:', accuracy) 36 | accuracy = tree_banknote_classifier.accuracy(inputs_test, outputs_test) 37 | print('Test accuracy:', accuracy) 38 | 39 | # Plot confusion matrix 40 | tree_banknote_classifier.confusion_matrix(inputs_test, outputs_test, 41 | gnames=['False', 'True']) 42 | 43 | # Plot descision tree 44 | tree_banknote_classifier.show_tree() 45 | 46 | # Assert accuracy 47 | assert (tree_banknote_classifier.accuracy(inputs_test, outputs_test)) >= 97 48 | 49 | 50 | if __name__ == '__main__': 51 | try: 52 | test_banknote_tree.__wrapped__() 53 | except AssertionError: 54 | pass 55 | -------------------------------------------------------------------------------- /tests/test_heart.py: -------------------------------------------------------------------------------- 1 | from pykitml.testing import pktest_graph 2 | 3 | 4 | @pktest_graph 5 | def test_heart(): 6 | import os.path 7 | 8 | import pykitml as pk 9 | from pykitml.datasets import heartdisease 10 | 11 | # Download the dataset 12 | if not os.path.exists('heartdisease.pkl'): 13 | heartdisease.get() 14 | 15 | # Load heartdisease data set 16 | inputs, outputs = heartdisease.load() 17 | 18 | # Normalize inputs in the dataset 19 | inputs_min, inputs_max = pk.get_minmax(inputs) 20 | inputs = pk.normalize_minmax(inputs, inputs_min, inputs_max, cols=[0, 3, 4, 7, 9]) 21 | 22 | # Change categorical values to onehot values 23 | inputs = pk.onehot_cols(inputs, [1, 2, 5, 6, 8, 10, 11, 12]) 24 | 25 | # Create model 26 | heart_classifier = pk.LogisticRegression(35, 1) 27 | 28 | # Train the model 29 | heart_classifier.train( 30 | training_data=inputs, 31 | targets=outputs, 32 | batch_size=10, 33 | epochs=1500, 34 | optimizer=pk.Adam(learning_rate=0.015, decay_rate=0.99), 35 | testing_freq=30, 36 | decay_freq=40 37 | ) 38 | 39 | # Save it 40 | pk.save(heart_classifier, 'heart_classifier.pkl') 41 | 42 | # Print accuracy and plot performance 43 | heart_classifier.plot_performance() 44 | accuracy = heart_classifier.accuracy(inputs, outputs) 45 | print('Accuracy:', accuracy) 46 | 47 | # Plot confusion matrix 48 | heart_classifier.confusion_matrix(inputs, outputs) 49 | 50 | # Assert if it has enough accuracy 51 | assert heart_classifier.accuracy(inputs, outputs) >= 87 52 | 53 | 54 | if __name__ == '__main__': 55 | try: 56 | test_heart.__wrapped__() 57 | except AssertionError: 58 | pass 59 | -------------------------------------------------------------------------------- /tests/test_adult_tree.py: -------------------------------------------------------------------------------- 1 | from pykitml.testing import pktest_graph 2 | 3 | 4 | @pktest_graph 5 | def test_adult_tree(): 6 | import os 7 | 8 | import pykitml as pk 9 | from pykitml.datasets import adult 10 | 11 | # Download the dataset 12 | if not os.path.exists('adult.data.pkl'): 13 | adult.get() 14 | 15 | # Load adult data set 16 | inputs_train, outputs_train, inputs_test, outputs_test = adult.load() 17 | outputs_train = pk.onehot(outputs_train) 18 | outputs_test = pk.onehot(outputs_test) 19 | 20 | # Create model 21 | ftypes = [ 22 | 'continues', 'categorical', 'continues', 'categorical', 23 | 'categorical', 'categorical', 'categorical', 'categorical', 'categorical', 24 | 'continues', 'continues', 'continues', 'categorical' 25 | ] 26 | tree_adult_classifier = pk.DecisionTree(13, 2, max_depth=100, min_split=100, feature_type=ftypes) 27 | 28 | # Train 29 | tree_adult_classifier.train(inputs_train, outputs_train) 30 | 31 | # Save it 32 | pk.save(tree_adult_classifier, 'tree_adult_classifier.pkl') 33 | 34 | # Print accuracy 35 | accuracy = tree_adult_classifier.accuracy(inputs_train, outputs_train) 36 | print('Train accuracy:', accuracy) 37 | accuracy = tree_adult_classifier.accuracy(inputs_test, outputs_test) 38 | print('Test accuracy:', accuracy) 39 | 40 | # Plot confusion matrix 41 | tree_adult_classifier.confusion_matrix(inputs_test, outputs_test, 42 | gnames=['False', 'True']) 43 | 44 | # Assert accuracy 45 | assert (tree_adult_classifier.accuracy(inputs_test, outputs_test)) >= 84 46 | 47 | 48 | if __name__ == '__main__': 49 | try: 50 | test_adult_tree.__wrapped__() 51 | except AssertionError: 52 | pass 53 | -------------------------------------------------------------------------------- /pykitml/smote.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from ._functions import pdist 4 | 5 | 6 | def smote(minority_data_points, k=1): 7 | ''' 8 | SMOTE (Synthetic Minority Oversampling TEchnique). 9 | Used to generate more data points for minority class or imbalanced learning. 10 | 11 | Parameters 12 | ---------- 13 | minority_data_points : numpy.array 14 | Inputs or data points corresponding to the minority class. 15 | k : int 16 | Number of neighbors to consider. 17 | 18 | Returns 19 | ------- 20 | new_points : numpy.array 21 | New generated data points (Excluding data points passed to the 22 | function). :code:`k*minority_data_points.shape[0]` points will be 23 | generated. 24 | ''' 25 | npoints = minority_data_points.shape[0] 26 | nfeatures = minority_data_points.shape[1] 27 | 28 | # Calculate distance between each point and every other point 29 | distances = pdist(minority_data_points, minority_data_points) 30 | 31 | # Get indices of closest k neighbours for each point 32 | indices = np.argsort(distances, axis=1)[:, 1:k+1] 33 | 34 | # Get the closest k neighbours for each point 35 | neighbours = minority_data_points[indices].squeeze() 36 | neighbours = neighbours.reshape(k*npoints, nfeatures) 37 | 38 | # Calculate difference between points and k neighbours 39 | minority_data_points_dups = minority_data_points[np.tile(np.arange(npoints).reshape(npoints, 1), k)] 40 | minority_data_points_dups = minority_data_points_dups.reshape(k*npoints, nfeatures) 41 | diff = neighbours - minority_data_points_dups 42 | 43 | # Create new data points 44 | random_floats = np.random.uniform(0, 1, (npoints*k)) 45 | new_points = minority_data_points_dups + (diff.T*random_floats).T 46 | 47 | return new_points 48 | -------------------------------------------------------------------------------- /tests/test_adult_forest.py: -------------------------------------------------------------------------------- 1 | from pykitml.testing import pktest_graph 2 | 3 | 4 | @pktest_graph 5 | def test_adult_forest(): 6 | import os 7 | 8 | import pykitml as pk 9 | from pykitml.datasets import adult 10 | 11 | # Download the dataset 12 | if not os.path.exists('adult.data.pkl'): 13 | adult.get() 14 | 15 | # Load adult data set 16 | inputs_train, outputs_train, inputs_test, outputs_test = adult.load() 17 | outputs_train = pk.onehot(outputs_train) 18 | outputs_test = pk.onehot(outputs_test) 19 | 20 | # Create model 21 | ftypes = [ 22 | 'continues', 'categorical', 'continues', 'categorical', 23 | 'categorical', 'categorical', 'categorical', 'categorical', 'categorical', 24 | 'continues', 'continues', 'continues', 'categorical' 25 | ] 26 | forest_adult_classifier = pk.RandomForest(13, 2, max_depth=1000, feature_type=ftypes) 27 | 28 | # Train 29 | forest_adult_classifier.train(inputs_train, outputs_train, num_trees=100) 30 | 31 | # Save it 32 | pk.save(forest_adult_classifier, 'forest_adult_classifier.pkl') 33 | 34 | # Print accuracy 35 | accuracy = forest_adult_classifier.accuracy(inputs_train, outputs_train) 36 | print('Train accuracy:', accuracy) 37 | accuracy = forest_adult_classifier.accuracy(inputs_test, outputs_test) 38 | print('Test accuracy:', accuracy) 39 | 40 | # Plot confusion matrix 41 | forest_adult_classifier.confusion_matrix(inputs_test, outputs_test, 42 | gnames=['False', 'True']) 43 | 44 | # Assert accuracy 45 | assert (forest_adult_classifier.accuracy(inputs_test, outputs_test)) >= 82 46 | 47 | 48 | if __name__ == '__main__': 49 | try: 50 | test_adult_forest.__wrapped__() 51 | except AssertionError: 52 | pass 53 | -------------------------------------------------------------------------------- /pykitml/datasets/s1clustering.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import zlib 3 | 4 | import numpy as np 5 | 6 | from .. import pklhandler 7 | from ._s1_compressed import encoded_data 8 | 9 | ''' 10 | This module contains helper functions to download and load 11 | the S1 clustering dataset. 12 | ''' 13 | 14 | 15 | def get(): 16 | ''' 17 | Downloads the s1 clustering dataset from 18 | http://cs.joensuu.fi/sipu/datasets/ 19 | and save is as a pkl file `s1.pkl`. 20 | 21 | Raises 22 | ------ 23 | urllib.error.URLError 24 | If internet connection is not available or the URL is not accessible. 25 | OSError 26 | If the file cannot be created due to a system-related error. 27 | KeyError 28 | If invalid/unknown type. 29 | 30 | Note 31 | ---- 32 | You only need to call this method once, i.e, after the dataset has been downloaded 33 | and you have the `s1.pkl` file, you don't need to call this method again. 34 | ''' 35 | 36 | # Decompress the data and save it as a pkl file 37 | decoded_data = base64.decodebytes(encoded_data) 38 | uncompressed_data = zlib.decompress(decoded_data) 39 | data_array = np.frombuffer(uncompressed_data, dtype=np.int64).reshape(5000, 2) 40 | pklhandler.save(data_array, 's1.pkl') 41 | 42 | 43 | def load(): 44 | ''' 45 | Loads x, y points from the s1 clustering dataset from saved pickle file `s1.pkl` to 46 | numpy array. S1 clustering dataset contains 15 clusters. 47 | 48 | Returns 49 | ------- 50 | training_data : numpy.array 51 | 5000x2 numpy array containing x, y points. 52 | 53 | Raises 54 | ------ 55 | FileNotFoundError 56 | If `s1.pkl` file does not exist, i.e, if the dataset was not 57 | downloaded and saved using the :py:func:`~get` method. 58 | ''' 59 | return pklhandler.load('s1.pkl') 60 | -------------------------------------------------------------------------------- /tests/test_iris.py: -------------------------------------------------------------------------------- 1 | from pykitml.testing import pktest_graph 2 | 3 | 4 | @pktest_graph 5 | def test_iris(): 6 | import pykitml as pk 7 | from pykitml.datasets import iris 8 | 9 | # Load iris data set 10 | inputs_train, outputs_train, inputs_test, outputs_test = iris.load() 11 | 12 | # Normalize inputs in the dataset 13 | inputs_min, inputs_max = pk.get_minmax(inputs_train) 14 | inputs_train = pk.normalize_minmax(inputs_train, inputs_min, inputs_max) 15 | inputs_test = pk.normalize_minmax(inputs_test, inputs_min, inputs_max) 16 | 17 | # Create model 18 | iris_classifier = pk.LogisticRegression(4, 3) 19 | 20 | # Train the model 21 | iris_classifier.train( 22 | training_data=inputs_train, 23 | targets=outputs_train, 24 | batch_size=10, 25 | epochs=1500, 26 | optimizer=pk.Adam(learning_rate=0.4, decay_rate=0.99), 27 | testing_data=inputs_test, 28 | testing_targets=outputs_test, 29 | testing_freq=30, 30 | decay_freq=20 31 | ) 32 | 33 | # Save it 34 | pk.save(iris_classifier, 'iris_classifier.pkl') 35 | 36 | # Print accuracy 37 | accuracy = iris_classifier.accuracy(inputs_train, outputs_train) 38 | print('Train accuracy:', accuracy) 39 | accuracy = iris_classifier.accuracy(inputs_test, outputs_test) 40 | print('Test accuracy:', accuracy) 41 | 42 | # Plot performance 43 | iris_classifier.plot_performance() 44 | 45 | # Plot confusion matrix 46 | iris_classifier.confusion_matrix(inputs_test, outputs_test, 47 | gnames=['Setosa', 'Versicolor', 'Virginica']) 48 | 49 | # Assert if it has enough accuracy 50 | assert iris_classifier.accuracy(inputs_train, outputs_train) >= 98 51 | 52 | 53 | if __name__ == '__main__': 54 | try: 55 | test_iris.__wrapped__() 56 | except AssertionError: 57 | pass 58 | -------------------------------------------------------------------------------- /docs/RandomForest.rst: -------------------------------------------------------------------------------- 1 | Random Forest 2 | ============= 3 | 4 | Class Reference 5 | --------------- 6 | 7 | .. autoclass:: pykitml.RandomForest 8 | 9 | .. automethod:: __init__ 10 | 11 | .. automethod:: feed 12 | 13 | .. automethod:: get_output 14 | 15 | .. automethod:: get_output_onehot 16 | 17 | .. automethod:: train 18 | 19 | .. automethod:: accuracy 20 | 21 | .. automethod:: r2score 22 | 23 | .. automethod:: confusion_matrix 24 | 25 | .. autoattribute:: trees 26 | 27 | Example: Banknote Authentication 28 | -------------------------------- 29 | 30 | **Dataset** 31 | 32 | :ref:`banknote_dataset` 33 | 34 | **Training** 35 | 36 | .. literalinclude:: ../tests/test_banknote_forest.py 37 | :pyobject: test_banknote_forest 38 | :lines: 3- 39 | :end-before: # Assert 40 | :dedent: 4 41 | 42 | **Predict banknote validity with variance, skewness, curtosis, entropy: 43 | -2.3, -9.3, 9.37, -0.86** 44 | 45 | .. literalinclude:: ../tests/test_banknote_forest.py 46 | :pyobject: test_predict_banknote_forest 47 | :lines: 3- 48 | :dedent: 4 49 | 50 | **Confusion Matrix** 51 | 52 | .. image:: ./demo_pics/forest_confusion_matrix.png 53 | 54 | Example: Heart Disease Prediction 55 | --------------------------------- 56 | 57 | **Dataset** 58 | 59 | :ref:`heart_dataset` 60 | 61 | **Training** 62 | 63 | .. literalinclude:: ../tests/test_heart_forest.py 64 | :pyobject: test_heart_forest 65 | :lines: 3- 66 | :end-before: # Assert 67 | :dedent: 4 68 | 69 | **Predict heartdisease for a person with 70 | age, sex, cp, trestbps, chol, fbs, restecg, thalach, exang, oldpeak, slope, ca, thal: 71 | 67, 1, 4, 160, 286, 0, 2, 108, 1, 1.5, 2, 3, 3** 72 | 73 | .. literalinclude:: ../tests/test_heart_forest.py 74 | :pyobject: test_predict_heart_forest 75 | :lines: 3- 76 | :dedent: 4 77 | 78 | **Confusion Matrix** 79 | 80 | .. image:: ./demo_pics/forest_heart_confusion_matrix.png -------------------------------------------------------------------------------- /tests/test_iris_bayes.py: -------------------------------------------------------------------------------- 1 | from pykitml.testing import pktest_graph, pktest_nograph 2 | 3 | 4 | @pktest_graph 5 | def test_iris_bayes(): 6 | import pykitml as pk 7 | from pykitml.datasets import iris 8 | 9 | # Load iris data set 10 | inputs_train, outputs_train, inputs_test, outputs_test = iris.load() 11 | 12 | # Create model 13 | bayes_iris_classifier = pk.GaussianNaiveBayes(4, 3) 14 | 15 | # Train 16 | bayes_iris_classifier.train(inputs_train, outputs_train) 17 | 18 | # Save it 19 | pk.save(bayes_iris_classifier, 'bayes_iris_classifier.pkl') 20 | 21 | # Print accuracy 22 | accuracy = bayes_iris_classifier.accuracy(inputs_train, outputs_train) 23 | print('Train accuracy:', accuracy) 24 | accuracy = bayes_iris_classifier.accuracy(inputs_test, outputs_test) 25 | print('Test accuracy:', accuracy) 26 | 27 | # Plot confusion matrix 28 | bayes_iris_classifier.confusion_matrix(inputs_test, outputs_test, 29 | gnames=['Setosa', 'Versicolor', 'Virginica']) 30 | 31 | # Assert accuracy 32 | assert (bayes_iris_classifier.accuracy(inputs_train, outputs_train)) >= 95 33 | 34 | 35 | @pktest_nograph 36 | def test_predict_iris_bayes(): 37 | import numpy as np 38 | import pykitml as pk 39 | 40 | # Predict type of species with 41 | # sepal-length sepal-width petal-length petal-width 42 | # 5.8, 2.7, 3.9, 1.2 43 | input_data = np.array([5.8, 2.7, 3.9, 1.2]) 44 | 45 | # Load the model 46 | bayes_iris_classifier = pk.load('bayes_iris_classifier.pkl') 47 | 48 | # Get output 49 | bayes_iris_classifier.feed(input_data) 50 | model_output = bayes_iris_classifier.get_output_onehot() 51 | 52 | # Print result 53 | print(model_output) 54 | 55 | 56 | if __name__ == '__main__': 57 | try: 58 | test_iris_bayes.__wrapped__() 59 | test_predict_iris_bayes.__wrapped__() 60 | except AssertionError: 61 | pass 62 | -------------------------------------------------------------------------------- /pykitml/testing.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cProfile 3 | from unittest.mock import patch 4 | from functools import wraps 5 | 6 | import matplotlib.pyplot as plt 7 | 8 | import numpy as np 9 | 10 | 11 | def _profile(test_func): 12 | ''' 13 | Calls test function and profiles it. 14 | 15 | Parameters 16 | ---------- 17 | test_func : function 18 | The function to test and profile. 19 | ''' 20 | # Reset random seed 21 | np.random.seed(0) 22 | # Call the test function and profile it 23 | profiler = cProfile.Profile() 24 | profiler.runcall(test_func) 25 | profiler.dump_stats(test_func.__name__+'.dat') 26 | 27 | 28 | def pktest_graph(test_func): 29 | ''' 30 | To test and profile function under pytest. Will prevent 31 | :code:`matplotlib.pyplot.show()` from blocking other tests. 32 | 33 | Parameters 34 | ---------- 35 | test_func : function 36 | The function to test and profile. 37 | ''' 38 | # Create wrapper function for testing and profiling in pytest 39 | @wraps(test_func) 40 | def test_wrapper(): 41 | # Close any open plots 42 | plt.close() 43 | plt.clf() 44 | 45 | with patch('matplotlib.pyplot.show') as show_func, patch('graphviz.Digraph.view') as _: 46 | # Run the test function 47 | #_profile(test_func) 48 | test_func() 49 | 50 | # Test if graph worked 51 | if "PYTEST_CURRENT_TEST" in os.environ: 52 | assert show_func.called 53 | 54 | return test_wrapper 55 | 56 | 57 | def pktest_nograph(test_func): 58 | ''' 59 | To test and profile function under pytest. 60 | 61 | Parameters 62 | ---------- 63 | test_func : function 64 | The function to test and profile. 65 | ''' 66 | # Create wrapper function for testing and profiling in pytest 67 | @wraps(test_func) 68 | def test_wrapper(): 69 | _profile(test_func) 70 | 71 | return test_wrapper 72 | -------------------------------------------------------------------------------- /tests/test_search.py: -------------------------------------------------------------------------------- 1 | from pykitml.testing import pktest_nograph 2 | 3 | 4 | @pktest_nograph 5 | def test_search(): 6 | import os 7 | 8 | import pykitml as pk 9 | from pykitml.datasets import mnist 10 | 11 | # If the dataset is not available then download it 12 | if not os.path.exists('mnist.pkl'): 13 | mnist.get(type='fashion') 14 | 15 | # Load dataset 16 | training_data, training_targets, testing_data, testing_targets = mnist.load() 17 | 18 | # Search for hyperparameters 19 | # Learning rate alpha = 10^-4 to 10^-2 20 | # Decay rate = 0.8 to 1 21 | # Decay frequency = 10 to 30 22 | # Batch size = 10 to 100 23 | search = pk.RandomSearch() 24 | for alpha, decay, decay_freq, bsize in search.search( 25 | 10, 3, 5, [-4, -2, 'log'], [0.8, 1, 'float'], [10, 30, 'int'], [10, 100, 'int']): 26 | 27 | # Create a new neural network 28 | fashion_classifier = pk.NeuralNetwork([784, 100, 10]) 29 | 30 | # Train it 31 | fashion_classifier.train( 32 | training_data=training_data, 33 | targets=training_targets, 34 | batch_size=bsize, 35 | epochs=1200, 36 | optimizer=pk.Adam(learning_rate=alpha, decay_rate=decay), 37 | testing_freq=100, 38 | decay_freq=decay_freq 39 | ) 40 | 41 | cost = fashion_classifier.cost(testing_data, testing_targets) 42 | search.set_cost(cost) 43 | 44 | # Save the best model 45 | if search.best: 46 | pk.save(fashion_classifier, 'best.pkl') 47 | 48 | # Load the best model 49 | fashion_classifier = pk.load('best.pkl') 50 | 51 | # Show performance 52 | accuracy = fashion_classifier.accuracy(testing_data, testing_targets) 53 | print('Test Accuracy:', accuracy) 54 | 55 | # Assert accuracy 56 | assert accuracy > 84 57 | 58 | 59 | if __name__ == '__main__': 60 | try: 61 | test_search.__wrapped__() 62 | except AssertionError: 63 | pass 64 | -------------------------------------------------------------------------------- /tests/test_iris_tree.py: -------------------------------------------------------------------------------- 1 | from pykitml.testing import pktest_graph, pktest_nograph 2 | 3 | 4 | @pktest_graph 5 | def test_iris_tree(): 6 | import pykitml as pk 7 | from pykitml.datasets import iris 8 | 9 | # Load iris data set 10 | inputs_train, outputs_train, inputs_test, outputs_test = iris.load() 11 | 12 | # Create model 13 | tree_iris_classifier = pk.DecisionTree(4, 3, max_depth=5, feature_type=['continues']*4) 14 | 15 | # Train 16 | tree_iris_classifier.train(inputs_train, outputs_train) 17 | 18 | # Save it 19 | pk.save(tree_iris_classifier, 'tree_iris_classifier.pkl') 20 | 21 | # Print accuracy 22 | accuracy = tree_iris_classifier.accuracy(inputs_train, outputs_train) 23 | print('Train accuracy:', accuracy) 24 | accuracy = tree_iris_classifier.accuracy(inputs_test, outputs_test) 25 | print('Test accuracy:', accuracy) 26 | 27 | # Plot confusion matrix 28 | tree_iris_classifier.confusion_matrix(inputs_test, outputs_test, 29 | gnames=['Setosa', 'Versicolor', 'Virginica']) 30 | 31 | # Plot decision tree 32 | tree_iris_classifier.show_tree() 33 | 34 | # Assert accuracy 35 | assert (tree_iris_classifier.accuracy(inputs_train, outputs_train)) >= 98 36 | 37 | 38 | @pktest_nograph 39 | def test_predict_iris_tree(): 40 | import numpy as np 41 | import pykitml as pk 42 | 43 | # Predict type of species with 44 | # sepal-length sepal-width petal-length petal-width 45 | # 5.8, 2.7, 3.9, 1.2 46 | input_data = np.array([5.8, 2.7, 3.9, 1.2]) 47 | 48 | # Load the model 49 | tree_iris_classifier = pk.load('tree_iris_classifier.pkl') 50 | 51 | # Get output 52 | tree_iris_classifier.feed(input_data) 53 | model_output = tree_iris_classifier.get_output_onehot() 54 | 55 | # Print result 56 | print(model_output) 57 | 58 | 59 | if __name__ == '__main__': 60 | try: 61 | test_iris_tree.__wrapped__() 62 | test_predict_iris_tree.__wrapped__() 63 | except AssertionError: 64 | pass 65 | -------------------------------------------------------------------------------- /pykitml/nearest_neighbor.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from ._classifier import Classifier 4 | from ._regressor import Regressor 5 | from . import _functions 6 | 7 | 8 | class NearestNeighbor(Classifier, Regressor): 9 | ''' 10 | This class implements nearest neighbor classifier. 11 | ''' 12 | 13 | def __init__(self, inputs_size, output_size, no_neighbors=1): 14 | ''' 15 | Parameters 16 | ---------- 17 | input_size : int 18 | Size of input data or number of input features. 19 | output_size : int 20 | Number of categories or groups. 21 | no_neighbors : int 22 | The number of nearest neighbors to consider. 23 | ''' 24 | self._k = no_neighbors 25 | self._output = None 26 | 27 | self._input_size = inputs_size 28 | self._output_size = output_size 29 | 30 | self._inputs = None 31 | self._outputs = None 32 | 33 | @property 34 | def _out_size(self): 35 | return self._output_size 36 | 37 | def train(self, training_data, targets): 38 | ''' 39 | Trains the model on the training data. 40 | 41 | Parameters 42 | ---------- 43 | training_data : numpy.array 44 | numpy array containing training data. 45 | targets : numpy.array 46 | numpy array containing training targets, corresponding to the training data. 47 | ''' 48 | self._inputs = training_data 49 | self._outputs = targets 50 | 51 | def feed(self, input_data): 52 | # Make sure array is 2D 53 | if input_data.ndim == 1: 54 | input_data = np.array([input_data]) 55 | 56 | # Get pair wise distances 57 | distances = _functions.pdist(input_data, self._inputs) 58 | 59 | # Sort the distances 60 | indices = np.argsort(distances, axis=1)[:, 0:self._k] 61 | 62 | # Get output 63 | self._output = np.mean(self._outputs[indices], axis=1) 64 | 65 | def get_output(self): 66 | return self._output.squeeze() 67 | -------------------------------------------------------------------------------- /tests/test_iris_neighbor.py: -------------------------------------------------------------------------------- 1 | from pykitml.testing import pktest_graph, pktest_nograph 2 | 3 | 4 | @pktest_graph 5 | def test_iris_neighbor(): 6 | import pykitml as pk 7 | from pykitml.datasets import iris 8 | 9 | # Load iris data set 10 | inputs_train, outputs_train, inputs_test, outputs_test = iris.load() 11 | 12 | # Create model 13 | neighbor_iris_classifier = pk.NearestNeighbor(4, 3) 14 | 15 | # Train the model 16 | neighbor_iris_classifier.train( 17 | training_data=inputs_train, 18 | targets=outputs_train, 19 | ) 20 | 21 | # Save it 22 | pk.save(neighbor_iris_classifier, 'neighbor_iris_classifier.pkl') 23 | 24 | # Print accuracy 25 | accuracy = neighbor_iris_classifier.accuracy(inputs_train, outputs_train) 26 | print('Train accuracy:', accuracy) 27 | accuracy = neighbor_iris_classifier.accuracy(inputs_test, outputs_test) 28 | print('Test accuracy:', accuracy) 29 | 30 | # Plot confusion matrix 31 | neighbor_iris_classifier.confusion_matrix(inputs_test, outputs_test, 32 | gnames=['Setosa', 'Versicolor', 'Virginica']) 33 | 34 | # Assert if it has enough accuracy 35 | assert neighbor_iris_classifier.accuracy(inputs_train, outputs_train) >= 100 36 | 37 | 38 | @pktest_nograph 39 | def test_predict_iris_neighbor(): 40 | import numpy as np 41 | import pykitml as pk 42 | 43 | # Predict type of species with 44 | # sepal-length sepal-width petal-length petal-width 45 | # 5.8, 2.7, 3.9, 1.2 46 | input_data = np.array([5.8, 2.7, 3.9, 1.2]) 47 | 48 | # Load the model 49 | neighbor_iris_classifier = pk.load('neighbor_iris_classifier.pkl') 50 | 51 | # Get output 52 | neighbor_iris_classifier.feed(input_data) 53 | model_output = neighbor_iris_classifier.get_output_onehot() 54 | 55 | # Print result 56 | print(model_output) 57 | 58 | 59 | if __name__ == '__main__': 60 | try: 61 | test_iris_neighbor.__wrapped__() 62 | test_predict_iris_neighbor.__wrapped__() 63 | except AssertionError: 64 | pass 65 | -------------------------------------------------------------------------------- /docs/SVM.rst: -------------------------------------------------------------------------------- 1 | Support Vector Machine 2 | ====================== 3 | 4 | Class Reference 5 | --------------- 6 | 7 | .. autoclass:: pykitml.SVM 8 | 9 | .. automethod:: __init__ 10 | 11 | .. automethod:: feed 12 | 13 | .. automethod:: get_output 14 | 15 | .. automethod:: get_output_onehot 16 | 17 | .. automethod:: train 18 | 19 | .. automethod:: plot_performance 20 | 21 | .. automethod:: cost 22 | 23 | .. automethod:: accuracy 24 | 25 | .. automethod:: confusion_matrix 26 | 27 | Gaussian Kernel 28 | --------------- 29 | 30 | .. autofunction:: pykitml.gaussian_kernel 31 | 32 | Example: Classifying Iris Using SVM with Linear Kernel 33 | ------------------------------------------------------ 34 | 35 | **Dataset** 36 | 37 | :ref:`iris_dataset` 38 | 39 | **Training** 40 | 41 | .. literalinclude:: ../tests/test_iris_svm.py 42 | :pyobject: test_iris_svm 43 | :lines: 3- 44 | :end-before: # Assert 45 | :dedent: 4 46 | 47 | **Predict type of species with sepal-length, sepal-width, petal-length, petal-width: 48 | 5.8, 2.7, 3.9, 1.2** 49 | 50 | .. literalinclude:: ../tests/test_iris_svm.py 51 | :pyobject: test_predict_iris_svm 52 | :lines: 3- 53 | :dedent: 4 54 | 55 | **Performance Graph** 56 | 57 | .. image:: ./demo_pics/linear_svm_perf_graph.png 58 | 59 | **Confusion Matrix** 60 | 61 | .. image :: ./demo_pics/linear_svm_confusion_matrix.png 62 | 63 | Example: Handwritten Digit Recognition (MNIST) using Gaussian Kernel 64 | -------------------------------------------------------------------- 65 | 66 | **Dataset** 67 | 68 | :ref:`mnist_dataset` 69 | 70 | **Training** 71 | 72 | .. literalinclude:: ../tests/test_mnist_svm.py 73 | :pyobject: test_mnist_svm 74 | :lines: 3- 75 | :end-before: # Assert 76 | :dedent: 4 77 | 78 | **Predicting** 79 | 80 | .. literalinclude:: ../tests/test_mnist_svm.py 81 | :pyobject: test_predict_mnist_svm 82 | :lines: 3- 83 | :dedent: 4 84 | 85 | **Performance Graph** 86 | 87 | .. image:: ./demo_pics/gaussian_svm_perf_graph.png 88 | 89 | **Confusion Matrix** 90 | 91 | .. image:: ./demo_pics/gaussian_svm_confusion_matrix.png -------------------------------------------------------------------------------- /pykitml/_shared_array.py: -------------------------------------------------------------------------------- 1 | import ctypes 2 | import multiprocessing 3 | 4 | import numpy 5 | 6 | ''' 7 | This module contains helper functions to share 8 | numpy arrays between python multiprocessing processes. 9 | 10 | See: https://stackoverflow.com/a/5034106/5516481 11 | ''' 12 | 13 | _ctypes_to_numpy = { 14 | ctypes.c_char: numpy.dtype(numpy.uint8), 15 | ctypes.c_wchar: numpy.dtype(numpy.int16), 16 | ctypes.c_byte: numpy.dtype(numpy.int8), 17 | ctypes.c_ubyte: numpy.dtype(numpy.uint8), 18 | ctypes.c_short: numpy.dtype(numpy.int16), 19 | ctypes.c_ushort: numpy.dtype(numpy.uint16), 20 | ctypes.c_int: numpy.dtype(numpy.int32), 21 | ctypes.c_uint: numpy.dtype(numpy.uint32), 22 | ctypes.c_long: numpy.dtype(numpy.int64), 23 | ctypes.c_ulong: numpy.dtype(numpy.uint64), 24 | ctypes.c_float: numpy.dtype(numpy.float32), 25 | ctypes.c_double: numpy.dtype(numpy.float64) 26 | } 27 | 28 | _numpy_to_ctypes = dict(zip(_ctypes_to_numpy.values(), _ctypes_to_numpy.keys())) 29 | 30 | 31 | def shm_as_ndarray(mp_array, shape=None): 32 | ''' 33 | Given a multiprocessing.Array, returns an ndarray pointing to 34 | the same data. 35 | ''' 36 | 37 | # support SynchronizedArray: 38 | if not hasattr(mp_array, '_type_'): 39 | mp_array = mp_array.get_obj() 40 | 41 | dtype = _ctypes_to_numpy[mp_array._type_] # pylint: disable=protected-access 42 | result = numpy.frombuffer(mp_array, dtype) 43 | 44 | if shape is not None: 45 | result = result.reshape(shape) 46 | 47 | return numpy.asarray(result) 48 | 49 | 50 | def ndarray_to_shm(array, lock=False): 51 | ''' 52 | Generate an 1D multiprocessing.Array containing the data from 53 | the passed ndarray. The data will be *copied* into shared 54 | memory. 55 | ''' 56 | 57 | array1d = array.ravel(order='A') 58 | 59 | try: 60 | c_type = _numpy_to_ctypes[array1d.dtype] 61 | except KeyError: 62 | c_type = _numpy_to_ctypes[numpy.dtype(array1d.dtype)] 63 | 64 | result = multiprocessing.Array(c_type, array1d.size, lock=lock) 65 | shm_as_ndarray(result)[:] = array1d 66 | return result 67 | -------------------------------------------------------------------------------- /tests/test_adult.py: -------------------------------------------------------------------------------- 1 | from pykitml.testing import pktest_graph 2 | 3 | 4 | @pktest_graph 5 | def test_adult(): 6 | import os.path 7 | 8 | import pykitml as pk 9 | from pykitml.datasets import adult 10 | 11 | # Download the dataset 12 | if not os.path.exists('adult.data.pkl'): 13 | adult.get() 14 | 15 | # Load adult data set 16 | inputs_train, outputs_train, inputs_test, outputs_test = adult.load() 17 | 18 | # Normalize dataset 19 | array_min, array_max = pk.get_minmax(inputs_train) 20 | inputs_train = pk.normalize_minmax(inputs_train, array_min, array_max, cols=[0, 2, 9, 10, 11]) 21 | inputs_test = pk.normalize_minmax(inputs_test, array_min, array_max, cols=[0, 2, 9, 10, 11]) 22 | 23 | # Convert categorical values to one-hot values 24 | inputs_train, inputs_test = pk.onehot_cols_traintest(inputs_train, inputs_test, cols=[1, 3, 4, 5, 6, 7, 8, 9, 12]) 25 | 26 | # Create model 27 | adult_classifier = pk.LogisticRegression(104, 1) 28 | 29 | # Train the model 30 | adult_classifier.train( 31 | training_data=inputs_train, 32 | targets=outputs_train, 33 | batch_size=10, 34 | epochs=1500, 35 | optimizer=pk.Adam(learning_rate=0.015, decay_rate=0.99), 36 | testing_data=inputs_test, 37 | testing_targets=outputs_test, 38 | testing_freq=30, 39 | decay_freq=40 40 | ) 41 | 42 | # Save it 43 | pk.save(adult_classifier, 'adult_classifier.pkl') 44 | 45 | # Plot performance 46 | adult_classifier.plot_performance() 47 | 48 | # Print accuracy 49 | accuracy = adult_classifier.accuracy(inputs_train, outputs_train) 50 | print('Train accuracy:', accuracy) 51 | accuracy = adult_classifier.accuracy(inputs_test, outputs_test) 52 | print('Test accuracy:', accuracy) 53 | 54 | # Plot confusion matrix 55 | adult_classifier.confusion_matrix(inputs_test, outputs_test) 56 | 57 | # Assert if it has enough accuracy 58 | assert adult_classifier.accuracy(inputs_test, outputs_test) >= 82 59 | 60 | 61 | if __name__ == '__main__': 62 | try: 63 | test_adult.__wrapped__() 64 | except AssertionError: 65 | pass 66 | -------------------------------------------------------------------------------- /pykitml/_regressor.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | 4 | class Regressor(ABC): 5 | ''' 6 | Mix-in class for Regression models. 7 | ''' 8 | 9 | @abstractmethod 10 | def get_output(self): 11 | ''' 12 | Returns the output activations of the model. 13 | 14 | Returns 15 | ------- 16 | numpy.array 17 | The output activations. 18 | ''' 19 | 20 | @abstractmethod 21 | def feed(self, input_data): 22 | ''' 23 | Accepts input array and feeds it to the model. 24 | 25 | Parameters 26 | ---------- 27 | input_data : numpy.array 28 | The input to feed the model. 29 | 30 | Raises 31 | ------ 32 | ValueError 33 | If the input data has invalid dimensions/shape. 34 | 35 | Note 36 | ---- 37 | This function only feeds the input data, to get the output after calling this 38 | function use :py:func:`get_output` or :py:func:`get_output_onehot` 39 | ''' 40 | 41 | @property 42 | @abstractmethod 43 | def _out_size(self): 44 | ''' 45 | Returns number of nodes/neurons in the output layer. 46 | ''' 47 | 48 | def r2score(self, testing_data, testing_targets): 49 | ''' 50 | Return R-squared or coefficient of determination value. 51 | 52 | Parameters 53 | ---------- 54 | testing_data : numpy.array 55 | numpy array containing testing data. 56 | testing_targets : numpy.array 57 | numpy array containing testing targets, corresponding to the testing data. 58 | 59 | Returns 60 | ------- 61 | r2score : float 62 | The average cost of the model over the testing data. 63 | 64 | Raises 65 | ------ 66 | ValueError 67 | If :code:`testing_data` or :code:`testing_targets` has invalid dimensions/shape. 68 | ''' 69 | self.feed(testing_data) 70 | output = self.get_output() 71 | 72 | error = ((output-testing_targets)**2).sum() 73 | var = ((testing_targets-testing_targets.mean(axis=0)) ** 2).sum() 74 | 75 | return 1-error/var 76 | -------------------------------------------------------------------------------- /tests/test_fashion.py: -------------------------------------------------------------------------------- 1 | from pykitml.testing import pktest_graph 2 | 3 | 4 | def test_download(): 5 | from pykitml.datasets import mnist 6 | # Download the mnist data set 7 | mnist.get(type='fashion') 8 | # Test ran successfully 9 | assert True 10 | 11 | 12 | @pktest_graph 13 | def test_adam_fashion(): 14 | import os 15 | 16 | import pykitml as pk 17 | from pykitml.datasets import mnist 18 | 19 | # If the dataset is not available then download it 20 | if not os.path.exists('mnist.pkl'): 21 | mnist.get(type='fashion') 22 | 23 | # Load dataset 24 | training_data, training_targets, testing_data, testing_targets = mnist.load() 25 | 26 | # Create a new neural network 27 | fashion_classifier = pk.NeuralNetwork([784, 100, 10]) 28 | 29 | # Train it 30 | fashion_classifier.train( 31 | training_data=training_data, 32 | targets=training_targets, 33 | batch_size=50, 34 | epochs=1200, 35 | optimizer=pk.Adam(learning_rate=0.012, decay_rate=0.95), 36 | testing_data=testing_data, 37 | testing_targets=testing_targets, 38 | testing_freq=30, 39 | decay_freq=10 40 | ) 41 | 42 | # Save it 43 | pk.save(fashion_classifier, 'fashion_classifier_network.pkl') 44 | 45 | # Show performance 46 | accuracy = fashion_classifier.accuracy(training_data, training_targets) 47 | print('Train Accuracy:', accuracy) 48 | accuracy = fashion_classifier.accuracy(testing_data, testing_targets) 49 | print('Test Accuracy:', accuracy) 50 | 51 | # Plot performance 52 | fashion_classifier.plot_performance() 53 | 54 | # Show confusion matrix 55 | fashion_classifier.confusion_matrix( 56 | training_data, training_targets, 57 | gnames=['T-shirt/Top', 'Trouser', 'Pullover', 58 | 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 59 | 'Bag', 'Ankle Boot' 60 | ] 61 | ) 62 | 63 | # Assert if it has enough accuracy 64 | assert fashion_classifier.accuracy(training_data, training_targets) > 84 65 | 66 | 67 | if __name__ == '__main__': 68 | try: 69 | test_adam_fashion.__wrapped__() 70 | except AssertionError: 71 | pass 72 | -------------------------------------------------------------------------------- /pykitml/pca.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class PCA: 5 | ''' 6 | This class implements Principle Component Analysis. 7 | ''' 8 | 9 | def __init__(self, data_points, no_components): 10 | ''' 11 | This class implements Principle Component Analysis, used for 12 | dimensionality reduction. 13 | 14 | Parameters 15 | ---------- 16 | data_points : numpy.array 17 | The dataset to perform PCA i.e. dimensionality reduction on. 18 | no_components : int 19 | Number of principle components to use. 20 | ''' 21 | # Calculate covariance matrix 22 | covariance_matrix = (data_points.T) @ data_points 23 | covariance_matrix = covariance_matrix/data_points.shape[0] 24 | 25 | # Perform Singular Value Decomposition on the comvariance matrix 26 | u, s, _ = np.linalg.svd(covariance_matrix, full_matrices=True) 27 | 28 | # Calculate amount of variance retained 29 | self._retention = np.sum(s[0:no_components])/np.sum(s) 30 | 31 | # The transformation matrix for PCA 32 | self._transform = u[:, 0:no_components] 33 | 34 | def transform(self, data_points): 35 | ''' 36 | Transforms the input dataset to lower dimensions. 37 | 38 | Parameters 39 | ---------- 40 | data_points : numpy.array 41 | The input dataset. 42 | 43 | Returns 44 | ------- 45 | transformed_data_points : numpy.array 46 | The transformed input. 47 | ''' 48 | # Transform the datapoints using principle components 49 | return data_points@self._transform 50 | 51 | def inverse_transform(self, pca_points): 52 | ''' 53 | Gets the original dataset from transformed points. 54 | 55 | Parameters 56 | ---------- 57 | pca_points : numpy.array 58 | The transformed points. 59 | 60 | ''' 61 | # Transform from principle components back to approx feature 62 | return pca_points @ (self._transform.T) 63 | 64 | @property 65 | def retention(self): 66 | ''' 67 | Returns the amount of variance retained, between 0 and 1. 68 | ''' 69 | return round(self._retention, 2) 70 | -------------------------------------------------------------------------------- /tests/test_heart_forest.py: -------------------------------------------------------------------------------- 1 | from pykitml.testing import pktest_graph, pktest_nograph 2 | 3 | 4 | @pktest_graph 5 | def test_heart_forest(): 6 | import os.path 7 | 8 | import pykitml as pk 9 | from pykitml.datasets import heartdisease 10 | 11 | # Download the dataset 12 | if not os.path.exists('heartdisease.pkl'): 13 | heartdisease.get() 14 | 15 | # Load heart data set 16 | inputs, outputs = heartdisease.load() 17 | outputs = pk.onehot(outputs) 18 | 19 | # Create model 20 | ftypes = [ 21 | 'continues', 'categorical', 'categorical', 22 | 'continues', 'continues', 'categorical', 'categorical', 23 | 'continues', 'categorical', 'continues', 'categorical', 24 | 'categorical', 'categorical' 25 | ] 26 | forest_heart_classifier = pk.RandomForest(13, 2, max_depth=8, feature_type=ftypes) 27 | 28 | # Train 29 | forest_heart_classifier.train(inputs, outputs) 30 | 31 | # Save it 32 | pk.save(forest_heart_classifier, 'forest_heart_classifier.pkl') 33 | 34 | # Print accuracy 35 | accuracy = forest_heart_classifier.accuracy(inputs, outputs) 36 | print('Accuracy:', accuracy) 37 | 38 | # Plot confusion matrix 39 | forest_heart_classifier.confusion_matrix(inputs, outputs, 40 | gnames=['False', 'True']) 41 | 42 | # Assert accuracy 43 | assert (forest_heart_classifier.accuracy(inputs, outputs)) >= 94 44 | 45 | 46 | @pktest_nograph 47 | def test_predict_heart_forest(): 48 | import numpy as np 49 | import pykitml as pk 50 | 51 | # Predict heartdisease for a person with 52 | # age sex cp trestbps chol fbs restecg thalach exang oldpeak slope ca thal 53 | # 67, 1, 4, 160, 286, 0, 2, 108, 1, 1.5, 2, 3, 3 54 | input_data = np.array([67, 1, 4, 160, 286, 0, 2, 108, 1, 1.5, 2, 3, 3], dtype=float) 55 | 56 | # Load the model 57 | forest_heart_classifier = pk.load('forest_heart_classifier.pkl') 58 | 59 | # Get output 60 | forest_heart_classifier.feed(input_data) 61 | model_output = forest_heart_classifier.get_output() 62 | 63 | # Print result (log of probabilities) 64 | print(model_output) 65 | 66 | 67 | if __name__ == '__main__': 68 | try: 69 | test_heart_forest.__wrapped__() 70 | test_predict_heart_forest.__wrapped__() 71 | except AssertionError: 72 | pass 73 | -------------------------------------------------------------------------------- /tests/test_banknote_forest.py: -------------------------------------------------------------------------------- 1 | from pykitml.testing import pktest_graph, pktest_nograph 2 | 3 | 4 | @pktest_graph 5 | def test_banknote_forest(): 6 | import os 7 | 8 | import pykitml as pk 9 | from pykitml.datasets import banknote 10 | 11 | # Download the dataset 12 | if not os.path.exists('banknote.pkl'): 13 | banknote.get() 14 | 15 | # Load heart data set 16 | inputs_train, outputs_train, inputs_test, outputs_test = banknote.load() 17 | 18 | # Change 0/False to [1, 0] 19 | # Change 1/True to [0, 1] 20 | outputs_train = pk.onehot(outputs_train) 21 | outputs_test = pk.onehot(outputs_test) 22 | 23 | # Create model 24 | ftypes = ['continues']*4 25 | forest_banknote_classifier = pk.RandomForest(4, 2, max_depth=9, feature_type=ftypes) 26 | 27 | # Train 28 | forest_banknote_classifier.train(inputs_train, outputs_train) 29 | 30 | # Save it 31 | pk.save(forest_banknote_classifier, 'forest_banknote_classifier.pkl') 32 | 33 | # Print accuracy 34 | accuracy = forest_banknote_classifier.accuracy(inputs_train, outputs_train) 35 | print('Train accuracy:', accuracy) 36 | accuracy = forest_banknote_classifier.accuracy(inputs_test, outputs_test) 37 | print('Test accuracy:', accuracy) 38 | 39 | # Plot confusion matrix 40 | forest_banknote_classifier.confusion_matrix(inputs_test, outputs_test, 41 | gnames=['False', 'True']) 42 | 43 | # Assert accuracy 44 | assert (forest_banknote_classifier.accuracy(inputs_test, outputs_test)) >= 98 45 | 46 | 47 | @pktest_nograph 48 | def test_predict_banknote_forest(): 49 | import numpy as np 50 | import pykitml as pk 51 | 52 | # Predict banknote validity with variance, skewness, curtosis, entropy 53 | # of -2.3, -9.3, 9.37, -0.86 54 | input_data = np.array([-2.3, -9.3, 9.37, -0.86]) 55 | 56 | # Load the model 57 | forest_banknote_classifier = pk.load('forest_banknote_classifier.pkl') 58 | 59 | # Get output 60 | forest_banknote_classifier.feed(input_data) 61 | model_output = forest_banknote_classifier.get_output() 62 | 63 | # Print result 64 | print(model_output) 65 | 66 | 67 | if __name__ == '__main__': 68 | try: 69 | test_banknote_forest.__wrapped__() 70 | test_predict_banknote_forest.__wrapped__() 71 | except AssertionError: 72 | pass 73 | -------------------------------------------------------------------------------- /tests/test_heart_bayes.py: -------------------------------------------------------------------------------- 1 | from pykitml.testing import pktest_graph, pktest_nograph 2 | 3 | 4 | @pktest_graph 5 | def test_heart_bayes(): 6 | import os.path 7 | 8 | import pykitml as pk 9 | from pykitml.datasets import heartdisease 10 | 11 | # Download the dataset 12 | if not os.path.exists('heartdisease.pkl'): 13 | heartdisease.get() 14 | 15 | # Load heart data set 16 | inputs, outputs = heartdisease.load() 17 | 18 | # Change 0/False to [1, 0] 19 | # Change 1/True to [0, 1] 20 | outputs = pk.onehot(outputs) 21 | 22 | distrbutions = [ 23 | 'gaussian', 'binomial', 'multinomial', 24 | 'gaussian', 'gaussian', 'binomial', 'multinomial', 25 | 'gaussian', 'binomial', 'gaussian', 'multinomial', 26 | 'multinomial', 'multinomial' 27 | ] 28 | 29 | # Create model 30 | bayes_heart_classifier = pk.NaiveBayes(13, 2, distrbutions) 31 | 32 | # Train 33 | bayes_heart_classifier.train(inputs, outputs) 34 | 35 | # Save it 36 | pk.save(bayes_heart_classifier, 'bayes_heart_classifier.pkl') 37 | 38 | # Print accuracy 39 | accuracy = bayes_heart_classifier.accuracy(inputs, outputs) 40 | print('Accuracy:', accuracy) 41 | 42 | # Plot confusion matrix 43 | bayes_heart_classifier.confusion_matrix(inputs, outputs, 44 | gnames=['False', 'True']) 45 | 46 | # Assert accuracy 47 | assert (bayes_heart_classifier.accuracy(inputs, outputs)) > 84 48 | 49 | 50 | @pktest_nograph 51 | def test_predict_heart_bayes(): 52 | import numpy as np 53 | import pykitml as pk 54 | 55 | # Predict heartdisease for a person with 56 | # age sex cp trestbps chol fbs restecg thalach exang oldpeak slope ca thal 57 | # 67, 1, 4, 160, 286, 0, 2, 108, 1, 1.5, 2, 3, 3 58 | input_data = np.array([67, 1, 4, 160, 286, 0, 2, 108, 1, 1.5, 2, 3, 3], dtype=float) 59 | 60 | # Load the model 61 | bayes_heart_classifier = pk.load('bayes_heart_classifier.pkl') 62 | 63 | # Get output 64 | bayes_heart_classifier.feed(input_data) 65 | model_output = bayes_heart_classifier.get_output() 66 | 67 | # Print result (log of probabilities) 68 | print(model_output) 69 | 70 | 71 | if __name__ == '__main__': 72 | # Train 73 | try: 74 | test_heart_bayes.__wrapped__() 75 | test_predict_heart_bayes.__wrapped__() 76 | except AssertionError: 77 | pass 78 | -------------------------------------------------------------------------------- /tests/test_iris_svm.py: -------------------------------------------------------------------------------- 1 | from pykitml.testing import pktest_graph, pktest_nograph 2 | 3 | 4 | @pktest_graph 5 | def test_iris_svm(): 6 | import numpy as np 7 | import pykitml as pk 8 | from pykitml.datasets import iris 9 | 10 | # Load iris data set 11 | inputs_train, outputs_train, inputs_test, outputs_test = iris.load() 12 | 13 | # Format the outputs for svm training, zeros to -1 14 | svm_outputs_train = np.where(outputs_train == 0, -1, 1) 15 | svm_outputs_test = np.where(outputs_test == 0, -1, 1) 16 | 17 | # Create model 18 | svm_iris_classifier = pk.SVM(4, 3) 19 | 20 | # Train the model 21 | svm_iris_classifier.train( 22 | training_data=inputs_train, 23 | targets=svm_outputs_train, 24 | batch_size=20, 25 | epochs=1000, 26 | optimizer=pk.Adam(learning_rate=3, decay_rate=0.95), 27 | testing_data=inputs_test, 28 | testing_targets=svm_outputs_test, 29 | testing_freq=30, 30 | decay_freq=10 31 | ) 32 | 33 | # Save it 34 | pk.save(svm_iris_classifier, 'svm_iris_classifier.pkl') 35 | 36 | # Print accuracy 37 | accuracy = svm_iris_classifier.accuracy(inputs_train, outputs_train) 38 | print('Train accuracy:', accuracy) 39 | accuracy = svm_iris_classifier.accuracy(inputs_test, outputs_test) 40 | print('Test accuracy:', accuracy) 41 | 42 | # Plot performance 43 | svm_iris_classifier.plot_performance() 44 | 45 | # Plot confusion matrix 46 | svm_iris_classifier.confusion_matrix(inputs_test, outputs_test, 47 | gnames=['Setosa', 'Versicolor', 'Virginica']) 48 | 49 | # Assert if it has enough accuracy 50 | assert svm_iris_classifier.accuracy(inputs_train, outputs_train) >= 97 51 | 52 | 53 | @pktest_nograph 54 | def test_predict_iris_svm(): 55 | import numpy as np 56 | import pykitml as pk 57 | 58 | # Predict type of species with 59 | # sepal-length sepal-width petal-length petal-width 60 | # 5.8, 2.7, 3.9, 1.2 61 | input_data = np.array([5.8, 2.7, 3.9, 1.2]) 62 | 63 | # Load the model 64 | svm_iris_classifier = pk.load('svm_iris_classifier.pkl') 65 | 66 | # Get output 67 | svm_iris_classifier.feed(input_data) 68 | model_output = svm_iris_classifier.get_output_onehot() 69 | 70 | # Print result 71 | print(model_output) 72 | 73 | 74 | if __name__ == '__main__': 75 | try: 76 | test_iris_svm.__wrapped__() 77 | test_predict_iris_svm.__wrapped__() 78 | except AssertionError: 79 | pass 80 | -------------------------------------------------------------------------------- /pykitml/cross_val.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def cross_validate(inputs, outputs, folds=5): 5 | ''' 6 | Python generator function for making K-fold cross validation easier. 7 | 8 | Parameters 9 | ---------- 10 | inputs : numpy.array 11 | Inputs/features of training data. 12 | outputs : numpy.array 13 | Outputs/targets of training data. 14 | 15 | Yields 16 | ------ 17 | train_inputs : numpy.array 18 | Training data containing inputs. 19 | train_outputs : numpy.array 20 | Training data containing outputs. 21 | test_inputs : numpy.array 22 | Testing data containing inputs. 23 | test_outputs : numpy.array 24 | Testing data containing outputs. 25 | 26 | Example 27 | ------- 28 | >>> import numpy as np 29 | >>> import pykitml as pk 30 | >>> 31 | >>> # Mock training data 32 | ... x = np.arange(30).reshape((10, 3)) 33 | >>> y = x + 10 34 | >>> 35 | >>> # 5-fold cross validation 36 | ... # Training data is split into 5 blocks, each block takes its turn 37 | ... # to be the test data. 38 | ... for train_x, train_y, test_x, test_y in pk.cross_validate(x, y, 5): 39 | ... print(train_x) 40 | ... print(train_y) 41 | ... print(test_x) 42 | ... print(test_y) 43 | ''' 44 | size = inputs.shape[0] 45 | block_size = size//folds 46 | remainder = size % folds 47 | 48 | # Calculate block sizes 49 | def get_block_size(block): 50 | if block < remainder: 51 | return block_size+1 52 | else: 53 | return block_size 54 | 55 | block_sizes = [get_block_size(block) for block in range(folds)] 56 | 57 | # Calculate block indices 58 | block_indices = [sum(block_sizes[:block]) for block in range(folds)] 59 | 60 | # Generate blocks 61 | def make_block(i, array): 62 | start = block_indices[i] 63 | end = block_indices[i]+block_sizes[i] 64 | return array[start:end] 65 | 66 | for i in range(folds): 67 | # Create testing data 68 | test_inputs, test_outputs = make_block(i, inputs), make_block(i, outputs) 69 | 70 | # Create training data 71 | train_blocks_inputs = [make_block(j, inputs) for j in range(folds) if j != i] 72 | train_inputs = np.concatenate(train_blocks_inputs, axis=0) 73 | train_blocks_outputs = [make_block(j, outputs) for j in range(folds) if j != i] 74 | train_outputs = np.concatenate(train_blocks_outputs, axis=0) 75 | 76 | yield train_inputs, train_outputs, test_inputs, test_outputs 77 | -------------------------------------------------------------------------------- /pykitml/datasets/fishlength.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | ''' 4 | This module contains helper functions to load the fish length dataset. 5 | ''' 6 | 7 | inputs = np.array([ 8 | # Age Temperature 9 | [14, 25], 10 | [28, 25], 11 | [41, 25], 12 | [55, 25], 13 | [69, 25], 14 | [83, 25], 15 | [97, 25], 16 | [111, 25], 17 | [125, 25], 18 | [139, 25], 19 | [153, 25], 20 | [14, 27], 21 | [28, 27], 22 | [41, 27], 23 | [55, 27], 24 | [69, 27], 25 | [83, 27], 26 | [97, 27], 27 | [111, 27], 28 | [125, 27], 29 | [139, 27], 30 | [153, 27], 31 | [14, 29], 32 | [28, 29], 33 | [41, 29], 34 | [55, 29], 35 | [69, 29], 36 | [83, 29], 37 | [97, 29], 38 | [111, 29], 39 | [125, 29], 40 | [139, 29], 41 | [153, 29], 42 | [14, 31], 43 | [28, 31], 44 | [41, 31], 45 | [55, 31], 46 | [69, 31], 47 | [83, 31], 48 | [97, 31], 49 | [111, 31], 50 | [125, 31], 51 | [139, 31], 52 | [153, 31] 53 | ]) 54 | 55 | outputs = np.array([ 56 | # Fish-length 57 | 620, 58 | 1315, 59 | 2120, 60 | 2600, 61 | 3110, 62 | 3535, 63 | 3935, 64 | 4465, 65 | 4530, 66 | 4570, 67 | 4600, 68 | 625, 69 | 1215, 70 | 2110, 71 | 2805, 72 | 3255, 73 | 4015, 74 | 4315, 75 | 4495, 76 | 4535, 77 | 4600, 78 | 4600, 79 | 590, 80 | 1305, 81 | 2140, 82 | 2890, 83 | 3920, 84 | 3920, 85 | 4515, 86 | 4520, 87 | 4525, 88 | 4565, 89 | 4566, 90 | 590, 91 | 1205, 92 | 1915, 93 | 2140, 94 | 2710, 95 | 3020, 96 | 3030, 97 | 3040, 98 | 3180, 99 | 3257, 100 | 3214, 101 | ]) 102 | 103 | 104 | def load(): 105 | ''' 106 | Loads the fish length dataset without any preprocessing. 107 | Source: https://people.sc.fsu.edu/~jburkardt/datasets/regression/x06.txt 108 | 109 | The length of a species of fish is to be represented as a function 110 | of the age and water temperature. The fish are kept in tanks 111 | at 25, 27, 29 and 31 degrees Celsius. After birth, a test specimen 112 | is chosen at random every 14 days and its length measured. 113 | 114 | Returns 115 | ------- 116 | inputs : numpy.array 117 | 44x2 numpy array, each row having 2 features, 118 | :code:`age temperature` 119 | outputs : numpy.array 120 | Length of fish, numpy array with 44 elements. 121 | ''' 122 | return inputs, outputs 123 | -------------------------------------------------------------------------------- /tests/test_fishlength.py: -------------------------------------------------------------------------------- 1 | from pykitml.testing import pktest_graph, pktest_nograph 2 | 3 | 4 | @pktest_graph 5 | def test_fishlength(): 6 | import pykitml as pk 7 | from pykitml.datasets import fishlength 8 | 9 | # Load the dataset 10 | inputs, outputs = fishlength.load() 11 | 12 | # Normalize inputs 13 | array_min, array_max = pk.get_minmax(inputs) 14 | inputs = pk.normalize_minmax(inputs, array_min, array_max) 15 | 16 | # Create polynomial features 17 | inputs_poly = pk.polynomial(inputs) 18 | 19 | # Normalize outputs 20 | array_min, array_max = pk.get_minmax(outputs) 21 | outputs = pk.normalize_minmax(outputs, array_min, array_max) 22 | 23 | # Create model 24 | fish_classifier = pk.LinearRegression(inputs_poly.shape[1], 1) 25 | 26 | # Train the model 27 | fish_classifier.train( 28 | training_data=inputs_poly, 29 | targets=outputs, 30 | batch_size=22, 31 | epochs=200, 32 | optimizer=pk.Adam(learning_rate=0.02, decay_rate=0.99), 33 | testing_freq=1, 34 | decay_freq=10 35 | ) 36 | 37 | # Save model 38 | pk.save(fish_classifier, 'fish_classifier.pkl') 39 | 40 | # Plot performance 41 | fish_classifier.plot_performance() 42 | 43 | # Print r2 score 44 | print('r2score:', fish_classifier.r2score(inputs_poly, outputs)) 45 | 46 | # Assert if it has enough accuracy 47 | assert fish_classifier.cost(inputs_poly, outputs) <= 0 48 | 49 | 50 | @pktest_nograph 51 | def test_predict_fishlength(): 52 | import numpy as np 53 | import pykitml as pk 54 | from pykitml.datasets import fishlength 55 | 56 | # Predict length of fish that is 28 days old at 25C 57 | 58 | # Load the dataset 59 | inputs, outputs = fishlength.load() 60 | 61 | # Load the model 62 | fish_classifier = pk.load('fish_classifier.pkl') 63 | 64 | # Normalize inputs 65 | array_min, array_max = pk.get_minmax(inputs) 66 | input_data = pk.normalize_minmax(np.array([28, 25]), array_min, array_max) 67 | 68 | # Create plynomial features 69 | input_data_poly = pk.polynomial(input_data) 70 | 71 | # Get output 72 | fish_classifier.feed(input_data_poly) 73 | model_output = fish_classifier.get_output() 74 | 75 | # Denormalize output 76 | array_min, array_max = pk.get_minmax(outputs) 77 | model_output = pk.denormalize_minmax(model_output, array_min, array_max) 78 | 79 | # Print result 80 | print(model_output) 81 | 82 | 83 | if __name__ == '__main__': 84 | try: 85 | test_fishlength.__wrapped__() 86 | 87 | test_predict_fishlength.__wrapped__() 88 | except AssertionError: 89 | pass 90 | -------------------------------------------------------------------------------- /docs/Datasets.rst: -------------------------------------------------------------------------------- 1 | Datasets 2 | ======== 3 | 4 | 5 | .. _mnist_dataset: 6 | 7 | MNIST - pykitml.datasets.mnist module 8 | ------------------------------------- 9 | 10 | .. automodule:: pykitml.datasets.mnist 11 | 12 | .. autofunction:: pykitml.datasets.mnist.get 13 | 14 | .. autofunction:: pykitml.datasets.mnist.load 15 | 16 | 17 | .. _iris_dataset: 18 | 19 | Iris - pykitml.datasets.iris module 20 | ----------------------------------- 21 | 22 | .. automodule:: pykitml.datasets.iris 23 | 24 | .. autofunction:: pykitml.datasets.iris.load 25 | 26 | 27 | .. _fishlength_dataset: 28 | 29 | Fish Length - pykitml.datasets.fishlength module 30 | ------------------------------------------------ 31 | 32 | .. automodule:: pykitml.datasets.fishlength 33 | 34 | .. autofunction:: pykitml.datasets.fishlength.load 35 | 36 | 37 | .. _heart_dataset: 38 | 39 | Heart Disease - pykitml.datasets.heartdisease module 40 | ---------------------------------------------------- 41 | 42 | .. automodule:: pykitml.datasets.heartdisease 43 | 44 | .. autofunction:: pykitml.datasets.heartdisease.get 45 | 46 | .. autofunction:: pykitml.datasets.heartdisease.load 47 | 48 | 49 | .. _adult_dataset: 50 | 51 | Adult - pykitml.datasets.adult module 52 | ---------------------------------------------------- 53 | 54 | .. automodule:: pykitml.datasets.adult 55 | 56 | .. autofunction:: pykitml.datasets.adult.get 57 | 58 | .. autofunction:: pykitml.datasets.adult.load 59 | 60 | 61 | .. _banknote_dataset: 62 | 63 | Banknote - pykitml.datasets.banknote module 64 | ---------------------------------------------------- 65 | 66 | .. automodule:: pykitml.datasets.banknote 67 | 68 | .. autofunction:: pykitml.datasets.banknote.get 69 | 70 | .. autofunction:: pykitml.datasets.banknote.load 71 | 72 | 73 | .. _sonar_dataset: 74 | 75 | Sonar Rocks and Mines - pykitml.datasets.sonar module 76 | ------------------------------------------------------- 77 | 78 | .. automodule:: pykitml.datasets.sonar 79 | 80 | .. autofunction:: pykitml.datasets.sonar.get 81 | 82 | .. autofunction:: pykitml.datasets.sonar.load 83 | 84 | 85 | .. _boston_dataset: 86 | 87 | Boston Housing - pykitml.boston.s1clustering module 88 | ---------------------------------------------------- 89 | 90 | .. automodule:: pykitml.datasets.boston 91 | 92 | .. autofunction:: pykitml.datasets.boston.get 93 | 94 | .. autofunction:: pykitml.datasets.boston.load 95 | 96 | 97 | .. _s1clustering_dataset: 98 | 99 | S1 Clustering - pykitml.datasets.s1clustering module 100 | ---------------------------------------------------- 101 | 102 | .. automodule:: pykitml.datasets.s1clustering 103 | 104 | .. autofunction:: pykitml.datasets.s1clustering.get 105 | 106 | .. autofunction:: pykitml.datasets.s1clustering.load 107 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![pykitml logo](https://raw.githubusercontent.com/RainingComputers/pykitml/master/pykitml128.png) 2 | 3 | # pykitml (Python Kit for Machine Learning) 4 | Machine Learning library written in Python and NumPy. 5 | 6 | ### Installation 7 | 8 | ``` 9 | python3 -m pip install pykitml 10 | ``` 11 | 12 | ### Documentation 13 | 14 | https://pykitml.readthedocs.io/en/latest/ 15 | 16 | # Demo (MNIST) 17 | ### Training 18 | ``` python 19 | import os.path 20 | 21 | import numpy as np 22 | import pykitml as pk 23 | from pykitml.datasets import mnist 24 | 25 | # Download dataset 26 | if(not os.path.exists('mnist.pkl')): mnist.get() 27 | 28 | # Load dataset 29 | training_data, training_targets, testing_data, testing_targets = mnist.load() 30 | 31 | # Create a new neural network 32 | digit_classifier = pk.NeuralNetwork([784, 100, 10]) 33 | 34 | # Train it 35 | digit_classifier.train( 36 | training_data=training_data, 37 | targets=training_targets, 38 | batch_size=50, 39 | epochs=1200, 40 | optimizer=pk.Adam(learning_rate=0.012, decay_rate=0.95), 41 | testing_data=testing_data, 42 | testing_targets=testing_targets, 43 | testing_freq=30, 44 | decay_freq=15 45 | ) 46 | 47 | # Save it 48 | pk.save(digit_classifier, 'digit_classifier_network.pkl') 49 | 50 | # Show performance 51 | accuracy = digit_classifier.accuracy(training_data, training_targets) 52 | print('Train Accuracy:', accuracy) 53 | accuracy = digit_classifier.accuracy(testing_data, testing_targets) 54 | print('Test Accuracy:', accuracy) 55 | 56 | # Plot performance graph 57 | digit_classifier.plot_performance() 58 | 59 | # Show confusion matrix 60 | digit_classifier.confusion_matrix(training_data, training_targets) 61 | ``` 62 | 63 | ### Trying the model 64 | ```python 65 | import random 66 | 67 | import numpy as np 68 | import matplotlib.pyplot as plt 69 | import pykitml as pk 70 | from pykitml.datasets import mnist 71 | 72 | # Load dataset 73 | training_data, training_targets, testing_data, testing_targets = mnist.load() 74 | 75 | # Load the trained network 76 | digit_classifier = pk.load('digit_classifier_network.pkl') 77 | 78 | # Pick a random example from testing data 79 | index = random.randint(0, 9999) 80 | 81 | # Show the test data and the label 82 | plt.imshow(training_data[index].reshape(28, 28)) 83 | plt.show() 84 | print('Label: ', training_targets[index]) 85 | 86 | # Show prediction 87 | digit_classifier.feed(training_data[index]) 88 | model_output = digit_classifier.get_output_onehot() 89 | print('Predicted: ', model_output) 90 | ``` 91 | 92 | ### Performance Graph 93 | 94 | ![Performance Graph](https://raw.githubusercontent.com/RainingComputers/pykitml/master/docs/demo_pics/neural_network_perf_graph.png) 95 | 96 | ### Confusion Matrix 97 | 98 | ![Confusion Matrix](https://raw.githubusercontent.com/RainingComputers/pykitml/master/docs/demo_pics/neural_network_confusion_matrix.png) 99 | -------------------------------------------------------------------------------- /tests/test_banknote.py: -------------------------------------------------------------------------------- 1 | from pykitml.testing import pktest_graph, pktest_nograph 2 | 3 | 4 | @pktest_graph 5 | def test_banknote(): 6 | import os.path 7 | 8 | import pykitml as pk 9 | from pykitml.datasets import banknote 10 | 11 | # Download the dataset 12 | if not os.path.exists('banknote.pkl'): 13 | banknote.get() 14 | 15 | # Load banknote data set 16 | inputs_train, outputs_train, inputs_test, outputs_test = banknote.load() 17 | 18 | # Normalize dataset 19 | array_min, array_max = pk.get_minmax(inputs_train) 20 | inputs_train = pk.normalize_minmax(inputs_train, array_min, array_max) 21 | inputs_test = pk.normalize_minmax(inputs_test, array_min, array_max) 22 | 23 | # Create polynomial features 24 | inputs_train_poly = pk.polynomial(inputs_train) 25 | inputs_test_poly = pk.polynomial(inputs_test) 26 | 27 | # Create model 28 | banknote_classifier = pk.LogisticRegression(inputs_train_poly.shape[1], 1) 29 | 30 | # Train the model 31 | banknote_classifier.train( 32 | training_data=inputs_train_poly, 33 | targets=outputs_train, 34 | batch_size=10, 35 | epochs=1500, 36 | optimizer=pk.Adam(learning_rate=0.06, decay_rate=0.99), 37 | testing_data=inputs_test_poly, 38 | testing_targets=outputs_test, 39 | testing_freq=30, 40 | decay_freq=40 41 | ) 42 | 43 | # Save it 44 | pk.save(banknote_classifier, 'banknote_classifier.pkl') 45 | 46 | # Plot performance 47 | banknote_classifier.plot_performance() 48 | 49 | # Print accuracy 50 | accuracy = banknote_classifier.accuracy(inputs_train_poly, outputs_train) 51 | print('Train accuracy:', accuracy) 52 | accuracy = banknote_classifier.accuracy(inputs_test_poly, outputs_test) 53 | print('Test accuracy:', accuracy) 54 | 55 | # Plot confusion matrix 56 | banknote_classifier.confusion_matrix(inputs_test_poly, outputs_test) 57 | 58 | # Assert if it has enough accuracy 59 | assert banknote_classifier.accuracy(inputs_test_poly, outputs_test) >= 99 60 | 61 | 62 | @pktest_nograph 63 | def test_predict_banknote(): 64 | import numpy as np 65 | import pykitml as pk 66 | from pykitml.datasets import banknote 67 | 68 | # Predict banknote validity with variance, skewness, curtosis, entropy 69 | # of -2.3, -9.3, 9.37, -0.86 70 | 71 | # Load banknote data set 72 | inputs_train, _, _, _ = banknote.load() 73 | 74 | # Load the model 75 | banknote_classifier = pk.load('banknote_classifier.pkl') 76 | 77 | # Normalize the inputs 78 | array_min, array_max = pk.get_minmax(inputs_train) 79 | input_data = pk.normalize_minmax(np.array([-2.3, -9.3, 9.37, -0.86]), array_min, array_max) 80 | 81 | # Create polynomial features 82 | input_data_poly = pk.polynomial(input_data) 83 | 84 | # Get output 85 | banknote_classifier.feed(input_data_poly) 86 | model_output = banknote_classifier.get_output() 87 | 88 | # Print result 89 | print(model_output) 90 | 91 | 92 | if __name__ == '__main__': 93 | try: 94 | test_banknote.__wrapped__() 95 | test_predict_banknote.__wrapped__() 96 | except AssertionError: 97 | pass 98 | -------------------------------------------------------------------------------- /pykitml/datasets/boston.py: -------------------------------------------------------------------------------- 1 | import os 2 | from urllib import request 3 | 4 | from numpy import genfromtxt 5 | 6 | from .. import pklhandler 7 | 8 | ''' 9 | This module contains helper functions to download and load 10 | the boston housing dataset. 11 | ''' 12 | 13 | 14 | def get(): 15 | ''' 16 | Downloads the boston dataset from 17 | https://archive.ics.uci.edu/ml/machine-learning-databases/housing/ 18 | and saves it as a pkl file `boston.pkl`. 19 | 20 | Raises 21 | ------ 22 | urllib.error.URLError 23 | If internet connection is not available or the URL is not accessible. 24 | OSError 25 | If the file cannot be created due to a system-related error. 26 | KeyError 27 | If invalid/unknown type. 28 | 29 | Note 30 | ---- 31 | You only need to call this method once, i.e, after the dataset has been downloaded 32 | and you have the `boston.pkl` file, you don't need to call this method again. 33 | ''' 34 | # Url to download the dataset from 35 | url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data' 36 | 37 | # Download the dataset 38 | print('Downloading housing.data...') 39 | request.urlretrieve(url, 'housing.data') 40 | print('Download complete.') 41 | 42 | # Parse the data and save it as a pkl file 43 | pklhandler.save(genfromtxt('housing.data'), 'boston.pkl') 44 | # Delete unnecessary files 45 | os.remove('housing.data') 46 | print('Deleted unnecessary files.') 47 | 48 | 49 | def load(): 50 | ''' 51 | Loads the boston housing dataset from pkl file. 52 | 53 | The inputs have following columns: 54 | 55 | - CRIM : 56 | per capita crime rate by town 57 | - ZN : 58 | proportion of residential land zoned for lots over 25,000 sq.ft. 59 | - INDUS : 60 | proportion of non-retail business acres per town 61 | - CHAS : 62 | Charles River dummy variable (= 1 if tract bounds river; 0 otherwise) 63 | - NOX : 64 | nitric oxides concentration (parts per 10 million) 65 | - RM : 66 | average number of rooms per dwelling 67 | - AGE : 68 | proportion of owner-occupied units built prior to 1940 69 | - DIS : 70 | weighted distances to five Boston employment centres 71 | - RAD : 72 | index of accessibility to radial highways 73 | - TAX : 74 | full-value property-tax rate per $10,000 75 | - PTRATIO : 76 | pupil-teacher ratio by town 77 | - B : 78 | 1000(Bk - 0.63)^2 where Bk is the proportion of black by town 79 | - LSTAT : 80 | % lower status of the population 81 | 82 | The outputs are 83 | 84 | - MEDV : 85 | Median value of owner-occupied homes in $1000's 86 | 87 | Returns 88 | ------- 89 | inputs_train : numpy.array 90 | outputs_train : numpy.array 91 | inputs_test : numpy.array 92 | outputs_test : numpy.array 93 | 94 | ''' 95 | data_array = pklhandler.load('boston.pkl') 96 | 97 | inputs_train = data_array[0:500, :-1] 98 | outputs_train = data_array[0:500, -1] 99 | inputs_test = data_array[500:, :-1] 100 | outputs_test = data_array[500:, -1] 101 | 102 | return inputs_train, outputs_train, inputs_test, outputs_test 103 | -------------------------------------------------------------------------------- /tests/test_mnist_svm.py: -------------------------------------------------------------------------------- 1 | from pykitml.testing import pktest_graph 2 | 3 | 4 | @pktest_graph 5 | def test_mnist_svm(): 6 | import os.path 7 | 8 | import numpy as np 9 | import pykitml as pk 10 | from pykitml.datasets import mnist 11 | 12 | # Download dataset 13 | if not os.path.exists('mnist.pkl'): 14 | mnist.get() 15 | 16 | # Load mnist data set 17 | inputs_train, outputs_train, inputs_test, outputs_test = mnist.load() 18 | 19 | # Train on only first 10000 20 | inputs_train = inputs_train[:10000] 21 | outputs_train = outputs_train[:10000] 22 | 23 | # Transform inputs using gaussian kernal 24 | sigma = 3.15 25 | gaussian_inputs_train = pk.gaussian_kernel(inputs_train, inputs_train, sigma) 26 | gaussian_inputs_test = pk.gaussian_kernel(inputs_test, inputs_train, sigma) 27 | 28 | # Format the outputs for svm training, zeros to -1 29 | svm_outputs_train = np.where(outputs_train == 0, -1, 1) 30 | svm_outputs_test = np.where(outputs_test == 0, -1, 1) 31 | 32 | # Create model 33 | svm_mnist_classifier = pk.SVM(gaussian_inputs_train.shape[1], 10) 34 | 35 | # Train the model 36 | svm_mnist_classifier.train( 37 | training_data=gaussian_inputs_train, 38 | targets=svm_outputs_train, 39 | batch_size=20, 40 | epochs=1000, 41 | optimizer=pk.Adam(learning_rate=3.5, decay_rate=0.95), 42 | testing_data=gaussian_inputs_test, 43 | testing_targets=svm_outputs_test, 44 | testing_freq=30, 45 | decay_freq=10 46 | ) 47 | 48 | # Save it 49 | pk.save(svm_mnist_classifier, 'svm_mnist_classifier.pkl') 50 | 51 | # Print accuracy 52 | accuracy = svm_mnist_classifier.accuracy(gaussian_inputs_train, outputs_train) 53 | print('Train accuracy:', accuracy) 54 | accuracy = svm_mnist_classifier.accuracy(gaussian_inputs_test, outputs_test) 55 | print('Test accuracy:', accuracy) 56 | 57 | # Plot performance 58 | svm_mnist_classifier.plot_performance() 59 | 60 | # Plot confusion matrix 61 | svm_mnist_classifier.confusion_matrix(gaussian_inputs_test, outputs_test) 62 | 63 | # Assert if it has enough accuracy 64 | assert svm_mnist_classifier.accuracy(gaussian_inputs_train, outputs_train) >= 90 65 | 66 | 67 | @pktest_graph 68 | def test_predict_mnist_svm(): 69 | import random 70 | 71 | import matplotlib.pyplot as plt 72 | import pykitml as pk 73 | from pykitml.datasets import mnist 74 | 75 | # Load dataset 76 | inputs_train, outputs_train, _, _ = mnist.load() 77 | 78 | # Use only first 10000 79 | inputs_train = inputs_train[:10000] 80 | outputs_train = outputs_train[:10000] 81 | 82 | # Load the trained network 83 | svm_mnist_classifier = pk.load('svm_mnist_classifier.pkl') 84 | 85 | # Pick a random example from testing data 86 | index = random.randint(0, 9000) 87 | 88 | # Show the test data and the label 89 | plt.imshow(inputs_train[index].reshape(28, 28)) 90 | plt.show() 91 | print('Label: ', outputs_train[index]) 92 | 93 | # Transform the input 94 | input_data = pk.gaussian_kernel(inputs_train[index], inputs_train) 95 | 96 | # Show prediction 97 | svm_mnist_classifier.feed(input_data) 98 | model_output = svm_mnist_classifier.get_output_onehot() 99 | print('Predicted: ', model_output) 100 | 101 | 102 | if __name__ == '__main__': 103 | try: 104 | test_mnist_svm.__wrapped__() 105 | test_predict_mnist_svm.__wrapped__() 106 | except AssertionError: 107 | pass 108 | -------------------------------------------------------------------------------- /docs/Normalization.rst: -------------------------------------------------------------------------------- 1 | Normalization/Feature-scaling 2 | ============================= 3 | 4 | Min-Max Normalization 5 | --------------------- 6 | 7 | .. autofunction:: pykitml.get_minmax 8 | 9 | .. autofunction:: pykitml.normalize_minmax 10 | 11 | .. autofunction:: pykitml.denormalize_minmax 12 | 13 | **Example** 14 | 15 | >>> import numpy as np 16 | >>> import pykitml as pk 17 | >>> a = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]]) 18 | >>> min_array, max_array = pk.get_minmax(a) 19 | >>> normalized_a = pk.normalize_minmax(a, min_array, max_array) 20 | >>> normalized_a 21 | array([[0. , 0. , 0. , 0. ], 22 | [0.33333333, 0.33333333, 0.33333333, 0.33333333], 23 | [0.66666667, 0.66666667, 0.66666667, 0.66666667], 24 | [1. , 1. , 1. , 1. ]]) 25 | >>> pk.denormalize_minmax(normalized_a, min_array, max_array) 26 | array([[ 1., 2., 3., 4.], 27 | [ 5., 6., 7., 8.], 28 | [ 9., 10., 11., 12.], 29 | [13., 14., 15., 16.]]) 30 | 31 | You can also only normalize/denormalize specific columns, 32 | 33 | >>> import numpy as np 34 | >>> import pykitml as pk 35 | >>> a = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]]) 36 | >>> min_array, max_array = pk.get_minmax(a) 37 | >>> normalized_a = pk.normalize_minmax(a, min_array, max_array, cols=[0, 2]) 38 | >>> normalized_a 39 | array([[ 0. , 2. , 0. , 4. ], 40 | [ 0.33333333, 6. , 0.33333333, 8. ], 41 | [ 0.66666667, 10. , 0.66666667, 12. ], 42 | [ 1. , 14. , 1. , 16. ]]) 43 | >>> pk.denormalize_minmax(normalized_a, min_array, max_array, cols=[0, 2]) 44 | array([[ 1., 2., 3., 4.], 45 | [ 5., 6., 7., 8.], 46 | [ 9., 10., 11., 12.], 47 | [13., 14., 15., 16.]]) 48 | 49 | Mean Normalization 50 | ------------------ 51 | 52 | .. autofunction:: pykitml.get_meanstd 53 | 54 | .. autofunction:: pykitml.normalize_mean 55 | 56 | .. autofunction:: pykitml.denormalize_mean 57 | 58 | **Example** 59 | 60 | >>> import numpy as np 61 | >>> import pykitml as pk 62 | >>> a = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]]) 63 | >>> array_mean, array_stddev = pk.get_meanstd(a) 64 | >>> normalized_a = pk.normalize_mean(a, array_mean, array_stddev) 65 | >>> normalized_a 66 | array([[-1.34164079, -1.34164079, -1.34164079, -1.34164079], 67 | [-0.4472136 , -0.4472136 , -0.4472136 , -0.4472136 ], 68 | [ 0.4472136 , 0.4472136 , 0.4472136 , 0.4472136 ], 69 | [ 1.34164079, 1.34164079, 1.34164079, 1.34164079]]) 70 | >>> pk.denormalize_mean(normalized_a, array_mean, array_stddev) 71 | array([[ 1., 2., 3., 4.], 72 | [ 5., 6., 7., 8.], 73 | [ 9., 10., 11., 12.], 74 | [13., 14., 15., 16.]]) 75 | 76 | You can also only normalize/denormalize specific columns, 77 | 78 | >>> import numpy as np 79 | >>> import pykitml as pk 80 | >>> a = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]]) 81 | >>> array_mean, array_stddev = pk.get_meanstd(a) 82 | >>> normalized_a = pk.normalize_mean(a, array_mean, array_stddev, cols=[0,2]) 83 | >>> normalized_a 84 | array([[-1.34164079, 2. , -1.34164079, 4. ], 85 | [-0.4472136 , 6. , -0.4472136 , 8. ], 86 | [ 0.4472136 , 10. , 0.4472136 , 12. ], 87 | [ 1.34164079, 14. , 1.34164079, 16. ]]) 88 | >>> pk.denormalize_mean(normalized_a, array_mean, array_stddev, cols=[0,2]) 89 | array([[ 1., 2., 3., 4.], 90 | [ 5., 6., 7., 8.], 91 | [ 9., 10., 11., 12.], 92 | [13., 14., 15., 16.]]) 93 | 94 | -------------------------------------------------------------------------------- /pykitml/_single_layer_model.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | import numpy as np 4 | 5 | from ._minimize_model import MinimizeModel 6 | 7 | 8 | class SingleLayerModel(MinimizeModel, ABC): 9 | ''' 10 | General base class for single layer models. 11 | ''' 12 | 13 | def __init__(self, input_size, output_size, reg_param=0): 14 | ''' 15 | Parameters 16 | ---------- 17 | input_size : int 18 | Size of input data or number of input features. 19 | output_size: int 20 | Number of categories or groups. 21 | reg_param : int 22 | Regularization parameter for the model, also known as 'weight decay'. 23 | ''' 24 | # Save sizes 25 | self._input_size = input_size 26 | self._output_size = output_size 27 | 28 | # Initialize regularization parameter 29 | self._reg_param = reg_param 30 | self._reg_param_half = reg_param/2 31 | 32 | # Initialize weights and parameters 33 | epsilon = np.sqrt(6)/(np.sqrt(output_size) + np.sqrt(input_size)) 34 | weights = np.random.rand(output_size, input_size)*2*epsilon - epsilon 35 | biases = np.random.rand(output_size) * 2 * epsilon - epsilon 36 | 37 | # Numpy array to store activations 38 | self._inputa = np.array([]) 39 | self.a = np.array([]) 40 | self.z = np.array([]) 41 | 42 | # Put parameters in numpy dtype=object array 43 | W = 0 # Weights 44 | B = 1 # Biases 45 | self._params = np.array([None, None], dtype=object) 46 | self._params[W] = weights 47 | self._params[B] = biases 48 | 49 | @property 50 | def _mparams(self): 51 | return self._params 52 | 53 | @_mparams.setter 54 | def _mparams(self, mparams): 55 | self._params = mparams 56 | 57 | @property 58 | def _cost_function(self): 59 | return self._cost_func 60 | 61 | @property 62 | def _out_size(self): 63 | return self._output_size 64 | 65 | def feed(self, input_data): 66 | # Constants 67 | W = 0 # Weights 68 | B = 1 # Biases 69 | 70 | # feed 71 | self._inputa = input_data 72 | self.z = (input_data @ self._params[W].T) + self._params[B] 73 | self.a = self._activ_func(self.z) 74 | 75 | def get_output(self): 76 | return self.a.squeeze() 77 | 78 | def _backpropagate(self, index, targets): 79 | # Constants 80 | W = 0 # Weights 81 | B = 1 # Biases 82 | 83 | # Gradients 84 | da_dz = self._activ_func_prime(self.z[index], self.a[index]) 85 | dc_db = self._cost_func_prime(self.a[index], targets) * da_dz 86 | dc_dw = np.multiply.outer(dc_db, self._inputa[index]) 87 | 88 | # Add regularization 89 | dc_dw += self._reg_param*self._params[W] 90 | 91 | # Return gradient 92 | gradient = np.array([None, None], dtype=object) 93 | gradient[W] = dc_dw 94 | gradient[B] = dc_db 95 | return gradient 96 | 97 | @property 98 | def bptt(self): 99 | return False 100 | 101 | def _get_norm_weights(self): 102 | W = 0 103 | return self._reg_param_half*(self._params[W]**2).sum() 104 | 105 | @property 106 | @abstractmethod 107 | def _activ_func(self): 108 | pass 109 | 110 | @property 111 | @abstractmethod 112 | def _activ_func_prime(self): 113 | pass 114 | 115 | @property 116 | @abstractmethod 117 | def _cost_func(self): 118 | pass 119 | 120 | @property 121 | @abstractmethod 122 | def _cost_func_prime(self): 123 | pass 124 | -------------------------------------------------------------------------------- /pykitml/datasets/banknote.py: -------------------------------------------------------------------------------- 1 | import os 2 | from urllib import request 3 | 4 | import numpy as np 5 | from numpy import genfromtxt 6 | 7 | from .. import pklhandler 8 | 9 | ''' 10 | This module contains helper functions to download and load 11 | the banknote dataset. 12 | ''' 13 | 14 | 15 | def get(): 16 | ''' 17 | Downloads the banknote dataset from 18 | http://archive.ics.uci.edu/ml/datasets/banknote+authentication 19 | and saves it as a pkl file `banknote.pkl`. 20 | 21 | Raises 22 | ------ 23 | urllib.error.URLError 24 | If internet connection is not available or the URL is not accessible. 25 | OSError 26 | If the file cannot be created due to a system-related error. 27 | KeyError 28 | If invalid/unknown type. 29 | 30 | Note 31 | ---- 32 | You only need to call this method once, i.e, after the dataset has been downloaded 33 | and you have the `banknote.pkl` file, you don't need to call this method again. 34 | ''' 35 | # Url to download the dataset from 36 | url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/00267/data_banknote_authentication.txt' 37 | 38 | # Download the dataset 39 | print('Downloading data_banknote_authentication.txt') 40 | request.urlretrieve(url, 'data_banknote_authentication.txt') 41 | print('Download complete.') 42 | 43 | # Parse the data and save it as a pkl file 44 | pklhandler.save(genfromtxt('data_banknote_authentication.txt', delimiter=','), 'banknote.pkl') 45 | 46 | # Delete unnecessary files 47 | os.remove('data_banknote_authentication.txt') 48 | print('Deleted unnecessary files.') 49 | 50 | 51 | def load(): 52 | ''' 53 | Loads the banknote data from pkl file. 54 | 55 | The inputs have the following columns: 56 | 57 | - Variance of Wavelet Transformed image (continuous) 58 | - Skewness of Wavelet Transformed image (continuous) 59 | - Curtosis of Wavelet Transformed image (continuous) 60 | - Entropy of image (continuous) 61 | 62 | The outputs are: 63 | 64 | - 0 = Real 65 | - 1 = Counterfeit 66 | 67 | Returns 68 | ------- 69 | inputs_train : numpy.array 70 | 1102x4 numpy array containing training inputs. 71 | outputs_train : numpy.array 72 | Numpy array of size 1102. 73 | inputs_test : numpy.array 74 | 270x4 numpy array containing testing inputs. 75 | outputs_test : numpy.array 76 | Numpy array of size 270. 77 | 78 | ''' 79 | data_array = pklhandler.load('banknote.pkl') 80 | 81 | # Separate data, positive and negative examples 82 | negative_examples = data_array[:762] 83 | positive_examples = data_array[762:] 84 | 85 | # Separate into training and testing 86 | negative_examples_test = negative_examples[:150] 87 | negative_examples_train = negative_examples[150:] 88 | positive_examples_test = positive_examples[:120] 89 | positive_examples_train = positive_examples[120:] 90 | 91 | # Join them to form training and testing dataset 92 | train = np.concatenate((negative_examples_train, positive_examples_train), axis=0) 93 | test = np.concatenate((negative_examples_test, positive_examples_test), axis=0) 94 | 95 | # Shuffle the dataset 96 | shuff_indices = np.arange(train.shape[0]) 97 | np.random.shuffle(shuff_indices) 98 | train = train[shuff_indices] 99 | shuff_indices = np.arange(test.shape[0]) 100 | np.random.shuffle(shuff_indices) 101 | test = test[shuff_indices] 102 | 103 | inputs_train = train[:, :-1] 104 | outputs_train = train[:, -1] 105 | inputs_test = test[:, :-1] 106 | outputs_test = test[:, -1] 107 | 108 | return inputs_train, outputs_train, inputs_test, outputs_test 109 | -------------------------------------------------------------------------------- /pykitml/kmeans_clustering.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tqdm 3 | 4 | from . import _functions 5 | 6 | 7 | def kmeans(training_data, nclusters, max_iter=1000, trials=50): 8 | ''' 9 | Identifies cluster centres on training data using k-means. 10 | 11 | Parameters 12 | ---------- 13 | training_data : numpy.array 14 | Numpy array containing training data. 15 | nclusters : int 16 | Number of cluster to find. 17 | max_iter : int 18 | Maximum number of iterations to run per trial. 19 | trials : int 20 | Number of times k-means should run, each with different 21 | random initialization. 22 | 23 | Returns 24 | ------- 25 | clusters : numpy.array 26 | Numpy array containing cluster centres. 27 | cost : numpy.array 28 | The cost of converged cluster centres. 29 | 30 | ''' 31 | 32 | # Keep track of trial with the least cost 33 | min_cost = float('infinity') 34 | distances = None 35 | clusters_min_cost = None 36 | clusters = None 37 | 38 | # Keep log of maximum number of iterations for convergence 39 | max_iter_log = 0 40 | 41 | pbar = tqdm.trange(0, trials, ncols=80, unit='trials') 42 | for _ in pbar: 43 | # Use kmeans++ to initialize cluster centres 44 | clusters = np.zeros((nclusters, training_data.shape[1])) 45 | 46 | # First cluster centre is random 47 | index = np.random.randint(training_data.shape[0], size=1) 48 | clusters[0] = training_data[index] 49 | 50 | # Loop for rest of cluster centres 51 | for i in range(1, nclusters): 52 | # Calculate distance between every data point and previous cluster centre 53 | prev_cluster_dists = _functions.pdist(clusters[i-1], training_data).squeeze() 54 | # Normalize distances 55 | prev_cluster_dists = prev_cluster_dists/prev_cluster_dists.sum() 56 | 57 | # Sample index with probability distribution proportional to distances 58 | index = np.random.choice(training_data.shape[0], 1, p=prev_cluster_dists) 59 | 60 | # Assign next cluster centre 61 | clusters[i] = training_data[index] 62 | 63 | # Start kmeans, Keep looping and moving the cluster points to mean 64 | for iteration in range(max_iter): 65 | new_clusters = np.zeros((nclusters, training_data.shape[1])) 66 | 67 | # Calculate distances between clusters and every point in training data 68 | distances = _functions.pdist(training_data, clusters) 69 | 70 | # Assign clusters index to each data point 71 | cluster_assignments = np.argmin(distances, axis=1) 72 | 73 | # Move cluster by taking mean of all the points assigned to that cluster 74 | for i in range(nclusters): 75 | cluster_points = training_data[cluster_assignments == i] 76 | if cluster_points.shape[0] == 0: 77 | continue 78 | new_clusters[i] = np.mean(cluster_points, axis=0) 79 | 80 | # Check for convergence 81 | if(np.abs(new_clusters-clusters) == 0).all(): 82 | break 83 | 84 | # Assign new clusters 85 | clusters = new_clusters 86 | 87 | # Select cluster centres with least cost 88 | cost = np.mean(np.min(distances, axis=1)) 89 | if cost < min_cost: 90 | clusters_min_cost = clusters 91 | min_cost = cost 92 | 93 | # Update maximum iterations for convergence 94 | if iteration > max_iter_log: 95 | max_iter_log = iteration 96 | 97 | # Update progress bar 98 | pbar.set_postfix(cost=min_cost, max_it=max_iter_log) 99 | 100 | return clusters_min_cost, min_cost 101 | -------------------------------------------------------------------------------- /pykitml/random_search.py: -------------------------------------------------------------------------------- 1 | import random 2 | import math 3 | 4 | 5 | class RandomSearch: 6 | ''' 7 | This class is used to search for hyperparameters. 8 | ''' 9 | 10 | def __init__(self): 11 | self._curr_cost = None 12 | self._best = False 13 | 14 | @property 15 | def best(self): 16 | ''' 17 | If the last generated hyperparameters is the best so far. 18 | 19 | Note 20 | ---- 21 | This property has to be used AFTER calling :py:func:`set_cost` 22 | ''' 23 | return self._best 24 | 25 | def set_cost(self, cost): 26 | ''' 27 | Set the cost for current hyperparameter. 28 | 29 | Parameters 30 | ---------- 31 | cost : float 32 | The cost corresponding to current set of hyperparameters. 33 | ''' 34 | self._curr_cost = cost 35 | 36 | def search(self, nsamples, nzoom, zoomratio, *args): 37 | ''' 38 | Generator function to loop through randomly generated hyperparameters. 39 | Total number of hyperparameters sampled will be :code:`nsamples*nzoom`. 40 | First :code:`nsamples` points will be sampled, then the function will 41 | 'zoom in' around the best sample, and :code:`nsamples` more points will 42 | be sampled. This will be repeated :code:`nzoom` times. 43 | The range for each hyperparameter should be passed as a list to 44 | :code:`*args`. The range should be :code:`[from, to, 'type']`, 45 | for e.g. :code:`[0.8, 1, 'float']`. Three range types are available, 46 | :code:`'float'`, :code:`'int'`, :code:`'log'`. 47 | 48 | Parameters 49 | ---------- 50 | nsamples : int 51 | Number of hyperparameters to sample. 52 | nzoom : int 53 | Number of times to zoom in. 54 | zoomratio : float 55 | How much to zoom in. 56 | *args 57 | Range type for each hyperparameter. 58 | ''' 59 | best_params = None 60 | min_cost = float('inf') 61 | range_types = args 62 | 63 | for z in range(nzoom): 64 | for i in range(nsamples): 65 | params = [] 66 | # Generate hyperparameters 67 | for rtype in range_types: 68 | l = rtype[0] 69 | u = rtype[1] 70 | if rtype[2] == 'int': 71 | params.append(random.randint(int(l), int(u))) 72 | elif rtype[2] == 'float': 73 | params.append(random.uniform(l, u)) 74 | elif rtype[2] == 'log': 75 | params.append(10**random.uniform(l, u)) 76 | 77 | print(f'Testing {i+1}/{nsamples}, zoomlvl {z+1}, params ={params}') 78 | 79 | # Yield 80 | yield params 81 | 82 | # Track best ones 83 | if self._curr_cost < min_cost: 84 | min_cost = self._curr_cost 85 | best_params = params 86 | self._best = True 87 | else: 88 | self._best = False 89 | 90 | # Zoom in around the best set of hyperparams 91 | new_range_types = [] 92 | for best_param, rtype in zip(best_params, range_types): 93 | l = rtype[0] 94 | u = rtype[1] 95 | diff = u-l 96 | if rtype[2] == 'log': 97 | best_param = math.log10(best_param) 98 | new_l = best_param-(diff/zoomratio) 99 | new_u = best_param+(diff/zoomratio) 100 | new_range_types.append([new_l, new_u, rtype[2]]) 101 | range_types = new_range_types 102 | 103 | # Print the best one 104 | print('\nSearch Finished') 105 | print('===============') 106 | print('Best params:', best_params) 107 | print('Best cost:', min_cost) 108 | -------------------------------------------------------------------------------- /pykitml/datasets/heartdisease.py: -------------------------------------------------------------------------------- 1 | import os 2 | from urllib import request 3 | 4 | import numpy as np 5 | 6 | from .. import pklhandler 7 | 8 | ''' 9 | This module contains helper functions to download and load 10 | the heart disease dataset. 11 | ''' 12 | 13 | 14 | def get(): 15 | ''' 16 | Downloads heartdisease dataset from 17 | https://archive.ics.uci.edu/ml/datasets/Heart+Disease 18 | and saves it as a pkl file `heartdisease.pkl`. 19 | 20 | Raises 21 | ------ 22 | urllib.error.URLError 23 | If internet connection is not available or the URL is not accessible. 24 | OSError 25 | If the file cannot be created due to a system-related error. 26 | KeyError 27 | If invalid/unknown type. 28 | 29 | Note 30 | ---- 31 | You only need to call this method once, i.e, after the dataset has been downloaded 32 | and you have the `heartdisease.pkl` file, you don't need to call this method again. 33 | ''' 34 | # Url to download the dataset from 35 | url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.cleveland.data' 36 | 37 | # Download the dataset 38 | print('Downloading processed.cleveland.data...') 39 | request.urlretrieve(url, 'processed.cleveland.data') 40 | print('Download complete.') 41 | 42 | # Parse data and save it as a pkl file. 43 | data_array = [] 44 | # Open the file and put the values in a list. 45 | with open('processed.cleveland.data', 'r') as datafile: 46 | for line in datafile: 47 | try: 48 | data_array.append(list(map(float, line.split(',')))) 49 | except ValueError: 50 | continue 51 | # Convert the list into a numpy array. 52 | heartdisease_data_array = np.array(data_array) 53 | # Save as a pkl file. 54 | pklhandler.save(heartdisease_data_array, 'heartdisease.pkl') 55 | 56 | # Delete unnecessary files. 57 | os.remove('processed.cleveland.data') 58 | print('Deleted unnecessary files.') 59 | 60 | 61 | def load(): 62 | ''' 63 | Loads heart disease dataset from saved pickle file `heartdisease.pkl` to numpy arrays. 64 | Loads data without any preprocessing. 65 | 66 | Returns 67 | ------- 68 | inputs : numpy.array 69 | 297x13 numpy array. 297 training examples, each example having 13 inputs(columns). 70 | The 13 columns corresponds to: 71 | :code:`age sex cp trestbps chol fbs restecg thalach exang oldpeak slope ca thal`. 72 | 73 | - age : Age in years 74 | - sex : 1=male, 0=female 75 | - cp : Chest pain type (1=typical-angina, 2=atypical-angina 3=non-anginal 4=asymptomatic) 76 | - trestbps : Resting blood pressure in mmHg 77 | - chol : Serum cholesterol in mg/dl 78 | - fbs : Fasting blood sugar > 120 mg/dl? (1=true, 0=false) 79 | - restecg : Resting electrocardiographic results (0=normal, 1=ST-T-abnormality 2= left-ventricular-hypertrophy) 80 | - thalach : Maximum heart rate achieved 81 | - exang : Exercise induced angina (1=yes, 0=no) 82 | - oldpeak : ST depression induced by exercise relative to rest 83 | - slope: Slope of the peak exercise ST segment (1=upsloping 2=flat 3=downsloping) 84 | - ca : Number of major vessels colored by flourosopy (0-3) 85 | - thal: 3=normal, 6=fixed-defect, 7=reversable-defect 86 | 87 | outputs : numpy.array 88 | Numpy array with 297 elements. 89 | 90 | - 0: < 50% diameter narrowing 91 | - 1: > 50% diameter narrowing 92 | 93 | Raises 94 | ------ 95 | FileNotFoundError 96 | If `heartdisease.pkl` file does not exist, i.e, if the dataset was not 97 | downloaded and saved using the :py:func:`~get` method. 98 | ''' 99 | # Load data from pkl file. 100 | heartdisease_data_array = pklhandler.load('heartdisease.pkl') 101 | inputs = heartdisease_data_array[:, :-1] 102 | outputs = (heartdisease_data_array[:, -1] > 0)*1 103 | 104 | # return data 105 | return inputs, outputs 106 | -------------------------------------------------------------------------------- /pykitml/datasets/sonar.py: -------------------------------------------------------------------------------- 1 | import os 2 | from urllib import request 3 | 4 | import numpy as np 5 | 6 | from .. import pklhandler 7 | 8 | ''' 9 | This module contains helper functions to download the sonar dataset. 10 | ''' 11 | 12 | 13 | def get(): 14 | ''' 15 | Downloads sonar dataset from 16 | https://archive.ics.uci.edu/ml/datasets/Connectionist+Bench+(Sonar,+Mines+vs.+Rocks) 17 | and saves it as a pkl file `sonar.pkl`. 18 | 19 | Raises 20 | ------ 21 | urllib.error.URLError 22 | If internet connection is not available or the URL is not accessible. 23 | OSError 24 | If the files cannot be created due to a system-related error. 25 | KeyError 26 | If invalid/unknown type. 27 | 28 | Note 29 | ---- 30 | You only need to call this method once, i.e, after the dataset has been downloaded 31 | and you have the `sonar.pkl` file, you don't need to call 32 | this method again. 33 | ''' 34 | # Url to download the dataset from 35 | url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/undocumented/connectionist-bench/sonar/sonar.all-data' # pylint: disable=line-too-long 36 | 37 | # Download the dataset 38 | print('Downloading sonar.all-data...') 39 | request.urlretrieve(url, 'sonar.all-data') 40 | print('Download complete.') 41 | 42 | out_dict = { 43 | 'R\n': 0, 'M\n': 1 44 | } 45 | 46 | # Parse data and save it as pkl file 47 | data_array = [] 48 | # Open the file and put the values in a list. 49 | with open('sonar.all-data', 'r') as datafile: 50 | for line in datafile: 51 | values = line.split(',') 52 | values[-1] = out_dict[values[-1]] 53 | data_array.append(list(map(float, values))) 54 | # Convert the list to numpy array 55 | sonar_data_array = np.array(data_array) 56 | # Save it as a pkl file 57 | pklhandler.save(sonar_data_array, 'sonar.pkl') 58 | 59 | # Delete files 60 | os.remove('sonar.all-data') 61 | 62 | 63 | def load(): 64 | ''' 65 | Loads the adult dataset from `sonar.pkl` file. 66 | 67 | Each pattern is a set of 60 numbers in the range 0.0 to 1.0. 68 | Each number represents the energy within a particular frequency band, 69 | integrated over a certain period of time. The integration aperture for 70 | higher frequencies occur later in time, since these frequencies are 71 | transmitted later during the chirp. 72 | 73 | The label associated with each record contains the letter 74 | "R" if the object is a rock and "M" if it is a mine (metal cylinder). 75 | 76 | Returns 77 | ------- 78 | inputs_train : numpy.array 79 | 190x60 numpy array containing training inputs. 80 | outputs_train : numpy.array 81 | Numpy array of size 190. 82 | inputs_test : numpy.array 83 | 18x60 numpy array containing testing inputs. 84 | outputs_test : numpy.array 85 | Numpy array of size 18. 86 | 87 | Raises 88 | ------ 89 | filesNotFoundError 90 | If `sonar.pkl` file does not exist, 91 | i.e, if the dataset was not downloaded and saved using the 92 | :py:func:`~get` method. 93 | 94 | ''' 95 | # Load the data from pkl file 96 | sonar_data_array = pklhandler.load('sonar.pkl') 97 | 98 | # Split into train and test 99 | train_neg = sonar_data_array[0:90] 100 | train_pos = sonar_data_array[97:197] 101 | test_neg = sonar_data_array[90:97] 102 | test_pos = sonar_data_array[197:208] 103 | 104 | # Shuffle the dataset, join neg and pos examples 105 | train = np.concatenate((train_pos, train_neg), axis=0) 106 | np.random.shuffle(train) 107 | test = np.concatenate((test_pos, test_neg), axis=0) 108 | np.random.shuffle(test) 109 | 110 | # Split the dataset into inputs and outputs 111 | inputs_train = train[:, :-1] 112 | outputs_train = train[:, -1] 113 | inputs_test = test[:, :-1] 114 | outputs_test = test[:, -1] 115 | 116 | # return 117 | return inputs_train, outputs_train, inputs_test, outputs_test 118 | -------------------------------------------------------------------------------- /tests/test_functions.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from pykitml import _functions 4 | 5 | eg_ws = np.array([[0.1, -0.2, 0.3], [-0.4, 0.5, -0.6]]) 6 | 7 | # ============================= 8 | # = Test activation functions = 9 | # ============================= 10 | 11 | 12 | def test_sigmoid(): 13 | expected_output = np.array([[0.52497919, 0.450166, 0.57444252], 14 | [0.40131234, 0.62245933, 0.35434369]]) 15 | 16 | assert np.allclose(_functions.sigmoid(eg_ws), expected_output) 17 | 18 | 19 | def test_tanh(): 20 | expected_output = np.array([[0.09966799, -0.19737532, 0.29131261], 21 | [-0.37994896, 0.46211716, -0.53704957]]) 22 | 23 | assert np.allclose(_functions.tanh(eg_ws), expected_output) 24 | 25 | 26 | def test_leakyrelu(): 27 | expected_output = np.array([[0.1, -0.002, 0.3], 28 | [-0.004, 0.5, -0.006]]) 29 | 30 | assert np.allclose(_functions.leakyrelu(eg_ws), expected_output) 31 | 32 | 33 | def test_relu(): 34 | expected_output = np.array([[0.1, 0, 0.3], [0, 0.5, 0]]) 35 | 36 | assert np.allclose(_functions.relu(eg_ws), expected_output) 37 | 38 | 39 | def test_softmax(): 40 | expected_output = np.array([[0.33758454, 0.25008878, 0.41232669], 41 | [0.23373585, 0.57489742, 0.19136673]]) 42 | 43 | assert np.allclose(_functions.softmax(eg_ws), expected_output) 44 | 45 | # =========================================== 46 | # = Test derivative of activation functions = 47 | # =========================================== 48 | 49 | 50 | def test_sigmoid_prime(): 51 | activ = _functions.sigmoid(eg_ws) 52 | 53 | expected_output = np.array([[0.24937604, 0.24751657, 0.24445831], 54 | [0.24026075, 0.23500371, 0.22878424]]) 55 | 56 | assert np.allclose(_functions.sigmoid_prime(eg_ws, activ), expected_output) 57 | 58 | 59 | def test_tanh_prime(): 60 | activ = _functions.tanh(eg_ws) 61 | 62 | expected_output = np.array([[0.99006629, 0.96104298, 0.91513696], 63 | [0.85563879, 0.78644773, 0.71157776]]) 64 | 65 | assert np.allclose(_functions.tanh_prime(eg_ws, activ), expected_output) 66 | 67 | 68 | def test_leakyrelu_prime(): 69 | activ = _functions.leakyrelu(eg_ws) 70 | 71 | expected_output = np.array([[1., 0.01, 1.], 72 | [0.01, 1., 0.01]]) 73 | 74 | assert np.allclose(_functions.leakyrelu_prime(eg_ws, activ), expected_output) 75 | 76 | 77 | def test_relu_prime(): 78 | activ = _functions.relu(eg_ws) 79 | 80 | expected_output = np.array([[1, 0, 1], [0, 1, 0]]) 81 | 82 | assert np.allclose(_functions.relu_prime(eg_ws, activ), expected_output) 83 | 84 | 85 | def test_softmax_prime(): 86 | activ = _functions.leakyrelu(eg_ws) 87 | 88 | expected_output = np.array([[0.09, -0.002004, 0.21], 89 | [-0.004016, 0.25, -0.006036]]) 90 | 91 | assert np.allclose(_functions.softmax_prime(eg_ws, activ), expected_output) 92 | 93 | # ======================= 94 | # = Test cost functions = 95 | # ======================= 96 | 97 | 98 | def test_mse(): 99 | eg_output = np.array([0.1, 0.4, -0.1, 0.3]) 100 | eg_target = np.array([0.2, 0.3, -0.5, 0.2]) 101 | expected_output = np.array([0.005, 0.005, 0.08, 0.005]) 102 | 103 | assert np.allclose(_functions.mse(eg_output, eg_target), expected_output) 104 | 105 | 106 | def test_cross_entropy(): 107 | eg_output = np.array([0.3, 0.1, 0.9, 0.7]) 108 | eg_target = np.array([1, 0, 1, 1]) 109 | expected_output = np.array([1.2039728, 0.10536052, 0.10536052, 0.35667494]) 110 | 111 | assert np.allclose(_functions.cross_entropy(eg_output, eg_target), expected_output) 112 | 113 | # ===================================== 114 | # = Test derivative of cost functions = 115 | # ===================================== 116 | 117 | 118 | def test_mse_prime(): 119 | eg_output = np.array([0.1, 0.4, -0.1, 0.3]) 120 | eg_target = np.array([0.2, 0.3, -0.5, 0.2]) 121 | expected_output = np.array([-0.1, 0.1, 0.4, 0.1]) 122 | 123 | assert np.allclose(_functions.mse_prime(eg_output, eg_target), expected_output) 124 | 125 | 126 | def test_cross_entropy_prime(): 127 | eg_output = np.array([0.3, 0.1, 0.9, 0.7]) 128 | eg_target = np.array([1, 0, 1, 1]) 129 | expected_output = np.array([-3.33333333, 1.11111111, -1.11111111, -1.42857143]) 130 | 131 | assert np.allclose(_functions.cross_entropy_prime(eg_output, eg_target), expected_output) 132 | -------------------------------------------------------------------------------- /pykitml/_heatmap.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib 3 | import matplotlib.pyplot as plt 4 | 5 | ''' 6 | This module contains helper functions to draw heatmaps. 7 | REF: https://matplotlib.org/3.1.1/gallery/images_contours_and_fields/image_annotated_heatmap.html 8 | ''' 9 | 10 | 11 | def heatmap(data, row_labels, col_labels, ax=None, cbar_kw={}, cbarlabel='', **kwargs): 12 | ''' 13 | Create a heatmap from a numpy array and two lists of labels. 14 | 15 | Parameters 16 | ---------- 17 | data 18 | A 2D numpy array of shape (N, M). 19 | row_labels 20 | A list or array of length N with the labels for the rows. 21 | col_labels 22 | A list or array of length M with the labels for the columns. 23 | ax 24 | A `matplotlib.axes.Axes` instance to which the heatmap is plotted. If 25 | not provided, use current axes or create a new one. Optional. 26 | cbar_kw 27 | A dictionary with arguments to `matplotlib.Figure.colorbar`. Optional. 28 | cbarlabel 29 | The label for the colorbar. Optional. 30 | **kwargs 31 | All other arguments are forwarded to `imshow`. 32 | ''' 33 | 34 | if not ax: 35 | ax = plt.gca() 36 | 37 | # Plot the heatmap 38 | im = ax.imshow(data, **kwargs) 39 | 40 | # Create colorbar 41 | cbar = ax.figure.colorbar(im, ax=ax, **cbar_kw) 42 | cbar.ax.set_ylabel(cbarlabel, rotation=-90, va='bottom') 43 | 44 | # We want to show all ticks... 45 | ax.set_xticks(np.arange(data.shape[1])) 46 | ax.set_yticks(np.arange(data.shape[0])) 47 | # ... and label them with the respective list entries. 48 | ax.set_xticklabels(col_labels) 49 | ax.set_yticklabels(row_labels) 50 | 51 | # Let the horizontal axes labeling appear on top. 52 | ax.tick_params(top=True, bottom=False, labeltop=True, labelbottom=False) 53 | 54 | # Rotate the tick labels and set their alignment. 55 | plt.setp(ax.get_xticklabels(), rotation=-30, ha='right', rotation_mode='anchor') 56 | 57 | # Turn spines off and create white grid. 58 | for _, spine in ax.spines.items(): 59 | spine.set_visible(False) 60 | 61 | ax.set_xticks(np.arange(data.shape[1]+1)-.5, minor=True) 62 | ax.set_yticks(np.arange(data.shape[0]+1)-.5, minor=True) 63 | ax.grid(which='minor', color='w', linestyle='-', linewidth=3) 64 | ax.tick_params(which='minor', bottom=False, left=False) 65 | 66 | return im, cbar 67 | 68 | 69 | def annotate_heatmap(im, data=None, valfmt='{x:.2f}', textcolors=['black', 'white'], 70 | threshold=None, **textkw): 71 | ''' 72 | A function to annotate a heatmap. 73 | 74 | Parameters 75 | ---------- 76 | im 77 | The AxesImage to be labeled. 78 | data 79 | Data used to annotate. If None, the image's data is used. Optional. 80 | valfmt 81 | The format of the annotations inside the heatmap. This should either 82 | use the string format method, e.g. '$ {x:.2f}', or be a 83 | `matplotlib.ticker.Formatter`. Optional. 84 | textcolors 85 | A list or array of two color specifications. The first is used for 86 | values below a threshold, the second for those above. Optional. 87 | threshold 88 | Value in data units according to which the colors from textcolors are 89 | applied. If None (the default) uses the middle of the colormap as 90 | separation. Optional. 91 | **kwargs 92 | All other arguments are forwarded to each call to `text` used to create 93 | the text labels. 94 | ''' 95 | 96 | if not isinstance(data, (list, np.ndarray)): 97 | data = im.get_array() 98 | 99 | # Normalize the threshold to the images color range. 100 | if threshold is not None: 101 | threshold = im.norm(threshold) 102 | else: 103 | threshold = im.norm(data.max())/2. 104 | 105 | # Set default alignment to center, but allow it to be 106 | # overwritten by textkw. 107 | kw = dict(horizontalalignment='center', verticalalignment='center') 108 | kw.update(textkw) 109 | 110 | # Get the formatter in case a string is supplied 111 | if isinstance(valfmt, str): 112 | valfmt = matplotlib.ticker.StrMethodFormatter(valfmt) 113 | 114 | # Loop over the data and create a `Text` for each 'pixel'. 115 | # Change the text's color depending on the data. 116 | texts = [] 117 | for i in range(data.shape[0]): 118 | for j in range(data.shape[1]): 119 | kw.update(color=textcolors[int(im.norm(data[i, j]) > threshold)]) 120 | text = im.axes.text(j, i, valfmt(data[i, j], None), **kw) 121 | texts.append(text) 122 | 123 | return texts 124 | -------------------------------------------------------------------------------- /pykitml/datasets/mnist.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This module contains helper functions to download and load MNIST and MNIST like datasets. 3 | ''' 4 | 5 | # ============================================================ 6 | # = Forked from: https://github.com/hsjeong5/MNIST-for-Numpy = 7 | # = Modified with minor changes = 8 | # ============================================================ 9 | 10 | import gzip 11 | import os 12 | from urllib import request 13 | 14 | import numpy as np 15 | 16 | from .. import pklhandler 17 | 18 | 19 | def get(type='classic'): # pylint: disable=redefined-builtin 20 | ''' 21 | Downloads the MNIST dataset and saves it as a pickle file, `mnist.pkl`. 22 | 23 | Parameters 24 | ---------- 25 | type : str 26 | The type of MNIST dataset to download. 27 | 28 | - 'classic' : Downloads the classic hanwritten digits dataset from http://yann.lecun.com/exdb/mnist/ 29 | - 'fashion' : Downloads fashion MNIST from https://github.com/zalandoresearch/fashion-mnist 30 | 31 | 32 | Raises 33 | ------ 34 | urllib.error.URLError 35 | If internet connection is not available or the URL is not accessible. 36 | OSError 37 | If the file cannot be created due to a system-related error. 38 | KeyError 39 | If invalid/unknown type. 40 | 41 | Note 42 | ---- 43 | You only need to call this method once, i.e, after the dataset has been 44 | downloaded and you have the `mnist.pkl` file, you don't need to call this method again. 45 | ''' 46 | # dict of URLs containing MNIST like datasets 47 | type_URLs = {'classic': 'https://ossci-datasets.s3.amazonaws.com/mnist/', 48 | 'fashion': 'http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/' 49 | } 50 | 51 | # MNIST files to download 52 | filename = [ 53 | ['training_images', 'train-images-idx3-ubyte.gz'], 54 | ['test_images', 't10k-images-idx3-ubyte.gz'], 55 | ['training_labels', 'train-labels-idx1-ubyte.gz'], 56 | ['test_labels', 't10k-labels-idx1-ubyte.gz'] 57 | ] 58 | 59 | def download_mnist(): 60 | # Download .gz files 61 | base_url = type_URLs[type] 62 | for name in filename: 63 | print('Downloading '+name[1]+'...') 64 | request.urlretrieve(base_url+name[1], name[1]) 65 | print('Download complete.') 66 | 67 | def save_mnist(): 68 | # Read .gz files and put them in a numpy array and save it as a pkl file 69 | mnist = {} 70 | for name in filename[:2]: 71 | with gzip.open(name[1], 'rb') as f: 72 | mnist[name[0]] = np.frombuffer(f.read(), np.uint8, offset=16).reshape(-1, 28*28) 73 | for name in filename[-2:]: 74 | with gzip.open(name[1], 'rb') as f: 75 | mnist[name[0]] = np.frombuffer(f.read(), np.uint8, offset=8) 76 | pklhandler.save(mnist, 'mnist.pkl') 77 | print('Save complete.') 78 | 79 | def clean(): 80 | # Remove unnecessary files 81 | os.remove('train-images-idx3-ubyte.gz') 82 | os.remove('t10k-images-idx3-ubyte.gz') 83 | os.remove('train-labels-idx1-ubyte.gz') 84 | os.remove('t10k-labels-idx1-ubyte.gz') 85 | print('Deleted unnecessary files.') 86 | 87 | download_mnist() 88 | save_mnist() 89 | clean() 90 | 91 | 92 | def load(): 93 | ''' 94 | Loads MNIST dataset from saved pickle file `mnist.pkl` to numpy arrays. 95 | 96 | Returns 97 | ------- 98 | training_data : numpy.array 99 | 60,000x784 numpy array, each row contains flattened version of training images. 100 | training_targets : numpy.array 101 | 60,000x10 numpy array that contains one hot target array of the corresponding 102 | training images. 103 | testing_data : numpy.array 104 | 10,000x784 numpy array, each row contains flattened version of test images. 105 | testing_targets : numpy.array 106 | 10,000x10 numpy array that contains one hot target array of the corresponding 107 | test images. 108 | 109 | Raises 110 | ------ 111 | FileNotFoundError 112 | If `mnist.pkl` file does not exist, i.e, if the dataset was not downloaded and 113 | saved using the :py:func:`~get` method. 114 | ''' 115 | mnist = pklhandler.load('mnist.pkl') 116 | # Normalize data 117 | training_data = mnist['training_images']/255 118 | testing_data = mnist['test_images']/255 119 | # Create one-hot target array for training labels 120 | training_targets = np.zeros((60000, 10)) 121 | training_targets[np.arange(60000), mnist['training_labels']] = 1 122 | # Create one-hot target array for testing labels 123 | testing_targets = np.zeros((10000, 10)) 124 | testing_targets[np.arange(10000), mnist['test_labels']] = 1 125 | # return the data 126 | return training_data, training_targets, testing_data, testing_targets 127 | 128 | 129 | if __name__ == '__main__': 130 | get() 131 | -------------------------------------------------------------------------------- /pykitml/_functions.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=unused-argument 2 | 3 | import numpy as np 4 | 5 | ''' 6 | This module contains utility functions 7 | ''' 8 | 9 | # ===================== 10 | # = Utility functions = 11 | # ===================== 12 | 13 | 14 | def pdist(x, y): 15 | ''' 16 | Calculate pairwise square distances between matrix x and y. 17 | See: https://stackoverflow.com/a/56084419/5516481 18 | ''' 19 | if x.ndim == 1: 20 | x = np.array([x]) 21 | 22 | nx, p = x.shape 23 | x_ext = np.empty((nx, 3*p)) 24 | x_ext[:, :p] = 1 25 | x_ext[:, p:2*p] = x 26 | x_ext[:, 2*p:] = np.square(x) 27 | 28 | ny = y.shape[0] 29 | y_ext = np.empty((3*p, ny)) 30 | y_ext[:p] = np.square(y).T 31 | y_ext[p:2*p] = -2*y.T 32 | y_ext[2*p:] = 1 33 | 34 | return x_ext.dot(y_ext) 35 | 36 | # ============================================== 37 | # = Activation functions and their derivatives = 38 | # ============================================== 39 | 40 | 41 | def sigmoid(weighted_sum): 42 | ''' 43 | Returns sigmoid of the weighted sum array of a layer. 44 | ''' 45 | return 1 / (1 + np.exp(-weighted_sum)) 46 | 47 | 48 | def sigmoid_prime(weighted_sum, activations): 49 | ''' 50 | Returns the derivative of sigmoid w.r.t layer's weighted sum. 51 | ''' 52 | return activations * (1 - activations) 53 | 54 | 55 | def tanh(weighted_sum): 56 | ''' 57 | Returns tanh of the weighted sum array of a layer. 58 | ''' 59 | return np.tanh(weighted_sum) 60 | 61 | 62 | def tanh_prime(weighted_sum, activations): 63 | ''' 64 | Returns the derivative of tanh w.r.t layer's weighted sum. 65 | ''' 66 | return 1 - (activations ** 2) 67 | 68 | 69 | def leakyrelu(weighted_sum): 70 | ''' 71 | Returns leaky-ReLU of the weighted sum array of a layer. 72 | ''' 73 | return np.where(weighted_sum > 0, weighted_sum, 0.01 * weighted_sum) 74 | 75 | 76 | def leakyrelu_prime(weighted_sum, activations): 77 | ''' 78 | Returns the derivative of leaky-ReLU w.r.t layer's weighted sum. 79 | ''' 80 | return np.where(weighted_sum > 0, 1, 0.01) 81 | 82 | 83 | def relu(weighted_sum): 84 | ''' 85 | Returns ReLU of the weighted sum array of a layer. 86 | ''' 87 | return np.where(weighted_sum > 0, weighted_sum, 0) 88 | 89 | 90 | def relu_prime(weighted_sum, activations): 91 | ''' 92 | Returns the derivative of ReLU w.r.t layer's weighted sum. 93 | ''' 94 | return np.where(weighted_sum > 0, 1, 0) 95 | 96 | 97 | def softmax(weighted_sum): 98 | ''' 99 | Returns softmax of the weighted sum array of a layer. 100 | If weighted_sum is a 2D array, then it performs softmax over each row. 101 | ''' 102 | if weighted_sum.ndim == 1: 103 | exps = np.exp(weighted_sum - np.max(weighted_sum)) 104 | return exps / np.sum(exps) 105 | 106 | normalized = weighted_sum - np.expand_dims(np.max(weighted_sum, axis=1), axis=1) 107 | exps = np.exp(normalized) 108 | return exps / np.expand_dims(np.sum(exps, axis=1), 1) 109 | 110 | 111 | def identity(weighted_sum): 112 | ''' 113 | Returns identity of the weighted sum array of a layer. 114 | ''' 115 | return weighted_sum 116 | 117 | 118 | def identity_prime(weighted_sum, activations): 119 | ''' 120 | Returns the derivative of identity w.r.t layer's weighted sum. 121 | ''' 122 | return 1 123 | 124 | 125 | def softmax_prime(weighted_sum, activations): 126 | ''' 127 | Returns the derivative of softmax w.r.t layer's weighted sum. 128 | ''' 129 | return activations * (1 - activations) 130 | 131 | # ======================================== 132 | # = Cost functions and their derivatives = 133 | # ======================================== 134 | 135 | 136 | def mse(output, target): 137 | ''' 138 | Returns mean squared error cost of the output. 139 | ''' 140 | return 0.5 * ((output - target) ** 2) 141 | 142 | 143 | def mse_prime(output, target): 144 | ''' 145 | Returns the derivative of the mse cost. 146 | ''' 147 | return output-target 148 | 149 | 150 | def cross_entropy(output, target): 151 | ''' 152 | Returns cross entropy cost of the output. 153 | ''' 154 | return -(target * np.log(output)) - ((1-target) * np.log(1-output)) 155 | 156 | 157 | def cross_entropy_prime(output, target): 158 | ''' 159 | Returns the derivative of the cross entropy cost. 160 | ''' 161 | return (output-target) / (output * (1-output)) 162 | 163 | 164 | def hinge_loss(output, target): 165 | ''' 166 | Returns hinge loss of the output for SVMs. 167 | ''' 168 | return np.maximum(0, 1 - target*output) 169 | 170 | 171 | def hinge_loss_prime(output, target): 172 | ''' 173 | Returns derivative of hinge loss. 174 | ''' 175 | return np.where((target*output) > 1, 0, -1*target) 176 | 177 | 178 | def huber(output, target): 179 | ''' 180 | Returns huber loss for dqn 181 | ''' 182 | error = output - target 183 | 184 | is_small_error = np.abs(error) < 1 185 | 186 | squared_loss = np.square(error)/2 187 | linear_loss = np.abs(error) - 0.5 188 | 189 | return np.where(is_small_error, squared_loss, linear_loss) 190 | 191 | 192 | def huber_prime(output, target): 193 | ''' 194 | Returns derivative of huber loss. 195 | ''' 196 | error = output - target 197 | 198 | is_small_error = np.abs(error) < 1 199 | 200 | return np.where(is_small_error, error, np.sign(error)) 201 | -------------------------------------------------------------------------------- /docs/FCEUX.rst: -------------------------------------------------------------------------------- 1 | Creating NES bots using FCEUX emulator 2 | ====================================== 3 | 4 | FCEUX Server 5 | ------------ 6 | 7 | .. autoclass:: pykitml.FCEUXServer 8 | 9 | .. automethod:: __init__ 10 | 11 | .. automethod:: start 12 | 13 | .. automethod:: frame_advance 14 | 15 | .. automethod:: get_joypad 16 | 17 | .. automethod:: set_joypad 18 | 19 | .. automethod:: read_mem 20 | 21 | .. automethod:: reset 22 | 23 | .. automethod:: quit 24 | 25 | .. autoattribute:: info 26 | 27 | Lua client script 28 | ----------------- 29 | 30 | This script has to be loaded into the emulator after 31 | starting the server. (File > Load Lua Script) 32 | 33 | **fceux_client.lua** 34 | 35 | .. code-block:: lua 36 | 37 | local socket = require "socket" 38 | 39 | -- Edit to change 40 | ip = 'localhost' 41 | port = '1234' 42 | 43 | -- Table for holding lua code snippets from server 44 | func_table = {} 45 | 46 | -- Start connection with server 47 | s = socket.connect('localhost', '1234') 48 | 49 | -- Helper function to convert table to string 50 | function table_to_string(table) 51 | str = '' 52 | 53 | for key, value in pairs(table) do 54 | str = str .. tostring(key) .. ' ' .. tostring(value) .. ' ' 55 | end 56 | 57 | return str 58 | end 59 | 60 | -- Helper function to split string into token 61 | function split(inputstr, sep) 62 | if sep == nil then 63 | sep = "%s" 64 | end 65 | local t={} 66 | for str in string.gmatch(inputstr, "([^"..sep.."]+)") do 67 | table.insert(t, str) 68 | end 69 | return t 70 | end 71 | 72 | -- Helper function to send server message 73 | function send(msg) 74 | s:send(msg) 75 | end 76 | 77 | -- Helper function to receive message from server 78 | function recv(msg) 79 | local resp, err = s:receive('*l') 80 | return resp 81 | end 82 | 83 | -- Helper function that waits for ackoledgement from server 84 | function wait_for_ack() 85 | while (recv() ~= 'ACK') do end 86 | end 87 | 88 | -- Set the speed of the emulator 89 | emu.speedmode('normal') 90 | 91 | -- Server info 92 | send('FCEUX Client '.._VERSION) 93 | wait_for_ack() 94 | 95 | -- Main loop 96 | while true do 97 | local resp = '' 98 | 99 | -- Log frame count 100 | fcount = string.format('%d', emu.framecount()) 101 | send(fcount) 102 | 103 | -- Parse commands from server 104 | while (resp ~= 'CONT') do 105 | resp = recv() 106 | 107 | if(resp == 'JOYPAD') then 108 | local controller = joypad.read(1) 109 | send(table_to_string(controller)) 110 | elseif(resp == 'SETJOYPAD') then 111 | local values = split(recv()) 112 | joypad.set(1, { 113 | up = (values[1]=='True'), down = (values[2]=='True'), 114 | left = (values[3]=='True'), right = (values[4]=='True'), 115 | A = (values[5]=='True'), B = (values[6]=='True'), 116 | start = (values[7]=='True'), select = (values[8]=='True'), 117 | }) 118 | elseif(resp == 'MEM') then 119 | local addr = tonumber(recv()) 120 | send(memory.readbyte(addr)) 121 | elseif(resp == 'RES') then 122 | emu.softreset() 123 | else 124 | break 125 | end 126 | end 127 | 128 | emu.frameadvance() 129 | end 130 | 131 | Example bot to spam the 'A' button 132 | ---------------------------------- 133 | 134 | .. code-block:: python 135 | 136 | import pykitml as pk 137 | 138 | def on_frame(server, frame): 139 | # Spam A and start button 140 | if(frame%10 < 5): server.set_joypad(A=True, start=True) 141 | else: server.set_joypad(A=False, start=False) 142 | 143 | # Print joypad 144 | print(server.get_joypad()) 145 | 146 | # Continue emulation 147 | server.frame_advance() 148 | 149 | # Intialize and start server 150 | server = pk.FCEUXServer(on_frame) 151 | print(server.info) 152 | server.start() 153 | 154 | Start this script, then run the FCEUX emulator. Open any NES ROM 155 | (File > Open ROM) and then load the lua client script (File > Load Lua Script). 156 | The bot will continuously spam the A button. 157 | 158 | Example bot to spam the 'A' button, second way 159 | ---------------------------------------------- 160 | 161 | .. code-block:: python 162 | 163 | import pykitml as pk 164 | 165 | # Instantiate server 166 | server = pk.FCEUXServer(None) 167 | 168 | try: 169 | while True: 170 | # Intialize frame, get frame count 171 | frame = server.init_frame() 172 | 173 | # Spam A and start button 174 | if(frame%10 < 5): server.set_joypad(A=True, start=True) 175 | else: server.set_joypad(A=False, start=False) 176 | 177 | # Print joypad 178 | print(server.get_joypad()) 179 | 180 | # Continue emulation 181 | server.frame_advance() 182 | 183 | except BrokenPipeError: 184 | server.quit('Client has quit.') 185 | except KeyboardInterrupt: 186 | server.quit() 187 | -------------------------------------------------------------------------------- /pykitml/fceux.py: -------------------------------------------------------------------------------- 1 | import socket 2 | 3 | 4 | class FCEUXServer: 5 | ''' 6 | Server class for making NES bots. Uses FCEUX emulator. 7 | Visit https://www.fceux.com for info. You will also need to 8 | load client lua script in the emulator. 9 | ''' 10 | 11 | def __init__(self, frame_func, quit_func=None, ip='localhost', port=1234): 12 | ''' 13 | Parameters 14 | ---------- 15 | frame_func : function 16 | This function will be called every frame. The function should 17 | accept two argument, :code:`server` (reference to this class) 18 | and :code:`frame` (number of frames executed). 19 | quit_func : function 20 | This function will be executed when the server disconnects from 21 | the emulator 22 | ip : str 23 | IP address of the computer. 24 | port : int 25 | Port to listen to. 26 | ''' 27 | # Establish connection with client 28 | self._serversocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 29 | self._serversocket.bind((ip, port)) 30 | self._serversocket.listen(5) 31 | self._clientsocket, self._address = self._serversocket.accept() 32 | 33 | # This function will be called every frame 34 | self._on_frame_func = frame_func 35 | self._on_quit_func = quit_func 36 | 37 | self._server_info = self.recv() + ' ' + str(self._address) 38 | self.send('ACK') 39 | 40 | @property 41 | def info(self): 42 | ''' 43 | Emulator info and lua version. 44 | ''' 45 | return self._server_info 46 | 47 | def send(self, msg): 48 | ''' 49 | Send message to lua code running on the emulator. 50 | 51 | Parameters 52 | ---------- 53 | msg : str 54 | ''' 55 | if not isinstance(msg, str): 56 | self.quit() 57 | raise TypeError('Arguments have to be string') 58 | 59 | self._clientsocket.send(bytes(msg+'\n', 'utf-8')) 60 | 61 | def recv(self): 62 | ''' 63 | Receive message from lua code running on emulator. 64 | 65 | Returns 66 | ------- 67 | str 68 | Received message from emulator. 69 | ''' 70 | return self._clientsocket.recv(4096).decode('utf-8') 71 | 72 | def init_frame(self): 73 | ''' 74 | Signal server to prep for next frame and returns 75 | frame count 76 | 77 | Returns 78 | ------- 79 | int 80 | Frame count 81 | ''' 82 | # Receive message from client 83 | frame_str = self.recv() 84 | if len(frame_str) == 0: 85 | self.quit('Client had quit') 86 | frame = int(frame_str) 87 | 88 | return frame 89 | 90 | def start(self): 91 | ''' 92 | Starts the server, waits for emulator to connect. 93 | Calls :code:`frame_func` every frame after connection 94 | has been established. 95 | ''' 96 | try: 97 | # Keep receiving messaged from FCEUX and acknowledge 98 | while True: 99 | frame = self.init_frame() 100 | self._on_frame_func(self, frame) 101 | 102 | except BrokenPipeError: 103 | self.quit('Client has quit.') 104 | except KeyboardInterrupt: 105 | self.quit() 106 | 107 | def frame_advance(self): 108 | ''' 109 | Move to next frame, should be called at the end of 110 | :code:`frame_func`. 111 | ''' 112 | # Send back continue message 113 | self.send('CONT') 114 | 115 | def get_joypad(self): 116 | ''' 117 | Returns 118 | ------- 119 | str 120 | Joypad button states. 121 | ''' 122 | self.send('JOYPAD') 123 | return self.recv() 124 | 125 | def set_joypad(self, up=False, down=False, left=False, 126 | right=False, A=False, B=False, start=False, select=False): 127 | ''' 128 | Set joypad button states. 129 | ''' 130 | self.send('SETJOYPAD') 131 | joypad = str(up)+' '+str(down)+' '+str(left)+' '+str(right)\ 132 | + ' '+str(A)+' '+str(B)+' '+str(start)+' '+str(select) 133 | self.send(joypad) 134 | 135 | def read_mem(self, addr, signed=False): 136 | ''' 137 | Read memory address. 138 | 139 | Parameters 140 | ---------- 141 | addr : int 142 | The memory address to read 143 | signed : bool 144 | If :code:`True`, returns signed integer 145 | 146 | Returns 147 | ------- 148 | int 149 | The byte at the address. 150 | ''' 151 | self.send('MEM') 152 | self.send(str(addr)) 153 | unsigned = int(self.recv()) 154 | 155 | if signed: 156 | return unsigned-256 if unsigned > 127 else unsigned 157 | else: 158 | return unsigned 159 | 160 | def reset(self): 161 | ''' 162 | Resets the emulator, executes a power cycle. 163 | ''' 164 | self.send('RES') 165 | 166 | def quit(self, reason=''): 167 | ''' 168 | Disconnect from emulator. 169 | 170 | Parameters 171 | ---------- 172 | reason : str 173 | Reason for quitting. 174 | ''' 175 | if self._on_quit_func is not None: 176 | self._on_quit_func() 177 | self._serversocket.close() 178 | self._clientsocket.close() 179 | print(reason) 180 | print('Server has quit.') 181 | exit() 182 | 183 | 184 | if __name__ == '__main__': 185 | def on_frame(server, frame): 186 | print(frame) 187 | print(server.get_joypad()) 188 | server.frame_advance() 189 | 190 | fceux_server = FCEUXServer(on_frame) 191 | print(fceux_server.info) 192 | fceux_server.start() 193 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Configuration file for the Sphinx documentation builder. 4 | # 5 | # This file does only contain a selection of the most common options. For a 6 | # full list see the documentation: 7 | # http://www.sphinx-doc.org/en/master/config 8 | 9 | # -- Path setup -------------------------------------------------------------- 10 | 11 | # If extensions (or modules to document with autodoc) are in another directory, 12 | # add these directories to sys.path here. If the directory is relative to the 13 | # documentation root, use os.path.abspath to make it absolute, like shown here. 14 | # 15 | import os 16 | import sys 17 | sys.path.insert(0, os.path.abspath('..')) 18 | 19 | 20 | # -- Project information ----------------------------------------------------- 21 | 22 | project = 'pykitml' 23 | copyright = '2019, Vishnu Shankar B' 24 | author = 'Vishnu Shankar B' 25 | 26 | # The short X.Y version 27 | version = '0.1.1' 28 | # The full version, including alpha/beta/rc tags 29 | release = '0.1.1' 30 | 31 | 32 | # -- General configuration --------------------------------------------------- 33 | 34 | # If your documentation needs a minimal Sphinx version, state it here. 35 | # 36 | # needs_sphinx = '1.0' 37 | 38 | # Add any Sphinx extension module names here, as strings. They can be 39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 40 | # ones. 41 | extensions = [ 42 | 'sphinx.ext.autodoc', 43 | 'sphinx.ext.doctest', 44 | 'sphinx.ext.napoleon' 45 | ] 46 | 47 | # Add any paths that contain templates here, relative to this directory. 48 | templates_path = ['_templates'] 49 | 50 | # The suffix(es) of source filenames. 51 | # You can specify multiple suffix as a list of string: 52 | # 53 | # source_suffix = ['.rst', '.md'] 54 | source_suffix = '.rst' 55 | 56 | # The master toctree document. 57 | master_doc = 'index' 58 | 59 | # The language for content autogenerated by Sphinx. Refer to documentation 60 | # for a list of supported languages. 61 | # 62 | # This is also used if you do content translation via gettext catalogs. 63 | # Usually you set "language" from the command line for these cases. 64 | language = None 65 | 66 | # List of patterns, relative to source directory, that match files and 67 | # directories to ignore when looking for source files. 68 | # This pattern also affects html_static_path and html_extra_path. 69 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 70 | 71 | # The name of the Pygments (syntax highlighting) style to use. 72 | pygments_style = None 73 | 74 | 75 | # -- Options for HTML output ------------------------------------------------- 76 | 77 | # The theme to use for HTML and HTML Help pages. See the documentation for 78 | # a list of builtin themes. 79 | # 80 | html_theme = 'sphinx_rtd_theme' 81 | 82 | # Theme options are theme-specific and customize the look and feel of a theme 83 | # further. For a list of options available for each theme, see the 84 | # documentation. 85 | # 86 | #html_theme_options = {} 87 | 88 | # Add any paths that contain custom static files (such as style sheets) here, 89 | # relative to this directory. They are copied after the builtin static files, 90 | # so a file named "default.css" will overwrite the builtin "default.css". 91 | html_static_path = ['_static'] 92 | 93 | # Custom sidebar templates, must be a dictionary that maps document names 94 | # to template names. 95 | # 96 | # The default sidebars (for documents that don't match any pattern) are 97 | # defined by theme itself. Builtin themes are using these templates by 98 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', 99 | # 'searchbox.html']``. 100 | # 101 | # html_sidebars = {} 102 | 103 | 104 | # -- Options for HTMLHelp output --------------------------------------------- 105 | 106 | # Output file base name for HTML help builder. 107 | htmlhelp_basename = 'pykitmldoc' 108 | 109 | 110 | # -- Options for LaTeX output ------------------------------------------------ 111 | 112 | latex_elements = { 113 | # The paper size ('letterpaper' or 'a4paper'). 114 | # 115 | # 'papersize': 'letterpaper', 116 | 117 | # The font size ('10pt', '11pt' or '12pt'). 118 | # 119 | # 'pointsize': '10pt', 120 | 121 | # Additional stuff for the LaTeX preamble. 122 | # 123 | # 'preamble': '', 124 | 125 | # Latex figure (float) alignment 126 | # 127 | # 'figure_align': 'htbp', 128 | } 129 | 130 | # Grouping the document tree into LaTeX files. List of tuples 131 | # (source start file, target name, title, 132 | # author, documentclass [howto, manual, or own class]). 133 | latex_documents = [ 134 | (master_doc, 'pykitml.tex', 'pykitml Documentation', 135 | 'Vishnu Shankar B', 'manual'), 136 | ] 137 | 138 | 139 | # -- Options for manual page output ------------------------------------------ 140 | 141 | # One entry per manual page. List of tuples 142 | # (source start file, name, description, authors, manual section). 143 | man_pages = [ 144 | (master_doc, 'pykitml', 'pykitml Documentation', 145 | [author], 1) 146 | ] 147 | 148 | 149 | # -- Options for Texinfo output ---------------------------------------------- 150 | 151 | # Grouping the document tree into Texinfo files. List of tuples 152 | # (source start file, target name, title, author, 153 | # dir menu entry, description, category) 154 | texinfo_documents = [ 155 | (master_doc, 'pykitml', 'pykitml Documentation', 156 | author, 'pykitml', 'Machine learninh library written in Python and NumPy.', 157 | 'Miscellaneous'), 158 | ] 159 | 160 | 161 | # -- Options for Epub output ------------------------------------------------- 162 | 163 | # Bibliographic Dublin Core info. 164 | epub_title = project 165 | 166 | # The unique identifier of the text. This can be a ISBN number 167 | # or the project homepage. 168 | # 169 | # epub_identifier = '' 170 | 171 | # A unique identification for the text. 172 | # 173 | # epub_uid = '' 174 | 175 | # A list of files that should not be packed into the epub file. 176 | epub_exclude_files = ['search.html'] 177 | 178 | 179 | # -- Extension configuration ------------------------------------------------- 180 | -------------------------------------------------------------------------------- /pykitml/normalize.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # =============================================== 4 | # = Functions for Normalization/Feature-scaling = 5 | # =============================================== 6 | 7 | 8 | def get_minmax(array): 9 | ''' 10 | Returns two row arrays, one array containing minimum values of each column 11 | and another one with maximum values. 12 | 13 | Parameters 14 | ---------- 15 | array : numpy.array 16 | The array to get minimum and maximum values for. 17 | 18 | Returns 19 | ------- 20 | array_min : numpy.array 21 | Array containing minimum values of each column. 22 | array_max : numpy.array 23 | Array containing maximum values of each column. 24 | ''' 25 | return np.amin(array, axis=0), np.amax(array, axis=0) 26 | 27 | 28 | def normalize_minmax(array, array_min, array_max, cols=[]): 29 | ''' 30 | Normalizes columns of the array to between 0 and 1 using min-max 31 | normalization. 32 | 33 | Parameters 34 | ---------- 35 | array : numpy.array 36 | The array to normalize. 37 | array_min : numpy.array 38 | Array containing minimum values of each column. 39 | array_max : numpy.array 40 | Array containing maximum values of each column. 41 | cols : list 42 | The columns to normalize. If the list is empty (default), 43 | all columns will be normalized. 44 | 45 | Returns 46 | ------- 47 | numpy.array 48 | The normalized array. 49 | 50 | Note 51 | ---- 52 | You can use :py:func:`~get_minmax` function to get :code:`array_min` 53 | and :code:`array_max` parameters. 54 | ''' 55 | normalized_array = array.astype(float) 56 | all_normalized = (array - array_min) / (array_max - array_min) 57 | 58 | if len(cols) == 0: 59 | # Normalize all columns 60 | normalized_array = all_normalized 61 | elif array.ndim == 1: 62 | # Normalize only specified columns, 1D array 63 | normalized_array[cols] = all_normalized[cols] 64 | else: 65 | # Normalize only specified columns, 2D array 66 | normalized_array[:, cols] = all_normalized[:, cols] 67 | 68 | return normalized_array 69 | 70 | 71 | def denormalize_minmax(array, array_min, array_max, cols=[]): 72 | ''' 73 | Denormalizes columns of a min-max normalized array. 74 | 75 | Parameters 76 | ---------- 77 | array : numpy.array 78 | The array to denormalize. 79 | array_min : numpy.array 80 | Array containing minimum values of each column. 81 | array_max : numpy.array 82 | Array containing maximum values of each column. 83 | cols : list 84 | The columns to normalize. If the list is empty (default), 85 | all columns will be denormalized. 86 | 87 | Returns 88 | ------- 89 | numpy.array 90 | The denormalized array. 91 | 92 | Note 93 | ---- 94 | You can use :py:func:`~get_minmax` function to get :code:`array_min` 95 | and :code:`array_max` parameters. 96 | ''' 97 | denormalized_array = array.astype(float) 98 | all_denormalized = (array * (array_max - array_min)) + array_min 99 | 100 | if len(cols) == 0: 101 | # Denormalize all columns 102 | denormalized_array = all_denormalized 103 | elif array.ndim == 1: 104 | # Denormalize only specified columns, 1D array 105 | denormalized_array[cols] = all_denormalized[cols] 106 | else: 107 | # Denormalize only specified columns, 2D array 108 | denormalized_array[:, cols] = all_denormalized[:, cols] 109 | 110 | return denormalized_array 111 | 112 | 113 | def get_meanstd(array): 114 | ''' 115 | Returns two row arrays, one array containing mean of each column 116 | and another one with standard deviation of each column. 117 | 118 | Parameters 119 | ---------- 120 | array : numpy.array 121 | The array to get mean and standard deviation values for. 122 | 123 | Returns 124 | ------- 125 | array_mean : numpy.array 126 | Array containing mean values of each column. 127 | array_stddev : numpy.array 128 | Array containing standard deviation values of each column. 129 | ''' 130 | return np.mean(array, axis=0), np.std(array, axis=0) 131 | 132 | 133 | def normalize_mean(array, array_mean, array_stddev, cols=[]): 134 | ''' 135 | Normalizes columns of the array with mean normalization. 136 | 137 | Parameters 138 | ---------- 139 | array : numpy.array 140 | The array to normalize. 141 | array_mean : numpy.array 142 | Array containing mean values of each column. 143 | array_stddev : numpy.array 144 | Array containing standard deviation values of each column. 145 | cols : list 146 | The columns to normalize. If the list is empty (default), 147 | all columns will be normalized. 148 | 149 | 150 | Returns 151 | ------- 152 | numpy.array 153 | The normalized array. 154 | 155 | Note 156 | ---- 157 | You can use :py:func:`~get_meanstd` function to get :code:`array_mean` 158 | and :code:`array_stddev` parameters. 159 | ''' 160 | normalized_array = array.astype(float) 161 | all_normalized = (array-array_mean)/array_stddev 162 | 163 | if len(cols) == 0: 164 | # Normalize all columns 165 | normalized_array = all_normalized 166 | elif array.ndim == 1: 167 | # Normalize only specified columns, 1D array 168 | normalized_array[cols] = all_normalized[cols] 169 | else: 170 | # Normalize only specified columns, 2D array 171 | normalized_array[:, cols] = all_normalized[:, cols] 172 | 173 | return normalized_array 174 | 175 | 176 | def denormalize_mean(array, array_mean, array_stddev, cols=[]): 177 | ''' 178 | Denormalizes a mean normalized array. 179 | 180 | Parameters 181 | ---------- 182 | array : numpy.array 183 | The array to denormalize. 184 | array_mean : numpy.array 185 | Array containing mean values of each column. 186 | array_stddev : numpy.array 187 | Array containing standard deviation values of each column. 188 | 189 | Returns 190 | ------- 191 | numpy.array 192 | The denormalized array. 193 | 194 | Note 195 | ---- 196 | You can use :py:func:`~get_meanstd` function to get :code:`array_mean` 197 | and :code:`array_stddev` parameters. 198 | ''' 199 | denormalized_array = array.astype(float) 200 | all_denormalized = (array*array_stddev) + array_mean 201 | 202 | if len(cols) == 0: 203 | # Denormalize all columns 204 | denormalized_array = all_denormalized 205 | elif array.ndim == 1: 206 | # Denormalize only specified columns, 1D array 207 | denormalized_array[cols] = all_denormalized[cols] 208 | else: 209 | # Denormalize only specified columns, 2D array 210 | denormalized_array[:, cols] = all_denormalized[:, cols] 211 | 212 | return denormalized_array 213 | -------------------------------------------------------------------------------- /pykitml/preprocessing.py: -------------------------------------------------------------------------------- 1 | from itertools import combinations_with_replacement 2 | 3 | import numpy as np 4 | 5 | ''' 6 | This module contains helper functions for preprocessing data. 7 | ''' 8 | 9 | 10 | def onehot(input_array): 11 | ''' 12 | Converts input array to one-hot array. 13 | 14 | Parameters 15 | ---------- 16 | input_array : numpy.array 17 | The input numpy array. 18 | 19 | Returns 20 | ------- 21 | one_hot : numpy.array 22 | The converted onehot array. 23 | 24 | Example 25 | ------- 26 | >>> import numpy as np 27 | >>> import pykitml as pk 28 | >>> a = np.array([0, 1, 2]) 29 | >>> pk.onehot(a) 30 | array([[1., 0., 0.], 31 | [0., 1., 0.], 32 | [0., 0., 1.]]) 33 | ''' 34 | array = input_array.astype(int) 35 | one_hot = np.zeros((array.size, array.max()+1)) 36 | one_hot[np.arange(array.size), array] = 1 37 | return one_hot 38 | 39 | 40 | def onehot_cols(dataset, cols): 41 | ''' 42 | Converts/replaces columns of dataset to one-hot values. 43 | 44 | Parameters 45 | ---------- 46 | dataset : numpy.array 47 | The input dataset. 48 | cols : list 49 | The columns which has to be replaced/converted 50 | to one-hot values. 51 | 52 | Returns 53 | ------- 54 | dataset_new : numpy.array 55 | The new dataset with replaced columns. 56 | 57 | Example 58 | ------- 59 | 60 | >>> import pykitml as pk 61 | >>> import numpy as np 62 | >>> a = np.array([[0, 1, 2.2], [1, 2, 3.4], [0, 0, 1.1]]) 63 | >>> a 64 | array([[0. , 1. , 2.2], 65 | [1. , 2. , 3.4], 66 | [0. , 0. , 1.1]]) 67 | >>> pk.onehot_cols(a, cols=[0, 1]) 68 | array([[1. , 0. , 0. , 1. , 0. , 2.2], 69 | [0. , 1. , 0. , 0. , 1. , 3.4], 70 | [1. , 0. , 1. , 0. , 0. , 1.1]]) 71 | 72 | ''' 73 | offset = 0 74 | dataset_new = dataset 75 | for col in cols: 76 | onehot_column = onehot(dataset_new[:, col+offset]) 77 | dataset_new = np.delete(dataset_new, col+offset, axis=1) 78 | dataset_new = np.insert(dataset_new, [col+offset], onehot_column, axis=1) 79 | offset += onehot_column.shape[1]-1 80 | 81 | return dataset_new 82 | 83 | 84 | def onehot_cols_traintest(dataset_train, dataset_test, cols): 85 | ''' 86 | Converts/replaces columns of :code:`dataset_train` and 87 | :code:`dataset_test` to one-hot values. 88 | 89 | Parameters 90 | ---------- 91 | dataset_train : numpy.array 92 | The training dataset. 93 | dataset_test : numpy.array 94 | The testing dataset. 95 | cols : list 96 | The columns which has to be replaced/converted 97 | to one-hot values. 98 | 99 | Returns 100 | ------- 101 | dataset_train_new : numpy.array 102 | The new training dataset with replaced columns. 103 | dataset_test_new : numpy.array 104 | The new testing dataset with replaced columns. 105 | 106 | Example 107 | ------- 108 | 109 | >>> import pykitml as pk 110 | >>> import numpy as np 111 | >>> a_train = np.array([[0, 1, 3.2], [1, 2, 3.5], [0, 0, 3.4]]) 112 | >>> a_test = np.array([[0, 3, 3.2], [1, 2, 4.5], [1, 3, 4.5]]) 113 | >>> a_train_onehot, a_test_onehot = pk.onehot_cols_traintest(a_train, a_test, cols=[0,1]) 114 | >>> a_train_onehot 115 | array([[1. , 0. , 0. , 1. , 0. , 0. , 3.2], 116 | [0. , 1. , 0. , 0. , 1. , 0. , 3.5], 117 | [1. , 0. , 1. , 0. , 0. , 0. , 3.4]]) 118 | >>> a_test_onehot 119 | array([[1. , 0. , 0. , 0. , 0. , 1. , 3.2], 120 | [0. , 1. , 0. , 0. , 1. , 0. , 4.5], 121 | [0. , 1. , 0. , 0. , 0. , 1. , 4.5]]) 122 | 123 | ''' 124 | # Combine the datasets 125 | dataset_new = np.concatenate((dataset_train, dataset_test), axis=0) 126 | 127 | # Replace columns with on hot values 128 | offset = 0 129 | for col in cols: 130 | onehot_column = onehot(dataset_new[:, col+offset]) 131 | dataset_new = np.delete(dataset_new, col+offset, axis=1) 132 | dataset_new = np.insert(dataset_new, [col+offset], onehot_column, axis=1) 133 | offset += onehot_column.shape[1]-1 134 | 135 | split = dataset_train.shape[0] 136 | return dataset_new[:split, :], dataset_new[split:, :] 137 | 138 | 139 | def polynomial(dataset_inputs, degree=3, cols=[]): 140 | ''' 141 | Generates polynomial features from the input dataset. 142 | For example, if an input sample is two dimensional and of the form [a, b], 143 | the degree-2 polynomial features are :code:`[a, b, a^2, ab, b^2]`, and degree-3 144 | polynomial features are 145 | :code:`[a, b, a^2, ab, b^2, a^3, (a^2)*b, a*(b^2), b^3]`. 146 | 147 | Parameters 148 | ---------- 149 | dataset_inputs : numpy.array 150 | The input dataset to generate the polynomials from. 151 | degree : int 152 | The degree of the polynomial. 153 | cols : list 154 | The columns to use to generate polynomial features, columns 155 | not in this list will be ignored. If empty (default), all columns will 156 | used to generate polynomial features. 157 | 158 | Returns 159 | ------- 160 | numpy.array 161 | The new dataset with polynomial features. 162 | 163 | Example 164 | ------- 165 | 166 | >>> import numpy as np 167 | >>> import pykitml as pk 168 | >>> pk.polynomial(np.array([[1, 2], [2, 3]]), degree=2) 169 | array([[1., 2., 1., 2., 4.], 170 | [2., 3., 4., 6., 9.]]) 171 | >>> pk.polynomial(np.array([[1, 2], [2, 3]]), degree=3) 172 | array([[ 1., 2., 1., 2., 4., 1., 2., 4., 8.], 173 | [ 2., 3., 4., 6., 9., 8., 12., 18., 27.]]) 174 | >>> pk.polynomial(np.array([[1, 4, 5, 2], [2, 5, 6, 3]]), degree=2, cols=[0, 3]) 175 | array([[1., 4., 5., 2., 1., 2., 4.], 176 | [2., 5., 6., 3., 4., 6., 9.]]) 177 | 178 | ''' 179 | # Make sure 2D array 180 | if dataset_inputs.ndim == 1: 181 | inputs = np.array([dataset_inputs]) 182 | else: 183 | inputs = dataset_inputs 184 | 185 | # Choose the columns to genrate polynomial features for 186 | if len(cols) == 0: 187 | cols = range(inputs.shape[1]) 188 | 189 | poly_dataset = inputs 190 | 191 | # Generate degree terms 192 | for d in range(2, degree+1): 193 | # Generate terms indices for degree d 194 | term_indices = list(combinations_with_replacement(cols, r=d)) 195 | # Multiply them to form the term and concatenate 196 | for indices in term_indices: 197 | term = inputs[:, indices].prod(axis=1) 198 | temp = np.zeros((poly_dataset.shape[0], poly_dataset.shape[1]+1)) 199 | temp[:, :-1] = poly_dataset 200 | temp[:, -1] = term 201 | poly_dataset = temp 202 | 203 | return poly_dataset.squeeze() 204 | -------------------------------------------------------------------------------- /pykitml/datasets/iris.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | ''' 4 | This module contains helper function to load the iris dataset. 5 | ''' 6 | 7 | 8 | # sepal-length, sepal-width, petal-length, petal-width 9 | # all in cm 10 | 11 | 12 | inputs_train = np.array([ 13 | [5.8, 2.7, 5.1, 1.9], [5.5, 2.3, 4.0, 1.3], [5.8, 2.7, 3.9, 1.2], 14 | [5.5, 2.5, 4.0, 1.3], [6.4, 2.8, 5.6, 2.2], [6.8, 2.8, 4.8, 1.4], 15 | [5.5, 2.4, 3.7, 1.0], [7.6, 3.0, 6.6, 2.1], [5.4, 3.0, 4.5, 1.5], 16 | [5.5, 2.4, 3.8, 1.1], [5.9, 3.2, 4.8, 1.8], [6.3, 3.3, 4.7, 1.6], 17 | [6.4, 3.2, 5.3, 2.3], [5.5, 3.5, 1.3, 0.2], [6.1, 2.8, 4.0, 1.3], 18 | [6.0, 3.4, 4.5, 1.6], [5.1, 3.3, 1.7, 0.5], [7.7, 3.8, 6.7, 2.2], 19 | [5.8, 2.7, 4.1, 1.0], [6.0, 2.9, 4.5, 1.5], [5.8, 2.8, 5.1, 2.4], 20 | [5.2, 3.4, 1.4, 0.2], [6.7, 2.5, 5.8, 1.8], [7.0, 3.2, 4.7, 1.4], 21 | [6.3, 3.3, 6.0, 2.5], [7.7, 2.6, 6.9, 2.3], [6.7, 3.3, 5.7, 2.1], 22 | [7.2, 3.6, 6.1, 2.5], [6.3, 3.4, 5.6, 2.4], [5.1, 3.5, 1.4, 0.3], 23 | [4.8, 3.4, 1.6, 0.2], [6.3, 2.7, 4.9, 1.8], [6.7, 3.1, 4.4, 1.4], 24 | [5.0, 3.4, 1.6, 0.4], [6.6, 3.0, 4.4, 1.4], [5.5, 4.2, 1.4, 0.2], 25 | [5.0, 3.4, 1.5, 0.2], [6.4, 3.1, 5.5, 1.8], [5.1, 3.8, 1.5, 0.3], 26 | [6.1, 2.8, 4.7, 1.2], [5.0, 2.0, 3.5, 1.0], [4.6, 3.4, 1.4, 0.3], 27 | [6.0, 2.2, 5.0, 1.5], [6.1, 2.6, 5.6, 1.4], [4.8, 3.4, 1.9, 0.2], 28 | [6.6, 2.9, 4.6, 1.3], [6.1, 2.9, 4.7, 1.4], [6.4, 2.8, 5.6, 2.1], 29 | [5.4, 3.7, 1.5, 0.2], [5.0, 3.2, 1.2, 0.2], [6.2, 2.8, 4.8, 1.8], 30 | [6.5, 3.0, 5.8, 2.2], [5.6, 3.0, 4.5, 1.5], [6.9, 3.1, 5.4, 2.1], 31 | [7.1, 3.0, 5.9, 2.1], [4.9, 3.1, 1.5, 0.1], [6.9, 3.2, 5.7, 2.3], 32 | [5.8, 4.0, 1.2, 0.2], [6.3, 2.3, 4.4, 1.3], [6.4, 2.9, 4.3, 1.3], 33 | [5.2, 2.7, 3.9, 1.4], [4.6, 3.1, 1.5, 0.2], [6.0, 2.7, 5.1, 1.6], 34 | [5.1, 3.5, 1.4, 0.2], [6.0, 3.0, 4.8, 1.8], [5.4, 3.9, 1.7, 0.4], 35 | [5.1, 3.4, 1.5, 0.2], [5.4, 3.4, 1.7, 0.2], [5.7, 2.5, 5.0, 2.0], 36 | [6.7, 3.0, 5.0, 1.7], [5.6, 2.5, 3.9, 1.1], [6.5, 2.8, 4.6, 1.5], 37 | [5.4, 3.9, 1.3, 0.4], [4.9, 3.0, 1.4, 0.2], [7.4, 2.8, 6.1, 1.9], 38 | [7.2, 3.2, 6.0, 1.8], [5.6, 2.9, 3.6, 1.3], [6.4, 3.2, 4.5, 1.5], 39 | [4.3, 3.0, 1.1, 0.1], [5.1, 3.7, 1.5, 0.4], [5.4, 3.4, 1.5, 0.4], 40 | [4.8, 3.1, 1.6, 0.2], [7.7, 3.0, 6.1, 2.3], [5.6, 3.0, 4.1, 1.3], 41 | [6.3, 2.5, 4.9, 1.5], [5.7, 4.4, 1.5, 0.4], [6.5, 3.2, 5.1, 2.0], 42 | [4.9, 3.1, 1.5, 0.1], [4.8, 3.0, 1.4, 0.1], [5.2, 3.5, 1.5, 0.2], 43 | [7.9, 3.8, 6.4, 2.0], [5.7, 3.8, 1.7, 0.3], [5.6, 2.8, 4.9, 2.0], 44 | [6.8, 3.0, 5.5, 2.1], [5.7, 2.8, 4.5, 1.3], [4.7, 3.2, 1.3, 0.2], 45 | [6.0, 2.2, 4.0, 1.0], [6.1, 3.0, 4.9, 1.8], [4.7, 3.2, 1.6, 0.2], 46 | [6.5, 3.0, 5.5, 1.8], [5.0, 3.6, 1.4, 0.2], [4.9, 2.4, 3.3, 1.0], 47 | [5.7, 2.6, 3.5, 1.0], [6.7, 3.1, 4.7, 1.5], [6.4, 2.7, 5.3, 1.9], 48 | [5.2, 4.1, 1.5, 0.1], [5.0, 3.0, 1.6, 0.2], [7.7, 2.8, 6.7, 2.0], 49 | [4.9, 2.5, 4.5, 1.7], [4.9, 3.1, 1.5, 0.1], [7.2, 3.0, 5.8, 1.6], 50 | [4.4, 2.9, 1.4, 0.2], [6.3, 2.9, 5.6, 1.8], [5.9, 3.0, 4.2, 1.5], 51 | [6.3, 2.8, 5.1, 1.5], [4.4, 3.0, 1.3, 0.2], [7.3, 2.9, 6.3, 1.8], 52 | [4.6, 3.6, 1.0, 0.2], [6.2, 2.2, 4.5, 1.5], [6.9, 3.1, 4.9, 1.5], 53 | ]) 54 | 55 | outputs_train = np.array([ 56 | [0, 0, 1], [0, 1, 0], [0, 1, 0], 57 | [0, 1, 0], [0, 0, 1], [0, 1, 0], 58 | [0, 1, 0], [0, 0, 1], [0, 1, 0], 59 | [0, 1, 0], [0, 1, 0], [0, 1, 0], 60 | [0, 0, 1], [1, 0, 0], [0, 1, 0], 61 | [0, 1, 0], [1, 0, 0], [0, 0, 1], 62 | [0, 1, 0], [0, 1, 0], [0, 0, 1], 63 | [1, 0, 0], [0, 0, 1], [0, 1, 0], 64 | [0, 0, 1], [0, 0, 1], [0, 0, 1], 65 | [0, 0, 1], [0, 0, 1], [1, 0, 0], 66 | [1, 0, 0], [0, 0, 1], [0, 1, 0], 67 | [1, 0, 0], [0, 1, 0], [1, 0, 0], 68 | [1, 0, 0], [0, 0, 1], [1, 0, 0], 69 | [0, 1, 0], [0, 1, 0], [1, 0, 0], 70 | [0, 0, 1], [0, 0, 1], [1, 0, 0], 71 | [0, 1, 0], [0, 1, 0], [0, 0, 1], 72 | [1, 0, 0], [1, 0, 0], [0, 0, 1], 73 | [0, 0, 1], [0, 1, 0], [0, 0, 1], 74 | [0, 0, 1], [1, 0, 0], [0, 0, 1], 75 | [1, 0, 0], [0, 1, 0], [0, 1, 0], 76 | [0, 1, 0], [1, 0, 0], [0, 1, 0], 77 | [1, 0, 0], [0, 0, 1], [1, 0, 0], 78 | [1, 0, 0], [1, 0, 0], [0, 0, 1], 79 | [0, 1, 0], [0, 1, 0], [0, 1, 0], 80 | [1, 0, 0], [1, 0, 0], [0, 0, 1], 81 | [0, 0, 1], [0, 1, 0], [0, 1, 0], 82 | [1, 0, 0], [1, 0, 0], [1, 0, 0], 83 | [1, 0, 0], [0, 0, 1], [0, 1, 0], 84 | [0, 1, 0], [1, 0, 0], [0, 0, 1], 85 | [1, 0, 0], [1, 0, 0], [1, 0, 0], 86 | [0, 0, 1], [1, 0, 0], [0, 0, 1], 87 | [0, 0, 1], [0, 1, 0], [1, 0, 0], 88 | [0, 1, 0], [0, 0, 1], [1, 0, 0], 89 | [0, 0, 1], [1, 0, 0], [0, 1, 0], 90 | [0, 1, 0], [0, 1, 0], [0, 0, 1], 91 | [1, 0, 0], [1, 0, 0], [0, 0, 1], 92 | [0, 0, 1], [1, 0, 0], [0, 0, 1], 93 | [1, 0, 0], [0, 0, 1], [0, 1, 0], 94 | [0, 0, 1], [1, 0, 0], [0, 0, 1], 95 | [1, 0, 0], [0, 1, 0], [0, 1, 0], 96 | ]) 97 | 98 | inputs_test = np.array([ 99 | [5.6, 2.7, 4.2, 1.3], [6.2, 3.4, 5.4, 2.3], [4.8, 3.0, 1.4, 0.3], 100 | [5.8, 2.7, 5.1, 1.9], [6.1, 3.0, 4.6, 1.4], [6.7, 3.3, 5.7, 2.5], 101 | [6.7, 3.0, 5.2, 2.3], [6.8, 3.2, 5.9, 2.3], [5.7, 2.8, 4.1, 1.3], 102 | [5.8, 2.6, 4.0, 1.2], [5.0, 3.5, 1.3, 0.3], [5.1, 3.8, 1.6, 0.2], 103 | [4.6, 3.2, 1.4, 0.2], [6.7, 3.1, 5.6, 2.4], [5.1, 3.8, 1.9, 0.4], 104 | [5.1, 2.5, 3.0, 1.1], [5.7, 2.9, 4.2, 1.3], [5.9, 3.0, 5.1, 1.8], 105 | [5.3, 3.7, 1.5, 0.2], [5.7, 3.0, 4.2, 1.2], [5.0, 2.3, 3.3, 1.0], 106 | [6.9, 3.1, 5.1, 2.3], [5.0, 3.3, 1.4, 0.2], [4.5, 2.3, 1.3, 0.3], 107 | [5.5, 2.6, 4.4, 1.2], [6.5, 3.0, 5.2, 2.0], [5.0, 3.5, 1.6, 0.6], 108 | [6.3, 2.5, 5.0, 1.9], [6.2, 2.9, 4.3, 1.3], [4.4, 3.2, 1.3, 0.2], 109 | ]) 110 | 111 | outputs_test = np.array([ 112 | [0, 1, 0], [0, 0, 1], [1, 0, 0], 113 | [0, 0, 1], [0, 1, 0], [0, 0, 1], 114 | [0, 0, 1], [0, 0, 1], [0, 1, 0], 115 | [0, 1, 0], [1, 0, 0], [1, 0, 0], 116 | [1, 0, 0], [0, 0, 1], [1, 0, 0], 117 | [0, 1, 0], [0, 1, 0], [0, 0, 1], 118 | [1, 0, 0], [0, 1, 0], [0, 1, 0], 119 | [0, 0, 1], [1, 0, 0], [1, 0, 0], 120 | [0, 1, 0], [0, 0, 1], [1, 0, 0], 121 | [0, 0, 1], [0, 1, 0], [1, 0, 0], 122 | ]) 123 | 124 | 125 | def load(): 126 | ''' 127 | Loads the iris dataset without any preprocessing. 128 | The data set consists of 50 samples from each of three species of Iris 129 | (Iris setosa, Iris virginica and Iris versicolor). 130 | Four features were measured from each sample: the length and the width 131 | of the sepals and petals 132 | 133 | Inputs have the following features/columns: 134 | 135 | :code:`sepal-length sepal-width petal-length petal-width` 136 | 137 | Outputs: 138 | 139 | :code:`[1, 0, 0]` - Iris-setosa, 140 | :code:`[0, 1, 0]` - Iris-versicolor, 141 | :code:`[0, 0, 1]` - Iris-virginica. 142 | 143 | Returns 144 | ------- 145 | inputs_train : numpy.array 146 | 120x4 numpy array, each row having 4 features, 147 | outputs_train : numpy.array 148 | 120x3 numpy array, contains 150 one-hot vectors, each 149 | corresponding to a category, 150 | inputs_test : numpy.array 151 | 30x4 numpy array, each row having 4 features, 152 | outputs_test : numpy.array 153 | 30x3 numpy array, contains 150 one-hot vectors, each 154 | corresponding to a category, 155 | ''' 156 | return inputs_train, outputs_train, inputs_test, outputs_test 157 | -------------------------------------------------------------------------------- /pykitml/random_forest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import multiprocessing as mp 3 | from math import ceil 4 | from contextlib import redirect_stdout 5 | 6 | import numpy as np 7 | import tqdm 8 | 9 | from . import _shared_array 10 | from ._regressor import Regressor 11 | from ._classifier import Classifier 12 | from . import decision_tree 13 | 14 | 15 | def _train_trees(input_q, ret_q, inputs_sh, inputs_shape, outputs_sh, outputs_shape): 16 | # Retrive numpy arrays from multiprocessing arrays 17 | inputs = _shared_array.shm_as_ndarray(inputs_sh, inputs_shape) 18 | outputs = _shared_array.shm_as_ndarray(outputs_sh, outputs_shape) 19 | 20 | # Suppress print statements 21 | with redirect_stdout(open(os.devnull, 'w')): 22 | while True: 23 | # Get tree from input queue 24 | try: 25 | tree = input_q.get(block=False) 26 | except mp.queues.Empty: 27 | break 28 | 29 | # Create bootstraped datset 30 | indices = np.random.choice(inputs.shape[0], inputs.shape[0]) 31 | bootstrapped_inputs = inputs[indices] 32 | bootstrapped_outputs = outputs[indices] 33 | 34 | # Grow the tree 35 | tree.train(bootstrapped_inputs, bootstrapped_outputs) 36 | 37 | # Put the trained tree in output queue 38 | ret_q.put(tree) 39 | 40 | 41 | class _RandomTree(decision_tree.DecisionTree): 42 | def __init__(self, input_size, output_size, num_features, feature_type=[], 43 | max_depth=6, min_split=2, max_splits_eval=100, regression=False): 44 | # Initialize parent class 45 | super(_RandomTree, self).__init__(input_size, output_size, feature_type, max_depth, 46 | min_split, max_splits_eval, regression) 47 | 48 | # Select only a few random columns of the dataset for training 49 | self._cols_train = np.random.choice(input_size, num_features, replace=False) 50 | 51 | # Disable progress bar 52 | self._pbardis = True 53 | 54 | 55 | class RandomForest(Classifier, Regressor): 56 | def __init__(self, input_size, output_size, feature_type=[], max_depth=6, min_split=2, 57 | max_splits_eval=100, regression=False): 58 | ''' 59 | Parameters 60 | ---------- 61 | input_size : int 62 | Size of input data or number of input features. 63 | output_size : int 64 | Number of categories or groups. 65 | feature_type : list 66 | List of string describing the type of feature for 67 | each column. Can be :code:`'continues'`, 68 | :code:`'ranked'`, or :code:`'categorical'`. 69 | max_depth : int 70 | The maximum depth the trees can grow to. 71 | min_split : int 72 | The minimum number of data points a node should have to get 73 | split. 74 | max_splits_eval : int 75 | The maximum number of split points to evaluate for an 76 | attribute. If the number of candidate split points exceed 77 | this, :code:`max_splits_eval` split candidates will be 78 | randomly sampled from the candidates and only the sampled 79 | ones will be evaluated from finding the best split point. 80 | regression : bool 81 | If the tree is being trained on a regression problem. 82 | 83 | Raises 84 | ------ 85 | InvalidFeatureType 86 | Invalid/Unknown feature type. Can only be :code:`'continues'`, 87 | :code:`'ranked'`, or :code:`'categorical'`. 88 | ''' 89 | # Save values 90 | self._input_size = input_size 91 | self._output_size = output_size 92 | self._ftype = feature_type 93 | self._max_depth = max_depth 94 | self._min_split = min_split 95 | self._regression = regression 96 | self._max_splits_eval = max_splits_eval 97 | 98 | # List to store trees in 99 | self._trees = [] 100 | 101 | # Outputs 102 | self._output = None 103 | 104 | @property 105 | def _out_size(self): 106 | return self._output_size 107 | 108 | @property 109 | def trees(self): 110 | ''' 111 | A list of decision trees used in the forest. 112 | ''' 113 | return self._trees 114 | 115 | def train(self, inputs, outputs, num_trees=100, num_feature_bag=None): 116 | ''' 117 | Trains the model on the training data. 118 | 119 | Parameters 120 | ---------- 121 | training_data : numpy.array 122 | numpy array containing training data. 123 | targets : numpy.array 124 | numpy array containing training targets, corresponding to the training data. 125 | num_trees : int 126 | Number of trees to grow. 127 | num_feature_bag : int or None 128 | Number of random features to select when growing 129 | a tree. If :code:`None` (default), :code:`ceil(sqrt(input_size))` 130 | is chosen for classification and :code:`int(input_size/3)` for regression. 131 | 132 | Raises 133 | ------ 134 | numpy.AxisError 135 | If output_size is less than two. Use :py:func:`pykitml.onehot` to change 136 | 0/False to [1, 0] and 1/True to [0, 1] for binary classification. 137 | ''' 138 | print('Training Model...') 139 | 140 | # Number of features to bag/choose for each tree 141 | if num_feature_bag is None: 142 | if not self._regression: 143 | num_feature_bag = ceil(np.sqrt(self._input_size)) 144 | else: 145 | num_feature_bag = int(self._input_size/3) 146 | 147 | # Create queues 148 | input_q = mp.Queue() 149 | ret_q = mp.Queue() 150 | 151 | # Initialize input queue 152 | for _ in range(num_trees): 153 | # Create tree 154 | tree = _RandomTree(self._input_size, self._output_size, num_feature_bag, 155 | self._ftype, self._max_depth, self._min_split, self._max_splits_eval, 156 | self._regression) 157 | # Put it in queue 158 | input_q.put(tree) 159 | 160 | # Create shared multiprocess array for inputs and outputs 161 | inputs_sh = _shared_array.ndarray_to_shm(inputs) 162 | outputs_sh = _shared_array.ndarray_to_shm(outputs) 163 | 164 | # Start multiprocess 165 | for _ in range(os.cpu_count()): 166 | p = mp.Process( 167 | target=_train_trees, args=(input_q, ret_q, inputs_sh, inputs.shape, outputs_sh, outputs.shape) 168 | ) 169 | p.start() 170 | 171 | # Progress bar and append trained trees to list 172 | pbar = tqdm.tqdm(total=num_trees, ncols=80, unit='trees') 173 | 174 | while len(self._trees) != num_trees: 175 | tree = ret_q.get() 176 | self._trees.append(tree) 177 | pbar.update() 178 | 179 | # Return if done 180 | pbar.close() 181 | 182 | def feed(self, input_data): 183 | # Loop through all the trees and total their outputs 184 | total = 0 185 | for tree in self._trees: 186 | tree.feed(input_data) 187 | total += tree.get_output() 188 | 189 | # Average 190 | self._output = total/len(self._trees) 191 | 192 | def get_output(self): 193 | return self._output.squeeze() 194 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "name": "PCA", 9 | "type": "python", 10 | "request": "launch", 11 | "cwd": "${workspaceFolder}/tests", 12 | "program": "${workspaceFolder}/tests/test_pca.py", 13 | "console": "integratedTerminal" 14 | }, 15 | { 16 | "name": "Banknote", 17 | "type": "python", 18 | "request": "launch", 19 | "cwd": "${workspaceFolder}/tests", 20 | "program": "${workspaceFolder}/tests/test_banknote.py", 21 | "console": "integratedTerminal" 22 | }, 23 | { 24 | "name": "Banknote Decision Tree", 25 | "type": "python", 26 | "request": "launch", 27 | "cwd": "${workspaceFolder}/tests", 28 | "program": "${workspaceFolder}/tests/test_banknote_tree.py", 29 | "console": "integratedTerminal" 30 | }, 31 | { 32 | "name": "Adult Random Forest", 33 | "type": "python", 34 | "request": "launch", 35 | "cwd": "${workspaceFolder}/tests", 36 | "program": "${workspaceFolder}/tests/test_adult_forest.py", 37 | "console": "integratedTerminal" 38 | }, 39 | { 40 | "name": "Adult Decision Tree", 41 | "type": "python", 42 | "request": "launch", 43 | "cwd": "${workspaceFolder}/tests", 44 | "program": "${workspaceFolder}/tests/test_adult_tree.py", 45 | "console": "integratedTerminal" 46 | }, 47 | { 48 | "name": "Adult", 49 | "type": "python", 50 | "request": "launch", 51 | "cwd": "${workspaceFolder}/tests", 52 | "program": "${workspaceFolder}/tests/test_adult.py", 53 | "console": "integratedTerminal" 54 | }, 55 | { 56 | "name": "Heart Disease", 57 | "type": "python", 58 | "request": "launch", 59 | "cwd": "${workspaceFolder}/tests", 60 | "program": "${workspaceFolder}/tests/test_heart.py", 61 | "console": "integratedTerminal" 62 | }, 63 | { 64 | "name": "Heart Disease Tree", 65 | "type": "python", 66 | "request": "launch", 67 | "cwd": "${workspaceFolder}/tests", 68 | "program": "${workspaceFolder}/tests/test_heart_tree.py", 69 | "console": "integratedTerminal" 70 | }, 71 | { 72 | "name": "Heart Disease Naive Bayes", 73 | "type": "python", 74 | "request": "launch", 75 | "cwd": "${workspaceFolder}/tests", 76 | "program": "${workspaceFolder}/tests/test_heart_bayes.py", 77 | "console": "integratedTerminal" 78 | }, 79 | { 80 | "name": "Fish Length", 81 | "type": "python", 82 | "request": "launch", 83 | "cwd": "${workspaceFolder}/tests", 84 | "program": "${workspaceFolder}/tests/test_fishlength.py", 85 | "console": "integratedTerminal" 86 | }, 87 | { 88 | "name": "Iris Nearest Neighbor", 89 | "type": "python", 90 | "request": "launch", 91 | "cwd": "${workspaceFolder}/tests", 92 | "program": "${workspaceFolder}/tests/test_iris_neighbor.py", 93 | "console": "integratedTerminal" 94 | }, 95 | { 96 | "name": "Iris Decision Tree", 97 | "type": "python", 98 | "request": "launch", 99 | "cwd": "${workspaceFolder}/tests", 100 | "program": "${workspaceFolder}/tests/test_iris_tree.py", 101 | "console": "integratedTerminal" 102 | }, 103 | { 104 | "name": "Boston Regression Tree", 105 | "type": "python", 106 | "request": "launch", 107 | "cwd": "${workspaceFolder}/tests", 108 | "program": "${workspaceFolder}/tests/test_boston_tree.py", 109 | "console": "integratedTerminal" 110 | }, 111 | { 112 | "name": "Boston Regression Forest", 113 | "type": "python", 114 | "request": "launch", 115 | "cwd": "${workspaceFolder}/tests", 116 | "program": "${workspaceFolder}/tests/test_boston_forest.py", 117 | "console": "integratedTerminal" 118 | }, 119 | { 120 | "name": "Iris Naive Bayes", 121 | "type": "python", 122 | "request": "launch", 123 | "cwd": "${workspaceFolder}/tests", 124 | "program": "${workspaceFolder}/tests/test_iris_bayes.py", 125 | "console": "integratedTerminal" 126 | }, 127 | { 128 | "name": "Iris SVM", 129 | "type": "python", 130 | "request": "launch", 131 | "cwd": "${workspaceFolder}/tests", 132 | "program": "${workspaceFolder}/tests/test_iris_svm.py", 133 | "console": "integratedTerminal" 134 | }, 135 | { 136 | "name": "Iris", 137 | "type": "python", 138 | "request": "launch", 139 | "cwd": "${workspaceFolder}/tests", 140 | "program": "${workspaceFolder}/tests/test_iris.py", 141 | "console": "integratedTerminal" 142 | }, 143 | { 144 | "name": "MNIST", 145 | "type": "python", 146 | "request": "launch", 147 | "cwd": "${workspaceFolder}/tests", 148 | "program": "${workspaceFolder}/tests/test_mnist.py", 149 | "args": ["adam"], 150 | "console": "integratedTerminal" 151 | }, 152 | { 153 | "name": "MNIST SVM", 154 | "type": "python", 155 | "request": "launch", 156 | "cwd": "${workspaceFolder}/tests", 157 | "program": "${workspaceFolder}/tests/test_mnist_svm.py", 158 | "console": "integratedTerminal" 159 | }, 160 | { 161 | "name": "S1 KMEANS", 162 | "type": "python", 163 | "request": "launch", 164 | "cwd": "${workspaceFolder}/tests", 165 | "program": "${workspaceFolder}/tests/test_s1_kmeans.py", 166 | "console": "integratedTerminal" 167 | }, 168 | { 169 | "name": "LSTM ECG eye", 170 | "type": "python", 171 | "request": "launch", 172 | "cwd": "${workspaceFolder}/tests", 173 | "program": "${workspaceFolder}/tests/test_lstm_eye.py", 174 | "console": "integratedTerminal" 175 | }, 176 | { 177 | "name": "LSTM Punchout", 178 | "type": "python", 179 | "request": "launch", 180 | "cwd": "${workspaceFolder}/tests", 181 | "program": "${workspaceFolder}/tests/test_punchout.py", 182 | "console": "integratedTerminal" 183 | }, 184 | { 185 | "name": "Random Search", 186 | "type": "python", 187 | "request": "launch", 188 | "cwd": "${workspaceFolder}/tests", 189 | "program": "${workspaceFolder}/tests/test_search.py", 190 | "console": "integratedTerminal" 191 | }, 192 | { 193 | "name": "DQN Cartpole", 194 | "type": "python", 195 | "request": "launch", 196 | "cwd": "${workspaceFolder}/tests", 197 | "program": "${workspaceFolder}/tests/test_cartpole_dqn.py", 198 | "console": "integratedTerminal" 199 | } 200 | ] 201 | } --------------------------------------------------------------------------------