├── .gitignore ├── LICENSE ├── README.md ├── docs ├── Makefile ├── make.bat ├── requirements.txt └── source │ ├── _static │ └── theoretical_basis_description.pdf │ ├── conf.py │ ├── index.rst │ └── rst │ ├── autodoc.rst │ ├── theoretical_basis.rst │ ├── tutorials.rst │ └── tutorials │ ├── black-box_model_aproximation.ipynb │ ├── classification.ipynb │ ├── data │ ├── GBSG2.arff │ ├── australian_test.csv │ ├── australian_train.csv │ ├── cpu.arff │ ├── titanic_kaggle.csv │ └── titanic_openml.csv │ ├── dataset_transformation.ipynb │ ├── regression.ipynb │ └── survival.ipynb ├── requirements.txt ├── rulexai ├── __init__.py ├── explainer.py ├── importances.py ├── models.py ├── reduct.py └── rule.py ├── setup.cfg ├── setup.py └── tests ├── resources ├── classification │ ├── iris.arff │ ├── results.csv │ └── results_split.csv ├── features_importances.csv ├── regression │ ├── diabetes.arff │ ├── results.csv │ └── results_split.csv └── survival │ ├── pbc.arff │ ├── results.csv │ └── results_split.csv ├── test_conditions_importances.py └── test_functionalities.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | 132 | doc/build/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RuleXAI 2 | 3 | RuleXAI is a rule-based aproach to explain the output of any machine learning model. It is suitable for classification, regression and survival tasks. 4 | 5 | ## Instalation 6 | 7 | RuleXAI can be installed from [PyPI](https://pypi.org/project/rulexai/) 8 | 9 | ```bash 10 | pip install rulexai 11 | ``` 12 | 13 | Or you can clone the repository and run: 14 | ```bash 15 | pip install . 16 | ``` 17 | 18 | ## Model agnostic example 19 | ```python 20 | from sklearn.datasets import load_iris 21 | from sklearn.model_selection import train_test_split 22 | from sklearn.svm import SVC 23 | import pandas as pd 24 | 25 | from rulexai.explainer import Explainer 26 | 27 | 28 | # load iris dataset 29 | data = load_iris() 30 | df = pd.DataFrame(data['data'], columns=data['feature_names']) 31 | df['class'] = data['target'] 32 | 33 | # train a SVM classifier 34 | X_train,X_test,y_train,y_test = train_test_split(df.drop(columns=["class"]), df["class"], test_size=0.2, random_state=0) 35 | svm = SVC(kernel='rbf', probability=True) 36 | svm.fit(X_train, y_train) 37 | predictions = svm.predict(X_train) 38 | # prepare model predictions to be fed to RuleXAI, remember to change numerical predictions to labels (in this example it is simply converting predictions to a string) 39 | model_predictions = pd.DataFrame(predictions.astype(str), columns=[y_train.name], index = y_train.index) 40 | 41 | # use Explainer to explain model output 42 | explainer = Explainer(X = X_train,model_predictions = model_predictions, type = "classification") 43 | explainer.explain() 44 | 45 | print(explainer.condition_importances_) 46 | ``` 47 | 48 | ## Sample notebooks 49 | 50 | * **[Classification](https://rulexai.readthedocs.io/en/latest/rst/tutorials/classification.html)** - in this notebook, the data from https://www.kaggle.com/c/titanic is analysed to show the advantages and possibilities of using the RuleXAI library for in-depth analysis of the dataset for classification task. The use of RuleXAI to explain rule-based and tree-based models was also compared. 51 | 52 | * **[Regression](https://rulexai.readthedocs.io/en/latest/rst/tutorials/regression.html)** - notebook showing the use of RuleXAI to explain rule-based regression model 53 | 54 | * **[Survival](https://rulexai.readthedocs.io/en/latest/rst/tutorials/survival.html)** - notebook showing the use of RuleXAI to explain rule-based survival model 55 | 56 | * **[Black-box model](https://rulexai.readthedocs.io/en/latest/rst/tutorials/black-box_model_aproximation.html)** explainability - the purpose of this notebook is to demonstrate the possibility of using RuleXAI to explain any black box models. 57 | 58 | * **[Transformation](https://rulexai.readthedocs.io/en/latest/rst/tutorials/dataset_transformation.html)** - notebook showing the use of RuleXAI to transform a dataset. Often datasets contain missing values and nominal values. Most available algorithms do not support either missing values or nominal values. Many algorithms require the data to be rescaled beforehand. The RuleXAI library is able to convert a dataset with nominal and missing values into a binary dataset containing as attributes the conditions describing the dataset and as values “1” when the condition is satisfied for the example and “0” when the condition is not satisfied. 59 | 60 | 61 | ## Documentation 62 | Full documentation is available [here](https://rulexai.readthedocs.io/en/latest/index.html) 63 | 64 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | set batdir=%~dp0 3 | 4 | pushd %~dp0 5 | 6 | REM Command file for Sphinx documentation 7 | 8 | if "%SPHINXBUILD%" == "" ( 9 | set SPHINXBUILD=python -m sphinx.cmd.build 10 | ) 11 | set SOURCEDIR=source 12 | set BUILDDIR=build 13 | 14 | if "%1" == "help" goto help 15 | 16 | %SPHINXBUILD% >NUL 2>NUL 17 | if errorlevel 9009 ( 18 | echo. 19 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 20 | echo.installed, then set the SPHINXBUILD environment variable to point 21 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 22 | echo.may add the Sphinx directory to PATH. 23 | echo. 24 | echo.If you don't have Sphinx installed, grab it from 25 | echo.http://sphinx-doc.org/ 26 | exit /b 1 27 | ) 28 | 29 | %SPHINXBUILD% -M html %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 30 | goto end 31 | 32 | :help 33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 34 | 35 | :end 36 | popd 37 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx-rtd-theme==0.5.1 2 | nbsphinx==0.8.7 3 | sphinx-copybutton==0.3.1 4 | ipykernel==5.5.0 5 | pandoc==1.0.2 6 | pandas~=1.2.1 7 | numpy~=1.20.3 8 | matplotlib~=3.4.2 9 | rulekit~=1.6.0 -------------------------------------------------------------------------------- /docs/source/_static/theoretical_basis_description.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adaa-polsl/RuleXAI/4075a717c36c2a0589f07a1130343e4bc71809ee/docs/source/_static/theoretical_basis_description.pdf -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import os 14 | import sys 15 | sys.path.insert(0, os.path.abspath('../..')) 16 | 17 | # -- Project information ----------------------------------------------------- 18 | 19 | project = "RuleXAI" 20 | copyright = "2022, Macha Dawid" 21 | author = "Macha Dawid" 22 | 23 | # The full version, including alpha/beta/rc tags 24 | release = "v1.0.0" 25 | 26 | source_suffix = [".rst", ".md"] 27 | 28 | # -- General configuration --------------------------------------------------- 29 | 30 | # Add any Sphinx extension module names here, as strings. They can be 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 32 | # ones. 33 | extensions = [ 34 | "sphinx_rtd_theme", 35 | "sphinx.ext.autodoc", 36 | "sphinx.ext.coverage", 37 | "sphinx.ext.napoleon", 38 | "sphinx.ext.intersphinx", 39 | "nbsphinx", 40 | "sphinx_copybutton", 41 | ] 42 | 43 | autoclass_content = "both" 44 | # Add any paths that contain templates here, relative to this directory. 45 | templates_path = ["_templates"] 46 | 47 | # List of patterns, relative to source directory, that match files and 48 | # directories to ignore when looking for source files. 49 | # This pattern also affects html_static_path and html_extra_path. 50 | exclude_patterns = [] 51 | 52 | 53 | # -- Options for HTML output ------------------------------------------------- 54 | 55 | # The theme to use for HTML and HTML Help pages. See the documentation for 56 | # a list of builtin themes. 57 | # 58 | html_theme = "sphinx_rtd_theme" 59 | 60 | # Add any paths that contain custom static files (such as style sheets) here, 61 | # relative to this directory. They are copied after the builtin static files, 62 | # so a file named "default.css" will overwrite the builtin "default.css". 63 | html_static_path = ["_static"] 64 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | 2 | RuleXAI 3 | ======== 4 | 5 | Welcome to RuleXAI's documentation! 6 | 7 | **RuleXAI** is a rule-based aproach to explain the output of any machine learning model. It is suitable for classification, regression and survival tasks. Theoretical basis of the rule analysis methods implemented 8 | in the RuleXAI package can be found in `Theoretical basis <./_static/theoretical_basis_description.pdf>`_ section. 9 | 10 | Installation 11 | ============ 12 | RuleXAI can be installed from `PyPI `_:: 13 | 14 | pip install rulexai 15 | 16 | 17 | .. toctree:: 18 | :maxdepth: 1 19 | :caption: Contents: 20 | 21 | Theoretical basis <./rst/theoretical_basis.rst> 22 | Code documentation <./rst/autodoc.rst> 23 | Tutorials <./rst/tutorials.rst> 24 | 25 | -------------------------------------------------------------------------------- /docs/source/rst/autodoc.rst: -------------------------------------------------------------------------------- 1 | 2 | Code documentation 3 | ======================================== 4 | 5 | .. autoclass:: rulexai.explainer.RuleExplainer 6 | :members: 7 | :inherited-members: 8 | .. autoclass:: rulexai.explainer.Explainer 9 | :members: 10 | :inherited-members: 11 | -------------------------------------------------------------------------------- /docs/source/rst/theoretical_basis.rst: -------------------------------------------------------------------------------- 1 | Theoretical basis 2 | ================= 3 | 4 | Click `here <../_static/theoretical_basis_description.pdf>`_ to view document describing theoretical basis of the rule analysis methods implemented in the RuleXAI package -------------------------------------------------------------------------------- /docs/source/rst/tutorials.rst: -------------------------------------------------------------------------------- 1 | Tutorials 2 | ========= 3 | .. toctree:: 4 | :maxdepth: 1 5 | :caption: Table of contents: 6 | 7 | Classification <./tutorials/classification.ipynb> 8 | Regression <./tutorials/regression.ipynb> 9 | Survival <./tutorials/survival.ipynb> 10 | Black-box <./tutorials/black-box_model_aproximation.ipynb> 11 | Transformation <./tutorials/dataset_transformation.ipynb> -------------------------------------------------------------------------------- /docs/source/rst/tutorials/data/GBSG2.arff: -------------------------------------------------------------------------------- 1 | @relation GBSG2 2 | @attribute horTh {'no','yes'} 3 | @attribute age numeric 4 | @attribute menostat {'Post','Pre'} 5 | @attribute tsize numeric 6 | @attribute tgrade {'I','II','III'} 7 | @attribute pnodes numeric 8 | @attribute progrec numeric 9 | @attribute estrec numeric 10 | @attribute survival_time numeric 11 | @attribute survival_status numeric 12 | @data 13 | 'no',70,'Post',21,'II',3,48,66,1814,1 14 | 'yes',56,'Post',12,'II',7,61,77,2018,1 15 | 'yes',58,'Post',35,'II',9,52,271,712,1 16 | 'yes',59,'Post',17,'II',4,60,29,1807,1 17 | 'no',73,'Post',35,'II',1,26,65,772,1 18 | 'no',32,'Pre',57,'III',24,0,13,448,1 19 | 'yes',59,'Post',8,'II',2,181,0,2172,0 20 | 'no',65,'Post',16,'II',1,192,25,2161,0 21 | 'no',80,'Post',39,'II',30,0,59,471,1 22 | 'no',66,'Post',18,'II',7,0,3,2014,0 23 | 'yes',68,'Post',40,'II',9,16,20,577,1 24 | 'yes',71,'Post',21,'II',9,0,0,184,1 25 | 'yes',59,'Post',58,'II',1,154,101,1840,0 26 | 'no',50,'Post',27,'III',1,16,12,1842,0 27 | 'yes',70,'Post',22,'II',3,113,139,1821,0 28 | 'no',54,'Post',30,'II',1,135,6,1371,1 29 | 'no',39,'Pre',35,'I',4,79,28,707,1 30 | 'yes',66,'Post',23,'II',1,112,225,1743,0 31 | 'yes',69,'Post',25,'I',1,131,196,1781,0 32 | 'no',55,'Post',65,'I',4,312,76,865,1 33 | 'no',56,'Post',22,'II',1,28,23,1684,1 34 | 'no',57,'Post',21,'II',2,184,294,1701,0 35 | 'no',65,'Post',25,'III',1,0,0,1701,0 36 | 'yes',70,'Post',15,'II',3,89,151,1693,0 37 | 'no',65,'Post',70,'III',26,2,64,379,1 38 | 'no',44,'Pre',23,'II',2,299,35,1105,1 39 | 'yes',59,'Post',23,'III',3,8,0,548,1 40 | 'no',43,'Pre',35,'II',4,37,5,1296,1 41 | 'yes',53,'Post',58,'II',1,0,0,1483,0 42 | 'no',32,'Pre',25,'II',2,36,10,1570,0 43 | 'no',45,'Pre',45,'III',2,0,0,1469,0 44 | 'no',36,'Pre',44,'III',2,6,5,1472,0 45 | 'yes',57,'Post',35,'III',1,1490,209,1342,0 46 | 'no',55,'Post',25,'I',2,26,53,1349,0 47 | 'no',34,'Pre',15,'II',5,103,118,1162,1 48 | 'yes',58,'Post',35,'II',2,38,18,1342,0 49 | 'no',62,'Post',22,'II',12,0,8,797,1 50 | 'no',64,'Post',25,'I',9,67,86,1232,0 51 | 'no',53,'Post',23,'II',3,13,7,1230,0 52 | 'no',53,'Post',13,'II',8,423,175,1205,0 53 | 'no',65,'Post',52,'III',7,25,155,1090,0 54 | 'no',45,'Pre',38,'II',38,160,5,1095,0 55 | 'no',58,'Post',42,'III',1,0,0,449,1 56 | 'no',59,'Post',25,'I',2,33,51,2438,0 57 | 'no',65,'Post',20,'II',1,6,6,2233,0 58 | 'yes',34,'Pre',30,'III',12,0,5,286,1 59 | 'yes',65,'Post',18,'II',5,133,175,1861,0 60 | 'no',61,'Post',30,'II',9,41,51,1080,1 61 | 'yes',61,'Post',25,'II',1,21,172,1521,1 62 | 'no',46,'Post',25,'II',1,2,0,1693,0 63 | 'no',63,'Post',25,'II',1,86,366,1528,1 64 | 'yes',45,'Pre',19,'II',7,19,0,169,1 65 | 'no',46,'Pre',35,'II',7,67,44,272,1 66 | 'no',63,'Post',40,'II',3,5,8,731,1 67 | 'yes',53,'Pre',21,'II',9,29,9,2059,0 68 | 'yes',43,'Post',40,'I',4,233,19,1853,0 69 | 'no',31,'Pre',23,'II',4,20,0,1854,0 70 | 'yes',71,'Post',15,'II',9,85,9,1645,0 71 | 'yes',59,'Post',28,'II',18,0,7,544,1 72 | 'no',62,'Post',15,'II',4,22,70,1666,0 73 | 'no',54,'Post',30,'II',2,31,11,353,1 74 | 'no',46,'Pre',25,'II',13,82,20,1791,0 75 | 'yes',53,'Post',25,'II',2,9,1,1685,0 76 | 'no',45,'Pre',10,'II',1,14,3,191,1 77 | 'no',48,'Pre',30,'II',4,19,4,370,1 78 | 'no',32,'Pre',20,'II',5,55,41,173,1 79 | 'no',30,'Pre',12,'II',11,4,3,242,1 80 | 'no',53,'Post',16,'III',1,1,1,420,1 81 | 'no',42,'Pre',12,'II',6,388,30,438,1 82 | 'no',48,'Pre',35,'II',1,41,61,1624,0 83 | 'yes',54,'Post',30,'II',6,15,81,1036,1 84 | 'no',56,'Post',25,'II',11,0,36,359,1 85 | 'no',51,'Pre',25,'II',16,91,31,171,1 86 | 'no',68,'Post',18,'II',14,0,2,959,1 87 | 'no',46,'Pre',21,'II',3,73,13,1351,0 88 | 'no',41,'Pre',15,'II',4,11,11,486,1 89 | 'no',48,'Pre',16,'III',10,0,0,525,1 90 | 'no',55,'Pre',23,'II',3,295,34,762,1 91 | 'no',52,'Pre',36,'II',6,6,16,175,1 92 | 'no',36,'Pre',8,'III',1,10,0,1195,0 93 | 'no',44,'Pre',25,'III',6,5,2,338,1 94 | 'no',47,'Post',20,'III',6,408,36,1125,0 95 | 'no',42,'Pre',25,'II',7,0,2,249,1 96 | 'no',63,'Post',32,'II',16,7,132,281,1 97 | 'yes',62,'Post',50,'II',11,1,2,377,1 98 | 'no',55,'Post',40,'I',2,64,81,1976,0 99 | 'yes',47,'Pre',45,'II',2,264,59,2539,0 100 | 'no',63,'Post',23,'II',3,22,32,2467,0 101 | 'no',69,'Post',20,'II',2,154,191,876,1 102 | 'no',43,'Pre',21,'II',1,206,87,2132,0 103 | 'no',59,'Post',24,'II',14,2,22,426,1 104 | 'no',75,'Post',50,'II',1,170,317,554,1 105 | 'yes',41,'Pre',40,'II',4,100,100,1246,1 106 | 'no',47,'Pre',36,'III',2,154,99,1926,0 107 | 'no',43,'Pre',80,'II',20,2,25,1207,1 108 | 'no',42,'Pre',30,'III',4,65,81,1852,0 109 | 'no',46,'Pre',35,'I',5,100,0,1174,1 110 | 'no',65,'Post',58,'II',11,390,119,1250,0 111 | 'no',59,'Post',30,'II',3,0,2,530,1 112 | 'no',48,'Pre',70,'II',7,8,0,1502,0 113 | 'no',44,'Pre',27,'II',3,525,61,1364,0 114 | 'no',53,'Post',25,'II',13,77,131,1170,1 115 | 'no',53,'Post',25,'II',2,54,58,1729,0 116 | 'no',60,'Pre',23,'II',3,136,507,1642,0 117 | 'no',64,'Post',24,'II',2,206,304,1218,1 118 | 'no',56,'Post',8,'II',1,110,0,1358,0 119 | 'no',66,'Post',30,'II',16,0,508,360,1 120 | 'no',50,'Pre',30,'II',1,183,243,550,1 121 | 'no',49,'Pre',55,'II',7,0,0,679,1 122 | 'no',33,'Pre',35,'III',1,26,0,1164,1 123 | 'no',50,'Post',52,'II',1,0,0,350,1 124 | 'no',45,'Pre',29,'II',1,0,0,578,1 125 | 'no',51,'Pre',20,'II',1,0,0,1460,1 126 | 'no',39,'Pre',30,'III',1,0,0,1434,0 127 | 'yes',56,'Post',40,'II',3,0,3,1763,1 128 | 'no',60,'Post',15,'II',2,84,93,889,1 129 | 'yes',47,'Pre',35,'III',17,14,3,357,1 130 | 'no',58,'Post',50,'II',7,77,77,547,1 131 | 'yes',56,'Pre',21,'II',3,111,20,1722,0 132 | 'yes',54,'Post',21,'II',1,7,139,2372,0 133 | 'yes',56,'Post',40,'II',3,0,59,2030,1 134 | 'no',57,'Post',26,'II',1,166,521,1002,1 135 | 'no',53,'Post',10,'II',1,17,61,1280,1 136 | 'no',31,'Pre',60,'II',7,542,77,338,1 137 | 'yes',41,'Pre',80,'II',1,0,0,533,1 138 | 'yes',37,'Pre',25,'II',1,235,38,1169,0 139 | 'no',66,'Post',15,'II',1,252,185,1675,1 140 | 'no',48,'Pre',45,'III',1,0,0,1862,0 141 | 'no',51,'Pre',50,'II',9,0,0,1167,0 142 | 'no',57,'Post',20,'II',3,39,83,495,1 143 | 'yes',40,'Pre',30,'II',2,320,30,1720,0 144 | 'yes',62,'Post',19,'II',1,35,1060,598,1 145 | 'yes',64,'Post',30,'III',12,0,0,392,1 146 | 'no',46,'Pre',12,'II',3,175,80,1502,0 147 | 'no',69,'Post',27,'I',3,140,350,1296,0 148 | 'no',58,'Post',52,'III',5,0,0,1177,0 149 | 'yes',65,'Post',30,'II',5,85,365,1113,0 150 | 'no',40,'Pre',40,'II',5,50,75,288,1 151 | 'no',55,'Post',20,'III',16,0,0,403,1 152 | 'no',62,'Post',25,'III',5,0,0,1225,1 153 | 'no',29,'Pre',12,'II',4,32,150,338,1 154 | 'no',38,'Pre',18,'III',5,141,105,1337,1 155 | 'no',52,'Pre',20,'I',1,78,14,1420,1 156 | 'no',47,'Post',55,'II',18,29,87,2048,0 157 | 'no',53,'Pre',75,'III',19,375,107,600,1 158 | 'no',37,'Pre',15,'I',1,162,22,1765,0 159 | 'no',63,'Post',60,'II',15,180,12,491,1 160 | 'no',63,'Post',45,'III',7,20,93,305,1 161 | 'no',59,'Post',22,'II',2,23,235,1582,0 162 | 'no',48,'Pre',30,'II',15,250,45,1771,0 163 | 'no',33,'Pre',15,'III',33,66,8,960,1 164 | 'no',38,'Pre',57,'III',9,18,62,571,1 165 | 'no',31,'Pre',28,'II',2,349,189,285,1 166 | 'no',53,'Post',48,'II',7,254,117,1472,0 167 | 'no',47,'Pre',30,'II',1,422,89,1279,1 168 | 'yes',64,'Post',19,'II',1,19,9,1863,0 169 | 'yes',49,'Post',56,'I',3,356,64,1933,0 170 | 'no',53,'Post',52,'II',9,6,29,358,1 171 | 'yes',61,'Post',22,'II',2,6,173,2372,1 172 | 'no',43,'Pre',30,'II',1,22,0,2563,0 173 | 'yes',74,'Post',20,'II',1,462,240,2372,0 174 | 'yes',58,'Post',18,'I',2,74,67,1989,1 175 | 'yes',49,'Pre',20,'II',6,56,98,2015,1 176 | 'yes',61,'Post',35,'III',2,23,9,1956,0 177 | 'no',66,'Post',40,'III',16,21,412,945,1 178 | 'yes',66,'Post',20,'III',3,54,17,2153,0 179 | 'no',59,'Post',23,'II',2,88,38,838,1 180 | 'no',51,'Post',70,'III',6,28,5,113,1 181 | 'yes',71,'Post',18,'II',2,31,9,1833,0 182 | 'no',46,'Pre',50,'III',10,44,4,1722,0 183 | 'no',52,'Pre',40,'III',6,32,5,241,1 184 | 'yes',60,'Post',16,'II',1,184,51,1352,1 185 | 'no',60,'Post',50,'II',7,65,30,1702,0 186 | 'yes',67,'Post',27,'II',4,1118,753,1222,0 187 | 'no',54,'Post',30,'III',3,1,0,1089,0 188 | 'no',55,'Post',12,'II',1,63,19,1243,0 189 | 'no',38,'Pre',20,'II',9,24,34,579,1 190 | 'yes',52,'Post',25,'II',13,31,196,1043,1 191 | 'no',43,'Pre',30,'II',3,45,11,2234,0 192 | 'no',50,'Pre',22,'I',1,135,111,2297,0 193 | 'yes',61,'Post',25,'I',2,32,144,2014,0 194 | 'no',62,'Post',20,'II',2,7,9,518,1 195 | 'no',52,'Post',20,'III',10,7,8,251,1 196 | 'no',45,'Pre',20,'II',2,64,48,1959,0 197 | 'no',52,'Post',10,'II',3,109,12,1897,0 198 | 'no',51,'Post',120,'II',12,3,1,160,1 199 | 'no',61,'Post',20,'II',5,25,75,348,1 200 | 'yes',64,'Post',45,'III',5,1,8,275,1 201 | 'no',64,'Post',17,'I',1,227,0,1329,1 202 | 'no',51,'Post',35,'III',1,6,1,1193,1 203 | 'yes',63,'Post',30,'II',7,0,0,698,1 204 | 'no',62,'Post',12,'II',7,0,0,436,1 205 | 'yes',65,'Post',18,'III',1,0,0,552,1 206 | 'yes',67,'Post',20,'II',1,0,0,564,1 207 | 'no',62,'Post',30,'II',1,8,371,2239,0 208 | 'yes',48,'Pre',25,'II',1,235,33,2237,0 209 | 'no',67,'Post',25,'II',1,6,19,529,1 210 | 'no',46,'Pre',11,'II',2,0,0,1820,0 211 | 'yes',56,'Post',20,'I',1,2,334,1756,0 212 | 'yes',72,'Post',34,'III',36,2,1091,515,1 213 | 'yes',50,'Post',70,'II',19,10,57,272,1 214 | 'no',58,'Post',21,'III',2,1,1,891,1 215 | 'no',63,'Post',21,'II',1,0,378,1356,0 216 | 'no',45,'Post',15,'II',6,1,162,1352,0 217 | 'yes',58,'Post',18,'II',3,64,418,675,1 218 | 'yes',63,'Post',21,'II',1,26,30,2551,0 219 | 'no',60,'Post',35,'II',12,41,62,754,1 220 | 'no',33,'Pre',25,'II',8,96,13,819,1 221 | 'yes',63,'Post',19,'II',5,18,38,1280,1 222 | 'no',70,'Post',16,'II',2,126,338,2388,0 223 | 'yes',60,'Post',30,'II',2,92,18,2296,0 224 | 'yes',54,'Post',25,'II',1,5,57,1884,0 225 | 'yes',64,'Post',25,'III',3,56,272,1059,1 226 | 'yes',50,'Post',21,'I',1,82,2,1109,0 227 | 'no',53,'Post',20,'II',1,1,1,1192,1 228 | 'no',77,'Post',20,'III',4,94,325,1806,1 229 | 'yes',47,'Pre',60,'II',15,5,38,500,1 230 | 'no',41,'Pre',20,'II',4,8,38,1589,1 231 | 'yes',47,'Pre',30,'II',5,12,11,1463,1 232 | 'yes',63,'Post',25,'II',2,8,195,1826,0 233 | 'no',48,'Pre',22,'II',4,26,29,1231,0 234 | 'no',40,'Pre',15,'II',1,204,138,1117,0 235 | 'yes',57,'Post',30,'II',8,40,40,836,1 236 | 'no',47,'Pre',40,'II',2,33,59,1222,0 237 | 'yes',58,'Post',35,'III',7,0,0,722,1 238 | 'yes',62,'Post',23,'II',2,0,14,1150,1 239 | 'no',50,'Pre',60,'III',4,0,0,446,1 240 | 'yes',65,'Post',30,'II',5,0,36,1855,0 241 | 'yes',59,'Post',30,'II',8,0,0,238,1 242 | 'no',49,'Pre',18,'II',2,0,0,1838,0 243 | 'yes',52,'Post',25,'II',13,0,0,1826,0 244 | 'no',45,'Pre',30,'II',1,0,0,1093,1 245 | 'no',49,'Post',14,'II',1,0,0,2051,0 246 | 'no',58,'Post',45,'III',4,0,0,370,1 247 | 'no',25,'Pre',22,'II',2,250,87,861,1 248 | 'no',50,'Pre',30,'III',6,0,0,1587,1 249 | 'no',43,'Pre',27,'II',1,23,9,552,1 250 | 'no',46,'Pre',12,'II',1,6,49,2353,0 251 | 'yes',64,'Post',24,'III',5,366,201,2471,0 252 | 'yes',63,'Post',43,'II',5,21,174,893,1 253 | 'no',40,'Pre',35,'II',2,279,99,2093,1 254 | 'yes',57,'Post',22,'II',4,16,5,2612,0 255 | 'yes',58,'Post',56,'I',11,51,50,956,1 256 | 'yes',62,'Post',25,'III',4,12,49,1637,0 257 | 'yes',50,'Pre',42,'I',2,238,26,2456,0 258 | 'no',49,'Post',30,'II',4,40,177,2227,0 259 | 'no',64,'Post',24,'II',2,41,80,1601,1 260 | 'yes',66,'Post',15,'II',2,15,42,1841,0 261 | 'yes',37,'Pre',30,'II',4,104,107,2177,0 262 | 'no',60,'Post',18,'III',2,12,8,2052,0 263 | 'no',51,'Pre',12,'I',2,55,64,2156,0 264 | 'yes',49,'Pre',28,'I',4,364,120,1499,0 265 | 'yes',57,'Post',7,'II',1,1,1,2030,0 266 | 'yes',68,'Post',14,'II',6,40,68,573,1 267 | 'no',47,'Pre',25,'II',1,199,134,1666,0 268 | 'no',51,'Post',13,'II',5,89,134,1979,0 269 | 'yes',49,'Pre',19,'I',5,69,14,1786,0 270 | 'no',63,'Post',28,'II',4,258,46,1847,0 271 | 'yes',64,'Post',15,'II',1,340,71,2009,0 272 | 'no',65,'Post',24,'II',1,328,115,1926,0 273 | 'yes',63,'Post',13,'II',1,124,361,1490,0 274 | 'no',33,'Pre',23,'III',10,2,3,233,1 275 | 'no',44,'Pre',35,'II',6,26,4,1240,0 276 | 'no',47,'Pre',13,'II',3,242,14,1751,0 277 | 'no',46,'Pre',19,'I',11,56,24,1878,0 278 | 'no',52,'Pre',26,'II',1,258,10,1171,0 279 | 'no',62,'Post',55,'III',8,3,2,1751,0 280 | 'yes',61,'Post',24,'II',2,28,50,1756,0 281 | 'no',60,'Post',27,'II',6,401,159,714,1 282 | 'yes',67,'Post',44,'II',10,431,267,1505,0 283 | 'no',47,'Pre',78,'II',14,168,53,776,1 284 | 'no',70,'Post',38,'III',2,24,15,1443,0 285 | 'no',50,'Pre',11,'I',1,10,11,1317,0 286 | 'no',58,'Post',30,'III',13,7,46,859,1 287 | 'no',59,'Post',20,'II',1,2,4,223,1 288 | 'no',45,'Pre',18,'I',1,56,40,1212,0 289 | 'no',45,'Pre',30,'II',3,345,31,1119,0 290 | 'yes',60,'Post',24,'III',7,10,10,632,1 291 | 'yes',51,'Pre',30,'III',2,1152,38,1760,0 292 | 'no',49,'Pre',45,'III',6,0,22,375,1 293 | 'yes',47,'Pre',42,'II',7,164,204,1323,0 294 | 'no',37,'Pre',50,'III',2,170,130,1233,0 295 | 'no',44,'Pre',29,'II',1,27,23,1866,0 296 | 'yes',38,'Pre',18,'II',4,28,5,491,1 297 | 'yes',51,'Pre',34,'II',3,13,12,1918,1 298 | 'no',59,'Post',8,'II',5,1,30,72,1 299 | 'yes',52,'Post',49,'III',6,8,5,1140,1 300 | 'yes',64,'Post',32,'II',4,402,372,799,1 301 | 'no',55,'Post',37,'II',1,82,234,1105,1 302 | 'no',61,'Post',22,'II',2,179,124,548,1 303 | 'yes',44,'Pre',28,'III',17,2,3,227,1 304 | 'no',38,'Pre',24,'II',3,13,5,1838,0 305 | 'yes',43,'Pre',11,'I',1,126,22,1833,0 306 | 'no',65,'Post',36,'III',2,9,7,550,1 307 | 'yes',59,'Post',48,'III',1,5,17,426,1 308 | 'no',38,'Pre',31,'I',10,365,206,1834,0 309 | 'no',47,'Pre',25,'II',3,18,42,1604,0 310 | 'yes',47,'Post',30,'I',9,114,26,1146,1 311 | 'no',36,'Pre',25,'II',2,70,22,371,1 312 | 'no',47,'Pre',24,'II',20,30,8,883,1 313 | 'no',38,'Pre',23,'III',3,14,6,1735,0 314 | 'yes',50,'Post',23,'II',8,98,30,554,1 315 | 'no',44,'Pre',5,'II',10,11,10,790,1 316 | 'no',54,'Post',22,'II',2,211,129,1340,0 317 | 'no',52,'Pre',30,'II',12,11,20,490,1 318 | 'no',34,'Pre',3,'III',1,14,11,1557,0 319 | 'no',64,'Post',33,'III',3,20,14,594,1 320 | 'no',65,'Post',27,'II',4,148,191,594,1 321 | 'yes',47,'Pre',30,'I',3,195,45,2556,0 322 | 'no',51,'Pre',20,'II',1,77,89,1753,1 323 | 'no',63,'Post',15,'III',5,0,0,417,1 324 | 'no',36,'Pre',30,'III',2,0,0,956,1 325 | 'yes',63,'Post',34,'II',12,223,236,1846,0 326 | 'no',47,'Pre',70,'II',5,796,24,1703,0 327 | 'no',51,'Pre',21,'III',1,0,0,1720,0 328 | 'yes',62,'Post',30,'II',1,88,544,1355,0 329 | 'no',56,'Post',40,'III',3,0,0,1603,0 330 | 'no',62,'Post',33,'I',5,239,76,476,1 331 | 'yes',61,'Post',30,'II',8,472,293,1350,0 332 | 'yes',55,'Post',15,'III',3,97,194,1341,0 333 | 'yes',56,'Post',11,'II',1,270,369,2449,0 334 | 'no',69,'Post',22,'II',8,282,191,2286,1 335 | 'no',57,'Post',25,'II',3,48,65,456,1 336 | 'no',27,'Pre',22,'II',1,56,99,536,1 337 | 'no',38,'Pre',25,'II',1,102,11,612,1 338 | 'no',42,'Pre',25,'III',2,11,10,2034,1 339 | 'no',69,'Post',19,'I',3,73,386,1990,1 340 | 'no',61,'Post',50,'II',4,10,10,2456,1 341 | 'no',53,'Pre',13,'III',3,10,20,2205,0 342 | 'no',50,'Pre',25,'III',1,24,85,544,1 343 | 'no',52,'Pre',27,'II',5,0,8,336,1 344 | 'no',47,'Pre',38,'II',2,58,10,2057,0 345 | 'no',65,'Post',27,'II',19,23,13,575,1 346 | 'no',48,'Pre',38,'II',3,92,41,2011,0 347 | 'no',61,'Post',38,'II',17,46,52,537,1 348 | 'yes',47,'Pre',12,'II',1,110,14,2217,0 349 | 'no',46,'Post',20,'II',11,680,152,1814,1 350 | 'yes',59,'Post',15,'II',1,30,122,890,1 351 | 'yes',60,'Post',22,'III',1,218,442,1114,0 352 | 'yes',45,'Pre',100,'II',6,178,77,2320,0 353 | 'no',58,'Post',35,'I',6,130,162,795,1 354 | 'no',51,'Post',40,'II',8,132,64,867,1 355 | 'no',49,'Pre',15,'II',1,111,19,1703,0 356 | 'no',43,'Pre',30,'II',2,32,16,670,1 357 | 'no',37,'Pre',35,'II',7,53,19,981,1 358 | 'no',51,'Pre',30,'II',2,505,270,1094,0 359 | 'yes',48,'Pre',35,'II',1,340,32,755,1 360 | 'no',54,'Post',21,'II',7,6,8,1388,1 361 | 'no',64,'Post',21,'III',1,4,3,1387,1 362 | 'no',44,'Pre',55,'III',4,8,8,535,1 363 | 'no',67,'Post',30,'II',2,5,14,1653,0 364 | 'no',63,'Post',24,'II',3,46,25,1904,0 365 | 'yes',42,'Pre',28,'III',4,27,22,1868,0 366 | 'yes',60,'Post',12,'I',2,402,90,1767,0 367 | 'no',39,'Pre',20,'II',1,38,110,855,1 368 | 'no',53,'Post',16,'II',1,16,120,1157,1 369 | 'yes',38,'Pre',61,'II',8,624,569,1869,0 370 | 'no',61,'Post',40,'I',15,185,206,1152,0 371 | 'no',47,'Pre',15,'II',1,38,0,1401,0 372 | 'no',67,'Post',65,'II',8,0,0,745,1 373 | 'yes',61,'Post',25,'II',18,595,419,1283,0 374 | 'yes',57,'Post',15,'II',3,44,78,1481,1 375 | 'yes',42,'Pre',9,'I',8,77,40,1807,0 376 | 'yes',39,'Pre',20,'III',1,2,2,542,1 377 | 'no',34,'Pre',50,'III',7,4,1,1441,0 378 | 'yes',52,'Pre',50,'II',7,45,39,1277,0 379 | 'yes',53,'Pre',45,'II',4,395,44,1486,0 380 | 'yes',46,'Pre',23,'III',8,2,1,177,1 381 | 'no',36,'Pre',36,'II',1,76,14,545,1 382 | 'no',39,'Pre',28,'II',3,5,4,1185,0 383 | 'no',46,'Pre',45,'I',9,239,58,1088,0 384 | 'yes',60,'Post',25,'II',7,116,435,2380,0 385 | 'yes',64,'Post',36,'II',2,122,198,1679,1 386 | 'yes',54,'Post',40,'III',4,3,2,498,1 387 | 'no',54,'Post',27,'II',5,138,23,2138,0 388 | 'no',46,'Pre',35,'II',6,405,27,2175,0 389 | 'no',49,'Pre',17,'II',2,324,94,2271,0 390 | 'yes',55,'Post',15,'II',3,16,14,964,1 391 | 'yes',45,'Pre',23,'II',4,1,4,540,1 392 | 'no',51,'Post',30,'III',10,15,103,747,1 393 | 'no',43,'Pre',25,'II',11,1,1,650,1 394 | 'yes',59,'Post',30,'II',13,7,81,410,1 395 | 'no',59,'Post',27,'III',20,9,2,624,1 396 | 'no',47,'Pre',28,'III',7,16,92,1560,0 397 | 'no',48,'Pre',35,'III',10,2,222,455,1 398 | 'no',47,'Pre',16,'II',2,128,18,1629,0 399 | 'no',49,'Post',21,'II',5,80,152,1730,0 400 | 'yes',65,'Post',25,'III',2,17,14,1483,0 401 | 'no',60,'Post',21,'II',1,58,701,687,1 402 | 'no',52,'Post',35,'III',1,8,5,308,1 403 | 'no',48,'Post',22,'II',4,14,0,563,1 404 | 'no',46,'Post',20,'II',2,32,29,2144,0 405 | 'no',59,'Post',21,'II',4,0,75,344,1 406 | 'yes',68,'Post',45,'I',3,31,145,1905,0 407 | 'yes',74,'Post',35,'II',11,10,472,855,1 408 | 'no',45,'Pre',50,'I',2,132,200,2370,0 409 | 'yes',44,'Pre',24,'III',5,187,62,475,1 410 | 'yes',72,'Post',17,'II',1,229,533,2195,0 411 | 'yes',49,'Pre',100,'II',35,84,24,648,1 412 | 'no',76,'Post',37,'III',24,11,0,195,1 413 | 'yes',57,'Post',35,'II',4,18,0,473,1 414 | 'yes',62,'Post',22,'II',1,263,34,2659,0 415 | 'yes',46,'Pre',60,'II',19,2,16,1977,1 416 | 'yes',53,'Post',17,'II',1,25,30,2401,0 417 | 'no',43,'Pre',20,'II',3,980,45,1499,0 418 | 'no',51,'Post',32,'III',10,0,0,1856,0 419 | 'no',41,'Pre',30,'III',11,6,5,595,1 420 | 'no',63,'Post',45,'III',2,530,328,2148,0 421 | 'yes',41,'Pre',20,'III',3,13,1,2126,0 422 | 'yes',74,'Post',30,'III',12,432,246,1975,1 423 | 'yes',57,'Post',30,'II',1,17,83,1641,1 424 | 'yes',44,'Pre',20,'II',6,150,67,1717,0 425 | 'yes',48,'Pre',24,'II',1,211,187,1858,0 426 | 'no',47,'Pre',15,'III',1,139,36,2049,0 427 | 'yes',70,'Post',25,'II',4,34,273,1502,1 428 | 'no',49,'Pre',14,'II',1,160,12,1922,0 429 | 'yes',49,'Post',24,'II',2,120,117,1818,0 430 | 'yes',58,'Post',35,'II',11,2,76,1100,0 431 | 'no',59,'Post',30,'II',1,87,8,1499,0 432 | 'no',60,'Post',35,'II',2,5,4,359,1 433 | 'yes',63,'Post',30,'I',5,144,221,1645,0 434 | 'no',44,'Pre',15,'II',1,175,88,1356,0 435 | 'yes',79,'Post',23,'I',1,60,80,1632,0 436 | 'yes',61,'Post',30,'II',1,24,38,1091,0 437 | 'yes',64,'Post',35,'II',3,47,64,918,1 438 | 'yes',51,'Pre',21,'II',1,3,2,557,1 439 | 'no',44,'Pre',22,'II',2,107,94,1219,1 440 | 'yes',60,'Post',25,'I',3,78,363,2170,0 441 | 'yes',55,'Post',50,'II',1,14,203,729,1 442 | 'no',70,'Post',80,'III',8,0,0,1449,1 443 | 'no',65,'Post',20,'I',2,912,606,991,1 444 | 'no',53,'Pre',20,'II',2,89,36,481,1 445 | 'yes',54,'Post',25,'III',3,1,83,1655,0 446 | 'no',65,'Post',25,'II',2,86,135,857,1 447 | 'yes',62,'Post',30,'II',2,5,104,369,1 448 | 'yes',48,'Pre',30,'I',3,133,129,1627,0 449 | 'yes',48,'Post',35,'I',2,845,105,1578,0 450 | 'no',42,'Pre',40,'II',10,130,51,732,1 451 | 'no',48,'Pre',30,'II',16,29,43,460,1 452 | 'no',66,'Post',25,'I',2,22,121,1208,0 453 | 'yes',63,'Post',25,'II',13,26,348,730,1 454 | 'no',54,'Post',23,'III',10,13,6,307,1 455 | 'no',52,'Post',17,'II',4,558,522,983,1 456 | 'no',43,'Pre',80,'III',11,9,1,120,1 457 | 'no',56,'Post',31,'II',1,45,286,1525,1 458 | 'no',42,'Post',21,'I',4,147,95,1680,0 459 | 'no',56,'Post',16,'II',10,4,2,1730,1 460 | 'no',61,'Post',36,'II',6,107,158,805,1 461 | 'no',67,'Post',17,'II',4,390,386,2388,0 462 | 'yes',63,'Post',21,'I',2,16,241,559,1 463 | 'yes',66,'Post',20,'II',9,1,11,1977,0 464 | 'no',37,'Pre',25,'III',1,13,1,476,1 465 | 'yes',71,'Post',16,'II',1,98,306,1514,0 466 | 'no',43,'Pre',28,'I',1,437,33,1617,0 467 | 'no',64,'Post',22,'III',1,8,11,1094,1 468 | 'yes',64,'Post',27,'II',3,186,139,784,1 469 | 'no',46,'Pre',32,'II',5,9,13,181,1 470 | 'no',45,'Pre',50,'II',7,20,23,415,1 471 | 'yes',67,'Post',24,'II',4,96,90,1120,1 472 | 'no',37,'Pre',25,'III',8,9,0,316,1 473 | 'no',65,'Post',22,'I',6,386,31,637,1 474 | 'no',21,'Pre',15,'II',3,24,25,247,1 475 | 'no',46,'Pre',45,'II',8,2,4,622,1 476 | 'yes',46,'Post',31,'III',1,6,3,1163,0 477 | 'no',58,'Post',31,'II',2,240,394,1721,0 478 | 'no',41,'Pre',23,'III',2,26,4,372,1 479 | 'no',32,'Pre',17,'III',1,19,8,1331,0 480 | 'yes',66,'Post',42,'III',11,412,339,394,1 481 | 'no',57,'Post',50,'III',13,22,47,98,1 482 | 'yes',47,'Post',23,'III',5,0,0,308,1 483 | 'no',44,'Pre',15,'II',1,0,0,1965,0 484 | 'yes',61,'Post',35,'III',16,10,13,548,1 485 | 'no',48,'Pre',21,'III',8,0,0,293,1 486 | 'yes',51,'Pre',16,'II',5,167,15,2017,0 487 | 'no',66,'Post',22,'II',4,11,22,1093,0 488 | 'no',66,'Post',21,'II',1,9,898,586,1 489 | 'yes',69,'Post',40,'III',1,3,9,1434,0 490 | 'yes',33,'Pre',19,'II',2,0,0,2128,0 491 | 'no',46,'Pre',30,'II',2,26,223,1965,0 492 | 'no',47,'Pre',20,'II',1,48,26,2161,0 493 | 'yes',35,'Pre',35,'II',4,0,0,1183,1 494 | 'no',34,'Pre',40,'III',1,0,37,1108,1 495 | 'no',38,'Pre',24,'I',1,138,82,2065,0 496 | 'no',54,'Post',27,'III',1,27,792,1598,0 497 | 'no',31,'Pre',55,'II',3,28,89,491,1 498 | 'no',41,'Pre',25,'II',5,6,9,1366,1 499 | 'yes',52,'Post',35,'II',21,11,57,859,1 500 | 'yes',65,'Post',25,'III',18,0,0,180,1 501 | 'no',47,'Post',45,'II',2,345,42,1625,0 502 | 'no',65,'Post',10,'I',2,213,209,1938,0 503 | 'yes',53,'Post',37,'II',5,345,47,1343,1 504 | 'no',45,'Pre',15,'II',3,28,27,646,1 505 | 'no',53,'Pre',19,'III',1,74,534,2192,0 506 | 'yes',50,'Post',25,'II',3,0,496,502,1 507 | 'no',54,'Post',50,'III',6,7,0,1675,0 508 | 'yes',64,'Post',40,'II',23,16,22,1363,1 509 | 'no',29,'Pre',15,'III',12,18,40,420,1 510 | 'no',48,'Pre',60,'I',4,312,20,982,1 511 | 'no',40,'Pre',30,'III',3,2,16,1459,0 512 | 'no',65,'Post',35,'II',1,7,74,1192,0 513 | 'no',50,'Post',40,'II',1,80,21,1264,0 514 | 'no',55,'Post',34,'II',6,109,477,1095,0 515 | 'no',35,'Pre',22,'II',13,16,25,465,1 516 | 'no',48,'Pre',52,'II',11,0,0,842,1 517 | 'yes',62,'Post',39,'II',4,73,235,374,1 518 | 'no',47,'Pre',40,'II',1,44,11,1089,0 519 | 'no',51,'Post',19,'II',2,92,245,1527,0 520 | 'no',42,'Pre',40,'II',10,256,0,285,1 521 | 'no',63,'Post',27,'II',1,0,0,1306,1 522 | 'yes',62,'Post',20,'II',7,0,0,797,1 523 | 'no',57,'Post',15,'II',1,91,125,1441,0 524 | 'no',25,'Pre',29,'II',3,0,0,343,1 525 | 'no',51,'Post',25,'II',2,0,80,503,1 526 | 'yes',47,'Pre',30,'II',10,0,0,827,1 527 | 'yes',34,'Pre',30,'II',2,210,49,1427,0 528 | 'yes',64,'Post',30,'III',12,550,263,177,1 529 | 'no',42,'Pre',55,'III',7,20,20,281,1 530 | 'no',37,'Pre',35,'III',1,242,67,205,1 531 | 'no',62,'Post',27,'II',13,197,79,629,1 532 | 'yes',51,'Post',22,'II',4,250,81,2010,0 533 | 'yes',45,'Pre',13,'III',4,21,27,2009,0 534 | 'no',41,'Pre',10,'I',2,241,214,1984,0 535 | 'no',39,'Pre',32,'II',9,1,8,1981,0 536 | 'no',53,'Post',26,'III',8,1,1,624,1 537 | 'no',59,'Post',35,'II',4,1,1,742,1 538 | 'yes',53,'Post',10,'II',2,217,20,1818,0 539 | 'yes',60,'Post',100,'II',10,102,88,1493,1 540 | 'no',50,'Pre',29,'I',2,323,60,1432,0 541 | 'no',51,'Pre',18,'I',1,94,60,801,1 542 | 'no',51,'Pre',25,'II',2,20,11,1182,0 543 | 'yes',57,'Post',32,'II',2,43,287,1722,0 544 | 'yes',46,'Pre',18,'II',1,120,628,1692,0 545 | 'yes',64,'Post',26,'II',2,1356,1144,1152,0 546 | 'yes',37,'Pre',22,'I',3,23,64,1459,1 547 | 'no',64,'Post',21,'II',3,403,253,2237,0 548 | 'no',48,'Pre',18,'I',1,137,73,2056,0 549 | 'yes',50,'Post',50,'II',6,1,2,1729,0 550 | 'yes',32,'Pre',20,'II',6,8,3,2024,0 551 | 'no',49,'Pre',19,'II',2,388,137,2039,1 552 | 'yes',33,'Pre',28,'III',1,1,1,2027,0 553 | 'yes',58,'Post',35,'II',1,6,11,2007,0 554 | 'no',57,'Post',25,'II',1,26,299,1253,1 555 | 'no',45,'Pre',35,'II',2,26,36,1789,0 556 | 'no',66,'Post',30,'I',5,100,288,1707,0 557 | 'no',52,'Pre',37,'II',3,66,104,1714,0 558 | 'yes',49,'Pre',25,'II',3,152,25,1717,0 559 | 'no',49,'Post',22,'II',1,14,41,329,1 560 | 'no',48,'Post',45,'I',1,312,236,1624,0 561 | 'yes',62,'Post',60,'II',1,56,17,1600,0 562 | 'no',60,'Post',35,'II',3,115,300,385,1 563 | 'no',45,'Pre',10,'II',1,82,8,1475,0 564 | 'no',60,'Post',37,'I',1,296,35,1435,0 565 | 'yes',57,'Post',36,'III',1,170,192,1329,0 566 | 'yes',53,'Post',27,'III',12,44,42,1357,0 567 | 'no',56,'Post',55,'III',3,46,31,1343,0 568 | 'no',46,'Pre',23,'II',2,120,41,748,1 569 | 'no',49,'Post',30,'II',2,254,353,1090,1 570 | 'yes',56,'Post',32,'II',2,53,174,1219,0 571 | 'yes',56,'Post',42,'I',5,113,700,662,1 572 | 'no',40,'Pre',40,'II',6,227,10,866,1 573 | 'yes',60,'Post',40,'II',6,8,11,504,1 574 | 'no',51,'Pre',25,'III',5,43,0,769,1 575 | 'no',52,'Post',23,'II',3,15,34,727,1 576 | 'no',55,'Post',23,'II',9,116,15,1701,1 577 | -------------------------------------------------------------------------------- /docs/source/rst/tutorials/data/australian_test.csv: -------------------------------------------------------------------------------- 1 | A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,Class 2 | 1.0,2208.0,1146.0,2.0,4.0,4.0,1585.0,0.0,0.0,0.0,1.0,2.0,100.0,1213.0,0 3 | 0.0,2267.0,7.0,2.0,8.0,4.0,165.0,0.0,0.0,0.0,0.0,2.0,160.0,1.0,0 4 | 0.0,1583.0,585.0,2.0,8.0,8.0,15.0,1.0,1.0,2.0,0.0,2.0,100.0,1.0,1 5 | 1.0,20.0,125.0,1.0,4.0,4.0,125.0,0.0,0.0,0.0,0.0,2.0,140.0,5.0,0 6 | 0.0,2242.0,5665.0,2.0,11.0,4.0,2585.0,1.0,1.0,7.0,0.0,2.0,129.0,3258.0,1 7 | 1.0,3275.0,15.0,2.0,13.0,8.0,55.0,1.0,1.0,3.0,1.0,2.0,0.0,1.0,1 8 | 0.0,3067.0,12.0,2.0,8.0,4.0,2.0,1.0,1.0,1.0,0.0,2.0,220.0,20.0,1 9 | 1.0,2758.0,325.0,1.0,11.0,8.0,5085.0,0.0,1.0,2.0,1.0,2.0,369.0,2.0,0 10 | 0.0,2375.0,71.0,2.0,9.0,4.0,25.0,0.0,1.0,1.0,1.0,2.0,240.0,5.0,0 11 | 1.0,3758.0,0.0,2.0,8.0,4.0,0.0,0.0,0.0,0.0,0.0,3.0,184.0,1.0,1 12 | 1.0,3625.0,5.0,2.0,8.0,5.0,25.0,1.0,1.0,6.0,0.0,2.0,0.0,368.0,1 13 | 1.0,2125.0,15.0,2.0,9.0,4.0,15.0,0.0,0.0,0.0,0.0,2.0,150.0,9.0,1 14 | 1.0,2775.0,585.0,1.0,13.0,4.0,25.0,1.0,1.0,2.0,0.0,2.0,260.0,501.0,1 15 | 1.0,19.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,4.0,0.0,2.0,45.0,2.0,0 16 | 1.0,4733.0,65.0,2.0,8.0,4.0,1.0,0.0,0.0,0.0,1.0,2.0,0.0,229.0,0 17 | 1.0,22.0,79.0,2.0,9.0,4.0,29.0,0.0,1.0,1.0,0.0,2.0,420.0,284.0,0 18 | 1.0,4517.0,15.0,2.0,8.0,4.0,25.0,1.0,0.0,0.0,1.0,2.0,140.0,1.0,0 19 | 1.0,2267.0,1585.0,1.0,9.0,4.0,3085.0,1.0,1.0,6.0,0.0,2.0,80.0,1.0,1 20 | 1.0,4783.0,4165.0,2.0,14.0,5.0,85.0,0.0,0.0,0.0,1.0,2.0,520.0,1.0,0 21 | 1.0,2308.0,25.0,2.0,1.0,1.0,85.0,0.0,0.0,0.0,1.0,2.0,100.0,4209.0,0 22 | 0.0,1892.0,925.0,1.0,8.0,4.0,1.0,1.0,1.0,4.0,1.0,2.0,80.0,501.0,1 23 | 1.0,4117.0,125.0,1.0,9.0,4.0,25.0,0.0,0.0,0.0,0.0,2.0,0.0,196.0,0 24 | 1.0,3317.0,104.0,2.0,12.0,8.0,65.0,1.0,0.0,0.0,1.0,2.0,164.0,31286.0,1 25 | 1.0,2075.0,5085.0,1.0,5.0,4.0,29.0,0.0,0.0,0.0,0.0,2.0,140.0,185.0,0 26 | 1.0,5442.0,5.0,1.0,4.0,8.0,396.0,1.0,0.0,0.0,0.0,2.0,180.0,315.0,1 27 | 1.0,3367.0,2165.0,2.0,8.0,4.0,15.0,0.0,0.0,0.0,0.0,3.0,120.0,1.0,0 28 | 1.0,4317.0,225.0,2.0,3.0,5.0,75.0,1.0,0.0,0.0,0.0,2.0,560.0,1.0,0 29 | 1.0,495.0,7585.0,2.0,3.0,5.0,7585.0,1.0,1.0,15.0,1.0,2.0,0.0,5001.0,1 30 | 1.0,6008.0,145.0,2.0,1.0,1.0,18.0,1.0,1.0,15.0,1.0,2.0,0.0,1001.0,1 31 | 1.0,415.0,154.0,2.0,3.0,5.0,35.0,0.0,0.0,0.0,0.0,2.0,216.0,1.0,1 32 | 1.0,3367.0,125.0,2.0,9.0,4.0,1165.0,0.0,0.0,0.0,0.0,2.0,120.0,1.0,0 33 | 1.0,23.0,75.0,2.0,7.0,4.0,5.0,0.0,0.0,0.0,1.0,1.0,320.0,1.0,0 34 | 1.0,35.0,25.0,2.0,3.0,4.0,1.0,0.0,0.0,0.0,1.0,2.0,210.0,1.0,0 35 | 0.0,5708.0,335.0,2.0,3.0,5.0,1.0,1.0,0.0,0.0,1.0,2.0,252.0,2198.0,0 36 | 1.0,20.0,11045.0,2.0,8.0,4.0,2.0,0.0,0.0,0.0,1.0,2.0,136.0,1.0,0 37 | 1.0,1858.0,571.0,2.0,2.0,4.0,54.0,0.0,0.0,0.0,0.0,2.0,120.0,1.0,0 38 | 1.0,2567.0,325.0,2.0,8.0,8.0,229.0,0.0,1.0,1.0,1.0,2.0,416.0,22.0,0 39 | 0.0,1967.0,21.0,2.0,11.0,8.0,29.0,1.0,1.0,11.0,0.0,2.0,80.0,100.0,1 40 | 0.0,3675.0,5125.0,2.0,10.0,4.0,5.0,1.0,0.0,0.0,1.0,2.0,0.0,4001.0,1 41 | 1.0,31.0,2085.0,2.0,8.0,4.0,85.0,0.0,0.0,0.0,0.0,2.0,300.0,1.0,0 42 | 1.0,7342.0,1775.0,2.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,2.0,0.0,1.0,1 43 | 1.0,2325.0,12625.0,2.0,8.0,4.0,125.0,0.0,1.0,2.0,0.0,2.0,0.0,5553.0,0 44 | 1.0,5483.0,155.0,2.0,10.0,9.0,0.0,1.0,1.0,20.0,0.0,2.0,152.0,131.0,0 45 | 1.0,5142.0,4.0,2.0,14.0,8.0,4.0,1.0,0.0,0.0,0.0,2.0,0.0,3001.0,1 46 | 1.0,6275.0,7.0,2.0,10.0,9.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,13.0,0 47 | 0.0,23.0,1175.0,2.0,14.0,8.0,5.0,1.0,1.0,2.0,1.0,2.0,300.0,552.0,1 48 | 1.0,5967.0,154.0,2.0,11.0,4.0,125.0,1.0,0.0,0.0,1.0,2.0,260.0,1.0,1 49 | 1.0,4858.0,65.0,2.0,11.0,8.0,6.0,1.0,0.0,0.0,1.0,2.0,350.0,1.0,1 50 | 1.0,2017.0,925.0,2.0,8.0,4.0,1665.0,1.0,1.0,3.0,1.0,2.0,40.0,29.0,1 51 | 0.0,6058.0,165.0,2.0,11.0,4.0,11.0,1.0,0.0,0.0,1.0,2.0,21.0,10562.0,1 52 | 1.0,215.0,975.0,2.0,8.0,4.0,25.0,1.0,0.0,0.0,0.0,2.0,140.0,1.0,0 53 | 1.0,5592.0,115.0,2.0,1.0,1.0,5.0,1.0,1.0,5.0,0.0,2.0,0.0,8852.0,1 54 | 0.0,285.0,1.0,2.0,11.0,4.0,1.0,1.0,1.0,2.0,1.0,2.0,167.0,501.0,0 55 | 1.0,2208.0,83.0,2.0,8.0,8.0,2165.0,0.0,0.0,0.0,1.0,2.0,128.0,1.0,1 56 | 1.0,2667.0,14585.0,2.0,3.0,5.0,0.0,0.0,0.0,0.0,1.0,2.0,178.0,1.0,0 57 | 1.0,23.0,625.0,1.0,6.0,4.0,125.0,1.0,0.0,0.0,0.0,2.0,180.0,2.0,0 58 | 0.0,3317.0,225.0,1.0,13.0,4.0,35.0,0.0,0.0,0.0,1.0,2.0,200.0,142.0,0 59 | 1.0,3492.0,25.0,2.0,9.0,4.0,0.0,1.0,0.0,0.0,1.0,2.0,239.0,201.0,1 60 | 1.0,3917.0,1625.0,2.0,8.0,4.0,15.0,1.0,1.0,10.0,0.0,2.0,186.0,4701.0,1 61 | 1.0,3642.0,75.0,1.0,2.0,4.0,585.0,0.0,0.0,0.0,0.0,2.0,240.0,4.0,0 62 | 1.0,2158.0,79.0,1.0,13.0,4.0,665.0,0.0,0.0,0.0,0.0,2.0,160.0,1.0,0 63 | 1.0,2567.0,221.0,1.0,6.0,4.0,4.0,1.0,0.0,0.0,0.0,2.0,188.0,1.0,0 64 | 1.0,2542.0,54.0,2.0,9.0,4.0,165.0,0.0,1.0,1.0,0.0,2.0,272.0,445.0,0 65 | 1.0,1633.0,4085.0,2.0,3.0,8.0,415.0,0.0,0.0,0.0,1.0,2.0,120.0,1.0,0 66 | 1.0,3517.0,25125.0,2.0,14.0,8.0,1625.0,1.0,1.0,1.0,1.0,2.0,515.0,501.0,1 67 | 1.0,2375.0,12.0,2.0,8.0,4.0,2085.0,0.0,0.0,0.0,0.0,1.0,80.0,1.0,0 68 | 1.0,2867.0,9335.0,2.0,11.0,8.0,5665.0,1.0,1.0,6.0,0.0,2.0,381.0,169.0,1 69 | 0.0,1792.0,1021.0,2.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,51.0,0 70 | 1.0,23.0,75.0,2.0,7.0,4.0,5.0,1.0,0.0,0.0,1.0,1.0,320.0,1.0,0 71 | -------------------------------------------------------------------------------- /docs/source/rst/tutorials/data/cpu.arff: -------------------------------------------------------------------------------- 1 | @relation cpu 2 | @attribute vendor {'adviser','amdahl','apollo','basf','bti','burroughs','c.r.d','cambex','cdc','dec','dg','formation','four-phase','gould','harris','honeywell','hp','ibm','ipl','magnuson','microdata','nas','ncr','nixdorf','perkin-elmer','prime','siemens','sperry','sratus','wang'} 3 | @attribute MYCT numeric 4 | @attribute MMIN numeric 5 | @attribute MMAX numeric 6 | @attribute CACH numeric 7 | @attribute CHMIN numeric 8 | @attribute CHMAX numeric 9 | @attribute class numeric 10 | @data 11 | 'adviser',125,256,6000,256,16,128,199 12 | 'amdahl',29,8000,32000,32,8,32,253 13 | 'amdahl',29,8000,32000,32,8,32,253 14 | 'amdahl',29,8000,32000,32,8,32,253 15 | 'amdahl',29,8000,16000,32,8,16,132 16 | 'amdahl',26,8000,32000,64,8,32,290 17 | 'amdahl',23,16000,32000,64,16,32,381 18 | 'amdahl',23,16000,32000,64,16,32,381 19 | 'amdahl',23,16000,64000,64,16,32,749 20 | 'amdahl',23,32000,64000,128,32,64,1238 21 | 'apollo',400,1000,3000,0,1,2,23 22 | 'apollo',400,512,3500,4,1,6,24 23 | 'basf',60,2000,8000,65,1,8,70 24 | 'basf',50,4000,16000,65,1,8,117 25 | 'bti',350,64,64,0,1,4,15 26 | 'bti',200,512,16000,0,4,32,64 27 | 'burroughs',167,524,2000,8,4,15,23 28 | 'burroughs',143,512,5000,0,7,32,29 29 | 'burroughs',143,1000,2000,0,5,16,22 30 | 'burroughs',110,5000,5000,142,8,64,124 31 | 'burroughs',143,1500,6300,0,5,32,35 32 | 'burroughs',143,3100,6200,0,5,20,39 33 | 'burroughs',143,2300,6200,0,6,64,40 34 | 'burroughs',110,3100,6200,0,6,64,45 35 | 'c.r.d',320,128,6000,0,1,12,28 36 | 'c.r.d',320,512,2000,4,1,3,21 37 | 'c.r.d',320,256,6000,0,1,6,28 38 | 'c.r.d',320,256,3000,4,1,3,22 39 | 'c.r.d',320,512,5000,4,1,5,28 40 | 'c.r.d',320,256,5000,4,1,6,27 41 | 'cdc',25,1310,2620,131,12,24,102 42 | 'cdc',25,1310,2620,131,12,24,102 43 | 'cdc',50,2620,10480,30,12,24,74 44 | 'cdc',50,2620,10480,30,12,24,74 45 | 'cdc',56,5240,20970,30,12,24,138 46 | 'cdc',64,5240,20970,30,12,24,136 47 | 'cdc',50,500,2000,8,1,4,23 48 | 'cdc',50,1000,4000,8,1,5,29 49 | 'cdc',50,2000,8000,8,1,5,44 50 | 'cambex',50,1000,4000,8,3,5,30 51 | 'cambex',50,1000,8000,8,3,5,41 52 | 'cambex',50,2000,16000,8,3,5,74 53 | 'cambex',50,2000,16000,8,3,6,74 54 | 'cambex',50,2000,16000,8,3,6,74 55 | 'dec',133,1000,12000,9,3,12,54 56 | 'dec',133,1000,8000,9,3,12,41 57 | 'dec',810,512,512,8,1,1,18 58 | 'dec',810,1000,5000,0,1,1,28 59 | 'dec',320,512,8000,4,1,5,36 60 | 'dec',200,512,8000,8,1,8,38 61 | 'dg',700,384,8000,0,1,1,34 62 | 'dg',700,256,2000,0,1,1,19 63 | 'dg',140,1000,16000,16,1,3,72 64 | 'dg',200,1000,8000,0,1,2,36 65 | 'dg',110,1000,4000,16,1,2,30 66 | 'dg',110,1000,12000,16,1,2,56 67 | 'dg',220,1000,8000,16,1,2,42 68 | 'formation',800,256,8000,0,1,4,34 69 | 'formation',800,256,8000,0,1,4,34 70 | 'formation',800,256,8000,0,1,4,34 71 | 'formation',800,256,8000,0,1,4,34 72 | 'formation',800,256,8000,0,1,4,34 73 | 'four-phase',125,512,1000,0,8,20,19 74 | 'gould',75,2000,8000,64,1,38,75 75 | 'gould',75,2000,16000,64,1,38,113 76 | 'gould',75,2000,16000,128,1,38,157 77 | 'hp',90,256,1000,0,3,10,18 78 | 'hp',105,256,2000,0,3,10,20 79 | 'hp',105,1000,4000,0,3,24,28 80 | 'hp',105,2000,4000,8,3,19,33 81 | 'hp',75,2000,8000,8,3,24,47 82 | 'hp',75,3000,8000,8,3,48,54 83 | 'hp',175,256,2000,0,3,24,20 84 | 'harris',300,768,3000,0,6,24,23 85 | 'harris',300,768,3000,6,6,24,25 86 | 'harris',300,768,12000,6,6,24,52 87 | 'harris',300,768,4500,0,1,24,27 88 | 'harris',300,384,12000,6,1,24,50 89 | 'harris',300,192,768,6,6,24,18 90 | 'harris',180,768,12000,6,1,31,53 91 | 'honeywell',330,1000,3000,0,2,4,23 92 | 'honeywell',300,1000,4000,8,3,64,30 93 | 'honeywell',300,1000,16000,8,2,112,73 94 | 'honeywell',330,1000,2000,0,1,2,20 95 | 'honeywell',330,1000,4000,0,3,6,25 96 | 'honeywell',140,2000,4000,0,3,6,28 97 | 'honeywell',140,2000,4000,0,4,8,29 98 | 'honeywell',140,2000,4000,8,1,20,32 99 | 'honeywell',140,2000,32000,32,1,20,175 100 | 'honeywell',140,2000,8000,32,1,54,57 101 | 'honeywell',140,2000,32000,32,1,54,181 102 | 'honeywell',140,2000,32000,32,1,54,181 103 | 'honeywell',140,2000,4000,8,1,20,32 104 | 'ibm',57,4000,16000,1,6,12,82 105 | 'ibm',57,4000,24000,64,12,16,171 106 | 'ibm',26,16000,32000,64,16,24,361 107 | 'ibm',26,16000,32000,64,8,24,350 108 | 'ibm',26,8000,32000,0,8,24,220 109 | 'ibm',26,8000,16000,0,8,16,113 110 | 'ibm',480,96,512,0,1,1,15 111 | 'ibm',203,1000,2000,0,1,5,21 112 | 'ibm',115,512,6000,16,1,6,35 113 | 'ibm',1100,512,1500,0,1,1,18 114 | 'ibm',1100,768,2000,0,1,1,20 115 | 'ibm',600,768,2000,0,1,1,20 116 | 'ibm',400,2000,4000,0,1,1,28 117 | 'ibm',400,4000,8000,0,1,1,45 118 | 'ibm',900,1000,1000,0,1,2,18 119 | 'ibm',900,512,1000,0,1,2,17 120 | 'ibm',900,1000,4000,4,1,2,26 121 | 'ibm',900,1000,4000,8,1,2,28 122 | 'ibm',900,2000,4000,0,3,6,28 123 | 'ibm',225,2000,4000,8,3,6,31 124 | 'ibm',225,2000,4000,8,3,6,31 125 | 'ibm',180,2000,8000,8,1,6,42 126 | 'ibm',185,2000,16000,16,1,6,76 127 | 'ibm',180,2000,16000,16,1,6,76 128 | 'ibm',225,1000,4000,2,3,6,26 129 | 'ibm',25,2000,12000,8,1,4,59 130 | 'ibm',25,2000,12000,16,3,5,65 131 | 'ibm',17,4000,16000,8,6,12,101 132 | 'ibm',17,4000,16000,32,6,12,116 133 | 'ibm',1500,768,1000,0,0,0,18 134 | 'ibm',1500,768,2000,0,0,0,20 135 | 'ibm',800,768,2000,0,0,0,20 136 | 'ipl',50,2000,4000,0,3,6,30 137 | 'ipl',50,2000,8000,8,3,6,44 138 | 'ipl',50,2000,8000,8,1,6,44 139 | 'ipl',50,2000,16000,24,1,6,82 140 | 'ipl',50,2000,16000,24,1,6,82 141 | 'ipl',50,8000,16000,48,1,10,128 142 | 'magnuson',100,1000,8000,0,2,6,37 143 | 'magnuson',100,1000,8000,24,2,6,46 144 | 'magnuson',100,1000,8000,24,3,6,46 145 | 'magnuson',50,2000,16000,12,3,16,80 146 | 'magnuson',50,2000,16000,24,6,16,88 147 | 'magnuson',50,2000,16000,24,6,16,88 148 | 'microdata',150,512,4000,0,8,128,33 149 | 'nas',115,2000,8000,16,1,3,46 150 | 'nas',115,2000,4000,2,1,5,29 151 | 'nas',92,2000,8000,32,1,6,53 152 | 'nas',92,2000,8000,32,1,6,53 153 | 'nas',92,2000,8000,4,1,6,41 154 | 'nas',75,4000,16000,16,1,6,86 155 | 'nas',60,4000,16000,32,1,6,95 156 | 'nas',60,2000,16000,64,5,8,107 157 | 'nas',60,4000,16000,64,5,8,117 158 | 'nas',50,4000,16000,64,5,10,119 159 | 'nas',72,4000,16000,64,8,16,120 160 | 'nas',72,2000,8000,16,6,8,48 161 | 'nas',40,8000,16000,32,8,16,126 162 | 'nas',40,8000,32000,64,8,24,266 163 | 'nas',35,8000,32000,64,8,24,270 164 | 'nas',38,16000,32000,128,16,32,426 165 | 'nas',48,4000,24000,32,8,24,151 166 | 'nas',38,8000,32000,64,8,24,267 167 | 'nas',30,16000,32000,256,16,24,603 168 | 'ncr',112,1000,1000,0,1,4,19 169 | 'ncr',84,1000,2000,0,1,6,21 170 | 'ncr',56,1000,4000,0,1,6,26 171 | 'ncr',56,2000,6000,0,1,8,35 172 | 'ncr',56,2000,8000,0,1,8,41 173 | 'ncr',56,4000,8000,0,1,8,47 174 | 'ncr',56,4000,12000,0,1,8,62 175 | 'ncr',56,4000,16000,0,1,8,78 176 | 'ncr',38,4000,8000,32,16,32,80 177 | 'ncr',38,4000,8000,32,16,32,80 178 | 'ncr',38,8000,16000,64,4,8,142 179 | 'ncr',38,8000,24000,160,4,8,281 180 | 'ncr',38,4000,16000,128,16,32,190 181 | 'nixdorf',200,1000,2000,0,1,2,21 182 | 'nixdorf',200,1000,4000,0,1,4,25 183 | 'nixdorf',200,2000,8000,64,1,5,67 184 | 'perkin-elmer',250,512,4000,0,1,7,24 185 | 'perkin-elmer',250,512,4000,0,4,7,24 186 | 'perkin-elmer',250,1000,16000,1,1,8,64 187 | 'prime',160,512,4000,2,1,5,25 188 | 'prime',160,512,2000,2,3,8,20 189 | 'prime',160,1000,4000,8,1,14,29 190 | 'prime',160,1000,8000,16,1,14,43 191 | 'prime',160,2000,8000,32,1,13,53 192 | 'siemens',240,512,1000,8,1,3,19 193 | 'siemens',240,512,2000,8,1,5,22 194 | 'siemens',105,2000,4000,8,3,8,31 195 | 'siemens',105,2000,6000,16,6,16,41 196 | 'siemens',105,2000,8000,16,4,14,47 197 | 'siemens',52,4000,16000,32,4,12,99 198 | 'siemens',70,4000,12000,8,6,8,67 199 | 'siemens',59,4000,12000,32,6,12,81 200 | 'siemens',59,8000,16000,64,12,24,149 201 | 'siemens',26,8000,24000,32,8,16,183 202 | 'siemens',26,8000,32000,64,12,16,275 203 | 'siemens',26,8000,32000,128,24,32,382 204 | 'sperry',116,2000,8000,32,5,28,56 205 | 'sperry',50,2000,32000,24,6,26,182 206 | 'sperry',50,2000,32000,48,26,52,227 207 | 'sperry',50,2000,32000,112,52,104,341 208 | 'sperry',50,4000,32000,112,52,104,360 209 | 'sperry',30,8000,64000,96,12,176,919 210 | 'sperry',30,8000,64000,128,12,176,978 211 | 'sperry',180,262,4000,0,1,3,24 212 | 'sperry',180,512,4000,0,1,3,24 213 | 'sperry',180,262,4000,0,1,3,24 214 | 'sperry',180,512,4000,0,1,3,24 215 | 'sperry',124,1000,8000,0,1,8,37 216 | 'sperry',98,1000,8000,32,2,8,50 217 | 'sratus',125,2000,8000,0,2,14,41 218 | 'wang',480,512,8000,32,0,0,47 219 | 'wang',480,1000,4000,0,0,0,25 220 | -------------------------------------------------------------------------------- /docs/source/rst/tutorials/survival.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd \n", 10 | "from scipy.io import arff\n", 11 | "from rulekit import RuleKit\n", 12 | "from rulekit.survival import SurvivalRules\n", 13 | "from rulekit.params import Measures\n", 14 | "\n", 15 | "from rulexai.explainer import RuleExplainer" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "# GBSG2" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "## Read data" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 3, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "dataset_path = \"./data/GBSG2.arff\"\n", 39 | "data = pd.DataFrame(arff.loadarff(dataset_path)[0])\n", 40 | "\n", 41 | "# code to change encoding of the file\n", 42 | "tmp_df = data.select_dtypes([object])\n", 43 | "tmp_df = tmp_df.stack().str.decode(\"utf-8\").unstack()\n", 44 | "for col in tmp_df:\n", 45 | " data[col] = tmp_df[col].replace({\"?\": None})\n", 46 | "\n", 47 | "x = data.drop([\"survival_status\"], axis=1)\n", 48 | "y = data[\"survival_status\"]" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "## Train RuleKit model" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 4, 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "data": { 65 | "text/plain": [ 66 | "" 67 | ] 68 | }, 69 | "execution_count": 4, 70 | "metadata": {}, 71 | "output_type": "execute_result" 72 | } 73 | ], 74 | "source": [ 75 | "# RuleKit\n", 76 | "RuleKit.init()\n", 77 | "\n", 78 | "srv = SurvivalRules(survival_time_attr=\"survival_time\")\n", 79 | "srv.fit(values=x, labels=y)" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "### Rules" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 5, 92 | "metadata": {}, 93 | "outputs": [ 94 | { 95 | "name": "stdout", 96 | "output_type": "stream", 97 | "text": [ 98 | "IF pnodes = (-inf, 3.50) THEN survival_status = {NaN} (p = 304.0, n = 0.0, P = 564.0, N = 0.0, weight = 0.9999999999999998, pvalue = 2.220446049250313e-16)\n", 99 | "IF pnodes = (-inf, 17.50) AND progrec = (-inf, 9.50) AND age = <41.50, 52.50) AND estrec = <0.50, 29) THEN survival_status = {NaN} (p = 21.0, n = 0.0, P = 564.0, N = 0.0, weight = 0.9999999999909083, pvalue = 9.09172737095787e-12)\n", 100 | "IF pnodes = <4.50, 19) AND progrec = (-inf, 11.50) AND age = <41.50, 64.50) AND estrec = <0.50, 41) THEN survival_status = {NaN} (p = 33.0, n = 0.0, P = 564.0, N = 0.0, weight = 1.0, pvalue = 0.0)\n", 101 | "IF pnodes = <4.50, inf) AND progrec = (-inf, 25.50) THEN survival_status = {NaN} (p = 113.0, n = 0.0, P = 564.0, N = 0.0, weight = 1.0, pvalue = 0.0)\n", 102 | "IF pnodes = <4.50, inf) AND progrec = (-inf, 99) THEN survival_status = {NaN} (p = 156.0, n = 0.0, P = 564.0, N = 0.0, weight = 1.0, pvalue = 0.0)\n", 103 | "IF pnodes = <5.50, inf) AND progrec = (-inf, 135) THEN survival_status = {NaN} (p = 144.0, n = 0.0, P = 564.0, N = 0.0, weight = 1.0, pvalue = 0.0)\n", 104 | "IF pnodes = <4.50, inf) AND progrec = (-inf, 233) THEN survival_status = {NaN} (p = 185.0, n = 0.0, P = 564.0, N = 0.0, weight = 1.0, pvalue = 0.0)\n", 105 | "IF pnodes = (-inf, 4.50) AND progrec = <9, inf) AND age = <39.50, inf) THEN survival_status = {NaN} (p = 245.0, n = 0.0, P = 564.0, N = 0.0, weight = 1.0, pvalue = 0.0)\n", 106 | "IF progrec = <107, inf) THEN survival_status = {NaN} (p = 168.0, n = 0.0, P = 564.0, N = 0.0, weight = 0.9999999989621143, pvalue = 1.0378856662995872e-09)\n", 107 | "IF pnodes = <3.50, inf) AND progrec = (-inf, 105.50) THEN survival_status = {NaN} (p = 195.0, n = 0.0, P = 564.0, N = 0.0, weight = 1.0, pvalue = 0.0)\n" 108 | ] 109 | } 110 | ], 111 | "source": [ 112 | "for rule in srv.model.rules:\n", 113 | " print(rule, rule.stats)" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "## RuleXAI" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 6, 126 | "metadata": {}, 127 | "outputs": [ 128 | { 129 | "data": { 130 | "text/plain": [ 131 | "" 132 | ] 133 | }, 134 | "execution_count": 6, 135 | "metadata": {}, 136 | "output_type": "execute_result" 137 | } 138 | ], 139 | "source": [ 140 | "explainer = RuleExplainer(model=srv, X=x, y=y, type=\"survival\")\n", 141 | "explainer.explain()" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "### Feature importance " 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 7, 154 | "metadata": {}, 155 | "outputs": [ 156 | { 157 | "data": { 158 | "text/html": [ 159 | "
\n", 160 | "\n", 173 | "\n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | "
attributesimportances
2pnodes460.222804
3progrec251.499862
0age20.523849
1estrec13.347720
\n", 204 | "
" 205 | ], 206 | "text/plain": [ 207 | " attributes importances\n", 208 | "2 pnodes 460.222804\n", 209 | "3 progrec 251.499862\n", 210 | "0 age 20.523849\n", 211 | "1 estrec 13.347720" 212 | ] 213 | }, 214 | "execution_count": 7, 215 | "metadata": {}, 216 | "output_type": "execute_result" 217 | } 218 | ], 219 | "source": [ 220 | "explainer.feature_importances_" 221 | ] 222 | }, 223 | { 224 | "cell_type": "markdown", 225 | "metadata": {}, 226 | "source": [ 227 | "### Condition importance" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": 8, 233 | "metadata": {}, 234 | "outputs": [ 235 | { 236 | "data": { 237 | "text/html": [ 238 | "
\n", 239 | "\n", 252 | "\n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | "
conditionsimportances
0pnodes = <4.5, inf)207.268572
1pnodes = (-inf, 3.5)67.394775
2pnodes = <5.5, inf)64.254026
3pnodes = <3.5, inf)64.104973
4progrec = (-inf, 25.5)48.923100
5progrec = <107.0, inf)37.252374
6progrec = (-inf, 105.5)33.962572
7progrec = (-inf, 99.0)33.423755
8pnodes = (-inf, 4.5)32.835122
9progrec = (-inf, 135.0)25.353218
10progrec = (-inf, 11.5)23.663185
11progrec = (-inf, 9.5)23.506762
12pnodes = <4.5, 19.0)18.150272
13progrec = <9.0, inf)13.146344
14progrec = (-inf, 233.0)12.268552
15estrec = <0.5, 29.0)10.450381
16age = <41.5, 64.5)9.275232
17age = <41.5, 52.5)8.077389
18pnodes = (-inf, 17.5)6.215064
19age = <39.5, inf)3.171229
20estrec = <0.5, 41.0)2.897339
\n", 368 | "
" 369 | ], 370 | "text/plain": [ 371 | " conditions importances\n", 372 | "0 pnodes = <4.5, inf) 207.268572\n", 373 | "1 pnodes = (-inf, 3.5) 67.394775\n", 374 | "2 pnodes = <5.5, inf) 64.254026\n", 375 | "3 pnodes = <3.5, inf) 64.104973\n", 376 | "4 progrec = (-inf, 25.5) 48.923100\n", 377 | "5 progrec = <107.0, inf) 37.252374\n", 378 | "6 progrec = (-inf, 105.5) 33.962572\n", 379 | "7 progrec = (-inf, 99.0) 33.423755\n", 380 | "8 pnodes = (-inf, 4.5) 32.835122\n", 381 | "9 progrec = (-inf, 135.0) 25.353218\n", 382 | "10 progrec = (-inf, 11.5) 23.663185\n", 383 | "11 progrec = (-inf, 9.5) 23.506762\n", 384 | "12 pnodes = <4.5, 19.0) 18.150272\n", 385 | "13 progrec = <9.0, inf) 13.146344\n", 386 | "14 progrec = (-inf, 233.0) 12.268552\n", 387 | "15 estrec = <0.5, 29.0) 10.450381\n", 388 | "16 age = <41.5, 64.5) 9.275232\n", 389 | "17 age = <41.5, 52.5) 8.077389\n", 390 | "18 pnodes = (-inf, 17.5) 6.215064\n", 391 | "19 age = <39.5, inf) 3.171229\n", 392 | "20 estrec = <0.5, 41.0) 2.897339" 393 | ] 394 | }, 395 | "execution_count": 8, 396 | "metadata": {}, 397 | "output_type": "execute_result" 398 | } 399 | ], 400 | "source": [ 401 | "explainer.condition_importances_" 402 | ] 403 | }, 404 | { 405 | "cell_type": "markdown", 406 | "metadata": {}, 407 | "source": [ 408 | "### Local explainability" 409 | ] 410 | }, 411 | { 412 | "cell_type": "code", 413 | "execution_count": 9, 414 | "metadata": {}, 415 | "outputs": [ 416 | { 417 | "name": "stdout", 418 | "output_type": "stream", 419 | "text": [ 420 | "Example:\n", 421 | "horTh no\n", 422 | "age 70.0\n", 423 | "menostat Post\n", 424 | "tsize 21.0\n", 425 | "tgrade II\n", 426 | "pnodes 3.0\n", 427 | "progrec 48.0\n", 428 | "estrec 66.0\n", 429 | "survival_time 1814.0\n", 430 | "survival_status 1.0\n", 431 | "Name: 0, dtype: object\n", 432 | "\n", 433 | "Rules that covers this example:\n", 434 | "IF pnodes = (-inf, 3.5) THEN survival_status = {NaN}\n", 435 | "IF pnodes = (-inf, 4.5) AND progrec = <9.0, inf) AND age = <39.5, inf) THEN survival_status = {NaN}\n", 436 | "\n", 437 | "Importances of the conditions from rules covering the example\n", 438 | " conditions importances\n", 439 | "0 pnodes = (-inf, 3.5) 67.394775\n", 440 | "1 pnodes = (-inf, 4.5) 32.835122\n", 441 | "2 progrec = <9.0, inf) 13.146344\n", 442 | "3 age = <39.5, inf) 3.171229\n" 443 | ] 444 | }, 445 | { 446 | "data": { 447 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcoAAAEWCAYAAADmYNeIAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAgJ0lEQVR4nO3deZgdZZ328e8NURBRBIJKEIyIbCoEAigCDq4j6uA68moEHXWAEZxxYRx1dAIqM+o7bq87IKAjIoqCDM5IfEGQZRQSDPviFgVcAAkCggjhN3+cp/XQdFefTjrp7uT7ua5znVNVTz31q5Nz9Z2nqk6dVBWSJGlka012AZIkTWUGpSRJHQxKSZI6GJSSJHUwKCVJ6mBQSpLUwaCUViNJXpvkvL7pO5Js2dH+iiR7r4rapOnKoJRWgSSvSrKwBdevkvx3kj1X9narav2q+mmr4fgk7x+2/IlVdfZEbzfJ2UneMNH9Lo+R9lsaD4NSWsmSvBX4GPCvwKOALYBPAy+axLLWCEnWnuwaNP0ZlNJKlGQD4L3AIVX1jar6fVXdU1X/WVX/2Nqsk+RjSX7ZHh9Lsk5btneS65O8LcmNbTT6N339b5zktCS3JbkQePyw7VeSrZIcCMwD3t5Gtf/Zli9J8uwVrWOM92Bo3bf3rfviJM9Pcm2SW5K8q6/94UlOTnJSktuTXJxkx77l27UR663t0PG+fcuOT/KZJP+V5PfA60fZ73ck+Unr/8okL+nr47VJzkvy70mWJvlZkn36lm+U5Lj2Hi1NcmrfshcmWdxquyDJDoO8R5raDEpp5dodWBc4paPNPwNPBeYAOwK7Ae/uW/5oYANgM3p/+D+VZMO27FPAH4BNgde1xwNU1VHACcCH2uHYv5rgOsbyaHrvw2bAvwBHA68G5gJ7Ae9J8ri+9i8CvgZsBHwZODXJg5I8CPhPYAHwSOBNwAlJtulb91XAkcDDgC+Ost8/advdADgC+FKSTfv6eApwDTAT+BDw+SRpy/4DWA94YqvhowBJdgKOBQ4CNgY+B5w29J8NTV8GpbRybQzcXFX3drSZB7y3qm6sqpvo/eHev2/5PW35PVX1X8AdwDbtsOLLgH9pI9XLgS+sQK3LVceAfd8DHFlV9wBfoRdAH6+q26vqCuBKeuE8ZFFVndzaf4ReyD61PdYHPlBVf6yqs4DTgVf2rfvNqjq/qu6rqj+MVExVfa2qftnanAT8iN5/DIb8vKqOrqpl9N7TTYFHtTDdBzi4qpa29+Kcts6BwOeq6gdVtayqvgDc3WrWNGZQSivXb4GZSWZ0tJkF/Lxv+udt3p/6GBa0d9ILi02AGcB1w9ZdXstbxyB+20IH4K72/Ju+5XcN6+tP+1RV9wHXt1pmAde1ef11bjbSuqNJckDfIdJbgSfRC+8hv+7b/p3t5frA5sAtVbV0hG4fC7xtqM/W7+bc/z3UNGRQSivX/9AbVby4o80v6f2RHbJFmzeWm4B76f0x7l93NGP9VNDy1rEy/GmfkqwFPKbV8ktg8zZvyBbADX3Tw/fzftNJHkvv0O+hwMZV9QjgciCM7TpgoySPGGXZkVX1iL7HelV14gD9agozKKWVqKp+R++c3KfaBSzrtXNt+yT5UGt2IvDuJJskmdnaf2mAvpcB3wAOb/1uD7ymY5XfAKN+p3J561hJ5iZ5aRuJv5nefza+D/yA3kj27e193Bv4K3qHc0czfL8fSi88bwJoFyU9aZCiqupXwH8Dn06yYavh6W3x0cDBSZ6SnocmeUGShw20x5qyDEppJauqDwNvpXdhzE30Rh6HAqe2Ju8HFgKXApcBF7d5gziU3iHBXwPHA8d1tP08sH07LHjqCMtXpI6J9k1gP2ApvfOkL23nA/9ILxj3AW6m9zWbA6rq6o6+7rffVXUl8GF6o/3fAE8Gzh9HbfvTO+d6NXAjvSCnqhYCfwt8stX9Y+C14+hXU1T84WZJU0mSw4GtqurVk12LBI4oJUnqZFBKktTBQ6+SJHVwRClJUoeuL0FrGpk5c2bNnj17ssuQpGll0aJFN1fVJl1tDMrVxOzZs1m4cOFklyFJ00qSMe9m5aFXSZI6GJSSJHUwKCVJ6mBQSpLUwaCUJKmDQSlJUgeDUpKkDgalJEkdvNfraiKzUhw02VVI0qpV81csw5Isqqpduto4opQkqYNBKUlSB4NSkqQOBqUkSR0MSkmSOhiUkiR1MCglSepgUEqS1MGglCSpg0EpSVIHg1KSpA4GpSRJHQxKSZI6GJSSJHUwKCVJ6mBQSpLUwaCUJKnDlAjKJEuSzFzF23xIknOSrD3CsoOTHDBAHycmuTTJWzravKi1WZxkYZI9R2l3dpJrWrvFSR7Z5h+a5HXj2TdJ0sSZMdkFTKLXAd+oqmXDF1TVZ8daOcmjgV2raqsxmp4JnFZVlWQH4KvAtqO0nVdVC4fNOxY4vz1LklaxgUeUSWYnuTrJCUmuSnJykvXasiVJjkhycZLLkmzb5m+U5NQ2ovp+CwqSbJxkQZIrkhwDpG87r05yYRtVfS7J2u1xfJLLW/+jjuDGYR7wzVH29fAkh7XXZyf5YKvp2iR7tWYLgM1anXuN1A9AVd1RVdUmHwrUaG1HWf9OYEmS3cazniRpYoz30Os2wKerajvgNuCNfcturqqdgc8Ah7V5RwA/rKodgHcBX2zz5wPnVdUTgVOALQCSbAfsB+xRVXOAZfQCbQ6wWVU9qaqeDBw3vLAk8/oOW/Y/Th6h7YOBLatqyYD7PaOqdgPe3GoH2Bf4SVXNqapzu1ZO8pIkVwPfojeSHc1xreb3JEnf/IXAA8I4yYHtcO5C7hxwTyRJ4zLeoLyuqs5vr78E9J9v+0Z7XgTMbq/3BP4DoKrOAjZO8nDg6W19qupbwNLW/lnAXOCiJIvb9JbAT4Etk3wiyfPohfT9VNUJLbSGP14+wn7MBG4dx36PtG8Dq6pTqmpb4MXA+0ZpNq/9J2Cv9ti/b9mNwKwR+j2qqnapql1Yb7xVSZIGMd5zlMMPG/ZP392ely1Hv0MCfKGq3vmABcmOwF8CBwOvYNjILMk84B9H6PPHI4TlXcC6feseCbwAoI1kh5uIfaOqvpdkyyQzq+rmYctuaM+3J/kysBt/HoGv22qWJK1i4x1RbpFk9/b6VcB5Y7Q/l96hU5LsTe/w7G3A99r6JNkH2LC1PxN4ed8VnxsleWy7Inatqvo68G5g5+EbGs+IsqqWAmsnWbdN//NQ+4HfiWHa1amHjjB/q6HDqEl2BtYBfjuszYyhq36TPAh4IXB5X5Oth01LklaR8Y6OrgEOSXIscCW985FdDgeOTXIpcCfwmjb/CODEJFcAFwC/AKiqK5O8G1iQZC3gHuAQeqOp49o8gAeMOJfDAnqHhv//BPQFvStZzx9h/suAA5LcQ28/9hu6uCfJ4hbO6wBntJBcu9V0dF8fe9B7LyVJq1j+fEHmGA2T2cDpVfWklVrRKtJGd2+pqv3HbDxYf6cDL62qP05Ef3397gS8daw6MyvFQRO5ZUma+mr+uL5I8ABJFlXVLl1tpsQNByZDVV0MfHekGw4sZ38vnOiQbGYC71kJ/UqSBjDwodf2VYrVYjQ5pKqm/Jf4q+o7k12DJK3J1tgRpSRJgzAoJUnqYFBKktTBoJQkqYNBKUlSB4NSkqQOBqUkSR0MSkmSOhiUkiR1MCglSepgUEqS1MGglCSpg0EpSVKH8f5ws6aoubPmsnD+wskuQ5JWO44oJUnqYFBKktTBoJQkqYNBKUlSB4NSkqQOBqUkSR0MSkmSOhiUkiR1MCglSeqQqprsGjQBMivFQZNdhdYENd+/GVp9JFlUVbt0tXFEKUlSB4NSkqQOBqUkSR0MSkmSOhiUkiR1MCglSepgUEqS1MGglCSpg0EpSVIHg1KSpA4GpSRJHQxKSZI6GJSSJHUwKCVJ6mBQSpLUwaCUJKmDQSlJUocpEZRJliSZuYq3+ZAk5yRZe4RlByc5YIA+TkxyaZK3DNB21yT3Jnn5KMvPTnJNksXt8cg2/9AkrxtknyRJE2/GZBcwiV4HfKOqlg1fUFWfHWvlJI8Gdq2qrQZouzbwQWDBGE3nVdXCYfOOBc5vz5KkVWzgEWWS2UmuTnJCkquSnJxkvbZsSZIjklyc5LIk27b5GyU5tY26vp9khzZ/4yQLklyR5Bggfdt5dZIL26jqc0nWbo/jk1ze+h9zBDeAecA3R9nXw5Mc1l6fneSDraZrk+zVmi0ANmt17jVSP33eBHwduHG8RVbVncCSJLuNd11J0oob76HXbYBPV9V2wG3AG/uW3VxVOwOfAQ5r844AflhVOwDvAr7Y5s8HzquqJwKnAFsAJNkO2A/Yo6rmAMvoBdocYLOqelJVPRk4bnhhSeb1Hbbsf5w8QtsHA1tW1ZIB93tGVe0GvLnVDrAv8JOqmlNV5462YpLNgJe092Usx7Wa35MkffMXAg8I4yQHJlmYZCF3DrgnkqRxGW9QXldV57fXXwL27Fv2jfa8CJjdXu8J/AdAVZ0FbJzk4cDT2/pU1beApa39s4C5wEVJFrfpLYGfAlsm+USS59EL6fupqhNaaA1/jHROcCZw6zj2e6R9G9THgH+qqvvGaDev/Sdgr/bYv2/ZjcCs4StU1VFVtUtV7cJ646xKkjSQ8Z6jrI7pu9vzsuXod0iAL1TVOx+wINkR+EvgYOAV9M4x9i+fB/zjCH3+eISwvAtYt2/dI4EXALSR7HArsm+7AF9pA8SZwPOT3FtVp/Y3qqob2vPtSb4M7MafR+DrtpolSavYeEeUWyTZvb1+FXDeGO3PpXfolCR70zs8exvwvbY+SfYBNmztzwRe3nfF50ZJHtuuiF2rqr4OvBvYefiGxjOirKqlwNpJ1m3T/zzUfuB3Yph2deqhI2zrcVU1u6pmAycDbxwekklmDF31m+RBwAuBy/uabD1sWpK0iow3KK8BDklyFb1wG+u82+HA3CSXAh8AXtPmHwE8PckVwEuBXwBU1ZX0gnBBW+c7wKbAZsDZ7XDsl4AHjDiXwwLuf+h4RW0L/HY8K7T9AVgHOKPt82LgBuDovqZ70HsvJEmrWKqGH00dpWEyGzi9qp60UitaRZLsDLylqvYfs/Fg/Z0OvLSq/jgR/fX1uxPw1rHqzKwUB03klqWR1fzB/mZI00GSRVW1S1ebKXHDgclQVRcD3x3phgPL2d8LJzokm5nAe1ZCv5KkAQx8YUr7KsVqMZocUlVT/kv8VeUhV0maRGvsiFKSpEEYlJIkdTAoJUnqYFBKktTBoJQkqYNBKUlSB4NSkqQOBqUkSR0MSkmSOhiUkiR1MCglSepgUEqS1MGglCSpw8C/HqKpbe6suSycv3Cyy5Ck1Y4jSkmSOhiUkiR1MCglSepgUEqS1MGglCSpg0EpSVIHg1KSpA4GpSRJHQxKSZI6pKomuwZNgMxKcdBkV7Hq1Xw/v5KWX5JFVbVLVxtHlJIkdTAoJUnqYFBKktTBoJQkqYNBKUlSB4NSkqQOBqUkSR0MSkmSOhiUkiR1MCglSepgUEqS1MGglCSpg0EpSVIHg1KSpA4GpSRJHQxKSZI6GJSSJHUwKEeQ5INJLm+P/UZps06Sk5L8OMkPksweoN8LBmizV5IrkixOsnmSby/HLkiSJsiEBGWStVdw/RkTUccK1rBRe34BsDMwB3gKcFiSh4+wyuuBpVW1FfBR4INjbaOqnjZAKfOAf6uqOVV1HfCrJHsMtheSpInWGZRJZie5OskJSa5KcnKS9dqyJW3kdTHw10lemeSyNgr7YF8fr09ybZILkxyd5JNt/vFJPpvkB8CHkjw+ybeTLEpybpJtW7tHJTklySXtMUjYDCTJjCT7JjkNOKXN3h74XlXdW1W/By4FnjfC6i8CvtBenww8K0nG2N4d7XnvJGe393Po/U2SNwCvAN6X5IS22qn0wlOSNAkGGcltA7y+qs5PcizwRuDf27LfVtXOSWYB3wfmAkuBBUleDFwIvIfeCO124Czgkr6+HwM8raqWJTkTOLiqfpTkKcCngWcC/w84p6pe0kau6w8vMMlJrc7hPlJVXxyh/Vb0RoQvBy4APlxV57TFlwDzk3wYWA94BnDlCH1vBlwHUFX3JvkdsDFw8whtR7IT8ETgl8D5wB5VdUySPYHTq+rk1m4h8P6ROkhyIHAgABsMuFVJ0rgMEpTXVdX57fWXgL/nz0F5UnveFTi7qm4CaKOhp7dl51TVLW3+14Ct+/r+WgvJ9YGnAV/rG5St056fCRwAUFXLgN8NL7CqRjyPOJIkL2t1HwnsXFW3D+trQZJd6QXoTcD/AMsG7X8cLqyq61tNi4HZwHkjtLsRmDVSB1V1FHAUQGalVkKNkrTGGyQoh/8B7p/+/Qpuf2j9tYBbq2rO8nQyzhHld4B/AP4G2D3JccApVfWHoQZVdSS9ICXJl4FrR+j7BmBz4Pp2jnUD4LfjKPvuvtfLGP3fYl3grnH0K0maQINczLNFkt3b61cx8qjnQuAvksxsh0dfCZwDXNTmb9jC5GUjbaCqbgN+luSvAdr5uh3b4jOBv2vz107ygIOMVbVfu/hl+OMBh12r6raq+lRV7QL8E7AncFWSD/VtY+P2egdgB2DBCGWfBrymvX45cFZVVZLN2mHkibI1cPkE9idJGodBgvIa4JAkVwEbAp8Z3qCqfgW8A/guvXN8i6rqm1V1A/Cv9IL0fGAJIxw6beYBr09yCXAFvYtloDf6e0aSy4BF9C62mRBV9cOqOgTYDji7zX4QcG6SK+kd1nx1Vd0LkOS9SfZt7T4PbJzkx8Bb6e0/wKbAvRNVI71zpN+awP4kSeOQqtFPbbXvBp5eVU9a7g0k61fVHW1EeQpwbFWdMtZ601WSQ4FfVNVpE9Tf94AXVdXSznazUhw0EVucXmq+p2YlLb8ki9oRxlGtiu8vHp7k2fTOtS2g93WH1VZVfXKi+kqyCb3zrJ0hKUlaeTqDsqqWAMs9mmx9HLYi66/J2lXEp052HZK0JvMWdpIkdTAoJUnqYFBKktTBoJQkqYNBKUlSB4NSkqQOBqUkSR0MSkmSOhiUkiR1MCglSepgUEqS1MGglCSpg0EpSVKHVfEzW1oF5s6ay8L5Cye7DEla7TiilCSpg0EpSVIHg1KSpA4GpSRJHQxKSZI6GJSSJHUwKCVJ6mBQSpLUwaCUJKlDqmqya9AEyKwUBy3fujXfz4CkNVOSRVW1S1cbR5SSJHUwKCVJ6mBQSpLUwaCUJKmDQSlJUgeDUpKkDgalJEkdDEpJkjoYlJIkdTAoJUnqYFBKktTBoJQkqYNBKUlSB4NSkqQOBqUkSR0MSkmSOhiUkiR1WGODMsnBSS5LsjjJeUm2b/MfnOS4tuySJHuPsv7hSW5o6y9O8vwBtnnBAG32SnJF63PzJN8e775JkibOGheUSTZsL79cVU+uqjnAh4CPtPl/C1BVTwaeA3w4yWjv00erak57/NdY266qpw1Q4jzg31qf1wG/SrLHAOtJklaClRKUSU5NsqiNjA7sm//6JNcmuTDJ0Uk+2eZvkuTrSS5qjwkNhiSPTHJYksuB/QCq6ra+Jg8Fqr3eHjirtbkRuBXYZYLquKM9753k7CQnJ7k6yQnpeQPwCuB9SU5oq51KLzwlSZNgxkrq93VVdUuShwAXJfk6sA7wHmBn4HZ6YXRJa/9xeqOz85JsAZwBbNffYZJtgJNG2d7eVXXrsPZrAc8F3kAv/L4MPK+qru9rcwjwVuDBwDPb7EuAfZOcCGwOzG3PF46w3UOTHAAsBN5WVUs735X72wl4IvBL4Hxgj6o6JsmewOlVdXJrtxB4/0gdtP+E9P4jssE4tixJGtjKCsq/T/KS9npz4AnAo4FzquoWgCRfA7ZubZ4NbJ9kaP2HJ1m/qu4YmlFV1wBzxlHDqfRC+Q3AGVVVwxtU1aeATyV5FfBu4DXAsfRCeiHwc+ACYNkI/X8GeB+9kej7gA8DrxtHfRcOhXaSxcBs4LwR2t0IzBqpg6o6CjgKILPygP2TJK24CQ/KdvHLs4Hdq+rOJGcD646x2lrAU6vqDx39jmtECbyT3vnGTwDfSXJcVV00yvpfoRd8VNW9wFv6tnsBcO3wFarqN31tjgZOH632Udzd93oZo/9brAvcNc6+JUkTZGWco9wAWNpCclvgqW3+RcBfJNkwyQzgZX3rLADeNDSRZM7wTqvqmr4LZ4Y/bh2h/RVV9WZ6hzfPAY5McmmS57ZtPKGv+QuAH7X56yV5aHv9HODeqrpyeP9JNu2bfAlweZu/WZIzu96gcdp6qG9J0qq3Mg69fhs4OMlVwDXA9wGq6oYk/0rvXN8twNXA79o6f0/vEOilrabvAQdPRDFV9Ud6I9GTkjwWmNkWHZrk2cA9wFJ6h10BHgmckeQ+4AZg/6G+khwDfLaqFgIfaoFewBLgoNZsU+Deiai9eQbwrQnsT5I0Dhnh1N3K21g779hGlKcAx1bVKausgFUgyaHAL6rqtAnq73vAi8a6UCizUn+K6nGq+Z7elLRmSrKoqjq/2bCyLuYZzeFtFLcuvcOtp67i7a90VfXJieorySbAR8Z5Na0kaQKt0qCsqsNW5famu6q6idXwPxOSNJ2scXfmkSRpPAxKSZI6GJSSJHUwKCVJ6mBQSpLUwaCUJKmDQSlJUgeDUpKkDgalJEkdDEpJkjoYlJIkdTAoJUnqYFBKktRhVf/MllaSubPmsnD+wskuQ5JWO44oJUnqYFBKktTBoJQkqYNBKUlSB4NSkqQOBqUkSR0MSkmSOhiUkiR1MCglSeqQqprsGjQBktwOXDPZdSynmcDNk13ECpjO9Vv75JnO9a9OtT+2qjbpWsFb2K0+rqmqXSa7iOWRZOF0rR2md/3WPnmmc/1rWu0eepUkqYNBKUlSB4Ny9XHUZBewAqZz7TC967f2yTOd61+javdiHkmSOjiilCSpg0EpSVIHg3KaS/K8JNck+XGSd0x2PWNJcmySG5Nc3jdvoyTfSfKj9rzhZNY4miSbJ/lukiuTXJHkH9r8KV9/knWTXJjkklb7EW3+45L8oH1+Tkry4MmudTRJ1k7ywySnt+npVPuSJJclWZxkYZs35T83AEkekeTkJFcnuSrJ7tOo9m3aez70uC3Jm8dbv0E5jSVZG/gUsA+wPfDKJNtPblVjOh543rB57wDOrKonAGe26anoXuBtVbU98FTgkPZ+T4f67waeWVU7AnOA5yV5KvBB4KNVtRWwFHj95JU4pn8Aruqbnk61Azyjqub0fYdvOnxuAD4OfLuqtgV2pPdvMC1qr6pr2ns+B5gL3Amcwnjrryof0/QB7A6c0Tf9TuCdk13XAHXPBi7vm74G2LS93pTezRMmvc4B9uObwHOmW/3AesDFwFPo3aFkxkifp6n0AB7T/qA9EzgdyHSpvdW3BJg5bN6U/9wAGwA/o134OZ1qH2Ffngucvzz1O6Kc3jYDruubvr7Nm24eVVW/aq9/DTxqMosZRJLZwE7AD5gm9bdDl4uBG4HvAD8Bbq2qe1uTqfz5+RjwduC+Nr0x06d2gAIWJFmU5MA2bzp8bh4H3AQc1w57H5PkoUyP2of7P8CJ7fW46jcoNaVU7794U/o7S0nWB74OvLmqbutfNpXrr6pl1TsE9RhgN2Dbya1oMEleCNxYVYsmu5YVsGdV7UzvNMkhSZ7ev3AKf25mADsDn6mqnYDfM+ww5RSu/U/a+et9ga8NXzZI/Qbl9HYDsHnf9GPavOnmN0k2BWjPN05yPaNK8iB6IXlCVX2jzZ429QNU1a3Ad+kdrnxEkqF7Pk/Vz88ewL5JlgBfoXf49eNMj9oBqKob2vON9M6R7cb0+NxcD1xfVT9o0yfTC87pUHu/fYCLq+o3bXpc9RuU09tFwBPa1X8Ppndo4bRJrml5nAa8pr1+Db1zf1NOkgCfB66qqo/0LZry9SfZJMkj2uuH0Du3ehW9wHx5azYla6+qd1bVY6pqNr3P+FlVNY9pUDtAkocmedjQa3rnyi5nGnxuqurXwHVJtmmzngVcyTSofZhX8ufDrjDO+r0zzzSX5Pn0zt+sDRxbVUdObkXdkpwI7E3vp25+A8wHTgW+CmwB/Bx4RVXdMkkljirJnsC5wGX8+VzZu+idp5zS9SfZAfgCvc/JWsBXq+q9SbakN0rbCPgh8OqqunvyKu2WZG/gsKp64XSpvdV5SpucAXy5qo5MsjFT/HMDkGQOcAzwYOCnwN/QPkNM8drhT/85+QWwZVX9rs0b13tvUEqS1MFDr5IkdTAoJUnqYFBKktTBoJQkqYNBKUlSB4NSWgMluWMVb292kletym1KE8WglLRStbvnzAYMSk1LBqW0Bkuyd5JzknwzyU+TfCDJvPbblZcleXxrd3ySzyZZmOTadv/Vod+5PK61/WGSZ7T5r01yWpKz6P3qxweAvdpvAr6ljTDPTXJxezytr56z+37/8IR2RySS7JrkgvR+U/PCJA9rN3r/v0kuSnJpkoMm5Y3Uam3G2E0kreZ2BLYDbqF355Vjqmq39H6Y+k3Am1u72fTuUfp44LtJtgIOoXdf6Scn2ZbeL2Rs3drvDOxQVbf031EHIMl6wHOq6g9JnkDv9mJDv9O4E/BE4JfA+cAeSS4ETgL2q6qLkjwcuIveb1D+rqp2TbIOcH6SBVX1s4l/m7SmMiglXTT0k0NJfgIsaPMvA57R1+6rVXUf8KMkP6X36yN7Ap8AqKqrk/wcGArK73TcFuxBwCfb7dGW9a0DcGFVXd/qWUwvoH8H/KqqLmrbuq0tfy6wQ5Khe75uADyB3m8oShPCoJTUf3/U+/qm7+P+fyOG3+9yrPtf/r5j2Vvo3et3R3qngP4wSj3L6P47FeBNVXXGGLVIy81zlJIG9ddJ1mrnLbek9yvx5wLzANoh1y3a/OFuBx7WN70BvRHifcD+9G7W3uUaYNMku7ZtPaxdJHQG8Hft589IsnW7CbY0YRxRShrUL4ALgYcDB7fzi58GPpPkMuBe4LVVdXe7/qbfpcCyJJcAxwOfBr6e5ADg23SPPqmqPybZD/hE+5mwu4Bn0/tVi9nAxe2in5uAF0/Avkp/4q+HSBpTkuOB06vq5MmuRVrVPPQqSVIHR5SSJHVwRClJUgeDUpKkDgalJEkdDEpJkjoYlJIkdfhf4CFoQbMfVOgAAAAASUVORK5CYII=", 448 | "text/plain": [ 449 | "
" 450 | ] 451 | }, 452 | "metadata": { 453 | "needs_background": "light" 454 | }, 455 | "output_type": "display_data" 456 | }, 457 | { 458 | "data": { 459 | "text/html": [ 460 | "
\n", 461 | "\n", 474 | "\n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | "
conditionsimportances
0pnodes = (-inf, 3.5)67.394775
1pnodes = (-inf, 4.5)32.835122
2progrec = <9.0, inf)13.146344
3age = <39.5, inf)3.171229
\n", 505 | "
" 506 | ], 507 | "text/plain": [ 508 | " conditions importances\n", 509 | "0 pnodes = (-inf, 3.5) 67.394775\n", 510 | "1 pnodes = (-inf, 4.5) 32.835122\n", 511 | "2 progrec = <9.0, inf) 13.146344\n", 512 | "3 age = <39.5, inf) 3.171229" 513 | ] 514 | }, 515 | "execution_count": 9, 516 | "metadata": {}, 517 | "output_type": "execute_result" 518 | } 519 | ], 520 | "source": [ 521 | "explainer.local_explainability(x.iloc[0, :], pd.DataFrame(y).iloc[0, :], plot = True)" 522 | ] 523 | } 524 | ], 525 | "metadata": { 526 | "interpreter": { 527 | "hash": "52a883ffde2ee8dab628074f134ac5e542adeed8306ab19e0ac3d240604a9b31" 528 | }, 529 | "kernelspec": { 530 | "display_name": "rulexai", 531 | "language": "python", 532 | "name": "rulexai" 533 | }, 534 | "language_info": { 535 | "codemirror_mode": { 536 | "name": "ipython", 537 | "version": 3 538 | }, 539 | "file_extension": ".py", 540 | "mimetype": "text/x-python", 541 | "name": "python", 542 | "nbconvert_exporter": "python", 543 | "pygments_lexer": "ipython3", 544 | "version": "3.8.5" 545 | }, 546 | "orig_nbformat": 4 547 | }, 548 | "nbformat": 4, 549 | "nbformat_minor": 2 550 | } 551 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pandas >= 1.5.0, < 2.3.0 2 | numpy ~= 1.26.4 3 | matplotlib ~= 3.8.3 4 | rulekit ~= 1.7.6 5 | lifelines ~= 0.28.0 -------------------------------------------------------------------------------- /rulexai/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '1.1.0' -------------------------------------------------------------------------------- /rulexai/explainer.py: -------------------------------------------------------------------------------- 1 | from .importances import ( 2 | ClassificationConditionImportance, 3 | RegressionConditionImportance, 4 | SurvivalConditionImportance, 5 | ConditionImportance, 6 | ) 7 | from .models import ClassificationModel, RegressionModel, SurvivalModel, BlackBoxModel 8 | import pandas as pd 9 | import numpy as np 10 | from typing import Union, List 11 | import matplotlib.pyplot as plt 12 | 13 | from .reduct import Reduct 14 | 15 | Labels = Union[pd.DataFrame, pd.Series] 16 | 17 | 18 | 19 | class BaseExplainer: 20 | """:meta private:""" 21 | def __init__( 22 | self, model, X: pd.DataFrame, y: Labels, type: str = "classification" 23 | ) -> None: 24 | 25 | 26 | self.model = model 27 | self.X = X 28 | self.y = y 29 | self.type = type 30 | 31 | self.condition_importances_ = None 32 | self.feature_importances_ = None 33 | self.if_basic_conditions = None 34 | 35 | self.condition_importance_class = None 36 | self._conditions_importances_for_training_set = None 37 | self._basic_conditions_importances_for_training_set = None 38 | 39 | 40 | def explain(self, measure: str = "C2", basic_conditions: bool = False): 41 | """Compute conditions importances. The importances of a conditions are computed base on: \n 42 | Marek Sikora: Redefinition of Decision Rules Based on the Importance of Elementary Conditions Evaluation. Fundam. Informaticae 123(2): 171-197 (2013) \n 43 | https://dblp.org/rec/journals/fuin/Sikora13.html 44 | 45 | Parameters 46 | ---------- 47 | measure: str 48 | Specifies the measure that is used to evaluate the quality of the rules. Possible measures for classification and regression problem are: C2, Lift, Correlation. Default: C2. It is not possible to select a measure for the survival problem, the LogRank test is used by default 49 | basic_conditions : bool 50 | Specifies whether to evaluate the conditions contained in the input rules, or to break the conditions in the rules into base conditions so that individual conditions do not overlap 51 | Returns 52 | ------- 53 | self: Explainer 54 | Fitted explainer with calculated conditions 55 | 56 | """ 57 | self.if_basic_conditions = basic_conditions 58 | self.condition_importances_ = self._determine_condition_importances(measure) 59 | self.feature_importances_ = self._determine_feature_importances(self.condition_importances_) 60 | 61 | return self 62 | 63 | def fit_transform( 64 | self, X: pd.DataFrame, selector=None, y=None, POS=None) -> pd.DataFrame: 65 | 66 | """Creates a dataset based on given dataset in which the examples, instead of being described by the original attributes, will be described with the specified conditions - it will be a set with binary attributes determining whether a given example meets a given condition. It can be considered as kind of dummification. 67 | Thanks to this function you can discretize data and get rid of missing values. It can be used as prestep for others algorithms. 68 | 69 | Parameters 70 | ---------- 71 | X: pd.DataFrame 72 | The input samples from which you want to create binary dataset. Should have the same columns and columns order as X specified when creating Explainer 73 | selector : string/float 74 | Specifies on what basis to select the conditions from the rules that will be included as attributes in the transformed set. 75 | If None all conditions will be included in the transformed set. If number 0-1 percent of the most important conditions will be selected based on condition importance ranking. If "reduct" the reduct of the conditions set will be selected. Preferably, the option with the percentage of most important conditions will be selected. 76 | y: Union[pd.DataFrame, pd.Series] 77 | Only if selector = "reduct".The target values for input sample, used in the determination of the reduct 78 | POS: float 79 | Only if selector = "reduct".Target reduct POS 80 | Returns 81 | ------- 82 | X_transformed: pd.DataFrame 83 | Transformed dataset 84 | 85 | """ 86 | 87 | helper = ConditionImportance(self.model.rules, X, None, self.if_basic_conditions) 88 | if self.if_basic_conditions: 89 | rules = helper.split_conditions_in_rules(self.model.rules) 90 | conditions = helper._get_conditions_with_rules(rules) 91 | else: 92 | conditions = helper._get_conditions_with_rules( 93 | self.model.rules 94 | ) 95 | 96 | binary_dataset = self._prepare_binary_dataset(X, conditions) 97 | 98 | if selector=="reduct": 99 | reduct = Reduct() 100 | binary_dataset = reduct.get_reduct(binary_dataset,y,POS) 101 | elif not selector is None: 102 | binary_dataset = self._get_top_conditions(binary_dataset, selector) 103 | 104 | chosen_conditions_names = binary_dataset.columns 105 | self.conditions_names = chosen_conditions_names 106 | self.conditions = [] 107 | for condition in conditions: 108 | if str(condition) in chosen_conditions_names: 109 | self.conditions.append(condition) 110 | 111 | return binary_dataset 112 | 113 | def transform( 114 | self, X: pd.DataFrame) -> pd.DataFrame: 115 | 116 | """Creates a dataset based on given dataset in which the examples, instead of being described by the original attributes, will be described with the specified conditions - it will be a set with binary attributes determining whether a given example meets a given condition. It can be considered as kind of dummification. 117 | Thanks to this function you can discretize data and get rid of missing values. It can be used as prestep for others algorithms. 118 | 119 | Parameters 120 | ---------- 121 | X: pd.DataFrame 122 | The input samples from which you want to create binary dataset. Should have the same columns and columns order as X given in fit_transform 123 | Returns 124 | ------- 125 | X_transformed: pd.DataFrame 126 | Transformed dataset 127 | 128 | """ 129 | transformed_dataset = self._prepare_binary_dataset(X, self.conditions) 130 | 131 | return transformed_dataset[self.conditions_names] 132 | 133 | 134 | def get_rules_covering_example(self, x: pd.DataFrame, y: Labels) -> List[str]: 135 | """Return rules that covers the given example 136 | 137 | Parameters 138 | ---------- 139 | x : pd.DataFrame 140 | The input sample. 141 | y : Union[pd.DataFrame, pd.Series] 142 | The target values for input sample. 143 | Returns 144 | ------- 145 | rules: List[str] 146 | Rules that covers the given example 147 | 148 | """ 149 | rules_covering_example = [] 150 | for rule in self.model.rules: 151 | if rule.premise.evaluate(x): 152 | rules_covering_example.append(rule) 153 | 154 | return rules_covering_example 155 | 156 | 157 | def local_explainability(self, x: pd.DataFrame, y: Labels, plot: bool = False): 158 | """Displays information about the local explanation of the example: the rules that cover the given example and the importance of the conditions contained in these rules 159 | 160 | Parameters 161 | ---------- 162 | x : pd.DataFrame 163 | The input sample. 164 | y : Union[pd.DataFrame, pd.Series] 165 | The target values for input sample. 166 | plot : bool 167 | If True the importance of the conditions will also be shown in the chart. Default: False 168 | """ 169 | rules_covering_example = self.get_rules_covering_example(x,y) 170 | 171 | print("Example:") 172 | print(pd.concat([x,y])) 173 | print("") 174 | 175 | print("Rules that covers this example:") 176 | for rule in rules_covering_example: 177 | print(rule) 178 | print("") 179 | 180 | conditions_importances = self.condition_importances_.copy() 181 | 182 | classes_with_conditions = dict() 183 | for rule in rules_covering_example: 184 | 185 | 186 | if rule.consequence.left in classes_with_conditions.keys(): 187 | conditions = classes_with_conditions[rule.consequence.left] 188 | else: 189 | conditions = [] 190 | 191 | conditions.extend(rule.premise.get_subconditions()) 192 | conditions = list(map(str, conditions)) 193 | classes_with_conditions[rule.consequence.left] = conditions 194 | 195 | 196 | 197 | if self.type == "classification": 198 | importances_for_covering_rules = pd.DataFrame() 199 | for j in range(0, conditions_importances.shape[1], 2): 200 | class_in_consequences = False 201 | tmp_df = conditions_importances.iloc[:, j : j + 2] 202 | for cl in classes_with_conditions.keys(): 203 | if cl in tmp_df.columns[0]: 204 | class_in_consequences = True 205 | class_name = cl 206 | 207 | if class_in_consequences: 208 | tmp_df.loc[~tmp_df[tmp_df.columns[0]].isin(classes_with_conditions[class_name]), tmp_df.columns[0]] = np.NaN 209 | tmp_df.dropna(inplace=True) 210 | tmp_df.reset_index(drop = True, inplace = True) 211 | importances_for_covering_rules = pd.concat([importances_for_covering_rules, tmp_df], ignore_index=False, axis=1) 212 | 213 | importances_for_covering_rules = importances_for_covering_rules.replace(np.nan, "-") 214 | else: 215 | conditions_importances.loc[~conditions_importances[conditions_importances.columns[0]].isin(conditions), conditions_importances.columns[0]] = np.NaN 216 | conditions_importances.dropna(inplace=True) 217 | importances_for_covering_rules = conditions_importances.reset_index(drop = True) 218 | 219 | 220 | 221 | print("Importances of the conditions from rules covering the example") 222 | print(importances_for_covering_rules) 223 | 224 | if plot: 225 | self.plot_importances(importances_for_covering_rules) 226 | 227 | return importances_for_covering_rules 228 | 229 | def get_rules(self): 230 | """Return rules from model 231 | 232 | Returns 233 | ------- 234 | rules: List[str] 235 | Rules from model 236 | """ 237 | rules = [] 238 | for rule in self.model.rules: 239 | rules.append(rule.__str__()) 240 | return rules 241 | 242 | 243 | def get_rules_with_basic_conditions(self): 244 | """Return rules from model with conditions broken down into base conditions so that individual conditions do not overlap 245 | 246 | Returns 247 | ------- 248 | rules: List[str] 249 | Rules from the model containing the base conditions 250 | """ 251 | rules_with_basic_conditions = [] 252 | helper = ConditionImportance(self.model.rules, self.X, self.y, True) 253 | rules = helper.rules 254 | for rule in rules: 255 | rules_with_basic_conditions.append(rule.__str__()) 256 | return rules_with_basic_conditions 257 | 258 | 259 | def plot_importances(self, importances: pd.DataFrame): 260 | """Plot importances 261 | Parameters 262 | ---------- 263 | importances : pd.DataFrame 264 | Feature/Condition importances to plot. 265 | """ 266 | 267 | if "attributes" in importances.columns[0]: 268 | title = "Feature Importance" 269 | else: 270 | title = "Condition Importance" 271 | 272 | plots_number = int(importances.shape[1]/2) 273 | 274 | if self.type == "classification" and plots_number > 1: 275 | 276 | fig, axs = plt.subplots(1,plots_number, sharex = True) 277 | i = 0 278 | ticks_number = importances.shape[0] 279 | y_ticks = np.arange(0, ticks_number) 280 | for j in range(0, importances.shape[1], 2): 281 | 282 | tmp_df = importances.iloc[:, j : j + 2] 283 | tmp_df = tmp_df.replace("-", np.nan).dropna() 284 | tmp_df.sort_values(inplace=True, by=tmp_df.columns[1]) 285 | 286 | ticks_values = tmp_df.iloc[:, 1].to_list() 287 | ticks_all = [0 for _ in range(len(ticks_values),ticks_number)] 288 | ticks_all.extend(ticks_values) 289 | labels = tmp_df.iloc[:, 0].to_list() 290 | labels_all = ["" for _ in range(len(labels),ticks_number)] 291 | labels_all.extend(labels) 292 | 293 | colors = ["green" if y >= 0 else "red" for y in ticks_all] 294 | 295 | axs[i].barh(y_ticks, ticks_all, color = colors) 296 | axs[i].set_yticks(y_ticks) 297 | axs[i].set_yticklabels(labels_all) 298 | class_name = tmp_df.columns[0].split(" | ")[0] 299 | axs[i].set_title(f"Importance for class: {class_name}") 300 | axs[i].set_xlabel(f"Importance") 301 | i+=1 302 | 303 | fig.subplots_adjust(wspace = 0.5) 304 | plt.show() 305 | 306 | else: 307 | 308 | tmp_df = importances.sort_values(by=importances.columns[1]) 309 | colors = ["green" if y >= 0 else "red" for y in tmp_df.iloc[:,1].to_list()] 310 | y_ticks = np.arange(0, len(tmp_df)) 311 | fig, ax = plt.subplots() 312 | ax.barh(y_ticks, tmp_df.iloc[:, 1], color = colors) 313 | ax.set_yticks(y_ticks) 314 | ax.set_yticklabels(tmp_df.iloc[:, 0]) 315 | ax.set_title(f"{title}") 316 | ax.set_xlabel(f"Importance") 317 | 318 | plt.show() 319 | 320 | 321 | 322 | def _determine_condition_importances(self, measure: str = "C2", X = None, y = None): 323 | if X is None: 324 | X = self.X 325 | if y is None: 326 | y = self.y 327 | 328 | if self.type == "regression": 329 | self.condition_importance_class = RegressionConditionImportance( 330 | rules = self.model.rules, data = X, labels = y, if_split = self.if_basic_conditions, measure = measure 331 | ) 332 | elif self.type == "survival": 333 | self.condition_importance_class = SurvivalConditionImportance( 334 | rules = self.model.rules, data = X, labels = y, if_split = self.if_basic_conditions, measure = measure 335 | ) 336 | else: 337 | self.condition_importance_class = ClassificationConditionImportance( 338 | rules = self.model.rules, data = X, labels = y, if_split = self.if_basic_conditions, measure = measure 339 | ) 340 | return self.condition_importance_class.condition_importances() 341 | 342 | def _determine_feature_importances(self, conditions_importances): 343 | feature_importances = pd.DataFrame() 344 | 345 | if self.type == "classification": 346 | 347 | for j in range(0, conditions_importances.shape[1], 2): 348 | class_importances = ( 349 | conditions_importances.iloc[:, j : j + 2] 350 | .replace("-", np.nan) 351 | .dropna() 352 | ) 353 | importances_df_tmp = pd.DataFrame() 354 | class_importances.iloc[:, 0] = class_importances.iloc[:, 0].apply( 355 | lambda x: x.split(" = ")[0] 356 | ) 357 | class_importances = ( 358 | class_importances.groupby(class_importances.columns[0]) 359 | .sum() 360 | .reset_index() 361 | ) 362 | class_importances.sort_values( 363 | class_importances.columns[1], ascending=False, inplace=True 364 | ) 365 | class_importances.reset_index(drop=True, inplace= True) 366 | 367 | class_name, _ = class_importances.columns[0].split(" | ") 368 | importances_df_tmp[class_name + " | attributes"] = pd.Series(class_importances[ 369 | class_importances.columns[0] 370 | ]) 371 | importances_df_tmp[class_name + " | importances"] = pd.Series(class_importances[ 372 | class_importances.columns[1] 373 | ]) 374 | 375 | feature_importances = pd.concat( 376 | [feature_importances, importances_df_tmp], 377 | ignore_index=False, 378 | axis=1 379 | ) 380 | 381 | feature_importances = feature_importances.replace(np.nan, "-") 382 | 383 | else: 384 | 385 | importances_df = conditions_importances.copy() 386 | importances_df.iloc[:, 0] = importances_df.iloc[:, 0].apply( 387 | lambda x: x.split(" = ")[0] 388 | ) 389 | importances_df = ( 390 | 391 | importances_df.groupby(importances_df.columns[0]).sum().reset_index() 392 | ) 393 | importances_df.sort_values( 394 | importances_df.columns[1], ascending=False, inplace=True 395 | ) 396 | 397 | importances_df.rename( 398 | columns={ 399 | importances_df.columns[0]: "attributes", 400 | importances_df.columns[1]: "importances", 401 | }, 402 | inplace=True, 403 | ) 404 | 405 | feature_importances = importances_df.replace(np.nan, "-") 406 | 407 | return feature_importances 408 | 409 | def _prepare_binary_dataset( 410 | self, X: pd.DataFrame, conditions) -> pd.DataFrame: 411 | 412 | x = X.to_numpy() 413 | binary_dataset_arr = np.zeros((x.shape[0], len(conditions)), dtype=int) 414 | conditions_names = [] 415 | 416 | for i, condition in enumerate(conditions): 417 | condition.evaluate_mask(binary_dataset_arr, x, column_index=i) 418 | conditions_names.append(str(condition)) 419 | 420 | binary_dataset = pd.DataFrame(binary_dataset_arr, columns=conditions_names) 421 | 422 | return binary_dataset 423 | 424 | def _get_top_conditions(self,binary_dataset, percent): 425 | if self.type == "classification": 426 | importances_TOP = [] 427 | for j in range(0, self.condition_importances_.shape[1] + 0, 2): 428 | class_importances = ( 429 | self.condition_importances_.iloc[:, j] 430 | .replace("-", np.nan) 431 | .dropna() 432 | ) 433 | class_importances_TOP_number = np.round( 434 | (percent) * class_importances.shape[0] 435 | ) 436 | 437 | if class_importances_TOP_number == 0: 438 | class_importances_TOP_number = 1 439 | 440 | class_importances_TOP = class_importances.loc[ 441 | 0 : class_importances_TOP_number - 1 442 | ] 443 | importances_TOP.extend(list(class_importances_TOP)) 444 | 445 | importances_TOP_list = list(set(importances_TOP)) 446 | else: 447 | 448 | importances_TOP_number = np.round( 449 | (percent) * self.condition_importances_.shape[0] 450 | ) 451 | if importances_TOP_number == 0: 452 | importances_TOP_number = 1 453 | 454 | importances_TOP = self.condition_importances_.loc[ 455 | 0 : importances_TOP_number - 1 456 | ] 457 | importances_TOP_list = importances_TOP["conditions"].to_list() 458 | 459 | return binary_dataset[importances_TOP_list] 460 | 461 | 462 | class RuleExplainer(BaseExplainer): 463 | def __init__( 464 | self, model, X: pd.DataFrame, y: Labels, type: str = "classification" 465 | ) -> None: 466 | """RuleExplainer 467 | 468 | Parameters 469 | ---------- 470 | model : Model = Union[RuleClassifier, RuleRegressor, SurvivalRules, CN2UnorderedClassifier, CN2SDUnorderedClassifier, DecisionTreeClassifier, DecisionTreeRegressor, SurvivalTree, List[str]] 471 | Model to be analyzed. RuleXai supports the following Rule models: 472 | - RuleKit(https://adaa-polsl.github.io/RuleKit-python/): RuleClassifier, RuleRegressor, SurvivalRules 473 | - Orange (https://orangedatamining.com/): CN2UnorderedClassifier, CN2SDUnorderedClassifier 474 | It can also extract rules from decision trees: 475 | - scikit-learn (https://scikit-learn.org/stable/): DecisionTreeClassifier, DecisionTreeRegressor 476 | - scikit-survival (https://scikit-survival.readthedocs.io/en/stable/): SurvivalTree 477 | Or you can provide a list of rules as: 478 | - classification: 479 | IF attribute1 = (-inf, value) AND ... AND attribute2 = None: 521 | """Explainer 522 | 523 | Parameters 524 | ---------- 525 | X : pd.DataFrame 526 | The training dataset used during provided model training 527 | model_predictions : Union[pd.DataFrame, pd.Series] 528 | The training dataset used during provided model training 529 | type : str 530 | The type of problem that the provided model solves. You can choose between: 531 | - "classification" 532 | - "regression" 533 | default: "classification" 534 | Attributes 535 | ---------- 536 | condition_importances_ : pd.DataFrame 537 | Computed conditions importances on given dataset 538 | feature_importances_ : pd.DataFrame 539 | Feature importances computed base on conditions importances 540 | """ 541 | 542 | 543 | if (not isinstance(model_predictions, pd.DataFrame)) and (not isinstance(model_predictions, pd.Series)) : 544 | model_predictions = pd.DataFrame(model_predictions, columns=["class"]) 545 | self._bb_model = BlackBoxModel(X, model_predictions, type) 546 | super().__init__(None, X, model_predictions, type) 547 | 548 | def explain(self, measure: str = "C2", basic_conditions: bool = False, X_org = None): 549 | """Compute conditions importances. The importances of a conditions are computed base on: \n 550 | Marek Sikora: Redefinition of Decision Rules Based on the Importance of Elementary Conditions Evaluation. Fundam. Informaticae 123(2): 171-197 (2013) \n 551 | https://dblp.org/rec/journals/fuin/Sikora13.html 552 | 553 | Parameters 554 | ---------- 555 | measure: str 556 | Specifies the measure that is used to evaluate the quality of the rules. Possible measures for classification and regression problem are: C2, Lift, Correlation. Default: C2. It is not possible to select a measure for the survival problem, the LogRank test is used by default 557 | basic_conditions : bool 558 | Specifies whether to evaluate the conditions contained in the input rules, or to break the conditions in the rules into base conditions so that individual conditions do not overlap 559 | X_org: 560 | The dataset on which the rule-based model should be built. It can be the set on which the black-box model was learned or this set before preprocessing (imputation of missing values, dummification, scaling), because such a set can be handled by the rule model 561 | Returns 562 | ------- 563 | self: Explainer 564 | Fitted explainer with calculated conditions 565 | 566 | """ 567 | self.if_basic_conditions = basic_conditions 568 | 569 | if X_org is None: 570 | self.model = self._bb_model.get_rules_model(self.X) 571 | self.condition_importances_ = self._determine_condition_importances(measure) 572 | else: 573 | self.model = self._bb_model.get_rules_model(X_org) 574 | self.condition_importances_ = self._determine_condition_importances(measure,X_org) 575 | 576 | self.feature_importances_ = self._determine_feature_importances(self.condition_importances_) 577 | 578 | return self -------------------------------------------------------------------------------- /rulexai/importances.py: -------------------------------------------------------------------------------- 1 | from .rule import ( 2 | CompoundCondition, 3 | CompoundConditionWithCombiningOperators, 4 | ClassificationRule, 5 | ElementaryCondition, 6 | RegressionRule, 7 | Rule, 8 | SurvivalRule, 9 | ) 10 | import pandas as pd 11 | import numpy as np 12 | from typing import List 13 | from operator import attrgetter 14 | import math 15 | from typing import Dict 16 | import importlib 17 | 18 | class ConditionImportance: 19 | def __init__(self, rules, data, labels, if_split, measure = None) -> None: 20 | self.data = data 21 | self.labels = labels 22 | self.dataset = pd.concat([self.data, self.labels], axis=1) 23 | self.if_split = if_split 24 | self.measure = measure 25 | self.column_indexes: Dict[str, int] = {column_name:i for i, column_name in enumerate(list(data.columns))} 26 | if if_split: 27 | self.rules = self.split_conditions_in_rules(rules) 28 | else: 29 | self.rules = rules 30 | 31 | def _get_conditions_with_rules(self, rules): 32 | conditions_with_rules = dict() 33 | 34 | for rule in rules: 35 | rule_conditions = rule.premise.get_subconditions() 36 | 37 | for condition in rule_conditions: 38 | if condition in conditions_with_rules.keys(): 39 | conditions_rules = conditions_with_rules[condition] 40 | else: 41 | conditions_rules = [] 42 | conditions_rules.append(rule) 43 | conditions_with_rules[condition] = conditions_rules 44 | 45 | return conditions_with_rules 46 | 47 | def _calculate_conditions_qualities(self, conditions_with_rules): 48 | conditions_qualities = [] 49 | for condition in conditions_with_rules.keys(): 50 | sum = 0 51 | for rule in conditions_with_rules[condition]: 52 | sum += self._calculate_index_simplified(condition, rule) 53 | conditions_qualities.append(ConditionEvaluation(condition, sum)) 54 | 55 | return conditions_qualities 56 | 57 | 58 | def _calculateMeasure(self, rule): 59 | p, n, P, N = rule.covers(self.dataset) 60 | 61 | if self.measure == "Correlation": 62 | if (P - p + N - n == 0): 63 | return 0 64 | else: 65 | return (p * N - P * n) / math.sqrt(P * N * (p + n) * (P - p + N - n)) 66 | 67 | elif self.measure == "Lift": 68 | if (p == 0 and n == 0) or P == 0: 69 | return 0 70 | else: 71 | return p * (P + N) / ((p + n) * P) 72 | 73 | else: # C2 74 | if (p == 0 and n == 0) or P == 0 or N == 0: 75 | return 0 76 | else: 77 | return (((P + N) * p / (p + n) - P) / N) * ((1 + p / P) / 2) # C2 78 | 79 | def _condition_importances_to_DataFrame(self, condition_importances): 80 | importances_df = pd.DataFrame() 81 | importances_df["conditions"] = pd.Series( 82 | [str(cnd) for cnd in condition_importances.keys()] 83 | ) 84 | importances_df["importances"] = pd.Series(condition_importances.values()) 85 | 86 | return importances_df 87 | 88 | def split_conditions_in_rules(self, rules): 89 | conditions_with_rules = self._get_conditions_with_rules(rules) 90 | conditions = self._split_conditions_into_basic(conditions_with_rules.keys()) 91 | 92 | return self._get_rules_with_splitted_conditions(conditions, rules) 93 | 94 | def _split_conditions_into_basic(self, conditions: List[ElementaryCondition]): 95 | splittedConditions = [] 96 | conditions_for_attributes = dict() 97 | for condition in conditions: 98 | if condition.right is None: #attribute is Nominal 99 | splittedConditions.append(condition) 100 | else: 101 | attr = condition.attribute 102 | if attr in conditions_for_attributes: 103 | attribute_conditions = conditions_for_attributes[attr] 104 | else: 105 | attribute_conditions = [] 106 | attribute_conditions.append(condition) 107 | conditions_for_attributes[attr] = attribute_conditions 108 | 109 | for conditions_for_attribute in conditions_for_attributes.values(): 110 | if len(conditions_for_attribute) > 1: 111 | basic_conditions = self._get_basic_conditions_for_attribute(conditions_for_attribute) 112 | splittedConditions.extend(basic_conditions) 113 | else: 114 | splittedConditions.extend(conditions_for_attribute) 115 | 116 | return splittedConditions 117 | 118 | def _get_basic_conditions_for_attribute(self, conditions: List[ElementaryCondition]): 119 | basic_conditions = [] 120 | points = [] 121 | id = 0 122 | attribute = conditions[0].attribute 123 | for condition in conditions: 124 | point = Point(id, condition.left, "Left", condition.leftClosed) 125 | points.append(point) 126 | point = Point(id, condition.right, "Right", condition.rightClosed) 127 | points.append(point) 128 | id += 1 129 | 130 | min_point_first = min(points, key = attrgetter('value')) 131 | all_points_len = len(points) 132 | points = [point for point in points if point.value != min_point_first.value] 133 | number_of_firts_mins = all_points_len - len(points) 134 | 135 | while(len(points) > 0): 136 | min_point_second = min(points, key = attrgetter('value')) 137 | points_with_second_min_len = len(points) 138 | points = [point for point in points if point.value != min_point_second.value] 139 | number_of_second_mins = points_with_second_min_len - len(points) 140 | 141 | if(min_point_first.value == float('-inf')): 142 | leftClosed = False 143 | else: 144 | leftClosed = True 145 | 146 | condition = ElementaryCondition(attribute = attribute, left = min_point_first.value, right= min_point_second.value, leftClosed = leftClosed, rightClosed= False, column_index=self.column_indexes[attribute]) 147 | basic_conditions.append(condition) 148 | 149 | if (min_point_first.condition_id == min_point_second.condition_id) and number_of_firts_mins == 1 and number_of_second_mins == 1 and len(points) > 1: 150 | min_point_first = min(points, key = attrgetter('value')) 151 | points_with_first_min_len = len(points) 152 | points = [point for point in points if point.value != min_point_first.value] 153 | number_of_firts_mins = points_with_first_min_len - len(points) 154 | else: 155 | min_point_first = min_point_second 156 | min_point_first.is_closed = not min_point_second.is_closed 157 | 158 | return basic_conditions 159 | 160 | 161 | def _get_rules_with_splitted_conditions(self, basic_conditions, rules: List[ClassificationRule]): 162 | rules_with_basic_conditions = [] 163 | for rule in rules: 164 | compoundCondition = self._create_compound_condition_for_rule(basic_conditions, rule) 165 | rules_with_basic_conditions.append(Rule(compoundCondition, rule.consequence)) 166 | return rules_with_basic_conditions 167 | 168 | def _create_compound_condition_for_rule(self,basic_conditions, rule: Rule): 169 | subconditions = rule.premise.get_subconditions() 170 | compoundCondition = CompoundConditionWithCombiningOperators() 171 | for condition_basic in basic_conditions: 172 | i = 0 173 | condition_added = False 174 | while(i= basic_condition.right)): 192 | return True 193 | else: 194 | return False 195 | 196 | 197 | class ClassificationConditionImportance(ConditionImportance): 198 | def __init__(self, rules, data, labels, if_split, measure) -> None: 199 | self.column_indexes: Dict[str, int] = {column_name:i for i, column_name in enumerate(list(data.columns))} 200 | super().__init__(rules, data, labels, if_split, measure) 201 | 202 | def condition_importances(self): 203 | rules_by_class = self._split_rules_by_decision_class(self.rules) 204 | 205 | condition_importances_for_classes = dict() 206 | 207 | for class_name in rules_by_class.keys(): 208 | class_rules = rules_by_class[class_name] 209 | conditions_with_rules = self._get_conditions_with_rules(class_rules) 210 | conditions_qualities = self._calculate_conditions_qualities( 211 | conditions_with_rules 212 | ) 213 | condition_importances_for_classes[class_name] = conditions_qualities 214 | 215 | conditions_importances = self._calculate_conditions_importances( 216 | condition_importances_for_classes 217 | ) 218 | return self._condition_importances_to_DataFrame(conditions_importances) 219 | 220 | def _split_rules_by_decision_class(self, rules): 221 | rules_by_class = dict() 222 | 223 | for rule in rules: 224 | class_name = rule.consequence.left 225 | if class_name in rules_by_class.keys(): 226 | class_rules = rules_by_class[class_name] 227 | else: 228 | class_rules = [] 229 | 230 | class_rules.append(rule) 231 | rules_by_class[class_name] = class_rules 232 | 233 | return rules_by_class 234 | 235 | def _calculate_index_simplified(self, condition, rule): 236 | rule = ClassificationRule(rule.premise, rule.consequence) 237 | 238 | rule_conditions = [] 239 | rule_conditions.extend(rule.premise.get_subconditions()) 240 | number_of_conditions = len(rule_conditions) 241 | rule_conditions.remove(condition) 242 | 243 | if self.if_split: 244 | premise_without_evaluated_condition = CompoundConditionWithCombiningOperators() 245 | else: 246 | premise_without_evaluated_condition = CompoundCondition() 247 | 248 | premise_without_evaluated_condition.add_subconditions(rule_conditions) 249 | rule_without_evaluated_condition = ClassificationRule( 250 | premise_without_evaluated_condition, rule.consequence 251 | ) 252 | 253 | factor = 1.0 / number_of_conditions 254 | 255 | if len(rule_conditions) == 0: 256 | return factor * ( 257 | self._calculateMeasure(rule) 258 | - self._calculateMeasure(rule_without_evaluated_condition) 259 | ) 260 | else: 261 | premise_with_only_evaluated_condition = CompoundCondition() 262 | premise_with_only_evaluated_condition.add_subcondition(condition) 263 | rule_with_only_evaluated_condition = ClassificationRule( 264 | premise_with_only_evaluated_condition, rule.consequence 265 | ) 266 | 267 | return factor * ( 268 | self._calculateMeasure(rule) 269 | - self._calculateMeasure(rule_without_evaluated_condition) 270 | + self._calculateMeasure(rule_with_only_evaluated_condition) 271 | ) 272 | 273 | def _calculate_conditions_importances(self, condition_qualities_for_classes): 274 | conditions_importances = dict() 275 | 276 | for evaluated_class in condition_qualities_for_classes.keys(): 277 | 278 | conditions_importances_for_class = dict() 279 | conditions_for_evaluated_class = condition_qualities_for_classes[ 280 | evaluated_class 281 | ] 282 | 283 | for condition in conditions_for_evaluated_class: 284 | sum = condition.quality 285 | """ 286 | for class_name in condition_qualities_for_classes.keys(): 287 | if class_name != evaluated_class: 288 | conditions_from_other_class = condition_qualities_for_classes[ 289 | class_name 290 | ] 291 | evaluated_condition_from_other_class = list( 292 | filter( 293 | lambda cnd: cnd.condition == condition.condition, 294 | conditions_from_other_class, 295 | ) 296 | ) 297 | if len(evaluated_condition_from_other_class) > 0: 298 | sum -= evaluated_condition_from_other_class[0].quality 299 | """ 300 | conditions_importances_for_class[condition.condition] = sum 301 | 302 | conditions_importances[evaluated_class] = dict( 303 | sorted( 304 | conditions_importances_for_class.items(), 305 | key=lambda item: item[1], 306 | reverse=True, 307 | ) 308 | ) 309 | 310 | return conditions_importances 311 | 312 | def _condition_importances_to_DataFrame(self, condition_importances): 313 | importances_df = pd.DataFrame() 314 | for class_name in condition_importances.keys(): 315 | importances_df_tmp = pd.DataFrame() 316 | importances_df_tmp[class_name + " | conditions_names"] = pd.Series( 317 | [str(cnd) for cnd in condition_importances[class_name].keys()] 318 | ) 319 | importances_df_tmp[class_name + " | importances"] = pd.Series( 320 | condition_importances[class_name].values() 321 | ) 322 | importances_df = pd.concat( 323 | [importances_df, importances_df_tmp], ignore_index=False, axis=1 324 | ) 325 | return importances_df.replace(np.nan, "-") 326 | 327 | def _get_rules_with_splitted_conditions(self, basic_conditions, rules: List[ClassificationRule]): 328 | rules_with_basic_conditions = [] 329 | for rule in rules: 330 | compoundCondition = self._create_compound_condition_for_rule(basic_conditions, rule) 331 | rules_with_basic_conditions.append(ClassificationRule(compoundCondition, rule.consequence)) 332 | return rules_with_basic_conditions 333 | 334 | 335 | 336 | class RegressionConditionImportance(ConditionImportance): 337 | def __init__(self, rules, data, labels, if_split, measure) -> None: 338 | self.column_indexes: Dict[str, int] = {column_name:i for i, column_name in enumerate(list(data.columns))} 339 | super().__init__(rules, data, labels, if_split, measure) 340 | 341 | def condition_importances(self): 342 | conditions_with_rules = self._get_conditions_with_rules(self.rules) 343 | conditions_qualities = self._calculate_conditions_qualities( 344 | conditions_with_rules 345 | ) 346 | conditions_importances = dict() 347 | 348 | for condition_evaluation in conditions_qualities: 349 | conditions_importances[ 350 | condition_evaluation.condition 351 | ] = condition_evaluation.quality 352 | 353 | conditions_importances = dict( 354 | sorted( 355 | conditions_importances.items(), key=lambda item: item[1], reverse=True 356 | ) 357 | ) 358 | 359 | return self._condition_importances_to_DataFrame(conditions_importances) 360 | 361 | def _calculate_index_simplified(self, condition, rule): 362 | rule = RegressionRule(rule.premise, rule.consequence) 363 | 364 | rule_conditions = [] 365 | rule_conditions.extend(rule.premise.get_subconditions()) 366 | number_of_conditions = len(rule_conditions) 367 | rule_conditions.remove(condition) 368 | 369 | if self.if_split: 370 | premise_without_evaluated_condition = CompoundConditionWithCombiningOperators() 371 | else: 372 | premise_without_evaluated_condition = CompoundCondition() 373 | premise_without_evaluated_condition.add_subconditions(rule_conditions) 374 | rule_without_evaluated_condition = RegressionRule( 375 | premise_without_evaluated_condition, rule.consequence 376 | ) 377 | 378 | factor = 1.0 / number_of_conditions 379 | 380 | if len(rule_conditions) == 0: 381 | return factor * ( 382 | self._calculateMeasure(rule) 383 | - self._calculateMeasure(rule_without_evaluated_condition) 384 | ) 385 | else: 386 | premise_with_only_evaluated_condition = CompoundCondition() 387 | premise_with_only_evaluated_condition.add_subcondition(condition) 388 | rule_with_only_evaluated_condition = RegressionRule( 389 | premise_with_only_evaluated_condition, rule.consequence 390 | ) 391 | 392 | return factor * ( 393 | self._calculateMeasure(rule) 394 | - self._calculateMeasure(rule_without_evaluated_condition) 395 | + self._calculateMeasure(rule_with_only_evaluated_condition) 396 | ) 397 | 398 | def _get_rules_with_splitted_conditions(self, basic_conditions, rules: List[RegressionRule]): 399 | rules_with_basic_conditions = [] 400 | for rule in rules: 401 | compoundCondition = self._create_compound_condition_for_rule(basic_conditions, rule) 402 | rules_with_basic_conditions.append(RegressionRule(compoundCondition, rule.consequence)) 403 | return rules_with_basic_conditions 404 | 405 | class SurvivalConditionImportance(ConditionImportance): 406 | def __init__(self, rules, data, labels, if_split, measure) -> None: 407 | self.column_indexes: Dict[str, int] = {column_name:i for i, column_name in enumerate(list(data.columns))} 408 | super().__init__(rules, data, labels, if_split, measure) 409 | lifelines = importlib.import_module("lifelines.statistics") 410 | self.logrank_test = getattr(lifelines, "logrank_test") 411 | 412 | 413 | def condition_importances(self): 414 | conditions_with_rules = self._get_conditions_with_rules(self.rules) 415 | conditions_qualities = self._calculate_conditions_qualities( 416 | conditions_with_rules 417 | ) 418 | conditions_importances = dict() 419 | 420 | for condition_evaluation in conditions_qualities: 421 | conditions_importances[ 422 | condition_evaluation.condition 423 | ] = condition_evaluation.quality 424 | 425 | conditions_importances = dict( 426 | sorted( 427 | conditions_importances.items(), key=lambda item: item[1], reverse=True 428 | ) 429 | ) 430 | 431 | return self._condition_importances_to_DataFrame(conditions_importances) 432 | 433 | def _calculate_index_simplified(self, condition, rule): 434 | rule = SurvivalRule(rule.premise, rule.consequence) 435 | 436 | rule_conditions = [] 437 | rule_conditions.extend(rule.premise.get_subconditions()) 438 | number_of_conditions = len(rule_conditions) 439 | rule_conditions.remove(condition) 440 | 441 | if self.if_split: 442 | premise_without_evaluated_condition = CompoundConditionWithCombiningOperators() 443 | else: 444 | premise_without_evaluated_condition = CompoundCondition() 445 | 446 | premise_without_evaluated_condition.add_subconditions(rule_conditions) 447 | rule_without_evaluated_condition = SurvivalRule( 448 | premise_without_evaluated_condition, rule.consequence 449 | ) 450 | 451 | factor = 1.0 / number_of_conditions 452 | 453 | if len(rule_conditions) == 0: 454 | return factor * ( 455 | self._calculateMeasure(rule) 456 | - self._calculateMeasure(rule_without_evaluated_condition) 457 | ) 458 | else: 459 | premise_with_only_evaluated_condition = CompoundCondition() 460 | premise_with_only_evaluated_condition.add_subcondition(condition) 461 | rule_with_only_evaluated_condition = SurvivalRule( 462 | premise_with_only_evaluated_condition, rule.consequence 463 | ) 464 | 465 | return factor * ( 466 | self._calculateMeasure(rule) 467 | - self._calculateMeasure(rule_without_evaluated_condition) 468 | + self._calculateMeasure(rule_with_only_evaluated_condition) 469 | ) 470 | 471 | def _calculateMeasure(self, rule): 472 | p, n, P, N, covered_indices = rule.covers(self.dataset, return_positives=True 473 | ) 474 | # covered_indices -> in survival rules all examples are classified as positives 475 | uncovered_indices = [ 476 | id for id in range(self.data.shape[0]) if id not in covered_indices 477 | ] 478 | 479 | results = self.logrank_test( 480 | self.data["survival_time"][ 481 | self.data["survival_time"].index.isin(covered_indices) 482 | ], 483 | self.data["survival_time"][ 484 | self.data["survival_time"].index.isin(uncovered_indices) 485 | ], 486 | event_observed_A=self.labels[self.labels.index.isin(covered_indices)], 487 | event_observed_B=self.labels[self.labels.index.isin(uncovered_indices)], 488 | ) 489 | 490 | return results.test_statistic 491 | 492 | 493 | def _get_rules_with_splitted_conditions(self, basic_conditions, rules: List[SurvivalRule]): 494 | rules_with_basic_conditions = [] 495 | for rule in rules: 496 | compoundCondition = self._create_compound_condition_for_rule(basic_conditions, rule) 497 | rules_with_basic_conditions.append(SurvivalRule(compoundCondition, rule.consequence)) 498 | return rules_with_basic_conditions 499 | 500 | 501 | 502 | class ConditionEvaluation: 503 | def __init__(self, condition, quality) -> None: 504 | self.condition = condition 505 | self.quality = quality 506 | 507 | 508 | class Point: 509 | def __init__(self, condition_id: int, value: float, side: str, is_closed: bool) -> None: 510 | self.condition_id = condition_id 511 | self.value = value 512 | self.side = side 513 | self.is_closed = is_closed -------------------------------------------------------------------------------- /rulexai/models.py: -------------------------------------------------------------------------------- 1 | from rulekit._operator import BaseOperator 2 | import numpy as np 3 | from .rule import Rule, CompoundCondition, ElementaryCondition 4 | from rulekit import RuleKit 5 | from rulekit.classification import RuleClassifier 6 | from rulekit.regression import RuleRegressor 7 | from rulekit.params import Measures 8 | import pandas as pd 9 | from typing import Dict 10 | import importlib 11 | 12 | class Model: 13 | def __init__( 14 | self, model, feature_names=None, class_names=None, label_name=None 15 | ) -> None: 16 | self.model = model 17 | self.rules = self.get_rules(self.model, feature_names, class_names, label_name) 18 | self.column_indexes: Dict[str, int] = {column_name:i for i, column_name in enumerate(list(feature_names))} 19 | 20 | 21 | def _map_rules_from_RuleKit(self, rules): 22 | preprocessed_Rules = [] 23 | for rule in rules: 24 | 25 | preprocessed_Rules.append(self._map_rule_from_RuleKit(rule.__str__())) 26 | return preprocessed_Rules 27 | 28 | def _map_rules_from_sklearn(self, rules): 29 | preprocessed_Rules = [] 30 | for rule in rules: 31 | 32 | preprocessed_Rules.append(self._map_rule_from_sklearn(rule.__str__())) 33 | return preprocessed_Rules 34 | 35 | def _map_rule_from_sklearn(self, rule): 36 | rule = rule[3:] 37 | premise, consequence = rule.split(" THEN ") 38 | conditions = premise.split(" AND ") 39 | compoundCondition = CompoundCondition() 40 | 41 | for condition in conditions: 42 | if "<=" in condition: 43 | attribute, value = condition.split(" <= ") 44 | elementaryCondition = ElementaryCondition( 45 | attribute, ElementaryCondition.minus_inf, float(value), False, True, column_index=self.column_indexes[attribute] 46 | 47 | ) 48 | elif "<" in condition: 49 | attribute, value = condition.split(" < ") 50 | elementaryCondition = ElementaryCondition( 51 | attribute, ElementaryCondition.minus_inf, float(value), False, False, column_index=self.column_indexes[attribute] 52 | 53 | ) 54 | elif ">=" in condition: 55 | attribute, value = condition.split(" >= ") 56 | elementaryCondition = ElementaryCondition( 57 | 58 | attribute, float(value), ElementaryCondition.inf, True, False, column_index=self.column_indexes[attribute] 59 | ) 60 | elif ">" in condition: 61 | attribute, value = condition.split(" > ") 62 | elementaryCondition = ElementaryCondition( 63 | 64 | attribute, float(value), ElementaryCondition.inf, False, False, column_index=self.column_indexes[attribute] 65 | ) 66 | else: 67 | attribute, value = condition.split(" = ") 68 | elementaryCondition = ElementaryCondition(attribute, str(value), column_index=self.column_indexes[attribute]) 69 | 70 | 71 | compoundCondition.add_subcondition(elementaryCondition) 72 | 73 | consequence_att, consequence_value = consequence.split(" = ") 74 | consequence_value = consequence_value[1:-1] 75 | consequence = ElementaryCondition(consequence_att, consequence_value, column_index=self.column_indexes[attribute]) 76 | 77 | return self._preprocessRule(Rule(compoundCondition, consequence)) 78 | 79 | def _map_rules_from_list(self, rules): 80 | preprocessed_Rules = [] 81 | for rule in rules: 82 | preprocessed_Rules.append(self._map_string_rule(rule)) 83 | return preprocessed_Rules 84 | 85 | def _map_string_rule(self, rule): 86 | rule = rule[3:] 87 | premise, consequence = rule.split(" THEN ") 88 | conditions = premise.split(" AND ") 89 | compoundCondition = CompoundCondition() 90 | 91 | for condition in conditions: 92 | attribute, valueset = condition.split(" = ") 93 | if "," in valueset: 94 | left, right = valueset.split(",") 95 | leftClosed = False if left[0] == "(" else True 96 | rightClosed = False if right[-1:] == ")" else True 97 | left = left[1:] 98 | right = right[1:-1] 99 | elementaryCondition = ElementaryCondition( 100 | 101 | attribute, float(left), float(right), leftClosed, rightClosed, column_index=self.column_indexes[attribute] 102 | ) 103 | else: 104 | value = valueset[1:-1] 105 | elementaryCondition = ElementaryCondition(attribute, str(value), column_index=self.column_indexes[attribute]) 106 | 107 | 108 | compoundCondition.add_subcondition(elementaryCondition) 109 | 110 | consequence_att, consequence_value = consequence.split(" = ") 111 | consequence_value = consequence_value[1:-1] 112 | consequence = ElementaryCondition(consequence_att, consequence_value, column_index=self.column_indexes[attribute]) 113 | 114 | return self._preprocessRule(Rule(compoundCondition, consequence)) 115 | 116 | def _preprocessRule(self, rule): 117 | conditions_for_attributes = {} 118 | 119 | for condition in rule.premise.subconditions: 120 | attr = condition.attribute 121 | 122 | if attr in conditions_for_attributes.keys(): 123 | old_condition = conditions_for_attributes[attr] 124 | condition = old_condition.get_intersection(condition) 125 | conditions_for_attributes[attr] = condition 126 | 127 | premise = CompoundCondition() 128 | premise.add_subconditions(conditions_for_attributes.values()) 129 | rule.premise = premise 130 | 131 | return rule 132 | 133 | def _get_rules_from_DT(self, tree, feature_names, class_names, label_name): 134 | tree_ = tree.tree_ 135 | feature_name = [ 136 | feature_names[i] if i != self._tree.TREE_UNDEFINED else "undefined!" 137 | for i in tree_.feature 138 | ] 139 | paths = [] 140 | path = [] 141 | 142 | def recurse(node, path, paths): 143 | if tree_.feature[node] != self._tree.TREE_UNDEFINED: 144 | name = feature_name[node] 145 | threshold = tree_.threshold[node] 146 | p1, p2 = list(path), list(path) 147 | p1 += [f"{name} <= {threshold}"] 148 | recurse(tree_.children_left[node], p1, paths) 149 | p2 += [f"{name} > {threshold}"] 150 | recurse(tree_.children_right[node], p2, paths) 151 | else: 152 | path += [(tree_.value[node], tree_.n_node_samples[node])] 153 | paths += [path] 154 | 155 | recurse(0, path, paths) 156 | 157 | # sort by samples count 158 | samples_count = [p[-1][1] for p in paths] 159 | ii = list(np.argsort(samples_count)) 160 | paths = [paths[i] for i in reversed(ii)] 161 | 162 | rules = [] 163 | for path in paths: 164 | rule = "IF " 165 | for p in path[:-1]: 166 | if rule != "IF ": 167 | rule += " AND " 168 | rule += str(p) 169 | rule += " THEN " 170 | if class_names is None: 171 | rule += ( 172 | 173 | f"{label_name} = " + "{" + str(path[-1][0][0][0]) + "}" 174 | ) 175 | else: 176 | classes = path[-1][0][0] 177 | l = np.argmax(classes) 178 | rule += f"{label_name} = " + "{" + f"{class_names[l]}" + "}" 179 | rules += [rule] 180 | 181 | return rules 182 | 183 | 184 | class ClassificationModel(Model): 185 | def __init__( 186 | self, model, feature_names=None, class_names=None, label_name=None 187 | ) -> None: 188 | self.column_indexes: Dict[str, int] = {column_name:i for i, column_name in enumerate(list(feature_names))} 189 | super().__init__( 190 | model, 191 | feature_names=feature_names, 192 | class_names=class_names, 193 | label_name=label_name, 194 | ) 195 | 196 | def get_rules(self, model, feature_names=None, class_names=None, label_name=None): 197 | 198 | if isinstance(model, BaseOperator): 199 | return self._map_rules_from_RuleKit(model.model.rules) 200 | if isinstance(model, list): 201 | return self._map_rules_from_list(model) 202 | sklearn_tree_module = importlib.import_module("sklearn.tree") 203 | BaseDecisionTree = getattr(sklearn_tree_module, "BaseDecisionTree") 204 | if isinstance(model, BaseDecisionTree): 205 | self._tree = getattr(sklearn_tree_module, "_tree") 206 | rules = self._get_rules_from_DT( 207 | self.model, feature_names, class_names, label_name 208 | ) 209 | return self._map_rules_from_sklearn(rules) 210 | orange_module = importlib.import_module("Orange.classification.rules") 211 | _RuleClassifier = getattr(orange_module, "_RuleClassifier") 212 | if isinstance(model, _RuleClassifier): 213 | return self._map_rules_from_Orange(model.rule_list) 214 | 215 | def _map_rule_from_RuleKit(self, rule): 216 | rule = rule[3:] 217 | premise, consequence = rule.split(" THEN ") 218 | conditions = premise.split(" AND ") 219 | 220 | compoundCondition = CompoundCondition() 221 | 222 | for condition in conditions: 223 | attribute, valueset = condition.split(" = ") 224 | if "," in valueset: 225 | left, right = valueset.split(",") 226 | leftClosed = False if left[0] == "(" else True 227 | rightClosed = False if right[-1:] == ")" else True 228 | left = left[1:] 229 | right = right[1:-1] 230 | elementaryCondition = ElementaryCondition( 231 | 232 | attribute, float(left), float(right), leftClosed, rightClosed, column_index=self.column_indexes[attribute] 233 | ) 234 | else: 235 | value = valueset[1:-1] 236 | elementaryCondition = ElementaryCondition(attribute, str(value), column_index=self.column_indexes[attribute]) 237 | 238 | 239 | compoundCondition.add_subcondition(elementaryCondition) 240 | 241 | consequence_att, consequence_value = consequence.split(" = ") 242 | consequence_value = consequence_value[1:-1] 243 | consequence = ElementaryCondition(consequence_att, consequence_value, column_index=self.column_indexes[attribute]) 244 | 245 | return Rule(compoundCondition, consequence) 246 | 247 | def _map_rules_from_Orange(self, rules): 248 | preprocessed_Rules = [] 249 | for rule in rules: 250 | if "TRUE" not in rule.__str__(): 251 | 252 | preprocessed_Rules.append(self._map_rule_from_Orange(rule.__str__())) 253 | return preprocessed_Rules 254 | 255 | def _map_rule_from_Orange(self, rule): 256 | rule = rule[3:] 257 | premise, consequence = rule.split(" THEN ") 258 | conditions = premise.split(" AND ") 259 | compoundCondition = CompoundCondition() 260 | 261 | for condition in conditions: 262 | if "<=" in condition: 263 | attribute, value = condition.split("<=") 264 | elementaryCondition = ElementaryCondition( 265 | attribute, ElementaryCondition.minus_inf, float(value), False, True, column_index=self.column_indexes[attribute] 266 | 267 | ) 268 | elif "<" in condition: 269 | attribute, value = condition.split("<") 270 | elementaryCondition = ElementaryCondition( 271 | attribute, ElementaryCondition.minus_inf, float(value), False, False, column_index=self.column_indexes[attribute] 272 | 273 | ) 274 | elif ">=" in condition: 275 | attribute, value = condition.split(">=") 276 | elementaryCondition = ElementaryCondition( 277 | 278 | attribute, float(value), ElementaryCondition.inf, True, False, column_index=self.column_indexes[attribute] 279 | ) 280 | elif ">" in condition: 281 | attribute, value = condition.split(">") 282 | elementaryCondition = ElementaryCondition( 283 | 284 | attribute, float(value), ElementaryCondition.inf, False, False, column_index=self.column_indexes[attribute] 285 | ) 286 | elif "=" in condition: 287 | attribute, value = condition.split("=") 288 | elementaryCondition = ElementaryCondition(attribute, str(value), column_index=self.column_indexes[attribute]) 289 | 290 | else: 291 | elementaryCondition = ElementaryCondition("all", "TRUE") 292 | 293 | compoundCondition.add_subcondition(elementaryCondition) 294 | 295 | consequence_att, consequence_value = consequence.split("=") 296 | consequence_value = consequence_value[:-1] 297 | consequence = ElementaryCondition(consequence_att, consequence_value, column_index=self.column_indexes[attribute]) 298 | 299 | return self._preprocessRule(Rule(compoundCondition, consequence)) 300 | 301 | 302 | class RegressionModel(Model): 303 | def __init__(self, model, feature_names=None, label_name=None) -> None: 304 | self.column_indexes: Dict[str, int] = {column_name:i for i, column_name in enumerate(list(feature_names))} 305 | super().__init__( 306 | model, feature_names=feature_names, class_names=None, label_name=label_name 307 | ) 308 | 309 | def get_rules(self, model, feature_names=None, class_names=None, label_name=None): 310 | if isinstance(model, BaseOperator): 311 | return self._map_rules_from_RuleKit(model.model.rules) 312 | if isinstance(model, list): 313 | return self._map_rules_from_list(model) 314 | sklearn_tree_module = importlib.import_module("sklearn.tree") 315 | BaseDecisionTree = getattr(sklearn_tree_module, "BaseDecisionTree") 316 | if isinstance(model, BaseDecisionTree): 317 | self._tree = getattr(sklearn_tree_module, "_tree") 318 | rules = self._get_rules_from_DT( 319 | self.model, feature_names, class_names, label_name 320 | ) 321 | return self._map_rules_from_sklearn(rules) 322 | 323 | 324 | def _map_rule_from_RuleKit(self, rule): 325 | rule = rule[3:] 326 | premise, consequence = rule.split(" THEN ") 327 | conditions = premise.split(" AND ") 328 | 329 | compoundCondition = CompoundCondition() 330 | 331 | for condition in conditions: 332 | attribute, valueset = condition.split(" = ") 333 | if "," in valueset: 334 | left, right = valueset.split(",") 335 | leftClosed = False if left[0] == "(" else True 336 | rightClosed = False if right[-1:] == ")" else True 337 | left = left[1:] 338 | right = right[1:-1] 339 | elementaryCondition = ElementaryCondition( 340 | 341 | attribute, float(left), float(right), leftClosed, rightClosed, column_index=self.column_indexes[attribute] 342 | ) 343 | else: 344 | value = valueset[1:-1] 345 | elementaryCondition = ElementaryCondition(attribute, str(value), column_index=self.column_indexes[attribute]) 346 | 347 | 348 | compoundCondition.add_subcondition(elementaryCondition) 349 | 350 | consequence_att, consequence_value = consequence.split(" = ") 351 | consequence_val, consequence_range = consequence_value.split(" ") 352 | consequence_val = consequence_val[1:-1] 353 | 354 | consequence = ElementaryCondition(consequence_att, float(consequence_val), column_index=self.column_indexes[attribute]) 355 | 356 | return Rule(compoundCondition, consequence) 357 | 358 | def _map_rule_from_sklearn(self, rule): 359 | rule = rule[3:] 360 | premise, consequence = rule.split(" THEN ") 361 | conditions = premise.split(" AND ") 362 | compoundCondition = CompoundCondition() 363 | 364 | for condition in conditions: 365 | if "<=" in condition: 366 | attribute, value = condition.split(" <= ") 367 | elementaryCondition = ElementaryCondition( 368 | attribute, ElementaryCondition.minus_inf, float(value), False, True, column_index=self.column_indexes[attribute] 369 | 370 | ) 371 | elif "<" in condition: 372 | attribute, value = condition.split(" < ") 373 | elementaryCondition = ElementaryCondition( 374 | attribute, ElementaryCondition.minus_inf, float(value), False, False, column_index=self.column_indexes[attribute] 375 | 376 | ) 377 | elif ">=" in condition: 378 | attribute, value = condition.split(" >= ") 379 | elementaryCondition = ElementaryCondition( 380 | 381 | attribute, float(value), ElementaryCondition.inf, True, False, column_index=self.column_indexes[attribute] 382 | ) 383 | elif ">" in condition: 384 | attribute, value = condition.split(" > ") 385 | elementaryCondition = ElementaryCondition( 386 | 387 | attribute, float(value), ElementaryCondition.inf, False, False, column_index=self.column_indexes[attribute] 388 | ) 389 | else: 390 | attribute, value = condition.split(" = ") 391 | elementaryCondition = ElementaryCondition(attribute, str(value), column_index=self.column_indexes[attribute]) 392 | 393 | 394 | compoundCondition.add_subcondition(elementaryCondition) 395 | 396 | consequence_att, consequence_value = consequence.split(" = ") 397 | consequence_value = consequence_value[1:-1] 398 | 399 | consequence = ElementaryCondition(consequence_att, float(consequence_value), column_index=self.column_indexes[attribute]) 400 | 401 | return self._preprocessRule(Rule(compoundCondition, consequence)) 402 | 403 | def _map_string_rule(self, rule): 404 | rule = rule[3:] 405 | premise, consequence = rule.split(" THEN ") 406 | conditions = premise.split(" AND ") 407 | compoundCondition = CompoundCondition() 408 | 409 | for condition in conditions: 410 | attribute, valueset = condition.split(" = ") 411 | if "," in valueset: 412 | left, right = valueset.split(",") 413 | leftClosed = False if left[0] == "(" else True 414 | rightClosed = False if right[-1:] == ")" else True 415 | left = left[1:] 416 | right = right[1:-1] 417 | elementaryCondition = ElementaryCondition( 418 | 419 | attribute, float(left), float(right), leftClosed, rightClosed, column_index=self.column_indexes[attribute] 420 | ) 421 | else: 422 | value = valueset[1:-1] 423 | elementaryCondition = ElementaryCondition(attribute, str(value), column_index=self.column_indexes[attribute]) 424 | 425 | 426 | compoundCondition.add_subcondition(elementaryCondition) 427 | 428 | consequence_att, consequence_value = consequence.split(" = ") 429 | consequence_value = consequence_value[1:-1] 430 | 431 | consequence = ElementaryCondition(consequence_att, float(consequence_value), column_index=self.column_indexes[attribute]) 432 | 433 | return self._preprocessRule(Rule(compoundCondition, consequence)) 434 | 435 | 436 | class SurvivalModel(Model): 437 | def __init__(self, model, feature_names=None, survival_status_name=None) -> None: 438 | self.column_indexes: Dict[str, int] = {column_name:i for i, column_name in enumerate(list(feature_names))} 439 | super().__init__( 440 | model, 441 | feature_names=feature_names, 442 | class_names=None, 443 | label_name=survival_status_name, 444 | ) 445 | 446 | def get_rules(self, model, feature_names=None, class_names=None, label_name=None): 447 | if isinstance(model, BaseOperator): 448 | return self._map_rules_from_RuleKit(model.model.rules) 449 | if isinstance(model, list): 450 | return self._map_rules_from_list(model) 451 | sksurv_tree_module = importlib.import_module("sksurv.tree") 452 | SurvivalTree = getattr(sksurv_tree_module, "SurvivalTree") 453 | if isinstance(model, SurvivalTree): 454 | sklearn_tree_module = importlib.import_module("sklearn.tree") 455 | self._tree = getattr(sklearn_tree_module, "_tree") 456 | rules = self._get_rules_from_DT( 457 | self.model, feature_names, class_names, label_name 458 | ) 459 | return self._map_rules_from_sklearn(rules) 460 | 461 | 462 | def _map_rule_from_RuleKit(self, rule): 463 | rule = rule[3:] 464 | premise, consequence = rule.split(" THEN ") 465 | conditions = premise.split(" AND ") 466 | compoundCondition = CompoundCondition() 467 | 468 | for condition in conditions: 469 | attribute, valueset = condition.split(" = ") 470 | if "," in valueset: 471 | left, right = valueset.split(",") 472 | leftClosed = False if left[0] == "(" else True 473 | rightClosed = False if right[-1:] == ")" else True 474 | left = left[1:] 475 | right = right[1:-1] 476 | elementaryCondition = ElementaryCondition( 477 | 478 | attribute, float(left), float(right), leftClosed, rightClosed, column_index=self.column_indexes[attribute] 479 | ) 480 | else: 481 | value = valueset[1:-1] 482 | elementaryCondition = ElementaryCondition(attribute, str(value), column_index=self.column_indexes[attribute]) 483 | 484 | 485 | compoundCondition.add_subcondition(elementaryCondition) 486 | 487 | consequence_att = "survival_curve" 488 | consequence_val = "" 489 | consequence = ElementaryCondition(consequence_att, consequence_val, column_index=0) 490 | 491 | return Rule(compoundCondition, consequence) 492 | 493 | 494 | class BlackBoxModel: 495 | def __init__(self, X_model, model_predictions, type) -> None: 496 | RuleKit.init() 497 | 498 | if type == "regression": 499 | rulekit_model = RuleRegressor( 500 | induction_measure=Measures.C2, 501 | pruning_measure=Measures.C2, 502 | voting_measure=Measures.C2, 503 | ) 504 | else: 505 | rulekit_model = RuleClassifier( 506 | induction_measure=Measures.C2, 507 | pruning_measure=Measures.C2, 508 | voting_measure=Measures.C2, 509 | ) 510 | self.type = type 511 | self.y = model_predictions 512 | self.rulekit_model = rulekit_model 513 | 514 | 515 | def get_rules_model(self, X_org): 516 | 517 | if isinstance(self.y, pd.DataFrame): 518 | label_name = self.y.columns[0] 519 | y = self.y.to_numpy().reshape(self.y.size) 520 | else: 521 | label_name = self.y.name 522 | y=self.y 523 | 524 | for column in X_org.select_dtypes('object').columns.tolist(): 525 | X_org[column] = X_org[column].replace({np.nan: None}) 526 | self.rulekit_model.fit(X_org,y) 527 | 528 | if type == "regression": 529 | model = RegressionModel(self.rulekit_model,label_name) 530 | else: 531 | model = ClassificationModel(self.rulekit_model, X_org.columns, np.unique(self.y), label_name) 532 | 533 | return model 534 | 535 | 536 | 537 | 538 | 539 | 540 | 541 | 542 | 543 | 544 | -------------------------------------------------------------------------------- /rulexai/reduct.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | 4 | 5 | class Reduct: 6 | 7 | def __init__(self) -> None: 8 | pass 9 | 10 | def calculate_POS(self, reduct: pd.DataFrame, y: pd.DataFrame): 11 | POS = [] 12 | 13 | reduct["if_duplicated"] = reduct.duplicated(keep=False) 14 | positive_region = [reduct.iloc[i,:].to_numpy() for i in range(reduct.shape[0]) if reduct.if_duplicated.iloc[i] == False] 15 | 16 | reduct["label"] = y 17 | df_duplicated = reduct[reduct.if_duplicated == True] 18 | df_duplicated = df_duplicated.drop(["if_duplicated"], axis=1) 19 | 20 | df_duplicated.sort_values(by = df_duplicated.columns.to_list(), inplace = True) 21 | df_duplicated.reset_index(inplace= True, drop = True) 22 | 23 | df_labels = df_duplicated["label"] 24 | df_duplicated = df_duplicated.drop(["label"], axis = 1) 25 | 26 | start_equivalence = 0 27 | for i in range(1, df_duplicated.shape[0]): 28 | if (not df_duplicated.iloc[i-1,:].equals(df_duplicated.iloc[i,:])) or i == (df_duplicated.shape[0] - 1): 29 | 30 | if i == (df_duplicated.shape[0] - 1): 31 | end_equivalence = i + 1 32 | else: 33 | end_equivalence = i 34 | 35 | Xj = df_duplicated.iloc[start_equivalence:end_equivalence, :] 36 | Xj = Xj.assign(label = df_labels.iloc[start_equivalence:end_equivalence]) 37 | 38 | if len(Xj["label"].unique()) == 1: 39 | positive_region.extend([Xj.iloc[i,:].to_numpy() for i in range(Xj.shape[0])]) 40 | 41 | start_equivalence = end_equivalence 42 | 43 | return positive_region 44 | 45 | 46 | def eliminate_irrelevant_attributes(self, R: pd.DataFrame, y: pd.DataFrame): 47 | 48 | POS_R = self.calculate_POS(R.copy(), y) 49 | m = len(POS_R) 50 | A = R.columns.to_list() 51 | for a in A: 52 | POS_R_a = self.calculate_POS(R.drop([a], axis = 1), y) 53 | m_a = len(POS_R_a) 54 | if m_a == m: 55 | R = R.drop([a], axis = 1) 56 | 57 | return R 58 | 59 | def eliminate_irrelevant_attributes_based_on_user_POS(self,R: pd.DataFrame, y: pd.DataFrame, user_POS: float): 60 | 61 | POS_R = self.calculate_POS(R.copy(), y) 62 | all_objects_number = len(R) 63 | m = len(POS_R)/all_objects_number 64 | 65 | if user_POS > m: 66 | print("Warning! The set POS is greater than the reduct POS. The reduct will not be limited") 67 | return R 68 | 69 | A = R.columns.to_list() 70 | for a in A: 71 | POS_R_a = self.calculate_POS(R.drop([a], axis = 1), y) 72 | m_a = len(POS_R_a)/all_objects_number 73 | if m_a >= user_POS: 74 | R = R.drop([a], axis = 1) 75 | return R 76 | 77 | # Jonhson's algorithm 78 | def semi_minimal_reduct(self, x: pd.DataFrame): 79 | 80 | repeate = True 81 | R = pd.DataFrame() 82 | L = [x] 83 | A = x.columns.to_list() 84 | 85 | while(repeate): 86 | 87 | W_U_R = dict() 88 | for a in A: 89 | W_U_R[a] = 0 90 | for Xi in L: 91 | cardinalities = Xi[a].value_counts() 92 | W_X_i = (pow(cardinalities.sum(), 2) - 93 | sum([pow(xi, 2) for xi in cardinalities]))/2 94 | W_U_R[a] += W_X_i 95 | 96 | # attribute with the largest W_U_R(a) 97 | a = max(W_U_R, key=lambda key: W_U_R[key]) 98 | 99 | A.remove(a) 100 | R[a] = x[a].copy() 101 | 102 | tmp_L = [] 103 | for Xi in L: 104 | tmp_L.extend([Xi[Xi[a] == value] for value in Xi[a].unique()]) 105 | L = tmp_L 106 | 107 | if W_U_R[a] == 0 or len(A) == 0: 108 | repeate = False 109 | 110 | return R 111 | 112 | 113 | def get_reduct(self, x: pd.DataFrame, y: pd.DataFrame = None, POS: float = None): 114 | 115 | R = self.semi_minimal_reduct(x) 116 | if y is None: 117 | return R 118 | 119 | if POS is None: 120 | R = self.eliminate_irrelevant_attributes(R, y) 121 | else: 122 | R = self.eliminate_irrelevant_attributes_based_on_user_POS(R,y,POS) 123 | return R 124 | 125 | -------------------------------------------------------------------------------- /rulexai/rule.py: -------------------------------------------------------------------------------- 1 | import enum 2 | import numpy as np 3 | from typing import List 4 | import warnings 5 | warnings.filterwarnings('ignore') 6 | 7 | # creating enumerations using class 8 | class LogicalOperator(enum.Enum): 9 | conjuction = 1 10 | alternative = 2 11 | 12 | 13 | class ElementaryCondition: 14 | 15 | inf = float("inf") 16 | minus_inf = float("-inf") 17 | 18 | def __init__( 19 | self, 20 | attribute: str, 21 | left: float, 22 | right: float = None, 23 | leftClosed: bool = None, 24 | rightClosed: bool = None, 25 | column_index: int = None, 26 | ) -> None: 27 | self.attribute = attribute 28 | self.left = left 29 | self.right = right 30 | self.leftClosed = leftClosed 31 | # if right is None its means that attribute is Nominal 32 | self.rightClosed = rightClosed 33 | self._column_index = column_index 34 | 35 | def covered_mask(self, X: np.ndarray) -> np.ndarray: 36 | if self.right is not None: 37 | left_part: np.ndarray = (X[:, self._column_index] >= self.left) if self.leftClosed else (X[:, self._column_index] > self.left) 38 | right_part: np.ndarray = (X[:, self._column_index] <= self.right) if self.rightClosed else (X[:, self._column_index] < self.right) 39 | return left_part & right_part 40 | else: 41 | return (X[:, self._column_index] == self.left) 42 | 43 | def uncovered_mask(self, X: np.ndarray) -> np.ndarray: 44 | return np.logical_not(self.covered_mask(X)) 45 | 46 | def negative_covered_mask(self, X: np.ndarray, y: np.ndarray, decision) -> np.ndarray: 47 | return self.covered_mask(X) & (y[:] != decision) 48 | 49 | def positive_covered_mask(self, X: np.ndarray, y: np.ndarray, decision) -> np.ndarray: 50 | a = self.covered_mask(X) & (y[:] == decision) 51 | return a 52 | def get_intersection(self, other_condition): 53 | return ElementaryCondition( 54 | self.attribute, 55 | np.max([self.left, other_condition.left]), 56 | np.min([self.right, other_condition.right]), 57 | other_condition.leftClosed 58 | if self.left < other_condition.left 59 | else self.leftClosed, 60 | other_condition.rightClosed 61 | if self.right > other_condition.right 62 | else self.rightClosed, 63 | column_index=self._column_index 64 | ) 65 | 66 | def evaluate(self, ex): 67 | value = ex[self.attribute] 68 | if self.right == None: 69 | return str(value) == self.left 70 | else: 71 | return ((value >= self.left and self.leftClosed) or value > self.left) and ( 72 | (value <= self.right and self.rightClosed) or value < self.right 73 | ) 74 | def evaluate_mask(self, X_t: np.ndarray, X: np.ndarray, column_index: int = None): 75 | X_t[:, column_index] = self.covered_mask(X) 76 | return X_t 77 | 78 | def __str__(self): 79 | if self.right == None: 80 | s = "".join(["{", str(self.left), "}"]) 81 | else: 82 | s = "".join( 83 | [ 84 | ("<" if self.leftClosed else "("), 85 | ("-inf" if (self.left == self.minus_inf) else str(self.left)), 86 | ", ", 87 | ("inf" if (self.right == self.inf) else str(self.right)), 88 | (">" if self.rightClosed else ")"), 89 | ] 90 | ) 91 | return "".join([self.attribute, " = ", s]) 92 | 93 | def __eq__(self, other): 94 | if not isinstance(other, ElementaryCondition): 95 | return False 96 | return ( 97 | (self.attribute == other.attribute) 98 | and (self.left == other.left) 99 | and (self.right == other.right) 100 | and (self.leftClosed == other.leftClosed) 101 | and (self.rightClosed == other.rightClosed) 102 | ) 103 | 104 | def __hash__(self): 105 | return hash( 106 | (self.attribute, self.left, self.right, 107 | self.leftClosed, self.rightClosed) 108 | ) 109 | 110 | 111 | class CompoundCondition: 112 | 113 | def __init__(self) -> None: 114 | self.operator = LogicalOperator.conjuction 115 | self.subconditions = [] 116 | 117 | def covered_mask(self, X: np.ndarray) -> np.ndarray: 118 | if len(self.subconditions) == 0: 119 | return np.full(X.shape[0], fill_value=False) 120 | covered_mask = (self.subconditions[0].covered_mask(X)) 121 | if self.operator == LogicalOperator.conjuction: 122 | for i in range(1, len(self.subconditions)): 123 | covered_mask &= self.subconditions[i].covered_mask(X) 124 | elif self.operator == LogicalOperator.alternative: 125 | for i in range(1, len(self.subconditions)): 126 | covered_mask |= self.subconditions[i].covered_mask(X) 127 | return covered_mask 128 | 129 | def uncovered_mask(self, X: np.ndarray) -> np.ndarray: 130 | if len(self.subconditions) == 0: 131 | return np.full(X.shape[0], fill_value=True) 132 | uncovered_mask = (self.subconditions[0].uncovered_mask(X)) 133 | if self.operator == LogicalOperator.conjuction: 134 | for i in range(1, len(self.subconditions)): 135 | uncovered_mask &= self.subconditions[i].uncovered_mask(X) 136 | elif self.operator == LogicalOperator.alternative: 137 | for i in range(1, len(self.subconditions)): 138 | uncovered_mask |= self.subconditions[i].uncovered_mask(X) 139 | return uncovered_mask 140 | 141 | def positive_covered_mask(self, X: np.ndarray, y: np.ndarray, decision) -> np.ndarray: 142 | if len(self.subconditions) == 0: 143 | return np.full(X.shape[0], fill_value=False) 144 | positive_covered_mask = ( 145 | self.subconditions[0].positive_covered_mask(X, y, decision)) 146 | if self.operator == LogicalOperator.conjuction: 147 | for i in range(1, len(self.subconditions)): 148 | positive_covered_mask &= self.subconditions[i].positive_covered_mask( 149 | X, y, decision) 150 | elif self.operator == LogicalOperator.alternative: 151 | for i in range(1, len(self.subconditions)): 152 | positive_covered_mask |= self.subconditions[i].positive_covered_mask( 153 | X, y, decision) 154 | return positive_covered_mask 155 | 156 | def negative_covered_mask(self, X: np.ndarray, y: np.ndarray, decision) -> np.ndarray: 157 | if len(self.subconditions) == 0: 158 | return np.full(X.shape[0], fill_value=False) 159 | negative_covered_mask = ( 160 | self.subconditions[0].negative_covered_mask(X, y, decision)) 161 | if self.operator == LogicalOperator.conjuction: 162 | for i in range(1, len(self.subconditions)): 163 | negative_covered_mask &= self.subconditions[i].negative_covered_mask( 164 | X, y, decision) 165 | elif self.operator == LogicalOperator.alternative: 166 | for i in range(1, len(self.subconditions)): 167 | negative_covered_mask |= self.subconditions[i].negative_covered_mask( 168 | X, y, decision) 169 | return negative_covered_mask 170 | 171 | def add_subcondition(self, cnd: ElementaryCondition): 172 | self.subconditions.append(cnd) 173 | 174 | def add_subconditions(self, cnds: List[ElementaryCondition]): 175 | self.subconditions.extend(cnds) 176 | 177 | def get_subconditions(self): 178 | return self.subconditions 179 | 180 | def set_logical_operator(self, operator: LogicalOperator): 181 | self.operator = operator 182 | 183 | def __str__(self): 184 | s = "" 185 | operator = " AND " if ( 186 | self.operator == LogicalOperator.conjuction) else " OR " 187 | 188 | for i in range(len(self.subconditions)): 189 | s += self.subconditions[i].__str__() 190 | if i != (len(self.subconditions) - 1): 191 | s += operator 192 | return s 193 | 194 | def evaluate(self, ex): 195 | for condition in self.subconditions: 196 | partial = condition.evaluate(ex) 197 | if self.operator == LogicalOperator.conjuction and partial == False: 198 | return False 199 | elif self.operator == LogicalOperator.alternative and partial == True: 200 | return True 201 | return True if (self.operator == LogicalOperator.conjuction) else False 202 | 203 | 204 | class CompoundConditionWithCombiningOperators(CompoundCondition): 205 | 206 | def __init__(self) -> None: 207 | self.operator = LogicalOperator.conjuction 208 | self.subCompoundConditions = dict() 209 | 210 | def covered_mask(self, X: np.ndarray) -> np.ndarray: 211 | subCompoundConditions_keys = list(self.subCompoundConditions.keys()) 212 | if len(self.subCompoundConditions) == 0: 213 | return np.full(X.shape[0], fill_value=False) 214 | covered_mask = (self.subCompoundConditions[subCompoundConditions_keys[0]].covered_mask(X)) 215 | if self.operator == LogicalOperator.conjuction: 216 | for i in range(1, len(subCompoundConditions_keys)): 217 | covered_mask &= self.subCompoundConditions[subCompoundConditions_keys[i]].covered_mask(X) 218 | elif self.operator == LogicalOperator.alternative: 219 | for i in range(1, len(subCompoundConditions_keys)): 220 | covered_mask |= self.subCompoundConditions[subCompoundConditions_keys[i]].covered_mask(X) 221 | return covered_mask 222 | 223 | def uncovered_mask(self, X: np.ndarray) -> np.ndarray: 224 | subCompoundConditions_keys = list(self.subCompoundConditions.keys()) 225 | if len(self.subCompoundConditions) == 0: 226 | return np.full(X.shape[0], fill_value=True) 227 | uncovered_mask = (self.subCompoundConditions[subCompoundConditions_keys[0]].uncovered_mask(X)) 228 | if self.operator == LogicalOperator.conjuction: 229 | for i in range(1, len(subCompoundConditions_keys)): 230 | uncovered_mask &= self.subCompoundConditions[subCompoundConditions_keys[i]].uncovered_mask(X) 231 | elif self.operator == LogicalOperator.alternative: 232 | for i in range(1, len(subCompoundConditions_keys)): 233 | uncovered_mask |= self.subCompoundConditions[subCompoundConditions_keys[i]].uncovered_mask(X) 234 | return uncovered_mask 235 | 236 | def positive_covered_mask(self, X: np.ndarray, y: np.ndarray, decision) -> np.ndarray: 237 | subCompoundConditions_keys = list(self.subCompoundConditions.keys()) 238 | if len(self.subCompoundConditions) == 0: 239 | return np.full(X.shape[0], fill_value=False) 240 | positive_covered_mask = ( 241 | self.subCompoundConditions[subCompoundConditions_keys[0]].positive_covered_mask(X, y, decision)) 242 | if self.operator == LogicalOperator.conjuction: 243 | for i in range(1, len(subCompoundConditions_keys)): 244 | positive_covered_mask &= self.subCompoundConditions[subCompoundConditions_keys[i]].positive_covered_mask( 245 | X, y, decision) 246 | elif self.operator == LogicalOperator.alternative: 247 | for i in range(1, len(subCompoundConditions_keys)): 248 | positive_covered_mask |= self.subCompoundConditions[subCompoundConditions_keys[i]].positive_covered_mask( 249 | X, y, decision) 250 | return positive_covered_mask 251 | 252 | def negative_covered_mask(self, X: np.ndarray, y: np.ndarray, decision) -> np.ndarray: 253 | subCompoundConditions_keys = list(self.subCompoundConditions.keys()) 254 | if len(self.subCompoundConditions) == 0: 255 | return np.full(X.shape[0], fill_value=False) 256 | negative_covered_mask = ( 257 | self.subCompoundConditions[subCompoundConditions_keys[0]].negative_covered_mask(X, y, decision)) 258 | if self.operator == LogicalOperator.conjuction: 259 | for i in range(1, len(subCompoundConditions_keys)): 260 | negative_covered_mask &= self.subCompoundConditions[subCompoundConditions_keys[i]].negative_covered_mask( 261 | X, y, decision) 262 | elif self.operator == LogicalOperator.alternative: 263 | for i in range(1, len(subCompoundConditions_keys)): 264 | negative_covered_mask |= self.subCompoundConditions[subCompoundConditions_keys[i]].negative_covered_mask( 265 | X, y, decision) 266 | return negative_covered_mask 267 | 268 | def add_subcondition(self, cnd: ElementaryCondition): 269 | attr = cnd.attribute 270 | if attr in self.subCompoundConditions.keys(): 271 | compoundCondition = self.subCompoundConditions[attr] 272 | else: 273 | compoundCondition = CompoundCondition() 274 | compoundCondition.set_logical_operator(LogicalOperator.alternative) 275 | compoundCondition.add_subcondition(cnd) 276 | self.subCompoundConditions[attr] = compoundCondition 277 | 278 | def add_subconditions(self, cnds: List[ElementaryCondition]): 279 | for condition in cnds: 280 | self.add_subcondition(condition) 281 | 282 | def get_subconditions(self): 283 | all_conditions_list = [] 284 | for compundCondition in self.subCompoundConditions.values(): 285 | all_conditions_list.extend(compundCondition.get_subconditions()) 286 | return all_conditions_list 287 | 288 | def set_logical_operator(self, operator: LogicalOperator): 289 | self.operator = operator 290 | 291 | def __str__(self): 292 | s = "" 293 | operator = " AND " if ( 294 | self.operator == LogicalOperator.conjuction) else " OR " 295 | operator_internal = " OR " if ( 296 | self.operator == LogicalOperator.conjuction) else " AND " 297 | 298 | for compound_condition in self.subCompoundConditions.values(): 299 | s += "[" 300 | for condition_base in compound_condition.get_subconditions(): 301 | s += condition_base.__str__() + operator_internal 302 | s = s[0: len(s) - len(operator_internal)] 303 | s += "]" + operator 304 | 305 | s = s[0: len(s) - len(operator)] 306 | return s 307 | 308 | def evaluate(self, ex): 309 | for compound_condition in self.subCompoundConditions.values(): 310 | partial = compound_condition.evaluate(ex) 311 | if self.operator == LogicalOperator.conjuction and partial == False: 312 | return False 313 | elif self.operator == LogicalOperator.alternative and partial == True: 314 | return True 315 | return True if (self.operator == LogicalOperator.conjuction) else False 316 | 317 | 318 | class Rule: 319 | 320 | def __init__( 321 | self, premise: CompoundCondition, consequence: ElementaryCondition 322 | ) -> None: 323 | self.premise = premise 324 | self.consequence = consequence 325 | 326 | def __str__(self) -> str: 327 | return "".join( 328 | ["IF ", self.premise.__str__(), " THEN ", self.consequence.__str__()] 329 | ) 330 | 331 | 332 | class ClassificationRule(Rule): 333 | 334 | def __init__( 335 | self, premise: CompoundCondition, consequence: ElementaryCondition 336 | ) -> None: 337 | super().__init__(premise, consequence) 338 | self.decision = consequence.left 339 | 340 | def covers(self, X_df): 341 | x = X_df.to_numpy() 342 | y = x[:, -1].astype(str) 343 | x = x[:, 0:-1] 344 | 345 | P = y[y == self.decision].shape[0] 346 | N = y.shape[0] - P 347 | p = x[self.premise.positive_covered_mask(x, y, self.decision)].shape[0] 348 | n = x[self.premise.negative_covered_mask(x, y, self.decision)].shape[0] 349 | 350 | return p, n, P, N 351 | 352 | 353 | class RegressionRule(Rule): 354 | 355 | def __init__( 356 | self, premise: CompoundCondition, consequence: ElementaryCondition 357 | ) -> None: 358 | super().__init__(premise, consequence) 359 | 360 | def covers(self, x): 361 | P = 0 362 | N = 0 363 | p = 0 364 | n = 0 365 | 366 | sum_y = 0.0 367 | sum_y2 = 0.0 368 | 369 | label_name = self.consequence.attribute 370 | x.sort_values(label_name, inplace=True) 371 | 372 | orderedNegatives = [] 373 | negatives = [] 374 | positives = [] 375 | 376 | for i in range(x.shape[0]): 377 | ex = x.iloc[i] 378 | N += 1 379 | 380 | if self.premise.evaluate(ex): # if covered 381 | n += 1 382 | orderedNegatives.append(i) 383 | negatives.append(i) 384 | 385 | y = ex[label_name] 386 | 387 | sum_y += y 388 | sum_y2 += y * y 389 | 390 | if len(negatives) == 0: 391 | return p, n, P, N 392 | 393 | mean_y = sum_y / n 394 | stddev_y = np.sqrt(sum_y2 / n - mean_y * mean_y) 395 | 396 | medianId = orderedNegatives[len(orderedNegatives) // 2] 397 | 398 | median_y = x.iloc[medianId][label_name] 399 | 400 | # update positives 401 | for i in range(x.shape[0]): 402 | ex = x.iloc[i] 403 | 404 | # if inside epsilon 405 | if np.abs(ex[label_name] - median_y) <= stddev_y: 406 | N -= 1 407 | P += 1 408 | 409 | # if covered 410 | if self.premise.evaluate(ex): 411 | negatives.remove(i) 412 | n -= 1 413 | positives.append(i) 414 | p += 1 415 | 416 | return p, n, P, N 417 | 418 | 419 | class SurvivalRule(Rule): 420 | 421 | def __init__( 422 | self, premise: CompoundCondition, consequence: ElementaryCondition 423 | ) -> None: 424 | super().__init__(premise, consequence) 425 | 426 | def covers(self, x, return_positives: bool = False): 427 | P = 0 428 | N = 0 429 | p = 0 430 | n = 0 431 | if return_positives: 432 | positives = [] 433 | 434 | for i in range(x.shape[0]): 435 | ex = x.iloc[i] 436 | P += 1 437 | if self.premise.evaluate(ex): 438 | p += 1 439 | if return_positives: 440 | positives.append(i) 441 | if return_positives: 442 | return p, n, P, N, positives 443 | else: 444 | return p, n, P, N 445 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | import os 3 | import io 4 | 5 | current_path = os.path.dirname(os.path.realpath(__file__)) 6 | 7 | with io.open(f"{current_path}/README.md", mode="r", encoding="utf-8") as fh: 8 | long_description = fh.read() 9 | 10 | 11 | setuptools.setup( 12 | name="rulexai", 13 | version="1.1.0", 14 | author="Dawid Macha", 15 | author_email="dawid.m.macha@gmail.com", 16 | description="RuleXAI is a rule-based aproach to explain the output of any machine learning model. It is suitable for classification, regression and survival tasks.", 17 | long_description=long_description, 18 | long_description_content_type="text/markdown", 19 | url="https://github.com/adaa-polsl/RuleXAI", 20 | packages=setuptools.find_packages(), 21 | classifiers=[ 22 | "Development Status :: 5 - Production/Stable", 23 | "License :: OSI Approved :: GNU Affero General Public License v3", 24 | "Programming Language :: Python :: 3", 25 | "Operating System :: Microsoft :: Windows", 26 | "Operating System :: Unix", 27 | "Topic :: Scientific/Engineering", 28 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 29 | "Intended Audience :: Developers", 30 | "Intended Audience :: Science/Research", 31 | ], 32 | include_package_data=True, 33 | python_requires=">=3.9", 34 | install_requires=[ 35 | "pandas >= 1.5.0, < 2.3.0", 36 | "numpy ~= 1.26.4", 37 | "matplotlib ~= 3.8.3", 38 | "rulekit ~= 1.7.6", 39 | "lifelines ~= 0.28.0" 40 | ], 41 | test_suite="tests", 42 | ) 43 | -------------------------------------------------------------------------------- /tests/resources/classification/iris.arff: -------------------------------------------------------------------------------- 1 | % 1. Title: Iris Plants Database 2 | % 3 | % 2. Sources: 4 | % (a) Creator: R.A. Fisher 5 | % (b) Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov) 6 | % (c) Date: July, 1988 7 | % 8 | % 3. Past Usage: 9 | % - Publications: too many to mention!!! Here are a few. 10 | % 1. Fisher,R.A. "The use of multiple measurements in taxonomic problems" 11 | % Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions 12 | % to Mathematical Statistics" (John Wiley, NY, 1950). 13 | % 2. Duda,R.O., & Hart,P.E. (1973) Pattern Classification and Scene Analysis. 14 | % (Q327.D83) John Wiley & Sons. ISBN 0-471-22361-1. See page 218. 15 | % 3. Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System 16 | % Structure and Classification Rule for Recognition in Partially Exposed 17 | % Environments". IEEE Transactions on Pattern Analysis and Machine 18 | % Intelligence, Vol. PAMI-2, No. 1, 67-71. 19 | % -- Results: 20 | % -- very low misclassification rates (0% for the setosa class) 21 | % 4. Gates, G.W. (1972) "The Reduced Nearest Neighbor Rule". IEEE 22 | % Transactions on Information Theory, May 1972, 431-433. 23 | % -- Results: 24 | % -- very low misclassification rates again 25 | % 5. See also: 1988 MLC Proceedings, 54-64. Cheeseman et al's AUTOCLASS II 26 | % conceptual clustering system finds 3 classes in the data. 27 | % 28 | % 4. Relevant Information: 29 | % --- This is perhaps the best known database to be found in the pattern 30 | % recognition literature. Fisher's paper is a classic in the field 31 | % and is referenced frequently to this day. (See Duda & Hart, for 32 | % example.) The data set contains 3 classes of 50 instances each, 33 | % where each class refers to a type of iris plant. One class is 34 | % linearly separable from the other 2; the latter are NOT linearly 35 | % separable from each other. 36 | % --- Predicted attribute: class of iris plant. 37 | % --- This is an exceedingly simple domain. 38 | % 39 | % 5. Number of Instances: 150 (50 in each of three classes) 40 | % 41 | % 6. Number of Attributes: 4 numeric, predictive attributes and the class 42 | % 43 | % 7. Attribute Information: 44 | % 1. sepal length in cm 45 | % 2. sepal width in cm 46 | % 3. petal length in cm 47 | % 4. petal width in cm 48 | % 5. class: 49 | % -- Iris Setosa 50 | % -- Iris Versicolour 51 | % -- Iris Virginica 52 | % 53 | % 8. Missing Attribute Values: None 54 | % 55 | % Summary Statistics: 56 | % Min Max Mean SD Class Correlation 57 | % sepal length: 4.3 7.9 5.84 0.83 0.7826 58 | % sepal width: 2.0 4.4 3.05 0.43 -0.4194 59 | % petal length: 1.0 6.9 3.76 1.76 0.9490 (high!) 60 | % petal width: 0.1 2.5 1.20 0.76 0.9565 (high!) 61 | % 62 | % 9. Class Distribution: 33.3% for each of 3 classes. 63 | 64 | @RELATION iris 65 | 66 | @ATTRIBUTE sepallength REAL 67 | @ATTRIBUTE sepalwidth REAL 68 | @ATTRIBUTE petallength REAL 69 | @ATTRIBUTE petalwidth REAL 70 | @ATTRIBUTE class {Iris-setosa,Iris-versicolor,Iris-virginica} 71 | 72 | @DATA 73 | 5.1,3.5,1.4,0.2,Iris-setosa 74 | 4.9,3.0,1.4,0.2,Iris-setosa 75 | 4.7,3.2,1.3,0.2,Iris-setosa 76 | 4.6,3.1,1.5,0.2,Iris-setosa 77 | 5.0,3.6,1.4,0.2,Iris-setosa 78 | 5.4,3.9,1.7,0.4,Iris-setosa 79 | 4.6,3.4,1.4,0.3,Iris-setosa 80 | 5.0,3.4,1.5,0.2,Iris-setosa 81 | 4.4,2.9,1.4,0.2,Iris-setosa 82 | 4.9,3.1,1.5,0.1,Iris-setosa 83 | 5.4,3.7,1.5,0.2,Iris-setosa 84 | 4.8,3.4,1.6,0.2,Iris-setosa 85 | 4.8,3.0,1.4,0.1,Iris-setosa 86 | 4.3,3.0,1.1,0.1,Iris-setosa 87 | 5.8,4.0,1.2,0.2,Iris-setosa 88 | 5.7,4.4,1.5,0.4,Iris-setosa 89 | 5.4,3.9,1.3,0.4,Iris-setosa 90 | 5.1,3.5,1.4,0.3,Iris-setosa 91 | 5.7,3.8,1.7,0.3,Iris-setosa 92 | 5.1,3.8,1.5,0.3,Iris-setosa 93 | 5.4,3.4,1.7,0.2,Iris-setosa 94 | 5.1,3.7,1.5,0.4,Iris-setosa 95 | 4.6,3.6,1.0,0.2,Iris-setosa 96 | 5.1,3.3,1.7,0.5,Iris-setosa 97 | 4.8,3.4,1.9,0.2,Iris-setosa 98 | 5.0,3.0,1.6,0.2,Iris-setosa 99 | 5.0,3.4,1.6,0.4,Iris-setosa 100 | 5.2,3.5,1.5,0.2,Iris-setosa 101 | 5.2,3.4,1.4,0.2,Iris-setosa 102 | 4.7,3.2,1.6,0.2,Iris-setosa 103 | 4.8,3.1,1.6,0.2,Iris-setosa 104 | 5.4,3.4,1.5,0.4,Iris-setosa 105 | 5.2,4.1,1.5,0.1,Iris-setosa 106 | 5.5,4.2,1.4,0.2,Iris-setosa 107 | 4.9,3.1,1.5,0.1,Iris-setosa 108 | 5.0,3.2,1.2,0.2,Iris-setosa 109 | 5.5,3.5,1.3,0.2,Iris-setosa 110 | 4.9,3.1,1.5,0.1,Iris-setosa 111 | 4.4,3.0,1.3,0.2,Iris-setosa 112 | 5.1,3.4,1.5,0.2,Iris-setosa 113 | 5.0,3.5,1.3,0.3,Iris-setosa 114 | 4.5,2.3,1.3,0.3,Iris-setosa 115 | 4.4,3.2,1.3,0.2,Iris-setosa 116 | 5.0,3.5,1.6,0.6,Iris-setosa 117 | 5.1,3.8,1.9,0.4,Iris-setosa 118 | 4.8,3.0,1.4,0.3,Iris-setosa 119 | 5.1,3.8,1.6,0.2,Iris-setosa 120 | 4.6,3.2,1.4,0.2,Iris-setosa 121 | 5.3,3.7,1.5,0.2,Iris-setosa 122 | 5.0,3.3,1.4,0.2,Iris-setosa 123 | 7.0,3.2,4.7,1.4,Iris-versicolor 124 | 6.4,3.2,4.5,1.5,Iris-versicolor 125 | 6.9,3.1,4.9,1.5,Iris-versicolor 126 | 5.5,2.3,4.0,1.3,Iris-versicolor 127 | 6.5,2.8,4.6,1.5,Iris-versicolor 128 | 5.7,2.8,4.5,1.3,Iris-versicolor 129 | 6.3,3.3,4.7,1.6,Iris-versicolor 130 | 4.9,2.4,3.3,1.0,Iris-versicolor 131 | 6.6,2.9,4.6,1.3,Iris-versicolor 132 | 5.2,2.7,3.9,1.4,Iris-versicolor 133 | 5.0,2.0,3.5,1.0,Iris-versicolor 134 | 5.9,3.0,4.2,1.5,Iris-versicolor 135 | 6.0,2.2,4.0,1.0,Iris-versicolor 136 | 6.1,2.9,4.7,1.4,Iris-versicolor 137 | 5.6,2.9,3.6,1.3,Iris-versicolor 138 | 6.7,3.1,4.4,1.4,Iris-versicolor 139 | 5.6,3.0,4.5,1.5,Iris-versicolor 140 | 5.8,2.7,4.1,1.0,Iris-versicolor 141 | 6.2,2.2,4.5,1.5,Iris-versicolor 142 | 5.6,2.5,3.9,1.1,Iris-versicolor 143 | 5.9,3.2,4.8,1.8,Iris-versicolor 144 | 6.1,2.8,4.0,1.3,Iris-versicolor 145 | 6.3,2.5,4.9,1.5,Iris-versicolor 146 | 6.1,2.8,4.7,1.2,Iris-versicolor 147 | 6.4,2.9,4.3,1.3,Iris-versicolor 148 | 6.6,3.0,4.4,1.4,Iris-versicolor 149 | 6.8,2.8,4.8,1.4,Iris-versicolor 150 | 6.7,3.0,5.0,1.7,Iris-versicolor 151 | 6.0,2.9,4.5,1.5,Iris-versicolor 152 | 5.7,2.6,3.5,1.0,Iris-versicolor 153 | 5.5,2.4,3.8,1.1,Iris-versicolor 154 | 5.5,2.4,3.7,1.0,Iris-versicolor 155 | 5.8,2.7,3.9,1.2,Iris-versicolor 156 | 6.0,2.7,5.1,1.6,Iris-versicolor 157 | 5.4,3.0,4.5,1.5,Iris-versicolor 158 | 6.0,3.4,4.5,1.6,Iris-versicolor 159 | 6.7,3.1,4.7,1.5,Iris-versicolor 160 | 6.3,2.3,4.4,1.3,Iris-versicolor 161 | 5.6,3.0,4.1,1.3,Iris-versicolor 162 | 5.5,2.5,4.0,1.3,Iris-versicolor 163 | 5.5,2.6,4.4,1.2,Iris-versicolor 164 | 6.1,3.0,4.6,1.4,Iris-versicolor 165 | 5.8,2.6,4.0,1.2,Iris-versicolor 166 | 5.0,2.3,3.3,1.0,Iris-versicolor 167 | 5.6,2.7,4.2,1.3,Iris-versicolor 168 | 5.7,3.0,4.2,1.2,Iris-versicolor 169 | 5.7,2.9,4.2,1.3,Iris-versicolor 170 | 6.2,2.9,4.3,1.3,Iris-versicolor 171 | 5.1,2.5,3.0,1.1,Iris-versicolor 172 | 5.7,2.8,4.1,1.3,Iris-versicolor 173 | 6.3,3.3,6.0,2.5,Iris-virginica 174 | 5.8,2.7,5.1,1.9,Iris-virginica 175 | 7.1,3.0,5.9,2.1,Iris-virginica 176 | 6.3,2.9,5.6,1.8,Iris-virginica 177 | 6.5,3.0,5.8,2.2,Iris-virginica 178 | 7.6,3.0,6.6,2.1,Iris-virginica 179 | 4.9,2.5,4.5,1.7,Iris-virginica 180 | 7.3,2.9,6.3,1.8,Iris-virginica 181 | 6.7,2.5,5.8,1.8,Iris-virginica 182 | 7.2,3.6,6.1,2.5,Iris-virginica 183 | 6.5,3.2,5.1,2.0,Iris-virginica 184 | 6.4,2.7,5.3,1.9,Iris-virginica 185 | 6.8,3.0,5.5,2.1,Iris-virginica 186 | 5.7,2.5,5.0,2.0,Iris-virginica 187 | 5.8,2.8,5.1,2.4,Iris-virginica 188 | 6.4,3.2,5.3,2.3,Iris-virginica 189 | 6.5,3.0,5.5,1.8,Iris-virginica 190 | 7.7,3.8,6.7,2.2,Iris-virginica 191 | 7.7,2.6,6.9,2.3,Iris-virginica 192 | 6.0,2.2,5.0,1.5,Iris-virginica 193 | 6.9,3.2,5.7,2.3,Iris-virginica 194 | 5.6,2.8,4.9,2.0,Iris-virginica 195 | 7.7,2.8,6.7,2.0,Iris-virginica 196 | 6.3,2.7,4.9,1.8,Iris-virginica 197 | 6.7,3.3,5.7,2.1,Iris-virginica 198 | 7.2,3.2,6.0,1.8,Iris-virginica 199 | 6.2,2.8,4.8,1.8,Iris-virginica 200 | 6.1,3.0,4.9,1.8,Iris-virginica 201 | 6.4,2.8,5.6,2.1,Iris-virginica 202 | 7.2,3.0,5.8,1.6,Iris-virginica 203 | 7.4,2.8,6.1,1.9,Iris-virginica 204 | 7.9,3.8,6.4,2.0,Iris-virginica 205 | 6.4,2.8,5.6,2.2,Iris-virginica 206 | 6.3,2.8,5.1,1.5,Iris-virginica 207 | 6.1,2.6,5.6,1.4,Iris-virginica 208 | 7.7,3.0,6.1,2.3,Iris-virginica 209 | 6.3,3.4,5.6,2.4,Iris-virginica 210 | 6.4,3.1,5.5,1.8,Iris-virginica 211 | 6.0,3.0,4.8,1.8,Iris-virginica 212 | 6.9,3.1,5.4,2.1,Iris-virginica 213 | 6.7,3.1,5.6,2.4,Iris-virginica 214 | 6.9,3.1,5.1,2.3,Iris-virginica 215 | 5.8,2.7,5.1,1.9,Iris-virginica 216 | 6.8,3.2,5.9,2.3,Iris-virginica 217 | 6.7,3.3,5.7,2.5,Iris-virginica 218 | 6.7,3.0,5.2,2.3,Iris-virginica 219 | 6.3,2.5,5.0,1.9,Iris-virginica 220 | 6.5,3.0,5.2,2.0,Iris-virginica 221 | 6.2,3.4,5.4,2.3,Iris-virginica 222 | 5.9,3.0,5.1,1.8,Iris-virginica 223 | % 224 | % 225 | % 226 | -------------------------------------------------------------------------------- /tests/resources/classification/results.csv: -------------------------------------------------------------------------------- 1 | Iris-setosa | conditions_names;Iris-setosa | importances;Iris-versicolor | conditions_names;Iris-versicolor | importances;Iris-virginica | conditions_names;Iris-virginica | importances 2 | petallength = (-inf, 2.45);1.0;petallength = <2.45, 4.75);0.8234509803921568;petallength = <4.75, inf);0.5136838148584906 3 | ;;petallength = <2.45, 5.35);0.3740469221835075;petalwidth = <1.75, inf);0.496569693094629 4 | ;;petallength = (-inf, 4.75);0.2981388613042039;petallength = <4.85, inf);0.43343030690537093 5 | ;;sepalwidth = (-inf, 2.95);0.25828887286709923;petalwidth = <1.45, inf);0.32763693985849063 6 | ;;sepalwidth = (-inf, 2.45);0.25589852008456654;; 7 | ;;petalwidth = (-inf, 1.75);0.18359664404223222;; 8 | ;;petalwidth = (-inf, 1.65);0.11654901960784321;; 9 | ;;sepallength = <4.95, inf);0.0870106947586727;; 10 | ;;sepallength = <4.7, inf);0.04013829787234042;; 11 | -------------------------------------------------------------------------------- /tests/resources/classification/results_split.csv: -------------------------------------------------------------------------------- 1 | Iris-setosa | conditions_names;Iris-setosa | importances;Iris-versicolor | conditions_names;Iris-versicolor | importances;Iris-virginica | conditions_names;Iris-virginica | importances 2 | petallength = (-inf, 2.45);1.0;petallength = <2.45, 4.75);1.4069;petalwidth = <1.75, inf);0.6118 3 | ;;petalwidth = <1.45, 1.65);0.2591;petallength = <5.35, inf);0.5545 4 | ;;sepalwidth = (-inf, 2.45);0.2554;petallength = <4.85, 5.35);0.3008 5 | ;;petalwidth = (-inf, 1.45);0.2154;petallength = <4.75, 4.85);0.0211 6 | ;;sepalwidth = <2.45, 2.95);0.0913;petalwidth = <1.65, 1.75);0.017 7 | ;;sepallength = <4.95, inf);0.0777;petalwidth = <1.45, 1.65);-0.0273 8 | ;;petallength = <4.75, 4.85);0.0201;; 9 | ;;petalwidth = <1.65, 1.75);0.0197;; 10 | ;;petallength = <4.85, 5.35);-0.0192;; 11 | ;;sepallength = <4.7, 4.95);-0.0372;; 12 | ;;petallength = (-inf, 2.45);-0.1;; 13 | -------------------------------------------------------------------------------- /tests/resources/features_importances.csv: -------------------------------------------------------------------------------- 1 | 0 | attributes;0 | importances;1 | attributes;1 | importances;2 | attributes;2 | importances 2 | petal length (cm);1.0;petal length (cm);1.4956367638798682;petal length (cm);0.9471141217638614 3 | ;;sepal width (cm);0.5141873929516658;petal width (cm);0.8242066329531197 4 | ;;petal width (cm);0.30014566365007544;; 5 | ;;sepal length (cm);0.12714899263101312;; 6 | -------------------------------------------------------------------------------- /tests/resources/regression/diabetes.arff: -------------------------------------------------------------------------------- 1 | @relation diabetes 2 | @attribute age numeric 3 | @attribute deficit numeric 4 | @attribute class numeric 5 | @data 6 | 5.2,-8.1,4.8 7 | 8.8,-16.1,4.1 8 | 10.5,-0.9,5.2 9 | 10.6,-7.8,5.5 10 | 10.4,-29,5 11 | 1.8,-19.2,3.4 12 | 12.7,-18.9,3.4 13 | 15.6,-10.6,4.9 14 | 5.8,-2.8,5.6 15 | 1.9,-25,3.7 16 | 2.2,-3.1,3.9 17 | 4.8,-7.8,4.5 18 | 7.9,-13.9,4.8 19 | 5.2,-4.5,4.9 20 | 0.9,-11.6,3 21 | 11.8,-2.1,4.6 22 | 7.9,-2,4.8 23 | 11.5,-9,5.5 24 | 10.6,-11.2,4.5 25 | 8.5,-0.2,5.3 26 | 11.1,-6.1,4.7 27 | 12.8,-1,6.6 28 | 11.3,-3.6,5.1 29 | 1,-8.2,3.9 30 | 14.5,-0.5,5.7 31 | 11.9,-2,5.1 32 | 8.1,-1.6,5.2 33 | 13.8,-11.9,3.7 34 | 15.5,-0.7,4.9 35 | 9.8,-1.2,4.8 36 | 11,-14.3,4.4 37 | 12.4,-0.8,5.2 38 | 11.1,-16.8,5.1 39 | 5.1,-5.1,4.6 40 | 4.8,-9.5,3.9 41 | 4.2,-17,5.1 42 | 6.9,-3.3,5.1 43 | 13.2,-0.7,6 44 | 9.9,-3.3,4.9 45 | 12.5,-13.6,4.1 46 | 13.2,-1.9,4.6 47 | 8.9,-10,4.9 48 | 10.8,-13.5,5.1 49 | -------------------------------------------------------------------------------- /tests/resources/regression/results.csv: -------------------------------------------------------------------------------- 1 | conditions;importances 2 | age = (-inf, 3.2);0.598015873015873 3 | age = <6.35, 11.4);0.47138047138047134 4 | deficit = <-6.95, inf);0.4384617898711036 5 | age = <3.5, 13.85);0.26523427274457995 6 | age = <6.35, inf);0.2532969417601067 7 | deficit = <-8.15, -0.45);0.22110831984801255 8 | deficit = <-13.55, inf);0.12160660762260922 9 | age = (-inf, 13.0);0.0934009552269355 10 | -------------------------------------------------------------------------------- /tests/resources/regression/results_split.csv: -------------------------------------------------------------------------------- 1 | conditions;importances 2 | age = <6.35, 11.4);0.8521793135064287 3 | age = (-inf, 3.2);0.7091553650445686 4 | deficit = <-0.45, inf);0.2753641116521085 5 | deficit = <-6.95, -0.45);0.1819302480607627 6 | age = <3.5, 6.35);0.06542692694361696 7 | deficit = <-8.15, -6.95);0.02060332289928545 8 | age = <13.85, inf);0.01639772819856604 9 | age = <3.2, 3.5);0.0 10 | deficit = <-13.55, -8.15);-0.007044158555616169 11 | age = <11.4, 13.0);-0.11752390907377938 12 | age = <13.0, 13.85);-0.15006381616152903 13 | -------------------------------------------------------------------------------- /tests/resources/survival/pbc.arff: -------------------------------------------------------------------------------- 1 | @relation pbc 2 | @attribute survival_time numeric 3 | @attribute survival_status numeric 4 | @attribute trt numeric 5 | @attribute age numeric 6 | @attribute sex {'f','m'} 7 | @attribute ascites numeric 8 | @attribute hepato numeric 9 | @attribute spiders numeric 10 | @attribute edema numeric 11 | @attribute bili numeric 12 | @attribute chol numeric 13 | @attribute albumin numeric 14 | @attribute copper numeric 15 | @attribute alk.phos numeric 16 | @attribute ast numeric 17 | @attribute trig numeric 18 | @attribute platelet numeric 19 | @attribute protime numeric 20 | @attribute stage numeric 21 | @data 22 | 400,1,1,58.765229,'f',1,1,1,1,14.5,261,2.6,156,1718,137.95,172,190,12.2,4 23 | 4500,0,1,56.44627,'f',0,1,1,0,1.1,302,4.14,54,7394.8,113.52,88,221,10.6,3 24 | 1012,1,1,70.072553,'m',0,0,0,0.5,1.4,176,3.48,210,516,96.1,55,151,12,4 25 | 1925,1,1,54.740589,'f',0,1,1,0.5,1.8,244,2.54,64,6121.8,60.63,92,183,10.3,4 26 | 2503,1,2,66.258727,'f',0,1,0,0,0.8,248,3.98,50,944,93,63,?,11,3 27 | 1832,0,2,55.534565,'f',0,1,0,0,1,322,4.09,52,824,60.45,213,204,9.7,3 28 | 2466,1,2,53.05681,'f',0,0,0,0,0.3,280,4,52,4651.2,28.38,189,373,11,3 29 | 2400,1,1,42.507871,'f',0,0,1,0,3.2,562,3.08,79,2276,144.15,88,251,11,2 30 | 51,1,2,70.55989,'f',1,0,1,1,12.6,200,2.74,140,918,147.25,143,302,11.5,4 31 | 3762,1,2,53.713895,'f',0,1,1,0,1.4,259,4.16,46,1104,79.05,79,258,12,4 32 | 304,1,2,59.137577,'f',0,0,1,0,3.6,236,3.52,94,591,82.15,95,71,13.6,4 33 | 3577,0,2,45.689254,'f',0,0,0,0,0.7,281,3.85,40,1181,88.35,130,244,10.6,3 34 | 1217,1,2,56.221766,'m',1,1,0,1,0.8,?,2.27,43,728,71,?,156,11,4 35 | 3584,1,1,64.646133,'f',0,0,0,0,0.8,231,3.87,173,9009.8,127.71,96,295,11,3 36 | 3672,0,2,40.443532,'f',0,0,0,0,0.7,204,3.66,28,685,72.85,58,198,10.8,3 37 | 769,1,2,52.183436,'f',0,1,0,0,2.7,274,3.15,159,1533,117.8,128,224,10.5,4 38 | 131,1,1,53.930185,'f',0,1,1,1,11.4,178,2.8,588,961,280.55,200,283,12.4,4 39 | 4232,0,1,49.560575,'f',0,1,0,0.5,0.7,235,3.56,39,1881,93,123,209,11,3 40 | 1356,1,2,59.953457,'f',0,1,0,0,5.1,374,3.51,140,1919,122.45,135,322,13,4 41 | 3445,0,2,64.188912,'m',0,1,1,0,0.6,252,3.83,41,843,65.1,83,336,11.4,4 42 | 673,1,1,56.276523,'f',0,0,1,0,3.4,271,3.63,464,1376,120.9,55,173,11.6,4 43 | 264,1,2,55.967146,'f',1,1,1,1,17.4,395,2.94,558,6064.8,227.04,191,214,11.7,4 44 | 4079,1,1,44.520192,'m',0,1,0,0,2.1,456,4,124,5719,221.88,230,70,9.9,2 45 | 4127,0,2,45.073238,'f',0,0,0,0,0.7,298,4.1,40,661,106.95,66,324,11.3,2 46 | 1444,1,2,52.024641,'f',0,1,1,0,5.2,1128,3.68,53,3228,165.85,166,421,9.9,3 47 | 77,1,2,54.439425,'f',1,1,1,0.5,21.6,175,3.31,221,3697.4,101.91,168,80,12,4 48 | 549,1,2,44.947296,'f',1,1,1,1,17.2,222,3.23,209,1975,189.1,195,144,13,4 49 | 4509,0,2,63.876797,'f',0,0,0,0,0.7,370,3.78,24,5833,73.53,86,390,10.6,2 50 | 321,1,2,41.385352,'f',0,1,1,0,3.6,260,2.54,172,7277,121.26,158,124,11,4 51 | 3839,1,2,41.552361,'f',0,1,0,0,4.7,296,3.44,114,9933.2,206.4,101,195,10.3,2 52 | 4523,0,2,53.995893,'f',0,1,0,0,1.8,262,3.34,101,7277,82.56,158,286,10.6,4 53 | 3170,1,2,51.282683,'f',0,0,0,0,0.8,210,3.19,82,1592,218.55,113,180,12,3 54 | 3933,0,1,52.060233,'f',0,0,0,0,0.8,364,3.7,37,1840,170.5,64,273,10.5,2 55 | 2847,1,2,48.618754,'f',0,0,0,0,1.2,314,3.2,201,12258.8,72.24,151,431,10.6,3 56 | 3611,0,2,56.410678,'f',0,0,0,0,0.3,172,3.39,18,558,71.3,96,311,10.6,2 57 | 223,1,1,61.727584,'f',1,1,0,1,7.1,334,3.01,150,6931.2,180.6,118,102,12,4 58 | 3244,1,2,36.626968,'f',0,1,1,0,3.3,383,3.53,102,1234,137.95,87,234,11,4 59 | 2297,1,1,55.392197,'f',0,1,0,0,0.7,282,3,52,9066.8,72.24,111,563,10.6,4 60 | 4467,0,1,46.669405,'f',0,0,0,0,1.3,?,3.34,105,11046.6,104.49,?,358,11,4 61 | 1350,1,1,33.634497,'f',0,1,0,0,6.8,?,3.26,96,1215,151.9,?,226,11.7,4 62 | 4453,0,2,33.69473,'f',0,1,1,0,2.1,?,3.54,122,8778,56.76,?,344,11,4 63 | 4556,0,1,48.870637,'f',0,0,0,0,1.1,361,3.64,36,5430.2,67.08,89,203,10.6,2 64 | 3428,1,2,37.582478,'f',0,1,1,1,3.3,299,3.55,131,1029,119.35,50,199,11.7,3 65 | 4025,0,2,41.793292,'f',0,0,0,0,0.6,?,3.93,19,1826,71.3,?,474,10.9,2 66 | 2256,1,1,45.798768,'f',0,1,0,0,5.7,482,2.84,161,11552,136.74,165,518,12.7,3 67 | 2576,0,2,47.427789,'f',0,0,0,0,0.5,316,3.65,68,1716,187.55,71,356,9.8,3 68 | 4427,0,2,49.136208,'m',0,0,0,0,1.9,259,3.7,281,10396.8,188.34,178,214,11,3 69 | 708,1,2,61.152635,'f',0,1,0,0,0.8,?,3.82,58,678,97.65,?,233,11,4 70 | 2598,1,1,53.508556,'f',0,1,0,0,1.1,257,3.36,43,1080,106.95,73,128,10.6,4 71 | 3853,1,2,52.087611,'f',0,0,0,0,0.8,276,3.6,54,4332,99.33,143,273,10.6,2 72 | 2386,1,1,50.540726,'m',0,0,0,0,6,614,3.7,158,5084.4,206.4,93,362,10.6,1 73 | 1000,1,1,67.408624,'f',0,1,0,0,2.6,?,3.1,94,6456.2,56.76,?,214,11,4 74 | 1434,1,1,39.19781,'f',1,1,1,1,1.3,288,3.4,262,5487.2,73.53,125,254,11,4 75 | 1360,1,1,65.763176,'m',0,0,0,0,1.8,416,3.94,121,10165,79.98,219,213,11,3 76 | 1847,1,2,33.61807,'f',0,1,1,0,1.1,498,3.8,88,13862.4,95.46,319,365,10.6,2 77 | 3282,1,1,53.571526,'f',0,1,0,0.5,2.3,260,3.18,231,11320.2,105.78,94,216,12.4,3 78 | 4459,0,1,44.569473,'m',0,0,0,0,0.7,242,4.08,73,5890,56.76,118,?,10.6,1 79 | 2224,1,1,40.394251,'f',0,1,1,0,0.8,329,3.5,49,7622.8,126.42,124,321,10.6,3 80 | 4365,0,1,58.38193,'f',0,0,0,0,0.9,604,3.4,82,876,71.3,58,228,10.3,3 81 | 4256,0,2,43.8987,'m',0,0,0,0,0.6,216,3.94,28,601,60.45,188,211,13,1 82 | 3090,1,2,60.706366,'f',1,0,0,0,1.3,302,2.75,58,1523,43.4,112,329,13.2,4 83 | 859,1,2,46.628337,'f',0,0,1,1,22.5,932,3.12,95,5396,244.9,133,165,11.6,3 84 | 1487,1,2,62.907598,'f',0,1,0,0,2.1,373,3.5,52,1009,150.35,188,178,11,3 85 | 3992,0,1,40.202601,'f',0,0,0,0,1.2,256,3.6,74,724,141.05,108,430,10,1 86 | 4191,1,1,46.453114,'m',0,1,0,0,1.4,427,3.7,105,1909,182.9,171,123,11,3 87 | 2769,1,2,51.288159,'f',0,0,0,0,1.1,466,3.91,84,1787,328.6,185,261,10,3 88 | 4039,0,1,32.613279,'f',0,0,0,0,0.7,174,4.09,58,642,71.3,46,203,10.6,3 89 | 1170,1,1,49.338809,'f',0,1,1,0.5,20,652,3.46,159,3292,215.45,184,227,12.4,3 90 | 3458,0,1,56.399726,'f',0,0,0,0,0.6,?,4.64,20,666,54.25,?,265,10.6,2 91 | 4196,0,2,48.845996,'f',0,1,0,0,1.2,258,3.57,79,2201,120.9,76,410,11.5,4 92 | 4184,0,2,32.492813,'f',0,0,0,0,0.5,320,3.54,51,1243,122.45,80,225,10,3 93 | 4190,0,2,38.494182,'f',0,0,0,0,0.7,132,3.6,17,423,49.6,56,265,11,1 94 | 1827,1,1,51.920602,'f',0,1,1,0,8.4,558,3.99,280,967,89.9,309,278,11,4 95 | 1191,1,1,43.518138,'f',1,1,1,0.5,17.1,674,2.53,207,2078,182.9,598,268,11.5,4 96 | 71,1,1,51.942505,'f',0,1,1,0.5,12.2,394,3.08,111,2132,155,243,165,11.6,4 97 | 326,1,2,49.826146,'f',0,1,1,0.5,6.6,244,3.41,199,1819,170.5,91,132,12.1,3 98 | 1690,1,1,47.945243,'f',0,1,0,0,6.3,436,3.02,75,2176,170.5,104,236,10.6,4 99 | 3707,0,1,46.516085,'f',0,1,0,0,0.8,315,4.24,13,1637,170.5,70,426,10.9,3 100 | 890,1,2,67.411362,'m',0,1,0,0,7.2,247,3.72,269,1303,176.7,91,360,11.2,4 101 | 2540,1,1,63.263518,'f',0,1,1,0,14.4,448,3.65,34,1218,60.45,318,385,11.7,4 102 | 3574,1,1,67.310062,'f',0,0,0,0,4.5,472,4.09,154,1580,117.8,272,412,11.1,3 103 | 4050,0,1,56.013689,'f',0,1,0,0.5,1.3,250,3.5,48,1138,71.3,100,81,12.9,4 104 | 4032,0,2,55.830253,'f',0,0,0,0,0.4,263,3.76,29,1345,137.95,74,181,11.2,3 105 | 3358,1,2,47.216975,'f',0,1,0,0,2.1,262,3.48,58,2045,89.9,84,225,11.5,4 106 | 1657,1,1,52.758385,'f',0,1,1,0,5,1600,3.21,75,2656,82.15,174,181,10.9,3 107 | 198,1,1,37.278576,'f',0,0,0,0,1.1,345,4.4,75,1860,218.55,72,447,10.7,3 108 | 2452,0,2,41.393566,'f',0,0,0,0.5,0.6,296,4.06,37,1032,80.6,83,442,12,3 109 | 1741,1,1,52.443532,'f',0,1,0,0,2,408,3.65,50,1083,110.05,98,200,11.4,2 110 | 2689,1,1,33.475702,'m',0,0,0,0,1.6,660,4.22,94,1857,151.9,155,337,11,2 111 | 460,1,2,45.607118,'f',0,1,1,0.5,5,325,3.47,110,2460,246.45,56,430,11.9,4 112 | 388,1,1,76.709103,'f',1,0,0,1,1.4,206,3.13,36,1626,86.8,70,145,12.2,4 113 | 3913,0,1,36.533881,'f',0,0,0,0,1.3,353,3.67,73,2039,232.5,68,380,11.1,2 114 | 750,1,1,53.916496,'f',0,1,1,0,3.2,201,3.11,178,1212,159.65,69,188,11.8,4 115 | 130,1,2,46.390144,'f',1,1,1,1,17.4,?,2.64,182,559,119.35,?,401,11.7,2 116 | 3850,0,1,48.845996,'f',0,0,0,0,1,?,3.7,33,1258,99.2,?,338,10.4,3 117 | 611,1,2,71.893224,'m',0,1,0,0.5,2,420,3.26,62,3196,77.5,91,344,11.4,3 118 | 3823,0,1,28.884326,'f',0,0,0,0,1,239,3.77,77,1877,97.65,101,312,10.2,1 119 | 3820,0,2,48.468172,'m',0,0,0,0,1.8,460,3.35,148,1472,108.5,118,172,10.2,2 120 | 552,1,2,51.468857,'m',0,1,0,0,2.3,178,3,145,746,178.25,122,119,12,4 121 | 3581,0,2,44.950034,'f',0,0,0,0,0.9,400,3.6,31,1689,164.3,166,327,10.4,3 122 | 3099,0,1,56.569473,'f',0,0,0,0,0.9,248,3.97,172,646,62,84,128,10.1,1 123 | 110,1,2,48.963723,'f',1,1,1,1,2.5,188,3.67,57,1273,119.35,102,110,11.1,4 124 | 3086,1,1,43.017112,'f',0,0,0,0,1.1,303,3.64,20,2108,128.65,53,349,11.1,2 125 | 3092,0,2,34.039699,'f',0,1,0,0,1.1,464,4.2,38,1644,151.9,102,348,10.3,3 126 | 3222,1,1,68.50924,'f',1,1,0,0,2.1,?,3.9,50,1087,103.85,?,137,10.6,2 127 | 3388,0,2,62.521561,'f',0,0,0,0,0.6,212,4.03,10,648,71.3,77,316,17.1,1 128 | 2583,1,1,50.35729,'f',0,0,0,0,0.4,127,3.5,14,1062,49.6,84,334,10.3,2 129 | 2504,0,2,44.062971,'f',0,0,0,0,0.5,120,3.61,53,804,110.05,52,271,10.6,3 130 | 2105,1,1,38.910335,'f',0,1,1,0,1.9,486,3.54,74,1052,108.5,109,141,10.9,3 131 | 2350,0,1,41.152635,'f',0,0,0,0,5.5,528,4.18,77,2404,172.05,78,467,10.7,3 132 | 3445,1,2,55.457906,'f',0,1,1,0,2,267,3.67,89,754,196.85,90,136,11.8,4 133 | 980,1,1,51.233402,'f',0,1,1,0,6.7,374,3.74,103,979,128.65,100,266,11.1,4 134 | 3395,1,2,52.826831,'m',0,0,0,0,3.2,259,4.3,208,1040,110.05,78,268,11.7,3 135 | 3422,0,2,42.639288,'f',0,0,1,0,0.7,303,4.19,81,1584,111.6,156,307,10.3,3 136 | 3336,0,1,61.0705,'f',0,0,1,0.5,3,458,3.63,74,1588,106.95,382,438,9.9,3 137 | 1083,1,1,49.6564,'f',0,1,1,0,6.5,950,3.11,111,2374,170.5,149,354,11,4 138 | 2288,1,1,48.854209,'f',0,1,0,0,3.5,390,3.3,67,878,137.95,93,207,10.2,3 139 | 515,1,1,54.255989,'f',0,0,1,0,0.6,636,3.83,129,944,97.65,114,306,9.5,3 140 | 2033,0,1,35.151266,'m',0,0,0,0,3.5,325,3.98,444,766,130.2,210,344,10.6,3 141 | 191,1,2,67.906913,'m',1,1,0,1,1.3,151,3.08,73,1112,46.5,49,213,13.2,4 142 | 3297,0,1,55.436003,'f',0,0,0,0,0.6,298,4.13,29,758,65.1,85,256,10.7,3 143 | 971,1,1,45.820671,'f',0,1,1,1,5.1,?,3.23,18,790,179.8,?,104,13,4 144 | 3069,0,1,52.889802,'m',0,1,0,0,0.6,251,3.9,25,681,57.35,107,182,10.8,4 145 | 2468,0,2,47.181383,'f',0,1,0,0,1.3,316,3.51,75,1162,147.25,137,238,10,4 146 | 824,1,1,53.598905,'f',1,1,1,0,1.2,269,3.12,?,1441,165.85,68,166,11.1,4 147 | 3255,0,2,44.104038,'f',0,0,0,0,0.5,268,4.08,9,1174,86.8,95,453,10,2 148 | 1037,1,1,41.94935,'f',0,1,1,0,16.2,?,2.89,42,1828,299.15,?,123,12.6,4 149 | 3239,0,1,63.613963,'f',0,1,0,0,0.9,420,3.87,30,1009,57.35,232,?,9.7,3 150 | 1413,1,2,44.227242,'f',0,1,1,0,17.4,1775,3.43,205,2065,165.85,97,418,11.5,3 151 | 850,1,2,62.001369,'f',0,1,1,0,2.8,242,3.8,74,614,136.4,104,121,13.2,4 152 | 2944,0,1,40.553046,'f',0,0,0,0,1.9,448,3.83,60,1052,127.1,175,181,9.8,3 153 | 2796,1,2,62.644764,'m',0,0,0,0,1.5,331,3.95,13,577,128.65,99,165,10.1,4 154 | 3149,0,2,42.335387,'f',0,0,0,0,0.7,578,3.67,35,1353,127.1,105,427,10.7,2 155 | 3150,0,1,42.96783,'f',0,0,0,0,0.4,263,3.57,123,836,74.4,121,445,11,2 156 | 3098,0,1,55.96167,'f',0,0,0,0,0.8,263,3.35,27,1636,116.25,69,206,9.8,2 157 | 2990,0,1,62.861054,'f',0,0,0,0,1.1,399,3.6,79,3472,155,152,344,10.1,2 158 | 1297,1,1,51.249829,'m',0,1,0,0,7.3,426,3.93,262,2424,145.7,218,252,10.5,3 159 | 2106,0,2,46.762491,'f',0,1,0,0,1.1,328,3.31,159,1260,94.55,134,142,11.6,4 160 | 3059,0,1,54.075291,'f',0,1,0,0,1.1,290,4.09,38,2120,186,146,318,10,3 161 | 3050,0,1,47.036277,'f',0,0,0,0,0.9,346,3.77,59,794,125.55,56,336,10.6,2 162 | 2419,1,2,55.726215,'f',0,1,0,0,1,364,3.48,20,720,134.85,88,283,9.9,2 163 | 786,1,2,46.102669,'f',0,1,0,0,2.9,332,3.6,86,1492,134.85,103,277,11,4 164 | 943,1,2,52.287474,'f',0,1,0,0.5,28,556,3.26,152,3896,198.4,171,335,10,3 165 | 2976,0,2,51.200548,'f',0,0,1,0,0.7,309,3.84,96,858,41.85,106,253,11.4,3 166 | 2615,0,2,33.864476,'f',0,0,0,0.5,1.2,?,3.89,58,1284,173.6,?,239,9.4,3 167 | 2995,0,1,75.011636,'f',0,0,0,0.5,1.2,288,3.37,32,791,57.35,114,213,10.7,2 168 | 1427,1,2,30.863792,'f',0,1,0,0,7.2,1015,3.26,247,3836,198.4,280,330,9.8,3 169 | 762,1,1,61.804244,'m',0,1,1,0.5,3,257,3.79,290,1664,102.3,112,140,9.9,4 170 | 2891,0,2,34.986995,'f',0,0,1,0,1,?,3.63,57,1536,134.85,?,233,10,1 171 | 2870,0,1,55.041752,'f',0,0,0,0,0.9,460,3.03,57,721,85.25,174,301,9.4,2 172 | 1152,1,1,69.941136,'m',0,1,0,0,2.3,586,3.01,243,2276,114.7,126,339,10.9,3 173 | 2863,0,1,49.604381,'f',0,0,0,0,0.5,217,3.85,68,453,54.25,68,270,11.1,1 174 | 140,1,1,69.377139,'m',0,0,1,1,2.4,168,2.56,225,1056,120.9,75,108,14.1,3 175 | 2666,0,2,43.556468,'f',0,1,1,0.5,0.6,220,3.35,57,1620,153.45,80,311,11.2,4 176 | 853,1,2,59.408624,'f',0,1,0,0,25.5,358,3.52,219,2468,201.5,205,151,11.5,2 177 | 2835,0,2,48.758385,'f',0,0,0,0,0.6,286,3.42,34,1868,77.5,206,487,10,2 178 | 2475,0,1,36.492813,'f',0,0,0,0,3.4,450,3.37,32,1408,116.25,118,313,11.2,2 179 | 1536,1,2,45.760438,'m',0,0,0,0,2.5,317,3.46,217,714,130.2,140,207,10.1,3 180 | 2772,0,2,57.371663,'f',0,0,0,0,0.6,217,3.62,13,414,75.95,119,224,10.5,3 181 | 2797,0,2,42.743326,'f',0,0,0,0,2.3,502,3.56,4,964,120.9,180,269,9.6,2 182 | 186,1,2,58.817248,'f',0,1,1,0,3.2,260,3.19,91,815,127.1,101,160,12,4 183 | 2055,1,1,53.497604,'f',0,0,0,0,0.3,233,4.08,20,622,66.65,68,358,9.9,3 184 | 264,1,2,43.4141,'f',0,1,1,0.5,8.5,?,3.34,161,1428,181.35,?,88,13.3,4 185 | 1077,1,1,53.305955,'m',0,1,0,0,4,196,3.45,80,2496,133.3,142,212,11.3,4 186 | 2721,0,2,41.355236,'f',0,1,0,0,5.7,1480,3.26,84,1960,457.25,108,213,9.5,2 187 | 1682,1,1,60.958248,'m',0,1,0,0,0.9,376,3.86,200,1015,83.7,154,238,10.3,4 188 | 2713,0,2,47.753593,'f',0,1,0,0,0.4,257,3.8,44,842,97.65,110,?,9.2,2 189 | 1212,1,2,35.49076,'f',0,0,0,0,1.3,408,4.22,67,1387,142.6,137,295,10.1,3 190 | 2692,0,1,48.66256,'f',0,0,0,0,1.2,390,3.61,32,1509,88.35,52,263,9,3 191 | 2574,0,1,52.668036,'f',0,0,0,0,0.5,?,4.52,31,784,74.4,?,361,10.1,3 192 | 2301,0,2,49.869952,'f',0,0,1,0,1.3,205,3.34,65,1031,91.45,126,217,9.8,3 193 | 2657,0,1,30.275154,'f',0,1,1,0,3,236,3.42,76,1403,89.9,86,493,9.8,2 194 | 2644,0,1,55.56742,'f',0,0,0,0,0.5,?,3.85,63,663,79.05,?,311,9.7,1 195 | 2624,0,2,52.15332,'f',0,0,0,0,0.8,283,3.8,152,718,108.5,168,340,10.1,3 196 | 1492,1,1,41.609856,'f',0,1,1,0,3.2,?,3.56,77,1790,139.5,?,149,10.1,4 197 | 2609,0,2,55.45243,'f',0,0,0,0,0.9,258,4.01,49,559,43.4,133,277,10.4,2 198 | 2580,0,1,70.004107,'f',0,0,0,0,0.6,?,4.08,51,665,74.4,?,325,10.2,4 199 | 2573,0,2,43.942505,'f',0,1,0,0,1.8,396,3.83,39,2148,102.3,133,278,9.9,4 200 | 2563,0,2,42.568104,'f',0,0,0,0,4.7,478,4.38,44,1629,237.15,76,175,10.4,3 201 | 2556,0,1,44.569473,'f',0,1,1,0,1.4,248,3.58,63,554,75.95,106,79,10.3,4 202 | 2555,0,1,56.944559,'f',0,1,0,0,0.6,?,3.69,161,674,26.35,?,539,9.9,2 203 | 2241,0,2,40.260096,'f',0,0,0,0,0.5,201,3.73,44,1345,54.25,145,445,10.1,2 204 | 974,1,2,37.607118,'f',0,1,0,0,11,674,3.55,358,2412,167.4,140,471,9.8,3 205 | 2527,0,1,48.361396,'f',0,0,0,0,0.8,256,3.54,42,1132,74.4,94,192,10.5,3 206 | 1576,1,1,70.836413,'f',0,0,1,0.5,2,225,3.53,51,933,69.75,62,200,12.7,3 207 | 733,1,2,35.791923,'f',0,1,0,0,14,808,3.43,251,2870,153.45,137,268,11.5,3 208 | 2332,0,1,62.622861,'f',0,1,0,0,0.7,187,3.48,41,654,120.9,98,164,11,4 209 | 2456,0,2,50.647502,'f',0,1,0,0,1.3,360,3.63,52,1812,97.65,164,256,9.9,3 210 | 2504,0,1,54.527036,'f',0,0,1,0,2.3,?,3.93,24,1828,133.3,?,327,10.2,2 211 | 216,1,2,52.692676,'f',1,1,1,0,24.5,1092,3.35,233,3740,147.25,432,399,15.2,4 212 | 2443,0,1,52.720055,'f',0,1,0,0,0.9,308,3.69,67,696,51.15,101,344,9.8,4 213 | 797,1,2,56.772074,'f',0,0,0,0,10.8,932,3.19,267,2184,161.2,157,382,10.4,4 214 | 2449,0,1,44.396988,'f',0,0,0,0,1.5,293,4.3,50,975,125.55,56,336,9.1,2 215 | 2330,0,1,29.555099,'f',0,1,0,0,3.7,347,3.9,76,2544,221.65,90,129,11.5,4 216 | 2363,0,1,57.040383,'f',0,1,1,0,1.4,226,3.36,13,810,72.85,62,117,11.6,4 217 | 2365,0,1,44.626968,'f',0,0,0,0,0.6,266,3.97,25,1164,102.3,102,201,10.1,2 218 | 2357,0,2,35.797399,'f',0,0,1,0,0.7,286,2.9,38,1692,141.05,90,381,9.6,2 219 | 2318,0,2,32.232717,'f',0,0,1,0,4.7,236,3.55,112,1391,137.95,114,332,9.9,3 220 | 2294,0,2,41.092402,'f',0,1,0,0,0.6,235,3.2,26,1758,106.95,67,228,10.8,4 221 | 2272,0,1,61.639973,'f',0,0,0,0,0.5,223,3.8,15,1044,80.6,89,514,10,2 222 | 2221,0,2,37.05681,'f',0,1,0,0,0.5,149,4.04,227,598,52.7,57,166,9.9,2 223 | 2090,1,2,62.579055,'f',0,0,0,0,0.7,255,3.74,23,1024,77.5,58,281,10.2,3 224 | 2081,1,1,48.977413,'f',1,0,0,0,2.5,382,3.55,108,1516,238.7,?,126,10.3,3 225 | 2255,0,1,61.990418,'f',0,0,0,0,0.6,213,4.07,12,5300,57.35,68,240,11,1 226 | 2171,0,1,72.772074,'f',0,0,0,0.5,0.6,?,3.33,14,733,85.25,?,259,10.1,4 227 | 904,1,1,61.295003,'f',0,1,0,0,3.9,396,3.2,58,1440,153.45,131,156,10,4 228 | 2216,0,2,52.62423,'f',0,1,1,0,0.7,252,4.01,11,1210,72.85,58,309,9.5,2 229 | 2224,0,2,49.763176,'m',0,1,0,0,0.9,346,3.37,81,1098,122.45,90,298,10,2 230 | 2195,0,2,52.914442,'f',0,0,0,0,1.3,?,3.76,27,1282,100.75,?,114,10.3,3 231 | 2176,0,2,47.263518,'f',0,0,0,0,1.2,232,3.98,11,1074,100.75,99,223,9.9,3 232 | 2178,0,1,50.20397,'f',0,0,1,0,0.5,400,3.4,9,1134,96.1,55,356,10.2,3 233 | 1786,1,2,69.347023,'f',0,1,0,0,0.9,404,3.43,34,1866,79.05,224,236,9.9,3 234 | 1080,1,2,41.169062,'f',0,0,0,0,5.9,1276,3.85,141,1204,203.05,157,216,10.7,3 235 | 2168,0,1,59.164956,'f',0,0,0,0,0.5,?,3.68,20,856,55.8,?,146,10.4,3 236 | 790,1,2,36.079398,'f',0,1,0,0,11.4,608,3.31,65,1790,151.9,210,298,10.8,4 237 | 2170,0,1,34.595483,'f',0,0,0,0,0.5,?,3.89,29,897,66.65,?,423,10.1,1 238 | 2157,0,2,42.71321,'f',0,0,0,0,1.6,215,4.17,67,936,134.85,85,176,9.6,3 239 | 1235,1,1,63.63039,'f',0,0,1,0,3.8,426,3.22,96,2716,210.8,113,228,10.6,2 240 | 2050,0,2,56.629706,'f',0,1,0,0,0.9,360,3.65,72,3186,94.55,154,269,9.7,4 241 | 597,1,2,46.264203,'f',0,1,0,0,4.5,372,3.38,227,2310,167.4,135,240,12.4,3 242 | 334,1,1,61.242984,'f',1,1,0,1,14.1,448,2.43,123,1833,134,155,210,11,4 243 | 1945,0,1,38.620123,'f',0,0,0,0,1,309,3.66,67,1214,158.1,101,309,9.7,3 244 | 2022,0,1,38.770705,'f',0,0,0,0,0.7,274,3.66,108,1065,88.35,135,251,10.1,2 245 | 1978,0,2,56.695414,'f',0,1,0,0,0.5,223,3.7,39,884,75.95,104,231,9.6,3 246 | 999,1,1,58.951403,'m',0,0,0,0,2.3,316,3.35,172,1601,179.8,63,394,9.7,2 247 | 1967,0,2,36.922656,'f',0,0,0,0,0.7,215,3.35,41,645,93,74,165,9.6,3 248 | 348,1,1,62.414784,'f',1,1,0,0.5,4.5,191,3.05,200,1020,175.15,118,139,11.4,4 249 | 1979,0,2,34.609172,'f',0,1,1,0,3.3,302,3.41,51,310,83.7,44,95,11.5,4 250 | 1165,1,2,58.335387,'f',0,1,1,0,3.4,518,1.96,115,2250,203.05,90,190,10.7,4 251 | 1951,0,1,50.182067,'f',0,1,0,0,0.4,267,3.02,47,1001,133.3,87,265,10.6,3 252 | 1932,0,1,42.685832,'f',0,1,1,0,0.9,514,3.06,412,2622,105.4,87,284,9.8,4 253 | 1776,0,2,34.379192,'f',0,0,0,0,0.9,578,3.35,78,976,116.25,177,322,11.2,2 254 | 1882,0,2,33.182752,'f',0,1,0,0,13,1336,4.16,71,3510,209.25,111,338,11.9,3 255 | 1908,0,1,38.38193,'f',0,1,1,0,1.5,253,3.79,67,1006,139.5,106,341,9.7,3 256 | 1882,0,1,59.761807,'f',0,1,0,0,1.6,442,2.95,105,820,85.25,108,181,10.1,3 257 | 1874,0,2,66.412047,'f',0,0,0,0.5,0.6,280,3.35,?,1093,128.65,81,295,9.8,2 258 | 694,1,1,46.78987,'f',0,1,1,0,0.8,300,2.94,231,1794,130.2,99,319,11.2,4 259 | 1831,0,1,56.079398,'f',0,0,0,0,0.4,232,3.72,24,369,51.15,139,326,10.1,3 260 | 1810,0,1,64.572211,'f',0,1,0,0,1.9,354,2.97,86,1553,196.85,152,277,9.9,3 261 | 930,1,2,67.488022,'f',0,1,0,0,8,468,2.81,139,2009,198.4,139,233,10,4 262 | 1690,1,1,44.829569,'f',0,0,1,0,3.9,350,3.22,121,1268,272.8,231,270,9.6,3 263 | 1790,0,2,45.771389,'f',0,1,0,0,0.6,273,3.65,48,794,52.7,214,305,9.6,3 264 | 1785,0,2,55.416838,'f',0,1,0,0,0.8,324,3.51,39,1237,66.65,146,371,10,3 265 | 1783,0,1,47.980835,'f',0,0,1,0,1.3,242,3.2,35,1556,175.15,71,195,10.6,4 266 | 1769,0,2,40.791239,'f',0,1,0,0,0.6,299,3.36,23,2769,220.1,85,303,10.9,4 267 | 1770,0,1,68.462697,'f',0,1,1,0,1.1,246,3.35,116,924,113.15,90,317,10,4 268 | 1765,0,1,78.439425,'m',1,1,1,0,7.1,243,3.03,380,983,158.1,154,97,11.2,4 269 | 1735,0,2,35.310062,'f',0,1,1,0,0.7,193,3.85,35,466,53,118,156,10.3,3 270 | 179,1,1,70.907598,'f',1,1,1,1,6.6,222,2.33,138,620,106,91,195,12.1,4 271 | 1191,1,1,55.394935,'f',1,1,0,0.5,6.4,344,2.75,16,834,82,179,149,11,4 272 | 41,1,1,65.883641,'f',1,0,0,1,17.9,175,2.1,220,705,338,229,62,12.9,4 273 | 799,1,1,67.572895,'m',0,1,0,0.5,4,416,3.99,177,960,86,242,269,9.8,2 274 | 207,1,2,58.171116,'f',0,1,0,0,5.2,?,2.23,234,601,135,?,206,12.3,4 275 | 4062,0,?,60,'f',?,?,?,0,0.7,?,3.65,?,?,?,?,378,11,? 276 | 3561,1,?,64.999316,'f',?,?,?,0.5,1.4,?,3.04,?,?,?,?,331,12.1,4 277 | 2844,0,?,54.001369,'f',?,?,?,0,0.7,?,4.03,?,?,?,?,226,9.8,4 278 | 2071,1,?,75.000684,'f',?,?,?,0.5,0.7,?,3.96,?,?,?,?,?,11.3,4 279 | 3030,0,?,62.001369,'f',?,?,?,0,0.8,?,2.48,?,?,?,?,273,10,? 280 | 41,1,?,46.001369,'f',?,?,?,0,5,?,2.93,?,?,?,?,260,10.4,? 281 | 2403,0,?,44,'f',?,?,?,0.5,0.4,?,3.81,?,?,?,?,226,10.5,3 282 | 2011,1,?,64,'f',?,?,?,0,1.1,?,3.69,?,?,?,?,139,10.5,? 283 | 3523,0,?,40,'f',?,?,?,0,0.6,?,4.04,?,?,?,?,130,11.2,2 284 | 3468,0,?,63.000684,'f',?,?,?,0,0.6,?,3.94,?,?,?,?,234,11.5,2 285 | 4795,0,?,34.001369,'f',?,?,?,0,1.8,?,3.24,?,?,?,?,?,18,2 286 | 4214,0,?,48.999316,'f',?,?,?,0,1.2,?,3.99,?,?,?,?,?,11.2,2 287 | 2111,1,?,54.001369,'f',?,?,?,0,1,?,3.6,?,?,?,?,?,12.1,2 288 | 1462,1,?,63.000684,'f',?,?,?,0,0.7,?,3.4,?,?,?,?,371,10.1,4 289 | 1746,1,?,54.001369,'m',?,?,?,0,3.5,?,3.63,?,?,?,?,325,10.3,2 290 | 94,1,?,46.001369,'f',?,?,?,0.5,3.1,?,3.56,?,?,?,?,142,13.6,4 291 | 785,1,?,52.999316,'f',?,?,?,0,12.6,?,2.87,?,?,?,?,114,11.8,4 292 | 1518,1,?,56,'f',?,?,?,0,2.8,?,3.92,?,?,?,?,?,10.6,4 293 | 466,1,?,56,'f',?,?,?,0,7.1,?,3.51,?,?,?,?,721,11.8,? 294 | 3527,0,?,55.000684,'f',?,?,?,0,0.6,?,4.15,?,?,?,?,280,10.1,2 295 | 2635,0,?,64.999316,'f',?,?,?,0,2.1,?,3.34,?,?,?,?,155,10.1,4 296 | 2286,1,?,56,'f',?,?,?,0,1.8,?,3.64,?,?,?,?,141,10,? 297 | 791,1,?,47.000684,'f',?,?,?,0,16,?,3.42,?,?,?,?,475,13.8,2 298 | 3492,0,?,60,'f',?,?,?,0,0.6,?,4.38,?,?,?,?,269,10.6,2 299 | 3495,0,?,52.999316,'f',?,?,?,0,5.4,?,4.19,?,?,?,?,141,11.2,2 300 | 111,1,?,54.001369,'f',?,?,?,0,9,?,3.29,?,?,?,?,286,13.1,4 301 | 3231,0,?,50.001369,'f',?,?,?,0,0.9,?,4.01,?,?,?,?,244,10.5,3 302 | 625,1,?,48,'f',?,?,?,0,11.1,?,2.84,?,?,?,?,?,12.2,2 303 | 3157,0,?,36,'f',?,?,?,0,8.9,?,3.76,?,?,?,?,209,10.6,3 304 | 3021,0,?,48,'f',?,?,?,0,0.5,?,3.76,?,?,?,?,388,10.1,2 305 | 559,1,?,70.001369,'f',?,?,?,0.5,0.6,?,3.81,?,?,?,?,160,11,4 306 | 2812,1,?,51.000684,'f',?,?,?,0,3.4,?,3.92,?,?,?,?,?,9.3,2 307 | 2834,0,?,52,'m',?,?,?,0,0.9,?,3.14,?,?,?,?,191,12.3,2 308 | 2855,0,?,54.001369,'f',?,?,?,0,1.4,?,3.82,?,?,?,?,249,10.3,2 309 | 662,1,?,48,'f',?,?,?,0,2.1,?,4.1,?,?,?,?,200,9,3 310 | 727,1,?,66.001369,'f',?,?,?,0,15,?,3.4,?,?,?,?,150,11.1,4 311 | 2716,0,?,52.999316,'f',?,?,?,0,0.6,?,4.19,?,?,?,?,330,9.9,1 312 | 2698,0,?,62.001369,'f',?,?,?,0,1.3,?,3.4,?,?,?,?,167,10.6,4 313 | 990,1,?,59.000684,'f',?,?,?,0,1.3,?,3.12,?,?,?,?,125,9.6,2 314 | 2338,0,?,39.000684,'f',?,?,?,0,1.6,?,3.75,?,?,?,?,145,10.4,3 315 | 1616,1,?,67.000684,'f',?,?,?,0.5,2.2,?,3.26,?,?,?,?,171,11.1,4 316 | 2563,0,?,58.001369,'f',?,?,?,0,3,?,3.46,?,?,?,?,109,10.4,4 317 | 2537,0,?,64,'f',?,?,?,0,0.8,?,3.49,?,?,?,?,314,10.3,3 318 | 2534,0,?,46.001369,'f',?,?,?,0,0.8,?,2.89,?,?,?,?,419,?,1 319 | 778,1,?,64,'f',?,?,?,0,1.8,?,3.15,?,?,?,?,183,10.4,4 320 | 2267,0,?,48.999316,'f',?,?,?,0,18,?,3.04,?,?,?,?,432,9.7,2 321 | 2249,0,?,44,'f',?,?,?,0,0.6,?,3.5,?,?,?,?,150,9.9,3 322 | 359,1,?,59.000684,'f',?,?,?,0,2.7,?,3.35,?,?,?,?,142,11.5,4 323 | 1925,0,?,63.000684,'f',?,?,?,0,0.9,?,3.58,?,?,?,?,224,10,3 324 | 249,1,?,60.999316,'f',?,?,?,0,1.3,?,3.01,?,?,?,?,223,10.7,3 325 | 2202,0,?,64,'f',?,?,?,0,1.1,?,3.49,?,?,?,?,166,9.8,3 326 | 43,1,?,48.999316,'f',?,?,?,0,13.8,?,2.77,?,?,?,?,388,?,4 327 | 1197,1,?,42.001369,'f',?,?,?,0,4.4,?,4.52,?,?,?,?,102,10.8,4 328 | 1095,1,?,50.001369,'f',?,?,?,0,16,?,3.36,?,?,?,?,384,10,3 329 | 489,1,?,51.000684,'f',?,?,?,0.5,7.3,?,3.52,?,?,?,?,265,11.1,1 330 | 2149,0,?,36.999316,'f',?,?,?,0,0.6,?,3.55,?,?,?,?,248,10.3,2 331 | 2103,0,?,62.001369,'f',?,?,?,0,0.7,?,3.29,?,?,?,?,190,9.8,2 332 | 1980,0,?,51.000684,'f',?,?,?,0,0.7,?,3.1,?,?,?,?,274,10.6,3 333 | 1478,1,?,44,'m',?,?,?,0,9.5,?,3.63,?,?,?,?,292,10.2,3 334 | 1987,0,?,32.999316,'f',?,?,?,0,2.2,?,3.76,?,?,?,?,253,9.9,3 335 | 1168,1,?,60,'f',?,?,?,0.5,1.8,?,3.62,?,?,?,?,225,9.9,2 336 | 597,1,?,63.000684,'f',?,?,?,0.5,3.3,?,2.73,?,?,?,?,224,11.1,4 337 | 1899,0,?,40.999316,'m',?,?,?,0,1.7,?,3.66,?,?,?,?,92,11,4 338 | 221,1,?,51.000684,'f',?,?,?,0,14,?,2.58,?,?,?,?,190,11.6,4 339 | 193,1,?,52,'f',?,?,?,0.5,0.7,?,2.96,?,?,?,?,319,9.9,4 340 | 935,1,?,68.999316,'f',?,?,?,0,4.2,?,3.19,?,?,?,?,120,11.1,4 341 | 703,1,?,46.001369,'f',?,?,?,0,4.5,?,2.68,?,?,?,?,219,11.5,4 342 | 681,1,?,67.000684,'f',?,?,?,0,1.2,?,2.96,?,?,?,?,174,10.9,3 343 | -------------------------------------------------------------------------------- /tests/resources/survival/results.csv: -------------------------------------------------------------------------------- 1 | conditions;importances 2 | bili = <2.4, inf);156.62083444387682 3 | bili = <2.15, inf);153.35066738474507 4 | bili = <1.95, inf);146.35072783977444 5 | bili = (-inf, 1.95);146.35072783977438 6 | -------------------------------------------------------------------------------- /tests/resources/survival/results_split.csv: -------------------------------------------------------------------------------- 1 | conditions;importances 2 | bili = <2.4, inf);411.736 3 | bili = (-inf, 1.95);146.3507 4 | bili = <2.15, 2.4);-1.3703 5 | bili = <1.95, 2.15);-2.0269 6 | -------------------------------------------------------------------------------- /tests/test_conditions_importances.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import rulekit 3 | 4 | from rulekit.main import RuleKit 5 | import pandas as pd 6 | from rulekit import RuleKit 7 | from rulekit.classification import RuleClassifier 8 | from rulekit.regression import RuleRegressor 9 | from rulekit.survival import SurvivalRules 10 | from rulekit.params import Measures 11 | from scipy.io import arff 12 | import numpy as np 13 | import os 14 | from pathlib import Path 15 | from rulexai.explainer import RuleExplainer 16 | 17 | class TestConditionsImportances(unittest.TestCase): 18 | 19 | @classmethod 20 | def setUpClass(cls): 21 | RuleKit.init() 22 | 23 | 24 | def test_classification(self): 25 | classification_resources = "resources/classification/" 26 | dataset_path = classification_resources + "iris.arff" 27 | results_path = classification_resources + "results.csv" 28 | results_split_path = classification_resources + "results_split.csv" 29 | dataset_path = os.path.join(Path(__file__).resolve().parent, dataset_path) 30 | results_path = os.path.join(Path(__file__).resolve().parent, results_path) 31 | results_split_path = os.path.join(Path(__file__).resolve().parent, results_split_path) 32 | 33 | train_df = pd.DataFrame(arff.loadarff(dataset_path)[0]) 34 | 35 | # code to change encoding of the file 36 | tmp_df = train_df.select_dtypes([object]) 37 | tmp_df = tmp_df.stack().str.decode("utf-8").unstack() 38 | for col in tmp_df: 39 | train_df[col] = tmp_df[col].replace({"?": None}) 40 | 41 | x = train_df.drop(["class"], axis=1) 42 | y = train_df["class"] 43 | 44 | # RuleKit 45 | clf = RuleClassifier( 46 | induction_measure=Measures.C2, 47 | pruning_measure=Measures.C2, 48 | voting_measure=Measures.C2, 49 | ) 50 | clf.fit(x, y) 51 | 52 | 53 | # RuleXai 54 | explainer = RuleExplainer(clf, x, y, type="classification") 55 | explainer.explain() 56 | conditions_importances = explainer.condition_importances_ 57 | conditions_importances.replace("-", np.nan, inplace = True) 58 | conditions_importances = conditions_importances.round(4) 59 | 60 | importances_ground_truth = pd.read_csv(results_path, sep = ";") 61 | importances_ground_truth = importances_ground_truth.round(4) 62 | 63 | # with splitting conditions to basic 64 | explainer.explain(basic_conditions=True) 65 | basic_conditions_importances= explainer.condition_importances_ 66 | basic_conditions_importances.replace("-", np.nan, inplace = True) 67 | basic_conditions_importances = basic_conditions_importances.round(4) 68 | 69 | basic_importances_ground_truth = pd.read_csv(results_split_path, sep = ";") 70 | basic_importances_ground_truth = basic_importances_ground_truth.round(4) 71 | 72 | assert conditions_importances.equals(importances_ground_truth) 73 | assert basic_conditions_importances.equals(basic_importances_ground_truth) 74 | 75 | 76 | def test_regression(self): 77 | 78 | regression_resources = "resources/regression/" 79 | dataset_path = regression_resources + "diabetes.arff" 80 | results_path = regression_resources + "results.csv" 81 | results_split_path = regression_resources + "results_split.csv" 82 | dataset_path = os.path.join(Path(__file__).resolve().parent, dataset_path) 83 | results_path = os.path.join(Path(__file__).resolve().parent, results_path) 84 | results_split_path = os.path.join(Path(__file__).resolve().parent, results_split_path) 85 | 86 | train_df = pd.DataFrame(arff.loadarff(dataset_path)[0]) 87 | 88 | # code to change encoding of the file 89 | tmp_df = train_df.select_dtypes([object]) 90 | if (tmp_df.size != 0): 91 | tmp_df = tmp_df.stack().str.decode("utf-8").unstack() 92 | for col in tmp_df: 93 | train_df[col] = tmp_df[col].replace({"?": None}) 94 | 95 | x = train_df.drop(["class"], axis=1) 96 | y = train_df["class"] 97 | 98 | df = train_df 99 | 100 | # RuleKit 101 | reg = RuleRegressor( 102 | induction_measure=Measures.C2, 103 | pruning_measure=Measures.C2, 104 | voting_measure=Measures.C2, 105 | mean_based_regression = False 106 | ) 107 | reg.fit(x, y) 108 | 109 | 110 | # RuleXai 111 | explainer = RuleExplainer(reg, x, y, type="regression") 112 | explainer.explain() 113 | conditions_importances = explainer.condition_importances_ 114 | conditions_importances = conditions_importances.round(4) 115 | 116 | importances_ground_truth = pd.read_csv(results_path, sep = ";") 117 | importances_ground_truth = importances_ground_truth.round(4) 118 | 119 | # with splitting conditions to basic 120 | explainer.explain(basic_conditions=True) 121 | basic_conditions_importances= explainer.condition_importances_ 122 | basic_conditions_importances = basic_conditions_importances.round(4) 123 | 124 | basic_importances_ground_truth = pd.read_csv(results_split_path, sep = ";") 125 | basic_importances_ground_truth = basic_importances_ground_truth.round(4) 126 | 127 | assert conditions_importances.equals(importances_ground_truth) 128 | assert basic_conditions_importances.equals(basic_importances_ground_truth) 129 | 130 | 131 | 132 | 133 | if __name__ == '__main__': 134 | unittest.main() -------------------------------------------------------------------------------- /tests/test_functionalities.py: -------------------------------------------------------------------------------- 1 | from rulexai import rule 2 | import unittest 3 | import rulekit 4 | from rulekit.main import RuleKit 5 | from rulexai.explainer import RuleExplainer 6 | from sklearn.datasets import load_iris 7 | import pandas as pd 8 | from rulekit.classification import RuleClassifier 9 | from rulekit.params import Measures 10 | from sklearn.tree import DecisionTreeClassifier 11 | from Orange.data.table import Table 12 | from Orange.data.pandas_compat import table_to_frame 13 | from Orange.classification import CN2UnorderedLearner 14 | 15 | import os 16 | from pathlib import Path 17 | import numpy as np 18 | 19 | class TestFunctionalities(unittest.TestCase): 20 | 21 | @classmethod 22 | def setUpClass(cls): 23 | RuleKit.init() 24 | 25 | def test_creating_model_with_rules_RuleKit(self): 26 | data = load_iris() 27 | x = pd.DataFrame(data.data, columns=data.feature_names) 28 | y = pd.DataFrame(data.target.astype(str), columns=["label"]) 29 | 30 | clf = RuleClassifier( 31 | induction_measure=Measures.C2, 32 | pruning_measure=Measures.C2, 33 | voting_measure=Measures.C2, 34 | ) 35 | clf.fit(x, data.target.astype(str)) 36 | 37 | explainer = RuleExplainer(clf, x, y, type="classification") 38 | 39 | assert len(clf.model.rules) == len(explainer.model.rules) 40 | 41 | 42 | def test_creating_model_with_rules_Sklearn(self): 43 | clf = DecisionTreeClassifier(random_state=0) 44 | data = load_iris() 45 | clf.fit(data.data, data.target) 46 | 47 | x = pd.DataFrame(data.data, columns=data.feature_names) 48 | y = pd.DataFrame(data.target.astype(str), columns=["class"]) 49 | 50 | explainer = RuleExplainer(clf, x, y, type="classification") 51 | 52 | assert len(explainer.model.rules) > 0 53 | 54 | def test_creating_model_with_rules_Orange(self): 55 | data = Table("iris") 56 | learner = CN2UnorderedLearner() 57 | clf = learner(data) 58 | df= table_to_frame(data) 59 | x = df.drop(['iris'], axis=1) 60 | y = df['iris'] 61 | 62 | explainer = RuleExplainer(clf, x, y, type="classification") 63 | 64 | assert len(explainer.model.rules) > 0 65 | 66 | 67 | def test_features_importances(self): 68 | 69 | results_path = "resources/features_importances.csv" 70 | results_path = os.path.join(Path(__file__).resolve().parent, results_path) 71 | 72 | data = load_iris() 73 | x = pd.DataFrame(data.data, columns=data.feature_names) 74 | y = pd.DataFrame(data.target.astype(str), columns=["label"]) 75 | 76 | clf = RuleClassifier( 77 | induction_measure=Measures.C2, 78 | pruning_measure=Measures.C2, 79 | voting_measure=Measures.C2, 80 | ) 81 | clf.fit(x, data.target.astype(str)) 82 | 83 | explainer = RuleExplainer(clf, x, y, type="classification") 84 | explainer.explain() 85 | 86 | features_importances = explainer.feature_importances_ 87 | features_importances.replace("-", np.nan, inplace = True) 88 | features_importances = features_importances.round(4) 89 | 90 | 91 | features_importances_ground_truth = pd.read_csv(results_path, sep = ";") 92 | features_importances_ground_truth.replace("-", np.nan, inplace = True) 93 | features_importances_ground_truth = features_importances_ground_truth.round(4) 94 | 95 | assert features_importances.equals(features_importances_ground_truth) 96 | 97 | 98 | def test_prepare_binary_dataset(self): 99 | data = load_iris() 100 | x = pd.DataFrame(data.data, columns=data.feature_names) 101 | y = pd.DataFrame(data.target.astype(str), columns=["label"]) 102 | 103 | clf = RuleClassifier( 104 | induction_measure=Measures.C2, 105 | pruning_measure=Measures.C2, 106 | voting_measure=Measures.C2, 107 | ) 108 | clf.fit(x, data.target.astype(str)) 109 | 110 | explainer = RuleExplainer(clf, x, y, type="classification") 111 | explainer.explain() 112 | 113 | binary_dataset = explainer.fit_transform(x) 114 | binary_dataset_10 = explainer.transform(x.iloc[0:10]) 115 | 116 | 117 | binary_dataset_TOP_50 = explainer.fit_transform(x, selector=0.5) 118 | 119 | assert binary_dataset.shape[0] == x.shape[0] 120 | assert binary_dataset_10.shape[0] == 10 121 | assert binary_dataset.shape[1] == 14 122 | assert binary_dataset_TOP_50.shape[0] == x.shape[0] 123 | assert binary_dataset_TOP_50.shape[1] == 7 124 | 125 | 126 | 127 | def test_get_rules_covering_example_and_get_rules(self): 128 | data = load_iris() 129 | x = pd.DataFrame(data.data, columns=data.feature_names) 130 | y = pd.DataFrame(data.target.astype(str), columns=["label"]) 131 | 132 | clf = RuleClassifier( 133 | induction_measure=Measures.C2, 134 | pruning_measure=Measures.C2, 135 | voting_measure=Measures.C2, 136 | ) 137 | clf.fit(x, data.target.astype(str)) 138 | 139 | explainer = RuleExplainer(clf, x, y, type="classification") 140 | 141 | rules_covering_example_0 = explainer.get_rules_covering_example(x.iloc[0,:], y.iloc[0,:]) 142 | rules_covering_example_100 = explainer.get_rules_covering_example(x.iloc[100,:], y.iloc[100,:]) 143 | 144 | assert len(rules_covering_example_0) == 1 145 | assert len(rules_covering_example_100) == 2 146 | assert len(explainer.get_rules()) == len(explainer.get_rules_with_basic_conditions()) 147 | assert len(explainer.get_rules()) == len(clf.model.rules) 148 | 149 | 150 | if __name__ == '__main__': 151 | unittest.main() --------------------------------------------------------------------------------