├── .gitignore
├── LICENSE
├── README.md
├── docs
    ├── Makefile
    ├── make.bat
    ├── requirements.txt
    └── source
    │   ├── _static
    │       └── theoretical_basis_description.pdf
    │   ├── conf.py
    │   ├── index.rst
    │   └── rst
    │       ├── autodoc.rst
    │       ├── theoretical_basis.rst
    │       ├── tutorials.rst
    │       └── tutorials
    │           ├── black-box_model_aproximation.ipynb
    │           ├── classification.ipynb
    │           ├── data
    │               ├── GBSG2.arff
    │               ├── australian_test.csv
    │               ├── australian_train.csv
    │               ├── cpu.arff
    │               ├── titanic_kaggle.csv
    │               └── titanic_openml.csv
    │           ├── dataset_transformation.ipynb
    │           ├── regression.ipynb
    │           └── survival.ipynb
├── requirements.txt
├── rulexai
    ├── __init__.py
    ├── explainer.py
    ├── importances.py
    ├── models.py
    ├── reduct.py
    └── rule.py
├── setup.cfg
├── setup.py
└── tests
    ├── resources
        ├── classification
        │   ├── iris.arff
        │   ├── results.csv
        │   └── results_split.csv
        ├── features_importances.csv
        ├── regression
        │   ├── diabetes.arff
        │   ├── results.csv
        │   └── results_split.csv
        └── survival
        │   ├── pbc.arff
        │   ├── results.csv
        │   └── results_split.csv
    ├── test_conditions_importances.py
    └── test_functionalities.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | 
132 | doc/build/


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # RuleXAI
 2 | 
 3 | RuleXAI is a rule-based aproach to explain the output of any machine learning model. It is suitable for classification, regression and survival tasks. 
 4 | 
 5 | ## Instalation
 6 | 
 7 | RuleXAI can be installed from [PyPI](https://pypi.org/project/rulexai/)
 8 | 
 9 | ```bash
10 | pip install rulexai
11 | ```
12 | 
13 | Or you can clone the repository and run:
14 | ```bash
15 | pip install .
16 | ```
17 | 
18 | ## Model agnostic example
19 | ```python
20 | from sklearn.datasets import load_iris
21 | from sklearn.model_selection import train_test_split
22 | from sklearn.svm import SVC
23 | import pandas as pd
24 | 
25 | from rulexai.explainer import Explainer
26 | 
27 | 
28 | # load iris dataset
29 | data = load_iris()
30 | df = pd.DataFrame(data['data'], columns=data['feature_names'])
31 | df['class'] = data['target']
32 | 
33 | # train a SVM classifier
34 | X_train,X_test,y_train,y_test = train_test_split(df.drop(columns=["class"]), df["class"], test_size=0.2, random_state=0)
35 | svm = SVC(kernel='rbf', probability=True)
36 | svm.fit(X_train, y_train)
37 | predictions = svm.predict(X_train)
38 | # prepare model predictions to be fed to RuleXAI, remember to change numerical predictions to labels (in this example it is simply converting predictions to a string)
39 | model_predictions = pd.DataFrame(predictions.astype(str), columns=[y_train.name], index = y_train.index)
40 | 
41 | # use Explainer to explain model output
42 | explainer =  Explainer(X = X_train,model_predictions = model_predictions, type = "classification")
43 | explainer.explain()
44 | 
45 | print(explainer.condition_importances_)
46 | ```
47 | 
48 | ## Sample notebooks
49 | 
50 | * **[Classification](https://rulexai.readthedocs.io/en/latest/rst/tutorials/classification.html)**  - in this notebook, the data from https://www.kaggle.com/c/titanic is analysed to show the advantages and possibilities of using the RuleXAI library for in-depth analysis of the dataset for classification task. The use of RuleXAI to explain rule-based and tree-based models was also compared. 
51 |    
52 | * **[Regression](https://rulexai.readthedocs.io/en/latest/rst/tutorials/regression.html)** - notebook showing the use of RuleXAI to explain rule-based regression model
53 |    
54 | * **[Survival](https://rulexai.readthedocs.io/en/latest/rst/tutorials/survival.html)** - notebook showing the use of RuleXAI to explain rule-based survival model
55 |     
56 | * **[Black-box model](https://rulexai.readthedocs.io/en/latest/rst/tutorials/black-box_model_aproximation.html)** explainability - the purpose of this notebook is to demonstrate the possibility of using RuleXAI to explain any black box models.
57 |      
58 | * **[Transformation](https://rulexai.readthedocs.io/en/latest/rst/tutorials/dataset_transformation.html)** - notebook showing the use of RuleXAI to transform a dataset. Often datasets contain missing values and nominal values. Most available algorithms do not support either missing values or nominal values. Many algorithms require the data to be rescaled beforehand. The RuleXAI library is able to convert a dataset with nominal and missing values into a binary dataset containing as attributes the conditions describing the dataset and as values “1” when the condition is satisfied for the example and “0” when the condition is not satisfied.
59 |    
60 | 
61 | ## Documentation
62 | Full documentation is available [here](https://rulexai.readthedocs.io/en/latest/index.html)
63 | 
64 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | set batdir=%~dp0
 3 | 
 4 | pushd %~dp0
 5 | 
 6 | REM Command file for Sphinx documentation
 7 | 
 8 | if "%SPHINXBUILD%" == "" (
 9 | 	set SPHINXBUILD=python -m sphinx.cmd.build
10 | )
11 | set SOURCEDIR=source
12 | set BUILDDIR=build
13 | 
14 | if "%1" == "help" goto help
15 | 
16 | %SPHINXBUILD% >NUL 2>NUL
17 | if errorlevel 9009 (
18 | 	echo.
19 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
20 | 	echo.installed, then set the SPHINXBUILD environment variable to point
21 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
22 | 	echo.may add the Sphinx directory to PATH.
23 | 	echo.
24 | 	echo.If you don't have Sphinx installed, grab it from
25 | 	echo.http://sphinx-doc.org/
26 | 	exit /b 1
27 | )
28 | 
29 | %SPHINXBUILD% -M html %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
30 | goto end
31 | 
32 | :help
33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
34 | 
35 | :end
36 | popd
37 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx-rtd-theme==0.5.1
2 | nbsphinx==0.8.7
3 | sphinx-copybutton==0.3.1
4 | ipykernel==5.5.0
5 | pandoc==1.0.2
6 | pandas~=1.2.1
7 | numpy~=1.20.3
8 | matplotlib~=3.4.2
9 | rulekit~=1.6.0


--------------------------------------------------------------------------------
/docs/source/_static/theoretical_basis_description.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adaa-polsl/RuleXAI/4075a717c36c2a0589f07a1130343e4bc71809ee/docs/source/_static/theoretical_basis_description.pdf


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import os
14 | import sys
15 | sys.path.insert(0, os.path.abspath('../..'))
16 | 
17 | # -- Project information -----------------------------------------------------
18 | 
19 | project = "RuleXAI"
20 | copyright = "2022, Macha Dawid"
21 | author = "Macha Dawid"
22 | 
23 | # The full version, including alpha/beta/rc tags
24 | release = "v1.0.0"
25 | 
26 | source_suffix = [".rst", ".md"]
27 | 
28 | # -- General configuration ---------------------------------------------------
29 | 
30 | # Add any Sphinx extension module names here, as strings. They can be
31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
32 | # ones.
33 | extensions = [
34 |     "sphinx_rtd_theme",
35 |     "sphinx.ext.autodoc",
36 |     "sphinx.ext.coverage",
37 |     "sphinx.ext.napoleon",
38 |     "sphinx.ext.intersphinx",
39 |     "nbsphinx",
40 |     "sphinx_copybutton",
41 | ]
42 | 
43 | autoclass_content = "both"
44 | # Add any paths that contain templates here, relative to this directory.
45 | templates_path = ["_templates"]
46 | 
47 | # List of patterns, relative to source directory, that match files and
48 | # directories to ignore when looking for source files.
49 | # This pattern also affects html_static_path and html_extra_path.
50 | exclude_patterns = []
51 | 
52 | 
53 | # -- Options for HTML output -------------------------------------------------
54 | 
55 | # The theme to use for HTML and HTML Help pages.  See the documentation for
56 | # a list of builtin themes.
57 | #
58 | html_theme = "sphinx_rtd_theme"
59 | 
60 | # Add any paths that contain custom static files (such as style sheets) here,
61 | # relative to this directory. They are copied after the builtin static files,
62 | # so a file named "default.css" will overwrite the builtin "default.css".
63 | html_static_path = ["_static"]
64 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | RuleXAI
 3 | ========
 4 | 
 5 | Welcome to RuleXAI's documentation!
 6 | 
 7 | **RuleXAI** is a rule-based aproach to explain the output of any machine learning model. It is suitable for classification, regression and survival tasks. Theoretical basis of the rule analysis methods implemented 
 8 | in the RuleXAI package can be found in `Theoretical basis <./_static/theoretical_basis_description.pdf>`_ section. 
 9 | 
10 | Installation
11 | ============
12 | RuleXAI can be installed from `PyPI <https://pypi.org/project/rulexai/>`_::
13 | 
14 |    pip install rulexai
15 | 
16 | 
17 | .. toctree::
18 |    :maxdepth: 1
19 |    :caption: Contents:
20 | 
21 |    Theoretical basis <./rst/theoretical_basis.rst>
22 |    Code documentation <./rst/autodoc.rst>
23 |    Tutorials <./rst/tutorials.rst>
24 |    
25 | 


--------------------------------------------------------------------------------
/docs/source/rst/autodoc.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | Code documentation
 3 | ========================================
 4 | 
 5 | .. autoclass:: rulexai.explainer.RuleExplainer
 6 |    :members:
 7 |    :inherited-members:
 8 | .. autoclass:: rulexai.explainer.Explainer
 9 |    :members:
10 |    :inherited-members:
11 | 


--------------------------------------------------------------------------------
/docs/source/rst/theoretical_basis.rst:
--------------------------------------------------------------------------------
1 | Theoretical basis
2 | =================
3 | 
4 | Click `here <../_static/theoretical_basis_description.pdf>`_ to view document describing theoretical basis of the rule analysis methods implemented in the RuleXAI package


--------------------------------------------------------------------------------
/docs/source/rst/tutorials.rst:
--------------------------------------------------------------------------------
 1 | Tutorials
 2 | =========
 3 | .. toctree::
 4 |     :maxdepth: 1
 5 |     :caption: Table of contents:
 6 | 
 7 |     Classification <./tutorials/classification.ipynb>
 8 |     Regression <./tutorials/regression.ipynb>
 9 |     Survival <./tutorials/survival.ipynb>
10 |     Black-box <./tutorials/black-box_model_aproximation.ipynb>
11 | 	Transformation <./tutorials/dataset_transformation.ipynb>


--------------------------------------------------------------------------------
/docs/source/rst/tutorials/data/GBSG2.arff:
--------------------------------------------------------------------------------
  1 | @relation GBSG2
  2 | @attribute horTh {'no','yes'}
  3 | @attribute age numeric
  4 | @attribute menostat {'Post','Pre'}
  5 | @attribute tsize numeric
  6 | @attribute tgrade {'I','II','III'}
  7 | @attribute pnodes numeric
  8 | @attribute progrec numeric
  9 | @attribute estrec numeric
 10 | @attribute survival_time numeric
 11 | @attribute survival_status numeric
 12 | @data
 13 | 'no',70,'Post',21,'II',3,48,66,1814,1
 14 | 'yes',56,'Post',12,'II',7,61,77,2018,1
 15 | 'yes',58,'Post',35,'II',9,52,271,712,1
 16 | 'yes',59,'Post',17,'II',4,60,29,1807,1
 17 | 'no',73,'Post',35,'II',1,26,65,772,1
 18 | 'no',32,'Pre',57,'III',24,0,13,448,1
 19 | 'yes',59,'Post',8,'II',2,181,0,2172,0
 20 | 'no',65,'Post',16,'II',1,192,25,2161,0
 21 | 'no',80,'Post',39,'II',30,0,59,471,1
 22 | 'no',66,'Post',18,'II',7,0,3,2014,0
 23 | 'yes',68,'Post',40,'II',9,16,20,577,1
 24 | 'yes',71,'Post',21,'II',9,0,0,184,1
 25 | 'yes',59,'Post',58,'II',1,154,101,1840,0
 26 | 'no',50,'Post',27,'III',1,16,12,1842,0
 27 | 'yes',70,'Post',22,'II',3,113,139,1821,0
 28 | 'no',54,'Post',30,'II',1,135,6,1371,1
 29 | 'no',39,'Pre',35,'I',4,79,28,707,1
 30 | 'yes',66,'Post',23,'II',1,112,225,1743,0
 31 | 'yes',69,'Post',25,'I',1,131,196,1781,0
 32 | 'no',55,'Post',65,'I',4,312,76,865,1
 33 | 'no',56,'Post',22,'II',1,28,23,1684,1
 34 | 'no',57,'Post',21,'II',2,184,294,1701,0
 35 | 'no',65,'Post',25,'III',1,0,0,1701,0
 36 | 'yes',70,'Post',15,'II',3,89,151,1693,0
 37 | 'no',65,'Post',70,'III',26,2,64,379,1
 38 | 'no',44,'Pre',23,'II',2,299,35,1105,1
 39 | 'yes',59,'Post',23,'III',3,8,0,548,1
 40 | 'no',43,'Pre',35,'II',4,37,5,1296,1
 41 | 'yes',53,'Post',58,'II',1,0,0,1483,0
 42 | 'no',32,'Pre',25,'II',2,36,10,1570,0
 43 | 'no',45,'Pre',45,'III',2,0,0,1469,0
 44 | 'no',36,'Pre',44,'III',2,6,5,1472,0
 45 | 'yes',57,'Post',35,'III',1,1490,209,1342,0
 46 | 'no',55,'Post',25,'I',2,26,53,1349,0
 47 | 'no',34,'Pre',15,'II',5,103,118,1162,1
 48 | 'yes',58,'Post',35,'II',2,38,18,1342,0
 49 | 'no',62,'Post',22,'II',12,0,8,797,1
 50 | 'no',64,'Post',25,'I',9,67,86,1232,0
 51 | 'no',53,'Post',23,'II',3,13,7,1230,0
 52 | 'no',53,'Post',13,'II',8,423,175,1205,0
 53 | 'no',65,'Post',52,'III',7,25,155,1090,0
 54 | 'no',45,'Pre',38,'II',38,160,5,1095,0
 55 | 'no',58,'Post',42,'III',1,0,0,449,1
 56 | 'no',59,'Post',25,'I',2,33,51,2438,0
 57 | 'no',65,'Post',20,'II',1,6,6,2233,0
 58 | 'yes',34,'Pre',30,'III',12,0,5,286,1
 59 | 'yes',65,'Post',18,'II',5,133,175,1861,0
 60 | 'no',61,'Post',30,'II',9,41,51,1080,1
 61 | 'yes',61,'Post',25,'II',1,21,172,1521,1
 62 | 'no',46,'Post',25,'II',1,2,0,1693,0
 63 | 'no',63,'Post',25,'II',1,86,366,1528,1
 64 | 'yes',45,'Pre',19,'II',7,19,0,169,1
 65 | 'no',46,'Pre',35,'II',7,67,44,272,1
 66 | 'no',63,'Post',40,'II',3,5,8,731,1
 67 | 'yes',53,'Pre',21,'II',9,29,9,2059,0
 68 | 'yes',43,'Post',40,'I',4,233,19,1853,0
 69 | 'no',31,'Pre',23,'II',4,20,0,1854,0
 70 | 'yes',71,'Post',15,'II',9,85,9,1645,0
 71 | 'yes',59,'Post',28,'II',18,0,7,544,1
 72 | 'no',62,'Post',15,'II',4,22,70,1666,0
 73 | 'no',54,'Post',30,'II',2,31,11,353,1
 74 | 'no',46,'Pre',25,'II',13,82,20,1791,0
 75 | 'yes',53,'Post',25,'II',2,9,1,1685,0
 76 | 'no',45,'Pre',10,'II',1,14,3,191,1
 77 | 'no',48,'Pre',30,'II',4,19,4,370,1
 78 | 'no',32,'Pre',20,'II',5,55,41,173,1
 79 | 'no',30,'Pre',12,'II',11,4,3,242,1
 80 | 'no',53,'Post',16,'III',1,1,1,420,1
 81 | 'no',42,'Pre',12,'II',6,388,30,438,1
 82 | 'no',48,'Pre',35,'II',1,41,61,1624,0
 83 | 'yes',54,'Post',30,'II',6,15,81,1036,1
 84 | 'no',56,'Post',25,'II',11,0,36,359,1
 85 | 'no',51,'Pre',25,'II',16,91,31,171,1
 86 | 'no',68,'Post',18,'II',14,0,2,959,1
 87 | 'no',46,'Pre',21,'II',3,73,13,1351,0
 88 | 'no',41,'Pre',15,'II',4,11,11,486,1
 89 | 'no',48,'Pre',16,'III',10,0,0,525,1
 90 | 'no',55,'Pre',23,'II',3,295,34,762,1
 91 | 'no',52,'Pre',36,'II',6,6,16,175,1
 92 | 'no',36,'Pre',8,'III',1,10,0,1195,0
 93 | 'no',44,'Pre',25,'III',6,5,2,338,1
 94 | 'no',47,'Post',20,'III',6,408,36,1125,0
 95 | 'no',42,'Pre',25,'II',7,0,2,249,1
 96 | 'no',63,'Post',32,'II',16,7,132,281,1
 97 | 'yes',62,'Post',50,'II',11,1,2,377,1
 98 | 'no',55,'Post',40,'I',2,64,81,1976,0
 99 | 'yes',47,'Pre',45,'II',2,264,59,2539,0
100 | 'no',63,'Post',23,'II',3,22,32,2467,0
101 | 'no',69,'Post',20,'II',2,154,191,876,1
102 | 'no',43,'Pre',21,'II',1,206,87,2132,0
103 | 'no',59,'Post',24,'II',14,2,22,426,1
104 | 'no',75,'Post',50,'II',1,170,317,554,1
105 | 'yes',41,'Pre',40,'II',4,100,100,1246,1
106 | 'no',47,'Pre',36,'III',2,154,99,1926,0
107 | 'no',43,'Pre',80,'II',20,2,25,1207,1
108 | 'no',42,'Pre',30,'III',4,65,81,1852,0
109 | 'no',46,'Pre',35,'I',5,100,0,1174,1
110 | 'no',65,'Post',58,'II',11,390,119,1250,0
111 | 'no',59,'Post',30,'II',3,0,2,530,1
112 | 'no',48,'Pre',70,'II',7,8,0,1502,0
113 | 'no',44,'Pre',27,'II',3,525,61,1364,0
114 | 'no',53,'Post',25,'II',13,77,131,1170,1
115 | 'no',53,'Post',25,'II',2,54,58,1729,0
116 | 'no',60,'Pre',23,'II',3,136,507,1642,0
117 | 'no',64,'Post',24,'II',2,206,304,1218,1
118 | 'no',56,'Post',8,'II',1,110,0,1358,0
119 | 'no',66,'Post',30,'II',16,0,508,360,1
120 | 'no',50,'Pre',30,'II',1,183,243,550,1
121 | 'no',49,'Pre',55,'II',7,0,0,679,1
122 | 'no',33,'Pre',35,'III',1,26,0,1164,1
123 | 'no',50,'Post',52,'II',1,0,0,350,1
124 | 'no',45,'Pre',29,'II',1,0,0,578,1
125 | 'no',51,'Pre',20,'II',1,0,0,1460,1
126 | 'no',39,'Pre',30,'III',1,0,0,1434,0
127 | 'yes',56,'Post',40,'II',3,0,3,1763,1
128 | 'no',60,'Post',15,'II',2,84,93,889,1
129 | 'yes',47,'Pre',35,'III',17,14,3,357,1
130 | 'no',58,'Post',50,'II',7,77,77,547,1
131 | 'yes',56,'Pre',21,'II',3,111,20,1722,0
132 | 'yes',54,'Post',21,'II',1,7,139,2372,0
133 | 'yes',56,'Post',40,'II',3,0,59,2030,1
134 | 'no',57,'Post',26,'II',1,166,521,1002,1
135 | 'no',53,'Post',10,'II',1,17,61,1280,1
136 | 'no',31,'Pre',60,'II',7,542,77,338,1
137 | 'yes',41,'Pre',80,'II',1,0,0,533,1
138 | 'yes',37,'Pre',25,'II',1,235,38,1169,0
139 | 'no',66,'Post',15,'II',1,252,185,1675,1
140 | 'no',48,'Pre',45,'III',1,0,0,1862,0
141 | 'no',51,'Pre',50,'II',9,0,0,1167,0
142 | 'no',57,'Post',20,'II',3,39,83,495,1
143 | 'yes',40,'Pre',30,'II',2,320,30,1720,0
144 | 'yes',62,'Post',19,'II',1,35,1060,598,1
145 | 'yes',64,'Post',30,'III',12,0,0,392,1
146 | 'no',46,'Pre',12,'II',3,175,80,1502,0
147 | 'no',69,'Post',27,'I',3,140,350,1296,0
148 | 'no',58,'Post',52,'III',5,0,0,1177,0
149 | 'yes',65,'Post',30,'II',5,85,365,1113,0
150 | 'no',40,'Pre',40,'II',5,50,75,288,1
151 | 'no',55,'Post',20,'III',16,0,0,403,1
152 | 'no',62,'Post',25,'III',5,0,0,1225,1
153 | 'no',29,'Pre',12,'II',4,32,150,338,1
154 | 'no',38,'Pre',18,'III',5,141,105,1337,1
155 | 'no',52,'Pre',20,'I',1,78,14,1420,1
156 | 'no',47,'Post',55,'II',18,29,87,2048,0
157 | 'no',53,'Pre',75,'III',19,375,107,600,1
158 | 'no',37,'Pre',15,'I',1,162,22,1765,0
159 | 'no',63,'Post',60,'II',15,180,12,491,1
160 | 'no',63,'Post',45,'III',7,20,93,305,1
161 | 'no',59,'Post',22,'II',2,23,235,1582,0
162 | 'no',48,'Pre',30,'II',15,250,45,1771,0
163 | 'no',33,'Pre',15,'III',33,66,8,960,1
164 | 'no',38,'Pre',57,'III',9,18,62,571,1
165 | 'no',31,'Pre',28,'II',2,349,189,285,1
166 | 'no',53,'Post',48,'II',7,254,117,1472,0
167 | 'no',47,'Pre',30,'II',1,422,89,1279,1
168 | 'yes',64,'Post',19,'II',1,19,9,1863,0
169 | 'yes',49,'Post',56,'I',3,356,64,1933,0
170 | 'no',53,'Post',52,'II',9,6,29,358,1
171 | 'yes',61,'Post',22,'II',2,6,173,2372,1
172 | 'no',43,'Pre',30,'II',1,22,0,2563,0
173 | 'yes',74,'Post',20,'II',1,462,240,2372,0
174 | 'yes',58,'Post',18,'I',2,74,67,1989,1
175 | 'yes',49,'Pre',20,'II',6,56,98,2015,1
176 | 'yes',61,'Post',35,'III',2,23,9,1956,0
177 | 'no',66,'Post',40,'III',16,21,412,945,1
178 | 'yes',66,'Post',20,'III',3,54,17,2153,0
179 | 'no',59,'Post',23,'II',2,88,38,838,1
180 | 'no',51,'Post',70,'III',6,28,5,113,1
181 | 'yes',71,'Post',18,'II',2,31,9,1833,0
182 | 'no',46,'Pre',50,'III',10,44,4,1722,0
183 | 'no',52,'Pre',40,'III',6,32,5,241,1
184 | 'yes',60,'Post',16,'II',1,184,51,1352,1
185 | 'no',60,'Post',50,'II',7,65,30,1702,0
186 | 'yes',67,'Post',27,'II',4,1118,753,1222,0
187 | 'no',54,'Post',30,'III',3,1,0,1089,0
188 | 'no',55,'Post',12,'II',1,63,19,1243,0
189 | 'no',38,'Pre',20,'II',9,24,34,579,1
190 | 'yes',52,'Post',25,'II',13,31,196,1043,1
191 | 'no',43,'Pre',30,'II',3,45,11,2234,0
192 | 'no',50,'Pre',22,'I',1,135,111,2297,0
193 | 'yes',61,'Post',25,'I',2,32,144,2014,0
194 | 'no',62,'Post',20,'II',2,7,9,518,1
195 | 'no',52,'Post',20,'III',10,7,8,251,1
196 | 'no',45,'Pre',20,'II',2,64,48,1959,0
197 | 'no',52,'Post',10,'II',3,109,12,1897,0
198 | 'no',51,'Post',120,'II',12,3,1,160,1
199 | 'no',61,'Post',20,'II',5,25,75,348,1
200 | 'yes',64,'Post',45,'III',5,1,8,275,1
201 | 'no',64,'Post',17,'I',1,227,0,1329,1
202 | 'no',51,'Post',35,'III',1,6,1,1193,1
203 | 'yes',63,'Post',30,'II',7,0,0,698,1
204 | 'no',62,'Post',12,'II',7,0,0,436,1
205 | 'yes',65,'Post',18,'III',1,0,0,552,1
206 | 'yes',67,'Post',20,'II',1,0,0,564,1
207 | 'no',62,'Post',30,'II',1,8,371,2239,0
208 | 'yes',48,'Pre',25,'II',1,235,33,2237,0
209 | 'no',67,'Post',25,'II',1,6,19,529,1
210 | 'no',46,'Pre',11,'II',2,0,0,1820,0
211 | 'yes',56,'Post',20,'I',1,2,334,1756,0
212 | 'yes',72,'Post',34,'III',36,2,1091,515,1
213 | 'yes',50,'Post',70,'II',19,10,57,272,1
214 | 'no',58,'Post',21,'III',2,1,1,891,1
215 | 'no',63,'Post',21,'II',1,0,378,1356,0
216 | 'no',45,'Post',15,'II',6,1,162,1352,0
217 | 'yes',58,'Post',18,'II',3,64,418,675,1
218 | 'yes',63,'Post',21,'II',1,26,30,2551,0
219 | 'no',60,'Post',35,'II',12,41,62,754,1
220 | 'no',33,'Pre',25,'II',8,96,13,819,1
221 | 'yes',63,'Post',19,'II',5,18,38,1280,1
222 | 'no',70,'Post',16,'II',2,126,338,2388,0
223 | 'yes',60,'Post',30,'II',2,92,18,2296,0
224 | 'yes',54,'Post',25,'II',1,5,57,1884,0
225 | 'yes',64,'Post',25,'III',3,56,272,1059,1
226 | 'yes',50,'Post',21,'I',1,82,2,1109,0
227 | 'no',53,'Post',20,'II',1,1,1,1192,1
228 | 'no',77,'Post',20,'III',4,94,325,1806,1
229 | 'yes',47,'Pre',60,'II',15,5,38,500,1
230 | 'no',41,'Pre',20,'II',4,8,38,1589,1
231 | 'yes',47,'Pre',30,'II',5,12,11,1463,1
232 | 'yes',63,'Post',25,'II',2,8,195,1826,0
233 | 'no',48,'Pre',22,'II',4,26,29,1231,0
234 | 'no',40,'Pre',15,'II',1,204,138,1117,0
235 | 'yes',57,'Post',30,'II',8,40,40,836,1
236 | 'no',47,'Pre',40,'II',2,33,59,1222,0
237 | 'yes',58,'Post',35,'III',7,0,0,722,1
238 | 'yes',62,'Post',23,'II',2,0,14,1150,1
239 | 'no',50,'Pre',60,'III',4,0,0,446,1
240 | 'yes',65,'Post',30,'II',5,0,36,1855,0
241 | 'yes',59,'Post',30,'II',8,0,0,238,1
242 | 'no',49,'Pre',18,'II',2,0,0,1838,0
243 | 'yes',52,'Post',25,'II',13,0,0,1826,0
244 | 'no',45,'Pre',30,'II',1,0,0,1093,1
245 | 'no',49,'Post',14,'II',1,0,0,2051,0
246 | 'no',58,'Post',45,'III',4,0,0,370,1
247 | 'no',25,'Pre',22,'II',2,250,87,861,1
248 | 'no',50,'Pre',30,'III',6,0,0,1587,1
249 | 'no',43,'Pre',27,'II',1,23,9,552,1
250 | 'no',46,'Pre',12,'II',1,6,49,2353,0
251 | 'yes',64,'Post',24,'III',5,366,201,2471,0
252 | 'yes',63,'Post',43,'II',5,21,174,893,1
253 | 'no',40,'Pre',35,'II',2,279,99,2093,1
254 | 'yes',57,'Post',22,'II',4,16,5,2612,0
255 | 'yes',58,'Post',56,'I',11,51,50,956,1
256 | 'yes',62,'Post',25,'III',4,12,49,1637,0
257 | 'yes',50,'Pre',42,'I',2,238,26,2456,0
258 | 'no',49,'Post',30,'II',4,40,177,2227,0
259 | 'no',64,'Post',24,'II',2,41,80,1601,1
260 | 'yes',66,'Post',15,'II',2,15,42,1841,0
261 | 'yes',37,'Pre',30,'II',4,104,107,2177,0
262 | 'no',60,'Post',18,'III',2,12,8,2052,0
263 | 'no',51,'Pre',12,'I',2,55,64,2156,0
264 | 'yes',49,'Pre',28,'I',4,364,120,1499,0
265 | 'yes',57,'Post',7,'II',1,1,1,2030,0
266 | 'yes',68,'Post',14,'II',6,40,68,573,1
267 | 'no',47,'Pre',25,'II',1,199,134,1666,0
268 | 'no',51,'Post',13,'II',5,89,134,1979,0
269 | 'yes',49,'Pre',19,'I',5,69,14,1786,0
270 | 'no',63,'Post',28,'II',4,258,46,1847,0
271 | 'yes',64,'Post',15,'II',1,340,71,2009,0
272 | 'no',65,'Post',24,'II',1,328,115,1926,0
273 | 'yes',63,'Post',13,'II',1,124,361,1490,0
274 | 'no',33,'Pre',23,'III',10,2,3,233,1
275 | 'no',44,'Pre',35,'II',6,26,4,1240,0
276 | 'no',47,'Pre',13,'II',3,242,14,1751,0
277 | 'no',46,'Pre',19,'I',11,56,24,1878,0
278 | 'no',52,'Pre',26,'II',1,258,10,1171,0
279 | 'no',62,'Post',55,'III',8,3,2,1751,0
280 | 'yes',61,'Post',24,'II',2,28,50,1756,0
281 | 'no',60,'Post',27,'II',6,401,159,714,1
282 | 'yes',67,'Post',44,'II',10,431,267,1505,0
283 | 'no',47,'Pre',78,'II',14,168,53,776,1
284 | 'no',70,'Post',38,'III',2,24,15,1443,0
285 | 'no',50,'Pre',11,'I',1,10,11,1317,0
286 | 'no',58,'Post',30,'III',13,7,46,859,1
287 | 'no',59,'Post',20,'II',1,2,4,223,1
288 | 'no',45,'Pre',18,'I',1,56,40,1212,0
289 | 'no',45,'Pre',30,'II',3,345,31,1119,0
290 | 'yes',60,'Post',24,'III',7,10,10,632,1
291 | 'yes',51,'Pre',30,'III',2,1152,38,1760,0
292 | 'no',49,'Pre',45,'III',6,0,22,375,1
293 | 'yes',47,'Pre',42,'II',7,164,204,1323,0
294 | 'no',37,'Pre',50,'III',2,170,130,1233,0
295 | 'no',44,'Pre',29,'II',1,27,23,1866,0
296 | 'yes',38,'Pre',18,'II',4,28,5,491,1
297 | 'yes',51,'Pre',34,'II',3,13,12,1918,1
298 | 'no',59,'Post',8,'II',5,1,30,72,1
299 | 'yes',52,'Post',49,'III',6,8,5,1140,1
300 | 'yes',64,'Post',32,'II',4,402,372,799,1
301 | 'no',55,'Post',37,'II',1,82,234,1105,1
302 | 'no',61,'Post',22,'II',2,179,124,548,1
303 | 'yes',44,'Pre',28,'III',17,2,3,227,1
304 | 'no',38,'Pre',24,'II',3,13,5,1838,0
305 | 'yes',43,'Pre',11,'I',1,126,22,1833,0
306 | 'no',65,'Post',36,'III',2,9,7,550,1
307 | 'yes',59,'Post',48,'III',1,5,17,426,1
308 | 'no',38,'Pre',31,'I',10,365,206,1834,0
309 | 'no',47,'Pre',25,'II',3,18,42,1604,0
310 | 'yes',47,'Post',30,'I',9,114,26,1146,1
311 | 'no',36,'Pre',25,'II',2,70,22,371,1
312 | 'no',47,'Pre',24,'II',20,30,8,883,1
313 | 'no',38,'Pre',23,'III',3,14,6,1735,0
314 | 'yes',50,'Post',23,'II',8,98,30,554,1
315 | 'no',44,'Pre',5,'II',10,11,10,790,1
316 | 'no',54,'Post',22,'II',2,211,129,1340,0
317 | 'no',52,'Pre',30,'II',12,11,20,490,1
318 | 'no',34,'Pre',3,'III',1,14,11,1557,0
319 | 'no',64,'Post',33,'III',3,20,14,594,1
320 | 'no',65,'Post',27,'II',4,148,191,594,1
321 | 'yes',47,'Pre',30,'I',3,195,45,2556,0
322 | 'no',51,'Pre',20,'II',1,77,89,1753,1
323 | 'no',63,'Post',15,'III',5,0,0,417,1
324 | 'no',36,'Pre',30,'III',2,0,0,956,1
325 | 'yes',63,'Post',34,'II',12,223,236,1846,0
326 | 'no',47,'Pre',70,'II',5,796,24,1703,0
327 | 'no',51,'Pre',21,'III',1,0,0,1720,0
328 | 'yes',62,'Post',30,'II',1,88,544,1355,0
329 | 'no',56,'Post',40,'III',3,0,0,1603,0
330 | 'no',62,'Post',33,'I',5,239,76,476,1
331 | 'yes',61,'Post',30,'II',8,472,293,1350,0
332 | 'yes',55,'Post',15,'III',3,97,194,1341,0
333 | 'yes',56,'Post',11,'II',1,270,369,2449,0
334 | 'no',69,'Post',22,'II',8,282,191,2286,1
335 | 'no',57,'Post',25,'II',3,48,65,456,1
336 | 'no',27,'Pre',22,'II',1,56,99,536,1
337 | 'no',38,'Pre',25,'II',1,102,11,612,1
338 | 'no',42,'Pre',25,'III',2,11,10,2034,1
339 | 'no',69,'Post',19,'I',3,73,386,1990,1
340 | 'no',61,'Post',50,'II',4,10,10,2456,1
341 | 'no',53,'Pre',13,'III',3,10,20,2205,0
342 | 'no',50,'Pre',25,'III',1,24,85,544,1
343 | 'no',52,'Pre',27,'II',5,0,8,336,1
344 | 'no',47,'Pre',38,'II',2,58,10,2057,0
345 | 'no',65,'Post',27,'II',19,23,13,575,1
346 | 'no',48,'Pre',38,'II',3,92,41,2011,0
347 | 'no',61,'Post',38,'II',17,46,52,537,1
348 | 'yes',47,'Pre',12,'II',1,110,14,2217,0
349 | 'no',46,'Post',20,'II',11,680,152,1814,1
350 | 'yes',59,'Post',15,'II',1,30,122,890,1
351 | 'yes',60,'Post',22,'III',1,218,442,1114,0
352 | 'yes',45,'Pre',100,'II',6,178,77,2320,0
353 | 'no',58,'Post',35,'I',6,130,162,795,1
354 | 'no',51,'Post',40,'II',8,132,64,867,1
355 | 'no',49,'Pre',15,'II',1,111,19,1703,0
356 | 'no',43,'Pre',30,'II',2,32,16,670,1
357 | 'no',37,'Pre',35,'II',7,53,19,981,1
358 | 'no',51,'Pre',30,'II',2,505,270,1094,0
359 | 'yes',48,'Pre',35,'II',1,340,32,755,1
360 | 'no',54,'Post',21,'II',7,6,8,1388,1
361 | 'no',64,'Post',21,'III',1,4,3,1387,1
362 | 'no',44,'Pre',55,'III',4,8,8,535,1
363 | 'no',67,'Post',30,'II',2,5,14,1653,0
364 | 'no',63,'Post',24,'II',3,46,25,1904,0
365 | 'yes',42,'Pre',28,'III',4,27,22,1868,0
366 | 'yes',60,'Post',12,'I',2,402,90,1767,0
367 | 'no',39,'Pre',20,'II',1,38,110,855,1
368 | 'no',53,'Post',16,'II',1,16,120,1157,1
369 | 'yes',38,'Pre',61,'II',8,624,569,1869,0
370 | 'no',61,'Post',40,'I',15,185,206,1152,0
371 | 'no',47,'Pre',15,'II',1,38,0,1401,0
372 | 'no',67,'Post',65,'II',8,0,0,745,1
373 | 'yes',61,'Post',25,'II',18,595,419,1283,0
374 | 'yes',57,'Post',15,'II',3,44,78,1481,1
375 | 'yes',42,'Pre',9,'I',8,77,40,1807,0
376 | 'yes',39,'Pre',20,'III',1,2,2,542,1
377 | 'no',34,'Pre',50,'III',7,4,1,1441,0
378 | 'yes',52,'Pre',50,'II',7,45,39,1277,0
379 | 'yes',53,'Pre',45,'II',4,395,44,1486,0
380 | 'yes',46,'Pre',23,'III',8,2,1,177,1
381 | 'no',36,'Pre',36,'II',1,76,14,545,1
382 | 'no',39,'Pre',28,'II',3,5,4,1185,0
383 | 'no',46,'Pre',45,'I',9,239,58,1088,0
384 | 'yes',60,'Post',25,'II',7,116,435,2380,0
385 | 'yes',64,'Post',36,'II',2,122,198,1679,1
386 | 'yes',54,'Post',40,'III',4,3,2,498,1
387 | 'no',54,'Post',27,'II',5,138,23,2138,0
388 | 'no',46,'Pre',35,'II',6,405,27,2175,0
389 | 'no',49,'Pre',17,'II',2,324,94,2271,0
390 | 'yes',55,'Post',15,'II',3,16,14,964,1
391 | 'yes',45,'Pre',23,'II',4,1,4,540,1
392 | 'no',51,'Post',30,'III',10,15,103,747,1
393 | 'no',43,'Pre',25,'II',11,1,1,650,1
394 | 'yes',59,'Post',30,'II',13,7,81,410,1
395 | 'no',59,'Post',27,'III',20,9,2,624,1
396 | 'no',47,'Pre',28,'III',7,16,92,1560,0
397 | 'no',48,'Pre',35,'III',10,2,222,455,1
398 | 'no',47,'Pre',16,'II',2,128,18,1629,0
399 | 'no',49,'Post',21,'II',5,80,152,1730,0
400 | 'yes',65,'Post',25,'III',2,17,14,1483,0
401 | 'no',60,'Post',21,'II',1,58,701,687,1
402 | 'no',52,'Post',35,'III',1,8,5,308,1
403 | 'no',48,'Post',22,'II',4,14,0,563,1
404 | 'no',46,'Post',20,'II',2,32,29,2144,0
405 | 'no',59,'Post',21,'II',4,0,75,344,1
406 | 'yes',68,'Post',45,'I',3,31,145,1905,0
407 | 'yes',74,'Post',35,'II',11,10,472,855,1
408 | 'no',45,'Pre',50,'I',2,132,200,2370,0
409 | 'yes',44,'Pre',24,'III',5,187,62,475,1
410 | 'yes',72,'Post',17,'II',1,229,533,2195,0
411 | 'yes',49,'Pre',100,'II',35,84,24,648,1
412 | 'no',76,'Post',37,'III',24,11,0,195,1
413 | 'yes',57,'Post',35,'II',4,18,0,473,1
414 | 'yes',62,'Post',22,'II',1,263,34,2659,0
415 | 'yes',46,'Pre',60,'II',19,2,16,1977,1
416 | 'yes',53,'Post',17,'II',1,25,30,2401,0
417 | 'no',43,'Pre',20,'II',3,980,45,1499,0
418 | 'no',51,'Post',32,'III',10,0,0,1856,0
419 | 'no',41,'Pre',30,'III',11,6,5,595,1
420 | 'no',63,'Post',45,'III',2,530,328,2148,0
421 | 'yes',41,'Pre',20,'III',3,13,1,2126,0
422 | 'yes',74,'Post',30,'III',12,432,246,1975,1
423 | 'yes',57,'Post',30,'II',1,17,83,1641,1
424 | 'yes',44,'Pre',20,'II',6,150,67,1717,0
425 | 'yes',48,'Pre',24,'II',1,211,187,1858,0
426 | 'no',47,'Pre',15,'III',1,139,36,2049,0
427 | 'yes',70,'Post',25,'II',4,34,273,1502,1
428 | 'no',49,'Pre',14,'II',1,160,12,1922,0
429 | 'yes',49,'Post',24,'II',2,120,117,1818,0
430 | 'yes',58,'Post',35,'II',11,2,76,1100,0
431 | 'no',59,'Post',30,'II',1,87,8,1499,0
432 | 'no',60,'Post',35,'II',2,5,4,359,1
433 | 'yes',63,'Post',30,'I',5,144,221,1645,0
434 | 'no',44,'Pre',15,'II',1,175,88,1356,0
435 | 'yes',79,'Post',23,'I',1,60,80,1632,0
436 | 'yes',61,'Post',30,'II',1,24,38,1091,0
437 | 'yes',64,'Post',35,'II',3,47,64,918,1
438 | 'yes',51,'Pre',21,'II',1,3,2,557,1
439 | 'no',44,'Pre',22,'II',2,107,94,1219,1
440 | 'yes',60,'Post',25,'I',3,78,363,2170,0
441 | 'yes',55,'Post',50,'II',1,14,203,729,1
442 | 'no',70,'Post',80,'III',8,0,0,1449,1
443 | 'no',65,'Post',20,'I',2,912,606,991,1
444 | 'no',53,'Pre',20,'II',2,89,36,481,1
445 | 'yes',54,'Post',25,'III',3,1,83,1655,0
446 | 'no',65,'Post',25,'II',2,86,135,857,1
447 | 'yes',62,'Post',30,'II',2,5,104,369,1
448 | 'yes',48,'Pre',30,'I',3,133,129,1627,0
449 | 'yes',48,'Post',35,'I',2,845,105,1578,0
450 | 'no',42,'Pre',40,'II',10,130,51,732,1
451 | 'no',48,'Pre',30,'II',16,29,43,460,1
452 | 'no',66,'Post',25,'I',2,22,121,1208,0
453 | 'yes',63,'Post',25,'II',13,26,348,730,1
454 | 'no',54,'Post',23,'III',10,13,6,307,1
455 | 'no',52,'Post',17,'II',4,558,522,983,1
456 | 'no',43,'Pre',80,'III',11,9,1,120,1
457 | 'no',56,'Post',31,'II',1,45,286,1525,1
458 | 'no',42,'Post',21,'I',4,147,95,1680,0
459 | 'no',56,'Post',16,'II',10,4,2,1730,1
460 | 'no',61,'Post',36,'II',6,107,158,805,1
461 | 'no',67,'Post',17,'II',4,390,386,2388,0
462 | 'yes',63,'Post',21,'I',2,16,241,559,1
463 | 'yes',66,'Post',20,'II',9,1,11,1977,0
464 | 'no',37,'Pre',25,'III',1,13,1,476,1
465 | 'yes',71,'Post',16,'II',1,98,306,1514,0
466 | 'no',43,'Pre',28,'I',1,437,33,1617,0
467 | 'no',64,'Post',22,'III',1,8,11,1094,1
468 | 'yes',64,'Post',27,'II',3,186,139,784,1
469 | 'no',46,'Pre',32,'II',5,9,13,181,1
470 | 'no',45,'Pre',50,'II',7,20,23,415,1
471 | 'yes',67,'Post',24,'II',4,96,90,1120,1
472 | 'no',37,'Pre',25,'III',8,9,0,316,1
473 | 'no',65,'Post',22,'I',6,386,31,637,1
474 | 'no',21,'Pre',15,'II',3,24,25,247,1
475 | 'no',46,'Pre',45,'II',8,2,4,622,1
476 | 'yes',46,'Post',31,'III',1,6,3,1163,0
477 | 'no',58,'Post',31,'II',2,240,394,1721,0
478 | 'no',41,'Pre',23,'III',2,26,4,372,1
479 | 'no',32,'Pre',17,'III',1,19,8,1331,0
480 | 'yes',66,'Post',42,'III',11,412,339,394,1
481 | 'no',57,'Post',50,'III',13,22,47,98,1
482 | 'yes',47,'Post',23,'III',5,0,0,308,1
483 | 'no',44,'Pre',15,'II',1,0,0,1965,0
484 | 'yes',61,'Post',35,'III',16,10,13,548,1
485 | 'no',48,'Pre',21,'III',8,0,0,293,1
486 | 'yes',51,'Pre',16,'II',5,167,15,2017,0
487 | 'no',66,'Post',22,'II',4,11,22,1093,0
488 | 'no',66,'Post',21,'II',1,9,898,586,1
489 | 'yes',69,'Post',40,'III',1,3,9,1434,0
490 | 'yes',33,'Pre',19,'II',2,0,0,2128,0
491 | 'no',46,'Pre',30,'II',2,26,223,1965,0
492 | 'no',47,'Pre',20,'II',1,48,26,2161,0
493 | 'yes',35,'Pre',35,'II',4,0,0,1183,1
494 | 'no',34,'Pre',40,'III',1,0,37,1108,1
495 | 'no',38,'Pre',24,'I',1,138,82,2065,0
496 | 'no',54,'Post',27,'III',1,27,792,1598,0
497 | 'no',31,'Pre',55,'II',3,28,89,491,1
498 | 'no',41,'Pre',25,'II',5,6,9,1366,1
499 | 'yes',52,'Post',35,'II',21,11,57,859,1
500 | 'yes',65,'Post',25,'III',18,0,0,180,1
501 | 'no',47,'Post',45,'II',2,345,42,1625,0
502 | 'no',65,'Post',10,'I',2,213,209,1938,0
503 | 'yes',53,'Post',37,'II',5,345,47,1343,1
504 | 'no',45,'Pre',15,'II',3,28,27,646,1
505 | 'no',53,'Pre',19,'III',1,74,534,2192,0
506 | 'yes',50,'Post',25,'II',3,0,496,502,1
507 | 'no',54,'Post',50,'III',6,7,0,1675,0
508 | 'yes',64,'Post',40,'II',23,16,22,1363,1
509 | 'no',29,'Pre',15,'III',12,18,40,420,1
510 | 'no',48,'Pre',60,'I',4,312,20,982,1
511 | 'no',40,'Pre',30,'III',3,2,16,1459,0
512 | 'no',65,'Post',35,'II',1,7,74,1192,0
513 | 'no',50,'Post',40,'II',1,80,21,1264,0
514 | 'no',55,'Post',34,'II',6,109,477,1095,0
515 | 'no',35,'Pre',22,'II',13,16,25,465,1
516 | 'no',48,'Pre',52,'II',11,0,0,842,1
517 | 'yes',62,'Post',39,'II',4,73,235,374,1
518 | 'no',47,'Pre',40,'II',1,44,11,1089,0
519 | 'no',51,'Post',19,'II',2,92,245,1527,0
520 | 'no',42,'Pre',40,'II',10,256,0,285,1
521 | 'no',63,'Post',27,'II',1,0,0,1306,1
522 | 'yes',62,'Post',20,'II',7,0,0,797,1
523 | 'no',57,'Post',15,'II',1,91,125,1441,0
524 | 'no',25,'Pre',29,'II',3,0,0,343,1
525 | 'no',51,'Post',25,'II',2,0,80,503,1
526 | 'yes',47,'Pre',30,'II',10,0,0,827,1
527 | 'yes',34,'Pre',30,'II',2,210,49,1427,0
528 | 'yes',64,'Post',30,'III',12,550,263,177,1
529 | 'no',42,'Pre',55,'III',7,20,20,281,1
530 | 'no',37,'Pre',35,'III',1,242,67,205,1
531 | 'no',62,'Post',27,'II',13,197,79,629,1
532 | 'yes',51,'Post',22,'II',4,250,81,2010,0
533 | 'yes',45,'Pre',13,'III',4,21,27,2009,0
534 | 'no',41,'Pre',10,'I',2,241,214,1984,0
535 | 'no',39,'Pre',32,'II',9,1,8,1981,0
536 | 'no',53,'Post',26,'III',8,1,1,624,1
537 | 'no',59,'Post',35,'II',4,1,1,742,1
538 | 'yes',53,'Post',10,'II',2,217,20,1818,0
539 | 'yes',60,'Post',100,'II',10,102,88,1493,1
540 | 'no',50,'Pre',29,'I',2,323,60,1432,0
541 | 'no',51,'Pre',18,'I',1,94,60,801,1
542 | 'no',51,'Pre',25,'II',2,20,11,1182,0
543 | 'yes',57,'Post',32,'II',2,43,287,1722,0
544 | 'yes',46,'Pre',18,'II',1,120,628,1692,0
545 | 'yes',64,'Post',26,'II',2,1356,1144,1152,0
546 | 'yes',37,'Pre',22,'I',3,23,64,1459,1
547 | 'no',64,'Post',21,'II',3,403,253,2237,0
548 | 'no',48,'Pre',18,'I',1,137,73,2056,0
549 | 'yes',50,'Post',50,'II',6,1,2,1729,0
550 | 'yes',32,'Pre',20,'II',6,8,3,2024,0
551 | 'no',49,'Pre',19,'II',2,388,137,2039,1
552 | 'yes',33,'Pre',28,'III',1,1,1,2027,0
553 | 'yes',58,'Post',35,'II',1,6,11,2007,0
554 | 'no',57,'Post',25,'II',1,26,299,1253,1
555 | 'no',45,'Pre',35,'II',2,26,36,1789,0
556 | 'no',66,'Post',30,'I',5,100,288,1707,0
557 | 'no',52,'Pre',37,'II',3,66,104,1714,0
558 | 'yes',49,'Pre',25,'II',3,152,25,1717,0
559 | 'no',49,'Post',22,'II',1,14,41,329,1
560 | 'no',48,'Post',45,'I',1,312,236,1624,0
561 | 'yes',62,'Post',60,'II',1,56,17,1600,0
562 | 'no',60,'Post',35,'II',3,115,300,385,1
563 | 'no',45,'Pre',10,'II',1,82,8,1475,0
564 | 'no',60,'Post',37,'I',1,296,35,1435,0
565 | 'yes',57,'Post',36,'III',1,170,192,1329,0
566 | 'yes',53,'Post',27,'III',12,44,42,1357,0
567 | 'no',56,'Post',55,'III',3,46,31,1343,0
568 | 'no',46,'Pre',23,'II',2,120,41,748,1
569 | 'no',49,'Post',30,'II',2,254,353,1090,1
570 | 'yes',56,'Post',32,'II',2,53,174,1219,0
571 | 'yes',56,'Post',42,'I',5,113,700,662,1
572 | 'no',40,'Pre',40,'II',6,227,10,866,1
573 | 'yes',60,'Post',40,'II',6,8,11,504,1
574 | 'no',51,'Pre',25,'III',5,43,0,769,1
575 | 'no',52,'Post',23,'II',3,15,34,727,1
576 | 'no',55,'Post',23,'II',9,116,15,1701,1
577 | 


--------------------------------------------------------------------------------
/docs/source/rst/tutorials/data/australian_test.csv:
--------------------------------------------------------------------------------
 1 | A1,A2,A3,A4,A5,A6,A7,A8,A9,A10,A11,A12,A13,A14,Class
 2 | 1.0,2208.0,1146.0,2.0,4.0,4.0,1585.0,0.0,0.0,0.0,1.0,2.0,100.0,1213.0,0
 3 | 0.0,2267.0,7.0,2.0,8.0,4.0,165.0,0.0,0.0,0.0,0.0,2.0,160.0,1.0,0
 4 | 0.0,1583.0,585.0,2.0,8.0,8.0,15.0,1.0,1.0,2.0,0.0,2.0,100.0,1.0,1
 5 | 1.0,20.0,125.0,1.0,4.0,4.0,125.0,0.0,0.0,0.0,0.0,2.0,140.0,5.0,0
 6 | 0.0,2242.0,5665.0,2.0,11.0,4.0,2585.0,1.0,1.0,7.0,0.0,2.0,129.0,3258.0,1
 7 | 1.0,3275.0,15.0,2.0,13.0,8.0,55.0,1.0,1.0,3.0,1.0,2.0,0.0,1.0,1
 8 | 0.0,3067.0,12.0,2.0,8.0,4.0,2.0,1.0,1.0,1.0,0.0,2.0,220.0,20.0,1
 9 | 1.0,2758.0,325.0,1.0,11.0,8.0,5085.0,0.0,1.0,2.0,1.0,2.0,369.0,2.0,0
10 | 0.0,2375.0,71.0,2.0,9.0,4.0,25.0,0.0,1.0,1.0,1.0,2.0,240.0,5.0,0
11 | 1.0,3758.0,0.0,2.0,8.0,4.0,0.0,0.0,0.0,0.0,0.0,3.0,184.0,1.0,1
12 | 1.0,3625.0,5.0,2.0,8.0,5.0,25.0,1.0,1.0,6.0,0.0,2.0,0.0,368.0,1
13 | 1.0,2125.0,15.0,2.0,9.0,4.0,15.0,0.0,0.0,0.0,0.0,2.0,150.0,9.0,1
14 | 1.0,2775.0,585.0,1.0,13.0,4.0,25.0,1.0,1.0,2.0,0.0,2.0,260.0,501.0,1
15 | 1.0,19.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,4.0,0.0,2.0,45.0,2.0,0
16 | 1.0,4733.0,65.0,2.0,8.0,4.0,1.0,0.0,0.0,0.0,1.0,2.0,0.0,229.0,0
17 | 1.0,22.0,79.0,2.0,9.0,4.0,29.0,0.0,1.0,1.0,0.0,2.0,420.0,284.0,0
18 | 1.0,4517.0,15.0,2.0,8.0,4.0,25.0,1.0,0.0,0.0,1.0,2.0,140.0,1.0,0
19 | 1.0,2267.0,1585.0,1.0,9.0,4.0,3085.0,1.0,1.0,6.0,0.0,2.0,80.0,1.0,1
20 | 1.0,4783.0,4165.0,2.0,14.0,5.0,85.0,0.0,0.0,0.0,1.0,2.0,520.0,1.0,0
21 | 1.0,2308.0,25.0,2.0,1.0,1.0,85.0,0.0,0.0,0.0,1.0,2.0,100.0,4209.0,0
22 | 0.0,1892.0,925.0,1.0,8.0,4.0,1.0,1.0,1.0,4.0,1.0,2.0,80.0,501.0,1
23 | 1.0,4117.0,125.0,1.0,9.0,4.0,25.0,0.0,0.0,0.0,0.0,2.0,0.0,196.0,0
24 | 1.0,3317.0,104.0,2.0,12.0,8.0,65.0,1.0,0.0,0.0,1.0,2.0,164.0,31286.0,1
25 | 1.0,2075.0,5085.0,1.0,5.0,4.0,29.0,0.0,0.0,0.0,0.0,2.0,140.0,185.0,0
26 | 1.0,5442.0,5.0,1.0,4.0,8.0,396.0,1.0,0.0,0.0,0.0,2.0,180.0,315.0,1
27 | 1.0,3367.0,2165.0,2.0,8.0,4.0,15.0,0.0,0.0,0.0,0.0,3.0,120.0,1.0,0
28 | 1.0,4317.0,225.0,2.0,3.0,5.0,75.0,1.0,0.0,0.0,0.0,2.0,560.0,1.0,0
29 | 1.0,495.0,7585.0,2.0,3.0,5.0,7585.0,1.0,1.0,15.0,1.0,2.0,0.0,5001.0,1
30 | 1.0,6008.0,145.0,2.0,1.0,1.0,18.0,1.0,1.0,15.0,1.0,2.0,0.0,1001.0,1
31 | 1.0,415.0,154.0,2.0,3.0,5.0,35.0,0.0,0.0,0.0,0.0,2.0,216.0,1.0,1
32 | 1.0,3367.0,125.0,2.0,9.0,4.0,1165.0,0.0,0.0,0.0,0.0,2.0,120.0,1.0,0
33 | 1.0,23.0,75.0,2.0,7.0,4.0,5.0,0.0,0.0,0.0,1.0,1.0,320.0,1.0,0
34 | 1.0,35.0,25.0,2.0,3.0,4.0,1.0,0.0,0.0,0.0,1.0,2.0,210.0,1.0,0
35 | 0.0,5708.0,335.0,2.0,3.0,5.0,1.0,1.0,0.0,0.0,1.0,2.0,252.0,2198.0,0
36 | 1.0,20.0,11045.0,2.0,8.0,4.0,2.0,0.0,0.0,0.0,1.0,2.0,136.0,1.0,0
37 | 1.0,1858.0,571.0,2.0,2.0,4.0,54.0,0.0,0.0,0.0,0.0,2.0,120.0,1.0,0
38 | 1.0,2567.0,325.0,2.0,8.0,8.0,229.0,0.0,1.0,1.0,1.0,2.0,416.0,22.0,0
39 | 0.0,1967.0,21.0,2.0,11.0,8.0,29.0,1.0,1.0,11.0,0.0,2.0,80.0,100.0,1
40 | 0.0,3675.0,5125.0,2.0,10.0,4.0,5.0,1.0,0.0,0.0,1.0,2.0,0.0,4001.0,1
41 | 1.0,31.0,2085.0,2.0,8.0,4.0,85.0,0.0,0.0,0.0,0.0,2.0,300.0,1.0,0
42 | 1.0,7342.0,1775.0,2.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,2.0,0.0,1.0,1
43 | 1.0,2325.0,12625.0,2.0,8.0,4.0,125.0,0.0,1.0,2.0,0.0,2.0,0.0,5553.0,0
44 | 1.0,5483.0,155.0,2.0,10.0,9.0,0.0,1.0,1.0,20.0,0.0,2.0,152.0,131.0,0
45 | 1.0,5142.0,4.0,2.0,14.0,8.0,4.0,1.0,0.0,0.0,0.0,2.0,0.0,3001.0,1
46 | 1.0,6275.0,7.0,2.0,10.0,9.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,13.0,0
47 | 0.0,23.0,1175.0,2.0,14.0,8.0,5.0,1.0,1.0,2.0,1.0,2.0,300.0,552.0,1
48 | 1.0,5967.0,154.0,2.0,11.0,4.0,125.0,1.0,0.0,0.0,1.0,2.0,260.0,1.0,1
49 | 1.0,4858.0,65.0,2.0,11.0,8.0,6.0,1.0,0.0,0.0,1.0,2.0,350.0,1.0,1
50 | 1.0,2017.0,925.0,2.0,8.0,4.0,1665.0,1.0,1.0,3.0,1.0,2.0,40.0,29.0,1
51 | 0.0,6058.0,165.0,2.0,11.0,4.0,11.0,1.0,0.0,0.0,1.0,2.0,21.0,10562.0,1
52 | 1.0,215.0,975.0,2.0,8.0,4.0,25.0,1.0,0.0,0.0,0.0,2.0,140.0,1.0,0
53 | 1.0,5592.0,115.0,2.0,1.0,1.0,5.0,1.0,1.0,5.0,0.0,2.0,0.0,8852.0,1
54 | 0.0,285.0,1.0,2.0,11.0,4.0,1.0,1.0,1.0,2.0,1.0,2.0,167.0,501.0,0
55 | 1.0,2208.0,83.0,2.0,8.0,8.0,2165.0,0.0,0.0,0.0,1.0,2.0,128.0,1.0,1
56 | 1.0,2667.0,14585.0,2.0,3.0,5.0,0.0,0.0,0.0,0.0,1.0,2.0,178.0,1.0,0
57 | 1.0,23.0,625.0,1.0,6.0,4.0,125.0,1.0,0.0,0.0,0.0,2.0,180.0,2.0,0
58 | 0.0,3317.0,225.0,1.0,13.0,4.0,35.0,0.0,0.0,0.0,1.0,2.0,200.0,142.0,0
59 | 1.0,3492.0,25.0,2.0,9.0,4.0,0.0,1.0,0.0,0.0,1.0,2.0,239.0,201.0,1
60 | 1.0,3917.0,1625.0,2.0,8.0,4.0,15.0,1.0,1.0,10.0,0.0,2.0,186.0,4701.0,1
61 | 1.0,3642.0,75.0,1.0,2.0,4.0,585.0,0.0,0.0,0.0,0.0,2.0,240.0,4.0,0
62 | 1.0,2158.0,79.0,1.0,13.0,4.0,665.0,0.0,0.0,0.0,0.0,2.0,160.0,1.0,0
63 | 1.0,2567.0,221.0,1.0,6.0,4.0,4.0,1.0,0.0,0.0,0.0,2.0,188.0,1.0,0
64 | 1.0,2542.0,54.0,2.0,9.0,4.0,165.0,0.0,1.0,1.0,0.0,2.0,272.0,445.0,0
65 | 1.0,1633.0,4085.0,2.0,3.0,8.0,415.0,0.0,0.0,0.0,1.0,2.0,120.0,1.0,0
66 | 1.0,3517.0,25125.0,2.0,14.0,8.0,1625.0,1.0,1.0,1.0,1.0,2.0,515.0,501.0,1
67 | 1.0,2375.0,12.0,2.0,8.0,4.0,2085.0,0.0,0.0,0.0,0.0,1.0,80.0,1.0,0
68 | 1.0,2867.0,9335.0,2.0,11.0,8.0,5665.0,1.0,1.0,6.0,0.0,2.0,381.0,169.0,1
69 | 0.0,1792.0,1021.0,2.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,51.0,0
70 | 1.0,23.0,75.0,2.0,7.0,4.0,5.0,1.0,0.0,0.0,1.0,1.0,320.0,1.0,0
71 | 


--------------------------------------------------------------------------------
/docs/source/rst/tutorials/data/cpu.arff:
--------------------------------------------------------------------------------
  1 | @relation cpu
  2 | @attribute vendor {'adviser','amdahl','apollo','basf','bti','burroughs','c.r.d','cambex','cdc','dec','dg','formation','four-phase','gould','harris','honeywell','hp','ibm','ipl','magnuson','microdata','nas','ncr','nixdorf','perkin-elmer','prime','siemens','sperry','sratus','wang'}
  3 | @attribute MYCT numeric
  4 | @attribute MMIN numeric
  5 | @attribute MMAX numeric
  6 | @attribute CACH numeric
  7 | @attribute CHMIN numeric
  8 | @attribute CHMAX numeric
  9 | @attribute class numeric
 10 | @data
 11 | 'adviser',125,256,6000,256,16,128,199
 12 | 'amdahl',29,8000,32000,32,8,32,253
 13 | 'amdahl',29,8000,32000,32,8,32,253
 14 | 'amdahl',29,8000,32000,32,8,32,253
 15 | 'amdahl',29,8000,16000,32,8,16,132
 16 | 'amdahl',26,8000,32000,64,8,32,290
 17 | 'amdahl',23,16000,32000,64,16,32,381
 18 | 'amdahl',23,16000,32000,64,16,32,381
 19 | 'amdahl',23,16000,64000,64,16,32,749
 20 | 'amdahl',23,32000,64000,128,32,64,1238
 21 | 'apollo',400,1000,3000,0,1,2,23
 22 | 'apollo',400,512,3500,4,1,6,24
 23 | 'basf',60,2000,8000,65,1,8,70
 24 | 'basf',50,4000,16000,65,1,8,117
 25 | 'bti',350,64,64,0,1,4,15
 26 | 'bti',200,512,16000,0,4,32,64
 27 | 'burroughs',167,524,2000,8,4,15,23
 28 | 'burroughs',143,512,5000,0,7,32,29
 29 | 'burroughs',143,1000,2000,0,5,16,22
 30 | 'burroughs',110,5000,5000,142,8,64,124
 31 | 'burroughs',143,1500,6300,0,5,32,35
 32 | 'burroughs',143,3100,6200,0,5,20,39
 33 | 'burroughs',143,2300,6200,0,6,64,40
 34 | 'burroughs',110,3100,6200,0,6,64,45
 35 | 'c.r.d',320,128,6000,0,1,12,28
 36 | 'c.r.d',320,512,2000,4,1,3,21
 37 | 'c.r.d',320,256,6000,0,1,6,28
 38 | 'c.r.d',320,256,3000,4,1,3,22
 39 | 'c.r.d',320,512,5000,4,1,5,28
 40 | 'c.r.d',320,256,5000,4,1,6,27
 41 | 'cdc',25,1310,2620,131,12,24,102
 42 | 'cdc',25,1310,2620,131,12,24,102
 43 | 'cdc',50,2620,10480,30,12,24,74
 44 | 'cdc',50,2620,10480,30,12,24,74
 45 | 'cdc',56,5240,20970,30,12,24,138
 46 | 'cdc',64,5240,20970,30,12,24,136
 47 | 'cdc',50,500,2000,8,1,4,23
 48 | 'cdc',50,1000,4000,8,1,5,29
 49 | 'cdc',50,2000,8000,8,1,5,44
 50 | 'cambex',50,1000,4000,8,3,5,30
 51 | 'cambex',50,1000,8000,8,3,5,41
 52 | 'cambex',50,2000,16000,8,3,5,74
 53 | 'cambex',50,2000,16000,8,3,6,74
 54 | 'cambex',50,2000,16000,8,3,6,74
 55 | 'dec',133,1000,12000,9,3,12,54
 56 | 'dec',133,1000,8000,9,3,12,41
 57 | 'dec',810,512,512,8,1,1,18
 58 | 'dec',810,1000,5000,0,1,1,28
 59 | 'dec',320,512,8000,4,1,5,36
 60 | 'dec',200,512,8000,8,1,8,38
 61 | 'dg',700,384,8000,0,1,1,34
 62 | 'dg',700,256,2000,0,1,1,19
 63 | 'dg',140,1000,16000,16,1,3,72
 64 | 'dg',200,1000,8000,0,1,2,36
 65 | 'dg',110,1000,4000,16,1,2,30
 66 | 'dg',110,1000,12000,16,1,2,56
 67 | 'dg',220,1000,8000,16,1,2,42
 68 | 'formation',800,256,8000,0,1,4,34
 69 | 'formation',800,256,8000,0,1,4,34
 70 | 'formation',800,256,8000,0,1,4,34
 71 | 'formation',800,256,8000,0,1,4,34
 72 | 'formation',800,256,8000,0,1,4,34
 73 | 'four-phase',125,512,1000,0,8,20,19
 74 | 'gould',75,2000,8000,64,1,38,75
 75 | 'gould',75,2000,16000,64,1,38,113
 76 | 'gould',75,2000,16000,128,1,38,157
 77 | 'hp',90,256,1000,0,3,10,18
 78 | 'hp',105,256,2000,0,3,10,20
 79 | 'hp',105,1000,4000,0,3,24,28
 80 | 'hp',105,2000,4000,8,3,19,33
 81 | 'hp',75,2000,8000,8,3,24,47
 82 | 'hp',75,3000,8000,8,3,48,54
 83 | 'hp',175,256,2000,0,3,24,20
 84 | 'harris',300,768,3000,0,6,24,23
 85 | 'harris',300,768,3000,6,6,24,25
 86 | 'harris',300,768,12000,6,6,24,52
 87 | 'harris',300,768,4500,0,1,24,27
 88 | 'harris',300,384,12000,6,1,24,50
 89 | 'harris',300,192,768,6,6,24,18
 90 | 'harris',180,768,12000,6,1,31,53
 91 | 'honeywell',330,1000,3000,0,2,4,23
 92 | 'honeywell',300,1000,4000,8,3,64,30
 93 | 'honeywell',300,1000,16000,8,2,112,73
 94 | 'honeywell',330,1000,2000,0,1,2,20
 95 | 'honeywell',330,1000,4000,0,3,6,25
 96 | 'honeywell',140,2000,4000,0,3,6,28
 97 | 'honeywell',140,2000,4000,0,4,8,29
 98 | 'honeywell',140,2000,4000,8,1,20,32
 99 | 'honeywell',140,2000,32000,32,1,20,175
100 | 'honeywell',140,2000,8000,32,1,54,57
101 | 'honeywell',140,2000,32000,32,1,54,181
102 | 'honeywell',140,2000,32000,32,1,54,181
103 | 'honeywell',140,2000,4000,8,1,20,32
104 | 'ibm',57,4000,16000,1,6,12,82
105 | 'ibm',57,4000,24000,64,12,16,171
106 | 'ibm',26,16000,32000,64,16,24,361
107 | 'ibm',26,16000,32000,64,8,24,350
108 | 'ibm',26,8000,32000,0,8,24,220
109 | 'ibm',26,8000,16000,0,8,16,113
110 | 'ibm',480,96,512,0,1,1,15
111 | 'ibm',203,1000,2000,0,1,5,21
112 | 'ibm',115,512,6000,16,1,6,35
113 | 'ibm',1100,512,1500,0,1,1,18
114 | 'ibm',1100,768,2000,0,1,1,20
115 | 'ibm',600,768,2000,0,1,1,20
116 | 'ibm',400,2000,4000,0,1,1,28
117 | 'ibm',400,4000,8000,0,1,1,45
118 | 'ibm',900,1000,1000,0,1,2,18
119 | 'ibm',900,512,1000,0,1,2,17
120 | 'ibm',900,1000,4000,4,1,2,26
121 | 'ibm',900,1000,4000,8,1,2,28
122 | 'ibm',900,2000,4000,0,3,6,28
123 | 'ibm',225,2000,4000,8,3,6,31
124 | 'ibm',225,2000,4000,8,3,6,31
125 | 'ibm',180,2000,8000,8,1,6,42
126 | 'ibm',185,2000,16000,16,1,6,76
127 | 'ibm',180,2000,16000,16,1,6,76
128 | 'ibm',225,1000,4000,2,3,6,26
129 | 'ibm',25,2000,12000,8,1,4,59
130 | 'ibm',25,2000,12000,16,3,5,65
131 | 'ibm',17,4000,16000,8,6,12,101
132 | 'ibm',17,4000,16000,32,6,12,116
133 | 'ibm',1500,768,1000,0,0,0,18
134 | 'ibm',1500,768,2000,0,0,0,20
135 | 'ibm',800,768,2000,0,0,0,20
136 | 'ipl',50,2000,4000,0,3,6,30
137 | 'ipl',50,2000,8000,8,3,6,44
138 | 'ipl',50,2000,8000,8,1,6,44
139 | 'ipl',50,2000,16000,24,1,6,82
140 | 'ipl',50,2000,16000,24,1,6,82
141 | 'ipl',50,8000,16000,48,1,10,128
142 | 'magnuson',100,1000,8000,0,2,6,37
143 | 'magnuson',100,1000,8000,24,2,6,46
144 | 'magnuson',100,1000,8000,24,3,6,46
145 | 'magnuson',50,2000,16000,12,3,16,80
146 | 'magnuson',50,2000,16000,24,6,16,88
147 | 'magnuson',50,2000,16000,24,6,16,88
148 | 'microdata',150,512,4000,0,8,128,33
149 | 'nas',115,2000,8000,16,1,3,46
150 | 'nas',115,2000,4000,2,1,5,29
151 | 'nas',92,2000,8000,32,1,6,53
152 | 'nas',92,2000,8000,32,1,6,53
153 | 'nas',92,2000,8000,4,1,6,41
154 | 'nas',75,4000,16000,16,1,6,86
155 | 'nas',60,4000,16000,32,1,6,95
156 | 'nas',60,2000,16000,64,5,8,107
157 | 'nas',60,4000,16000,64,5,8,117
158 | 'nas',50,4000,16000,64,5,10,119
159 | 'nas',72,4000,16000,64,8,16,120
160 | 'nas',72,2000,8000,16,6,8,48
161 | 'nas',40,8000,16000,32,8,16,126
162 | 'nas',40,8000,32000,64,8,24,266
163 | 'nas',35,8000,32000,64,8,24,270
164 | 'nas',38,16000,32000,128,16,32,426
165 | 'nas',48,4000,24000,32,8,24,151
166 | 'nas',38,8000,32000,64,8,24,267
167 | 'nas',30,16000,32000,256,16,24,603
168 | 'ncr',112,1000,1000,0,1,4,19
169 | 'ncr',84,1000,2000,0,1,6,21
170 | 'ncr',56,1000,4000,0,1,6,26
171 | 'ncr',56,2000,6000,0,1,8,35
172 | 'ncr',56,2000,8000,0,1,8,41
173 | 'ncr',56,4000,8000,0,1,8,47
174 | 'ncr',56,4000,12000,0,1,8,62
175 | 'ncr',56,4000,16000,0,1,8,78
176 | 'ncr',38,4000,8000,32,16,32,80
177 | 'ncr',38,4000,8000,32,16,32,80
178 | 'ncr',38,8000,16000,64,4,8,142
179 | 'ncr',38,8000,24000,160,4,8,281
180 | 'ncr',38,4000,16000,128,16,32,190
181 | 'nixdorf',200,1000,2000,0,1,2,21
182 | 'nixdorf',200,1000,4000,0,1,4,25
183 | 'nixdorf',200,2000,8000,64,1,5,67
184 | 'perkin-elmer',250,512,4000,0,1,7,24
185 | 'perkin-elmer',250,512,4000,0,4,7,24
186 | 'perkin-elmer',250,1000,16000,1,1,8,64
187 | 'prime',160,512,4000,2,1,5,25
188 | 'prime',160,512,2000,2,3,8,20
189 | 'prime',160,1000,4000,8,1,14,29
190 | 'prime',160,1000,8000,16,1,14,43
191 | 'prime',160,2000,8000,32,1,13,53
192 | 'siemens',240,512,1000,8,1,3,19
193 | 'siemens',240,512,2000,8,1,5,22
194 | 'siemens',105,2000,4000,8,3,8,31
195 | 'siemens',105,2000,6000,16,6,16,41
196 | 'siemens',105,2000,8000,16,4,14,47
197 | 'siemens',52,4000,16000,32,4,12,99
198 | 'siemens',70,4000,12000,8,6,8,67
199 | 'siemens',59,4000,12000,32,6,12,81
200 | 'siemens',59,8000,16000,64,12,24,149
201 | 'siemens',26,8000,24000,32,8,16,183
202 | 'siemens',26,8000,32000,64,12,16,275
203 | 'siemens',26,8000,32000,128,24,32,382
204 | 'sperry',116,2000,8000,32,5,28,56
205 | 'sperry',50,2000,32000,24,6,26,182
206 | 'sperry',50,2000,32000,48,26,52,227
207 | 'sperry',50,2000,32000,112,52,104,341
208 | 'sperry',50,4000,32000,112,52,104,360
209 | 'sperry',30,8000,64000,96,12,176,919
210 | 'sperry',30,8000,64000,128,12,176,978
211 | 'sperry',180,262,4000,0,1,3,24
212 | 'sperry',180,512,4000,0,1,3,24
213 | 'sperry',180,262,4000,0,1,3,24
214 | 'sperry',180,512,4000,0,1,3,24
215 | 'sperry',124,1000,8000,0,1,8,37
216 | 'sperry',98,1000,8000,32,2,8,50
217 | 'sratus',125,2000,8000,0,2,14,41
218 | 'wang',480,512,8000,32,0,0,47
219 | 'wang',480,1000,4000,0,0,0,25
220 | 


--------------------------------------------------------------------------------
/docs/source/rst/tutorials/survival.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import pandas as pd \n",
 10 |     "from scipy.io import arff\n",
 11 |     "from rulekit import RuleKit\n",
 12 |     "from rulekit.survival import SurvivalRules\n",
 13 |     "from rulekit.params import Measures\n",
 14 |     "\n",
 15 |     "from rulexai.explainer import RuleExplainer"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "markdown",
 20 |    "metadata": {},
 21 |    "source": [
 22 |     "# GBSG2"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "## Read data"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 3,
 35 |    "metadata": {},
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "dataset_path = \"./data/GBSG2.arff\"\n",
 39 |     "data = pd.DataFrame(arff.loadarff(dataset_path)[0])\n",
 40 |     "\n",
 41 |     "# code to change encoding of the file\n",
 42 |     "tmp_df = data.select_dtypes([object])\n",
 43 |     "tmp_df = tmp_df.stack().str.decode(\"utf-8\").unstack()\n",
 44 |     "for col in tmp_df:\n",
 45 |     "    data[col] = tmp_df[col].replace({\"?\": None})\n",
 46 |     "\n",
 47 |     "x = data.drop([\"survival_status\"], axis=1)\n",
 48 |     "y = data[\"survival_status\"]"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "markdown",
 53 |    "metadata": {},
 54 |    "source": [
 55 |     "## Train RuleKit model"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": 4,
 61 |    "metadata": {},
 62 |    "outputs": [
 63 |     {
 64 |      "data": {
 65 |       "text/plain": [
 66 |        "<rulekit.survival.SurvivalRules at 0x176db91a880>"
 67 |       ]
 68 |      },
 69 |      "execution_count": 4,
 70 |      "metadata": {},
 71 |      "output_type": "execute_result"
 72 |     }
 73 |    ],
 74 |    "source": [
 75 |     "# RuleKit\n",
 76 |     "RuleKit.init()\n",
 77 |     "\n",
 78 |     "srv = SurvivalRules(survival_time_attr=\"survival_time\")\n",
 79 |     "srv.fit(values=x, labels=y)"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "markdown",
 84 |    "metadata": {},
 85 |    "source": [
 86 |     "### Rules"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": 5,
 92 |    "metadata": {},
 93 |    "outputs": [
 94 |     {
 95 |      "name": "stdout",
 96 |      "output_type": "stream",
 97 |      "text": [
 98 |       "IF pnodes = (-inf, 3.50) THEN survival_status = {NaN} (p = 304.0, n = 0.0, P = 564.0, N = 0.0, weight = 0.9999999999999998, pvalue = 2.220446049250313e-16)\n",
 99 |       "IF pnodes = (-inf, 17.50) AND progrec = (-inf, 9.50) AND age = <41.50, 52.50) AND estrec = <0.50, 29) THEN survival_status = {NaN} (p = 21.0, n = 0.0, P = 564.0, N = 0.0, weight = 0.9999999999909083, pvalue = 9.09172737095787e-12)\n",
100 |       "IF pnodes = <4.50, 19) AND progrec = (-inf, 11.50) AND age = <41.50, 64.50) AND estrec = <0.50, 41) THEN survival_status = {NaN} (p = 33.0, n = 0.0, P = 564.0, N = 0.0, weight = 1.0, pvalue = 0.0)\n",
101 |       "IF pnodes = <4.50, inf) AND progrec = (-inf, 25.50) THEN survival_status = {NaN} (p = 113.0, n = 0.0, P = 564.0, N = 0.0, weight = 1.0, pvalue = 0.0)\n",
102 |       "IF pnodes = <4.50, inf) AND progrec = (-inf, 99) THEN survival_status = {NaN} (p = 156.0, n = 0.0, P = 564.0, N = 0.0, weight = 1.0, pvalue = 0.0)\n",
103 |       "IF pnodes = <5.50, inf) AND progrec = (-inf, 135) THEN survival_status = {NaN} (p = 144.0, n = 0.0, P = 564.0, N = 0.0, weight = 1.0, pvalue = 0.0)\n",
104 |       "IF pnodes = <4.50, inf) AND progrec = (-inf, 233) THEN survival_status = {NaN} (p = 185.0, n = 0.0, P = 564.0, N = 0.0, weight = 1.0, pvalue = 0.0)\n",
105 |       "IF pnodes = (-inf, 4.50) AND progrec = <9, inf) AND age = <39.50, inf) THEN survival_status = {NaN} (p = 245.0, n = 0.0, P = 564.0, N = 0.0, weight = 1.0, pvalue = 0.0)\n",
106 |       "IF progrec = <107, inf) THEN survival_status = {NaN} (p = 168.0, n = 0.0, P = 564.0, N = 0.0, weight = 0.9999999989621143, pvalue = 1.0378856662995872e-09)\n",
107 |       "IF pnodes = <3.50, inf) AND progrec = (-inf, 105.50) THEN survival_status = {NaN} (p = 195.0, n = 0.0, P = 564.0, N = 0.0, weight = 1.0, pvalue = 0.0)\n"
108 |      ]
109 |     }
110 |    ],
111 |    "source": [
112 |     "for rule in srv.model.rules:\n",
113 |     "    print(rule, rule.stats)"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "markdown",
118 |    "metadata": {},
119 |    "source": [
120 |     "## RuleXAI"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": 6,
126 |    "metadata": {},
127 |    "outputs": [
128 |     {
129 |      "data": {
130 |       "text/plain": [
131 |        "<rulexai.explainer.RuleExplainer at 0x176db937700>"
132 |       ]
133 |      },
134 |      "execution_count": 6,
135 |      "metadata": {},
136 |      "output_type": "execute_result"
137 |     }
138 |    ],
139 |    "source": [
140 |     "explainer = RuleExplainer(model=srv, X=x, y=y, type=\"survival\")\n",
141 |     "explainer.explain()"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "markdown",
146 |    "metadata": {},
147 |    "source": [
148 |     "### Feature importance "
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "code",
153 |    "execution_count": 7,
154 |    "metadata": {},
155 |    "outputs": [
156 |     {
157 |      "data": {
158 |       "text/html": [
159 |        "<div>\n",
160 |        "<style scoped>\n",
161 |        "    .dataframe tbody tr th:only-of-type {\n",
162 |        "        vertical-align: middle;\n",
163 |        "    }\n",
164 |        "\n",
165 |        "    .dataframe tbody tr th {\n",
166 |        "        vertical-align: top;\n",
167 |        "    }\n",
168 |        "\n",
169 |        "    .dataframe thead th {\n",
170 |        "        text-align: right;\n",
171 |        "    }\n",
172 |        "</style>\n",
173 |        "<table border=\"1\" class=\"dataframe\">\n",
174 |        "  <thead>\n",
175 |        "    <tr style=\"text-align: right;\">\n",
176 |        "      <th></th>\n",
177 |        "      <th>attributes</th>\n",
178 |        "      <th>importances</th>\n",
179 |        "    </tr>\n",
180 |        "  </thead>\n",
181 |        "  <tbody>\n",
182 |        "    <tr>\n",
183 |        "      <th>2</th>\n",
184 |        "      <td>pnodes</td>\n",
185 |        "      <td>460.222804</td>\n",
186 |        "    </tr>\n",
187 |        "    <tr>\n",
188 |        "      <th>3</th>\n",
189 |        "      <td>progrec</td>\n",
190 |        "      <td>251.499862</td>\n",
191 |        "    </tr>\n",
192 |        "    <tr>\n",
193 |        "      <th>0</th>\n",
194 |        "      <td>age</td>\n",
195 |        "      <td>20.523849</td>\n",
196 |        "    </tr>\n",
197 |        "    <tr>\n",
198 |        "      <th>1</th>\n",
199 |        "      <td>estrec</td>\n",
200 |        "      <td>13.347720</td>\n",
201 |        "    </tr>\n",
202 |        "  </tbody>\n",
203 |        "</table>\n",
204 |        "</div>"
205 |       ],
206 |       "text/plain": [
207 |        "  attributes  importances\n",
208 |        "2     pnodes   460.222804\n",
209 |        "3    progrec   251.499862\n",
210 |        "0        age    20.523849\n",
211 |        "1     estrec    13.347720"
212 |       ]
213 |      },
214 |      "execution_count": 7,
215 |      "metadata": {},
216 |      "output_type": "execute_result"
217 |     }
218 |    ],
219 |    "source": [
220 |     "explainer.feature_importances_"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "markdown",
225 |    "metadata": {},
226 |    "source": [
227 |     "### Condition importance"
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "code",
232 |    "execution_count": 8,
233 |    "metadata": {},
234 |    "outputs": [
235 |     {
236 |      "data": {
237 |       "text/html": [
238 |        "<div>\n",
239 |        "<style scoped>\n",
240 |        "    .dataframe tbody tr th:only-of-type {\n",
241 |        "        vertical-align: middle;\n",
242 |        "    }\n",
243 |        "\n",
244 |        "    .dataframe tbody tr th {\n",
245 |        "        vertical-align: top;\n",
246 |        "    }\n",
247 |        "\n",
248 |        "    .dataframe thead th {\n",
249 |        "        text-align: right;\n",
250 |        "    }\n",
251 |        "</style>\n",
252 |        "<table border=\"1\" class=\"dataframe\">\n",
253 |        "  <thead>\n",
254 |        "    <tr style=\"text-align: right;\">\n",
255 |        "      <th></th>\n",
256 |        "      <th>conditions</th>\n",
257 |        "      <th>importances</th>\n",
258 |        "    </tr>\n",
259 |        "  </thead>\n",
260 |        "  <tbody>\n",
261 |        "    <tr>\n",
262 |        "      <th>0</th>\n",
263 |        "      <td>pnodes = &lt;4.5, inf)</td>\n",
264 |        "      <td>207.268572</td>\n",
265 |        "    </tr>\n",
266 |        "    <tr>\n",
267 |        "      <th>1</th>\n",
268 |        "      <td>pnodes = (-inf, 3.5)</td>\n",
269 |        "      <td>67.394775</td>\n",
270 |        "    </tr>\n",
271 |        "    <tr>\n",
272 |        "      <th>2</th>\n",
273 |        "      <td>pnodes = &lt;5.5, inf)</td>\n",
274 |        "      <td>64.254026</td>\n",
275 |        "    </tr>\n",
276 |        "    <tr>\n",
277 |        "      <th>3</th>\n",
278 |        "      <td>pnodes = &lt;3.5, inf)</td>\n",
279 |        "      <td>64.104973</td>\n",
280 |        "    </tr>\n",
281 |        "    <tr>\n",
282 |        "      <th>4</th>\n",
283 |        "      <td>progrec = (-inf, 25.5)</td>\n",
284 |        "      <td>48.923100</td>\n",
285 |        "    </tr>\n",
286 |        "    <tr>\n",
287 |        "      <th>5</th>\n",
288 |        "      <td>progrec = &lt;107.0, inf)</td>\n",
289 |        "      <td>37.252374</td>\n",
290 |        "    </tr>\n",
291 |        "    <tr>\n",
292 |        "      <th>6</th>\n",
293 |        "      <td>progrec = (-inf, 105.5)</td>\n",
294 |        "      <td>33.962572</td>\n",
295 |        "    </tr>\n",
296 |        "    <tr>\n",
297 |        "      <th>7</th>\n",
298 |        "      <td>progrec = (-inf, 99.0)</td>\n",
299 |        "      <td>33.423755</td>\n",
300 |        "    </tr>\n",
301 |        "    <tr>\n",
302 |        "      <th>8</th>\n",
303 |        "      <td>pnodes = (-inf, 4.5)</td>\n",
304 |        "      <td>32.835122</td>\n",
305 |        "    </tr>\n",
306 |        "    <tr>\n",
307 |        "      <th>9</th>\n",
308 |        "      <td>progrec = (-inf, 135.0)</td>\n",
309 |        "      <td>25.353218</td>\n",
310 |        "    </tr>\n",
311 |        "    <tr>\n",
312 |        "      <th>10</th>\n",
313 |        "      <td>progrec = (-inf, 11.5)</td>\n",
314 |        "      <td>23.663185</td>\n",
315 |        "    </tr>\n",
316 |        "    <tr>\n",
317 |        "      <th>11</th>\n",
318 |        "      <td>progrec = (-inf, 9.5)</td>\n",
319 |        "      <td>23.506762</td>\n",
320 |        "    </tr>\n",
321 |        "    <tr>\n",
322 |        "      <th>12</th>\n",
323 |        "      <td>pnodes = &lt;4.5, 19.0)</td>\n",
324 |        "      <td>18.150272</td>\n",
325 |        "    </tr>\n",
326 |        "    <tr>\n",
327 |        "      <th>13</th>\n",
328 |        "      <td>progrec = &lt;9.0, inf)</td>\n",
329 |        "      <td>13.146344</td>\n",
330 |        "    </tr>\n",
331 |        "    <tr>\n",
332 |        "      <th>14</th>\n",
333 |        "      <td>progrec = (-inf, 233.0)</td>\n",
334 |        "      <td>12.268552</td>\n",
335 |        "    </tr>\n",
336 |        "    <tr>\n",
337 |        "      <th>15</th>\n",
338 |        "      <td>estrec = &lt;0.5, 29.0)</td>\n",
339 |        "      <td>10.450381</td>\n",
340 |        "    </tr>\n",
341 |        "    <tr>\n",
342 |        "      <th>16</th>\n",
343 |        "      <td>age = &lt;41.5, 64.5)</td>\n",
344 |        "      <td>9.275232</td>\n",
345 |        "    </tr>\n",
346 |        "    <tr>\n",
347 |        "      <th>17</th>\n",
348 |        "      <td>age = &lt;41.5, 52.5)</td>\n",
349 |        "      <td>8.077389</td>\n",
350 |        "    </tr>\n",
351 |        "    <tr>\n",
352 |        "      <th>18</th>\n",
353 |        "      <td>pnodes = (-inf, 17.5)</td>\n",
354 |        "      <td>6.215064</td>\n",
355 |        "    </tr>\n",
356 |        "    <tr>\n",
357 |        "      <th>19</th>\n",
358 |        "      <td>age = &lt;39.5, inf)</td>\n",
359 |        "      <td>3.171229</td>\n",
360 |        "    </tr>\n",
361 |        "    <tr>\n",
362 |        "      <th>20</th>\n",
363 |        "      <td>estrec = &lt;0.5, 41.0)</td>\n",
364 |        "      <td>2.897339</td>\n",
365 |        "    </tr>\n",
366 |        "  </tbody>\n",
367 |        "</table>\n",
368 |        "</div>"
369 |       ],
370 |       "text/plain": [
371 |        "                 conditions  importances\n",
372 |        "0       pnodes = <4.5, inf)   207.268572\n",
373 |        "1      pnodes = (-inf, 3.5)    67.394775\n",
374 |        "2       pnodes = <5.5, inf)    64.254026\n",
375 |        "3       pnodes = <3.5, inf)    64.104973\n",
376 |        "4    progrec = (-inf, 25.5)    48.923100\n",
377 |        "5    progrec = <107.0, inf)    37.252374\n",
378 |        "6   progrec = (-inf, 105.5)    33.962572\n",
379 |        "7    progrec = (-inf, 99.0)    33.423755\n",
380 |        "8      pnodes = (-inf, 4.5)    32.835122\n",
381 |        "9   progrec = (-inf, 135.0)    25.353218\n",
382 |        "10   progrec = (-inf, 11.5)    23.663185\n",
383 |        "11    progrec = (-inf, 9.5)    23.506762\n",
384 |        "12     pnodes = <4.5, 19.0)    18.150272\n",
385 |        "13     progrec = <9.0, inf)    13.146344\n",
386 |        "14  progrec = (-inf, 233.0)    12.268552\n",
387 |        "15     estrec = <0.5, 29.0)    10.450381\n",
388 |        "16       age = <41.5, 64.5)     9.275232\n",
389 |        "17       age = <41.5, 52.5)     8.077389\n",
390 |        "18    pnodes = (-inf, 17.5)     6.215064\n",
391 |        "19        age = <39.5, inf)     3.171229\n",
392 |        "20     estrec = <0.5, 41.0)     2.897339"
393 |       ]
394 |      },
395 |      "execution_count": 8,
396 |      "metadata": {},
397 |      "output_type": "execute_result"
398 |     }
399 |    ],
400 |    "source": [
401 |     "explainer.condition_importances_"
402 |    ]
403 |   },
404 |   {
405 |    "cell_type": "markdown",
406 |    "metadata": {},
407 |    "source": [
408 |     "### Local explainability"
409 |    ]
410 |   },
411 |   {
412 |    "cell_type": "code",
413 |    "execution_count": 9,
414 |    "metadata": {},
415 |    "outputs": [
416 |     {
417 |      "name": "stdout",
418 |      "output_type": "stream",
419 |      "text": [
420 |       "Example:\n",
421 |       "horTh                  no\n",
422 |       "age                  70.0\n",
423 |       "menostat             Post\n",
424 |       "tsize                21.0\n",
425 |       "tgrade                 II\n",
426 |       "pnodes                3.0\n",
427 |       "progrec              48.0\n",
428 |       "estrec               66.0\n",
429 |       "survival_time      1814.0\n",
430 |       "survival_status       1.0\n",
431 |       "Name: 0, dtype: object\n",
432 |       "\n",
433 |       "Rules that covers this example:\n",
434 |       "IF pnodes = (-inf, 3.5) THEN survival_status = {NaN}\n",
435 |       "IF pnodes = (-inf, 4.5) AND progrec = <9.0, inf) AND age = <39.5, inf) THEN survival_status = {NaN}\n",
436 |       "\n",
437 |       "Importances of the conditions from rules covering the example\n",
438 |       "             conditions  importances\n",
439 |       "0  pnodes = (-inf, 3.5)    67.394775\n",
440 |       "1  pnodes = (-inf, 4.5)    32.835122\n",
441 |       "2  progrec = <9.0, inf)    13.146344\n",
442 |       "3     age = <39.5, inf)     3.171229\n"
443 |      ]
444 |     },
445 |     {
446 |      "data": {
447 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcoAAAEWCAYAAADmYNeIAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAgJ0lEQVR4nO3deZgdZZ328e8NURBRBIJKEIyIbCoEAigCDq4j6uA68moEHXWAEZxxYRx1dAIqM+o7bq87IKAjIoqCDM5IfEGQZRQSDPviFgVcAAkCggjhN3+cp/XQdFefTjrp7uT7ua5znVNVTz31q5Nz9Z2nqk6dVBWSJGlka012AZIkTWUGpSRJHQxKSZI6GJSSJHUwKCVJ6mBQSpLUwaCUViNJXpvkvL7pO5Js2dH+iiR7r4rapOnKoJRWgSSvSrKwBdevkvx3kj1X9narav2q+mmr4fgk7x+2/IlVdfZEbzfJ2UneMNH9Lo+R9lsaD4NSWsmSvBX4GPCvwKOALYBPAy+axLLWCEnWnuwaNP0ZlNJKlGQD4L3AIVX1jar6fVXdU1X/WVX/2Nqsk+RjSX7ZHh9Lsk5btneS65O8LcmNbTT6N339b5zktCS3JbkQePyw7VeSrZIcCMwD3t5Gtf/Zli9J8uwVrWOM92Bo3bf3rfviJM9Pcm2SW5K8q6/94UlOTnJSktuTXJxkx77l27UR663t0PG+fcuOT/KZJP+V5PfA60fZ73ck+Unr/8okL+nr47VJzkvy70mWJvlZkn36lm+U5Lj2Hi1NcmrfshcmWdxquyDJDoO8R5raDEpp5dodWBc4paPNPwNPBeYAOwK7Ae/uW/5oYANgM3p/+D+VZMO27FPAH4BNgde1xwNU1VHACcCH2uHYv5rgOsbyaHrvw2bAvwBHA68G5gJ7Ae9J8ri+9i8CvgZsBHwZODXJg5I8CPhPYAHwSOBNwAlJtulb91XAkcDDgC+Ost8/advdADgC+FKSTfv6eApwDTAT+BDw+SRpy/4DWA94YqvhowBJdgKOBQ4CNgY+B5w29J8NTV8GpbRybQzcXFX3drSZB7y3qm6sqpvo/eHev2/5PW35PVX1X8AdwDbtsOLLgH9pI9XLgS+sQK3LVceAfd8DHFlV9wBfoRdAH6+q26vqCuBKeuE8ZFFVndzaf4ReyD61PdYHPlBVf6yqs4DTgVf2rfvNqjq/qu6rqj+MVExVfa2qftnanAT8iN5/DIb8vKqOrqpl9N7TTYFHtTDdBzi4qpa29+Kcts6BwOeq6gdVtayqvgDc3WrWNGZQSivXb4GZSWZ0tJkF/Lxv+udt3p/6GBa0d9ILi02AGcB1w9ZdXstbxyB+20IH4K72/Ju+5XcN6+tP+1RV9wHXt1pmAde1ef11bjbSuqNJckDfIdJbgSfRC+8hv+7b/p3t5frA5sAtVbV0hG4fC7xtqM/W7+bc/z3UNGRQSivX/9AbVby4o80v6f2RHbJFmzeWm4B76f0x7l93NGP9VNDy1rEy/GmfkqwFPKbV8ktg8zZvyBbADX3Tw/fzftNJHkvv0O+hwMZV9QjgciCM7TpgoySPGGXZkVX1iL7HelV14gD9agozKKWVqKp+R++c3KfaBSzrtXNt+yT5UGt2IvDuJJskmdnaf2mAvpcB3wAOb/1uD7ymY5XfAKN+p3J561hJ5iZ5aRuJv5nefza+D/yA3kj27e193Bv4K3qHc0czfL8fSi88bwJoFyU9aZCiqupXwH8Dn06yYavh6W3x0cDBSZ6SnocmeUGShw20x5qyDEppJauqDwNvpXdhzE30Rh6HAqe2Ju8HFgKXApcBF7d5gziU3iHBXwPHA8d1tP08sH07LHjqCMtXpI6J9k1gP2ApvfOkL23nA/9ILxj3AW6m9zWbA6rq6o6+7rffVXUl8GF6o/3fAE8Gzh9HbfvTO+d6NXAjvSCnqhYCfwt8stX9Y+C14+hXU1T84WZJU0mSw4GtqurVk12LBI4oJUnqZFBKktTBQ6+SJHVwRClJUoeuL0FrGpk5c2bNnj17ssuQpGll0aJFN1fVJl1tDMrVxOzZs1m4cOFklyFJ00qSMe9m5aFXSZI6GJSSJHUwKCVJ6mBQSpLUwaCUJKmDQSlJUgeDUpKkDgalJEkdvNfraiKzUhw02VVI0qpV81csw5Isqqpduto4opQkqYNBKUlSB4NSkqQOBqUkSR0MSkmSOhiUkiR1MCglSepgUEqS1MGglCSpg0EpSVIHg1KSpA4GpSRJHQxKSZI6GJSSJHUwKCVJ6mBQSpLUwaCUJKnDlAjKJEuSzFzF23xIknOSrD3CsoOTHDBAHycmuTTJWzravKi1WZxkYZI9R2l3dpJrWrvFSR7Z5h+a5HXj2TdJ0sSZMdkFTKLXAd+oqmXDF1TVZ8daOcmjgV2raqsxmp4JnFZVlWQH4KvAtqO0nVdVC4fNOxY4vz1LklaxgUeUSWYnuTrJCUmuSnJykvXasiVJjkhycZLLkmzb5m+U5NQ2ovp+CwqSbJxkQZIrkhwDpG87r05yYRtVfS7J2u1xfJLLW/+jjuDGYR7wzVH29fAkh7XXZyf5YKvp2iR7tWYLgM1anXuN1A9AVd1RVdUmHwrUaG1HWf9OYEmS3cazniRpYoz30Os2wKerajvgNuCNfcturqqdgc8Ah7V5RwA/rKodgHcBX2zz5wPnVdUTgVOALQCSbAfsB+xRVXOAZfQCbQ6wWVU9qaqeDBw3vLAk8/oOW/Y/Th6h7YOBLatqyYD7PaOqdgPe3GoH2Bf4SVXNqapzu1ZO8pIkVwPfojeSHc1xreb3JEnf/IXAA8I4yYHtcO5C7hxwTyRJ4zLeoLyuqs5vr78E9J9v+0Z7XgTMbq/3BP4DoKrOAjZO8nDg6W19qupbwNLW/lnAXOCiJIvb9JbAT4Etk3wiyfPohfT9VNUJLbSGP14+wn7MBG4dx36PtG8Dq6pTqmpb4MXA+0ZpNq/9J2Cv9ti/b9mNwKwR+j2qqnapql1Yb7xVSZIGMd5zlMMPG/ZP392ely1Hv0MCfKGq3vmABcmOwF8CBwOvYNjILMk84B9H6PPHI4TlXcC6feseCbwAoI1kh5uIfaOqvpdkyyQzq+rmYctuaM+3J/kysBt/HoGv22qWJK1i4x1RbpFk9/b6VcB5Y7Q/l96hU5LsTe/w7G3A99r6JNkH2LC1PxN4ed8VnxsleWy7Inatqvo68G5g5+EbGs+IsqqWAmsnWbdN//NQ+4HfiWHa1amHjjB/q6HDqEl2BtYBfjuszYyhq36TPAh4IXB5X5Oth01LklaR8Y6OrgEOSXIscCW985FdDgeOTXIpcCfwmjb/CODEJFcAFwC/AKiqK5O8G1iQZC3gHuAQeqOp49o8gAeMOJfDAnqHhv//BPQFvStZzx9h/suAA5LcQ28/9hu6uCfJ4hbO6wBntJBcu9V0dF8fe9B7LyVJq1j+fEHmGA2T2cDpVfWklVrRKtJGd2+pqv3HbDxYf6cDL62qP05Ef3397gS8daw6MyvFQRO5ZUma+mr+uL5I8ABJFlXVLl1tpsQNByZDVV0MfHekGw4sZ38vnOiQbGYC71kJ/UqSBjDwodf2VYrVYjQ5pKqm/Jf4q+o7k12DJK3J1tgRpSRJgzAoJUnqYFBKktTBoJQkqYNBKUlSB4NSkqQOBqUkSR0MSkmSOhiUkiR1MCglSepgUEqS1MGglCSpg0EpSVKH8f5ws6aoubPmsnD+wskuQ5JWO44oJUnqYFBKktTBoJQkqYNBKUlSB4NSkqQOBqUkSR0MSkmSOhiUkiR1MCglSeqQqprsGjQBMivFQZNdhdYENd+/GVp9JFlUVbt0tXFEKUlSB4NSkqQOBqUkSR0MSkmSOhiUkiR1MCglSepgUEqS1MGglCSpg0EpSVIHg1KSpA4GpSRJHQxKSZI6GJSSJHUwKCVJ6mBQSpLUwaCUJKmDQSlJUocpEZRJliSZuYq3+ZAk5yRZe4RlByc5YIA+TkxyaZK3DNB21yT3Jnn5KMvPTnJNksXt8cg2/9AkrxtknyRJE2/GZBcwiV4HfKOqlg1fUFWfHWvlJI8Gdq2qrQZouzbwQWDBGE3nVdXCYfOOBc5vz5KkVWzgEWWS2UmuTnJCkquSnJxkvbZsSZIjklyc5LIk27b5GyU5tY26vp9khzZ/4yQLklyR5Bggfdt5dZIL26jqc0nWbo/jk1ze+h9zBDeAecA3R9nXw5Mc1l6fneSDraZrk+zVmi0ANmt17jVSP33eBHwduHG8RVbVncCSJLuNd11J0oob76HXbYBPV9V2wG3AG/uW3VxVOwOfAQ5r844AflhVOwDvAr7Y5s8HzquqJwKnAFsAJNkO2A/Yo6rmAMvoBdocYLOqelJVPRk4bnhhSeb1Hbbsf5w8QtsHA1tW1ZIB93tGVe0GvLnVDrAv8JOqmlNV5462YpLNgJe092Usx7Wa35MkffMXAg8I4yQHJlmYZCF3DrgnkqRxGW9QXldV57fXXwL27Fv2jfa8CJjdXu8J/AdAVZ0FbJzk4cDT2/pU1beApa39s4C5wEVJFrfpLYGfAlsm+USS59EL6fupqhNaaA1/jHROcCZw6zj2e6R9G9THgH+qqvvGaDev/Sdgr/bYv2/ZjcCs4StU1VFVtUtV7cJ646xKkjSQ8Z6jrI7pu9vzsuXod0iAL1TVOx+wINkR+EvgYOAV9M4x9i+fB/zjCH3+eISwvAtYt2/dI4EXALSR7HArsm+7AF9pA8SZwPOT3FtVp/Y3qqob2vPtSb4M7MafR+DrtpolSavYeEeUWyTZvb1+FXDeGO3PpXfolCR70zs8exvwvbY+SfYBNmztzwRe3nfF50ZJHtuuiF2rqr4OvBvYefiGxjOirKqlwNpJ1m3T/zzUfuB3Yph2deqhI2zrcVU1u6pmAycDbxwekklmDF31m+RBwAuBy/uabD1sWpK0iow3KK8BDklyFb1wG+u82+HA3CSXAh8AXtPmHwE8PckVwEuBXwBU1ZX0gnBBW+c7wKbAZsDZ7XDsl4AHjDiXwwLuf+h4RW0L/HY8K7T9AVgHOKPt82LgBuDovqZ70HsvJEmrWKqGH00dpWEyGzi9qp60UitaRZLsDLylqvYfs/Fg/Z0OvLSq/jgR/fX1uxPw1rHqzKwUB03klqWR1fzB/mZI00GSRVW1S1ebKXHDgclQVRcD3x3phgPL2d8LJzokm5nAe1ZCv5KkAQx8YUr7KsVqMZocUlVT/kv8VeUhV0maRGvsiFKSpEEYlJIkdTAoJUnqYFBKktTBoJQkqYNBKUlSB4NSkqQOBqUkSR0MSkmSOhiUkiR1MCglSepgUEqS1MGglCSpw8C/HqKpbe6suSycv3Cyy5Ck1Y4jSkmSOhiUkiR1MCglSepgUEqS1MGglCSpg0EpSVIHg1KSpA4GpSRJHQxKSZI6pKomuwZNgMxKcdBkV7Hq1Xw/v5KWX5JFVbVLVxtHlJIkdTAoJUnqYFBKktTBoJQkqYNBKUlSB4NSkqQOBqUkSR0MSkmSOhiUkiR1MCglSepgUEqS1MGglCSpg0EpSVIHg1KSpA4GpSRJHQxKSZI6GJSSJHUwKEeQ5INJLm+P/UZps06Sk5L8OMkPksweoN8LBmizV5IrkixOsnmSby/HLkiSJsiEBGWStVdw/RkTUccK1rBRe34BsDMwB3gKcFiSh4+wyuuBpVW1FfBR4INjbaOqnjZAKfOAf6uqOVV1HfCrJHsMtheSpInWGZRJZie5OskJSa5KcnKS9dqyJW3kdTHw10lemeSyNgr7YF8fr09ybZILkxyd5JNt/vFJPpvkB8CHkjw+ybeTLEpybpJtW7tHJTklySXtMUjYDCTJjCT7JjkNOKXN3h74XlXdW1W/By4FnjfC6i8CvtBenww8K0nG2N4d7XnvJGe393Po/U2SNwCvAN6X5IS22qn0wlOSNAkGGcltA7y+qs5PcizwRuDf27LfVtXOSWYB3wfmAkuBBUleDFwIvIfeCO124Czgkr6+HwM8raqWJTkTOLiqfpTkKcCngWcC/w84p6pe0kau6w8vMMlJrc7hPlJVXxyh/Vb0RoQvBy4APlxV57TFlwDzk3wYWA94BnDlCH1vBlwHUFX3JvkdsDFw8whtR7IT8ETgl8D5wB5VdUySPYHTq+rk1m4h8P6ROkhyIHAgABsMuFVJ0rgMEpTXVdX57fWXgL/nz0F5UnveFTi7qm4CaKOhp7dl51TVLW3+14Ct+/r+WgvJ9YGnAV/rG5St056fCRwAUFXLgN8NL7CqRjyPOJIkL2t1HwnsXFW3D+trQZJd6QXoTcD/AMsG7X8cLqyq61tNi4HZwHkjtLsRmDVSB1V1FHAUQGalVkKNkrTGGyQoh/8B7p/+/Qpuf2j9tYBbq2rO8nQyzhHld4B/AP4G2D3JccApVfWHoQZVdSS9ICXJl4FrR+j7BmBz4Pp2jnUD4LfjKPvuvtfLGP3fYl3grnH0K0maQINczLNFkt3b61cx8qjnQuAvksxsh0dfCZwDXNTmb9jC5GUjbaCqbgN+luSvAdr5uh3b4jOBv2vz107ygIOMVbVfu/hl+OMBh12r6raq+lRV7QL8E7AncFWSD/VtY+P2egdgB2DBCGWfBrymvX45cFZVVZLN2mHkibI1cPkE9idJGodBgvIa4JAkVwEbAp8Z3qCqfgW8A/guvXN8i6rqm1V1A/Cv9IL0fGAJIxw6beYBr09yCXAFvYtloDf6e0aSy4BF9C62mRBV9cOqOgTYDji7zX4QcG6SK+kd1nx1Vd0LkOS9SfZt7T4PbJzkx8Bb6e0/wKbAvRNVI71zpN+awP4kSeOQqtFPbbXvBp5eVU9a7g0k61fVHW1EeQpwbFWdMtZ601WSQ4FfVNVpE9Tf94AXVdXSznazUhw0EVucXmq+p2YlLb8ki9oRxlGtiu8vHp7k2fTOtS2g93WH1VZVfXKi+kqyCb3zrJ0hKUlaeTqDsqqWAMs9mmx9HLYi66/J2lXEp052HZK0JvMWdpIkdTAoJUnqYFBKktTBoJQkqYNBKUlSB4NSkqQOBqUkSR0MSkmSOhiUkiR1MCglSepgUEqS1MGglCSpg0EpSVKHVfEzW1oF5s6ay8L5Cye7DEla7TiilCSpg0EpSVIHg1KSpA4GpSRJHQxKSZI6GJSSJHUwKCVJ6mBQSpLUwaCUJKlDqmqya9AEyKwUBy3fujXfz4CkNVOSRVW1S1cbR5SSJHUwKCVJ6mBQSpLUwaCUJKmDQSlJUgeDUpKkDgalJEkdDEpJkjoYlJIkdTAoJUnqYFBKktTBoJQkqYNBKUlSB4NSkqQOBqUkSR0MSkmSOhiUkiR1WGODMsnBSS5LsjjJeUm2b/MfnOS4tuySJHuPsv7hSW5o6y9O8vwBtnnBAG32SnJF63PzJN8e775JkibOGheUSTZsL79cVU+uqjnAh4CPtPl/C1BVTwaeA3w4yWjv00erak57/NdY266qpw1Q4jzg31qf1wG/SrLHAOtJklaClRKUSU5NsqiNjA7sm//6JNcmuTDJ0Uk+2eZvkuTrSS5qjwkNhiSPTHJYksuB/QCq6ra+Jg8Fqr3eHjirtbkRuBXYZYLquKM9753k7CQnJ7k6yQnpeQPwCuB9SU5oq51KLzwlSZNgxkrq93VVdUuShwAXJfk6sA7wHmBn4HZ6YXRJa/9xeqOz85JsAZwBbNffYZJtgJNG2d7eVXXrsPZrAc8F3kAv/L4MPK+qru9rcwjwVuDBwDPb7EuAfZOcCGwOzG3PF46w3UOTHAAsBN5WVUs735X72wl4IvBL4Hxgj6o6JsmewOlVdXJrtxB4/0gdtP+E9P4jssE4tixJGtjKCsq/T/KS9npz4AnAo4FzquoWgCRfA7ZubZ4NbJ9kaP2HJ1m/qu4YmlFV1wBzxlHDqfRC+Q3AGVVVwxtU1aeATyV5FfBu4DXAsfRCeiHwc+ACYNkI/X8GeB+9kej7gA8DrxtHfRcOhXaSxcBs4LwR2t0IzBqpg6o6CjgKILPygP2TJK24CQ/KdvHLs4Hdq+rOJGcD646x2lrAU6vqDx39jmtECbyT3vnGTwDfSXJcVV00yvpfoRd8VNW9wFv6tnsBcO3wFarqN31tjgZOH632Udzd93oZo/9brAvcNc6+JUkTZGWco9wAWNpCclvgqW3+RcBfJNkwyQzgZX3rLADeNDSRZM7wTqvqmr4LZ4Y/bh2h/RVV9WZ6hzfPAY5McmmS57ZtPKGv+QuAH7X56yV5aHv9HODeqrpyeP9JNu2bfAlweZu/WZIzu96gcdp6qG9J0qq3Mg69fhs4OMlVwDXA9wGq6oYk/0rvXN8twNXA79o6f0/vEOilrabvAQdPRDFV9Ud6I9GTkjwWmNkWHZrk2cA9wFJ6h10BHgmckeQ+4AZg/6G+khwDfLaqFgIfaoFewBLgoNZsU+Deiai9eQbwrQnsT5I0Dhnh1N3K21g779hGlKcAx1bVKausgFUgyaHAL6rqtAnq73vAi8a6UCizUn+K6nGq+Z7elLRmSrKoqjq/2bCyLuYZzeFtFLcuvcOtp67i7a90VfXJieorySbAR8Z5Na0kaQKt0qCsqsNW5famu6q6idXwPxOSNJ2scXfmkSRpPAxKSZI6GJSSJHUwKCVJ6mBQSpLUwaCUJKmDQSlJUgeDUpKkDgalJEkdDEpJkjoYlJIkdTAoJUnqYFBKktRhVf/MllaSubPmsnD+wskuQ5JWO44oJUnqYFBKktTBoJQkqYNBKUlSB4NSkqQOBqUkSR0MSkmSOhiUkiR1MCglSeqQqprsGjQBktwOXDPZdSynmcDNk13ECpjO9Vv75JnO9a9OtT+2qjbpWsFb2K0+rqmqXSa7iOWRZOF0rR2md/3WPnmmc/1rWu0eepUkqYNBKUlSB4Ny9XHUZBewAqZz7TC967f2yTOd61+javdiHkmSOjiilCSpg0EpSVIHg3KaS/K8JNck+XGSd0x2PWNJcmySG5Nc3jdvoyTfSfKj9rzhZNY4miSbJ/lukiuTXJHkH9r8KV9/knWTXJjkklb7EW3+45L8oH1+Tkry4MmudTRJ1k7ywySnt+npVPuSJJclWZxkYZs35T83AEkekeTkJFcnuSrJ7tOo9m3aez70uC3Jm8dbv0E5jSVZG/gUsA+wPfDKJNtPblVjOh543rB57wDOrKonAGe26anoXuBtVbU98FTgkPZ+T4f67waeWVU7AnOA5yV5KvBB4KNVtRWwFHj95JU4pn8Aruqbnk61Azyjqub0fYdvOnxuAD4OfLuqtgV2pPdvMC1qr6pr2ns+B5gL3Amcwnjrryof0/QB7A6c0Tf9TuCdk13XAHXPBi7vm74G2LS93pTezRMmvc4B9uObwHOmW/3AesDFwFPo3aFkxkifp6n0AB7T/qA9EzgdyHSpvdW3BJg5bN6U/9wAGwA/o134OZ1qH2Ffngucvzz1O6Kc3jYDruubvr7Nm24eVVW/aq9/DTxqMosZRJLZwE7AD5gm9bdDl4uBG4HvAD8Bbq2qe1uTqfz5+RjwduC+Nr0x06d2gAIWJFmU5MA2bzp8bh4H3AQc1w57H5PkoUyP2of7P8CJ7fW46jcoNaVU7794U/o7S0nWB74OvLmqbutfNpXrr6pl1TsE9RhgN2Dbya1oMEleCNxYVYsmu5YVsGdV7UzvNMkhSZ7ev3AKf25mADsDn6mqnYDfM+ww5RSu/U/a+et9ga8NXzZI/Qbl9HYDsHnf9GPavOnmN0k2BWjPN05yPaNK8iB6IXlCVX2jzZ429QNU1a3Ad+kdrnxEkqF7Pk/Vz88ewL5JlgBfoXf49eNMj9oBqKob2vON9M6R7cb0+NxcD1xfVT9o0yfTC87pUHu/fYCLq+o3bXpc9RuU09tFwBPa1X8Ppndo4bRJrml5nAa8pr1+Db1zf1NOkgCfB66qqo/0LZry9SfZJMkj2uuH0Du3ehW9wHx5azYla6+qd1bVY6pqNr3P+FlVNY9pUDtAkocmedjQa3rnyi5nGnxuqurXwHVJtmmzngVcyTSofZhX8ufDrjDO+r0zzzSX5Pn0zt+sDRxbVUdObkXdkpwI7E3vp25+A8wHTgW+CmwB/Bx4RVXdMkkljirJnsC5wGX8+VzZu+idp5zS9SfZAfgCvc/JWsBXq+q9SbakN0rbCPgh8OqqunvyKu2WZG/gsKp64XSpvdV5SpucAXy5qo5MsjFT/HMDkGQOcAzwYOCnwN/QPkNM8drhT/85+QWwZVX9rs0b13tvUEqS1MFDr5IkdTAoJUnqYFBKktTBoJQkqYNBKUlSB4NSWgMluWMVb292kletym1KE8WglLRStbvnzAYMSk1LBqW0Bkuyd5JzknwzyU+TfCDJvPbblZcleXxrd3ySzyZZmOTadv/Vod+5PK61/WGSZ7T5r01yWpKz6P3qxweAvdpvAr6ljTDPTXJxezytr56z+37/8IR2RySS7JrkgvR+U/PCJA9rN3r/v0kuSnJpkoMm5Y3Uam3G2E0kreZ2BLYDbqF355Vjqmq39H6Y+k3Am1u72fTuUfp44LtJtgIOoXdf6Scn2ZbeL2Rs3drvDOxQVbf031EHIMl6wHOq6g9JnkDv9mJDv9O4E/BE4JfA+cAeSS4ETgL2q6qLkjwcuIveb1D+rqp2TbIOcH6SBVX1s4l/m7SmMiglXTT0k0NJfgIsaPMvA57R1+6rVXUf8KMkP6X36yN7Ap8AqKqrk/wcGArK73TcFuxBwCfb7dGW9a0DcGFVXd/qWUwvoH8H/KqqLmrbuq0tfy6wQ5Khe75uADyB3m8oShPCoJTUf3/U+/qm7+P+fyOG3+9yrPtf/r5j2Vvo3et3R3qngP4wSj3L6P47FeBNVXXGGLVIy81zlJIG9ddJ1mrnLbek9yvx5wLzANoh1y3a/OFuBx7WN70BvRHifcD+9G7W3uUaYNMku7ZtPaxdJHQG8Hft589IsnW7CbY0YRxRShrUL4ALgYcDB7fzi58GPpPkMuBe4LVVdXe7/qbfpcCyJJcAxwOfBr6e5ADg23SPPqmqPybZD/hE+5mwu4Bn0/tVi9nAxe2in5uAF0/Avkp/4q+HSBpTkuOB06vq5MmuRVrVPPQqSVIHR5SSJHVwRClJUgeDUpKkDgalJEkdDEpJkjoYlJIkdfhf4CFoQbMfVOgAAAAASUVORK5CYII=",
448 |       "text/plain": [
449 |        "<Figure size 432x288 with 1 Axes>"
450 |       ]
451 |      },
452 |      "metadata": {
453 |       "needs_background": "light"
454 |      },
455 |      "output_type": "display_data"
456 |     },
457 |     {
458 |      "data": {
459 |       "text/html": [
460 |        "<div>\n",
461 |        "<style scoped>\n",
462 |        "    .dataframe tbody tr th:only-of-type {\n",
463 |        "        vertical-align: middle;\n",
464 |        "    }\n",
465 |        "\n",
466 |        "    .dataframe tbody tr th {\n",
467 |        "        vertical-align: top;\n",
468 |        "    }\n",
469 |        "\n",
470 |        "    .dataframe thead th {\n",
471 |        "        text-align: right;\n",
472 |        "    }\n",
473 |        "</style>\n",
474 |        "<table border=\"1\" class=\"dataframe\">\n",
475 |        "  <thead>\n",
476 |        "    <tr style=\"text-align: right;\">\n",
477 |        "      <th></th>\n",
478 |        "      <th>conditions</th>\n",
479 |        "      <th>importances</th>\n",
480 |        "    </tr>\n",
481 |        "  </thead>\n",
482 |        "  <tbody>\n",
483 |        "    <tr>\n",
484 |        "      <th>0</th>\n",
485 |        "      <td>pnodes = (-inf, 3.5)</td>\n",
486 |        "      <td>67.394775</td>\n",
487 |        "    </tr>\n",
488 |        "    <tr>\n",
489 |        "      <th>1</th>\n",
490 |        "      <td>pnodes = (-inf, 4.5)</td>\n",
491 |        "      <td>32.835122</td>\n",
492 |        "    </tr>\n",
493 |        "    <tr>\n",
494 |        "      <th>2</th>\n",
495 |        "      <td>progrec = &lt;9.0, inf)</td>\n",
496 |        "      <td>13.146344</td>\n",
497 |        "    </tr>\n",
498 |        "    <tr>\n",
499 |        "      <th>3</th>\n",
500 |        "      <td>age = &lt;39.5, inf)</td>\n",
501 |        "      <td>3.171229</td>\n",
502 |        "    </tr>\n",
503 |        "  </tbody>\n",
504 |        "</table>\n",
505 |        "</div>"
506 |       ],
507 |       "text/plain": [
508 |        "             conditions  importances\n",
509 |        "0  pnodes = (-inf, 3.5)    67.394775\n",
510 |        "1  pnodes = (-inf, 4.5)    32.835122\n",
511 |        "2  progrec = <9.0, inf)    13.146344\n",
512 |        "3     age = <39.5, inf)     3.171229"
513 |       ]
514 |      },
515 |      "execution_count": 9,
516 |      "metadata": {},
517 |      "output_type": "execute_result"
518 |     }
519 |    ],
520 |    "source": [
521 |     "explainer.local_explainability(x.iloc[0, :], pd.DataFrame(y).iloc[0, :], plot = True)"
522 |    ]
523 |   }
524 |  ],
525 |  "metadata": {
526 |   "interpreter": {
527 |    "hash": "52a883ffde2ee8dab628074f134ac5e542adeed8306ab19e0ac3d240604a9b31"
528 |   },
529 |   "kernelspec": {
530 |    "display_name": "rulexai",
531 |    "language": "python",
532 |    "name": "rulexai"
533 |   },
534 |   "language_info": {
535 |    "codemirror_mode": {
536 |     "name": "ipython",
537 |     "version": 3
538 |    },
539 |    "file_extension": ".py",
540 |    "mimetype": "text/x-python",
541 |    "name": "python",
542 |    "nbconvert_exporter": "python",
543 |    "pygments_lexer": "ipython3",
544 |    "version": "3.8.5"
545 |   },
546 |   "orig_nbformat": 4
547 |  },
548 |  "nbformat": 4,
549 |  "nbformat_minor": 2
550 | }
551 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas >= 1.5.0, < 2.3.0
2 | numpy ~= 1.26.4
3 | matplotlib ~= 3.8.3
4 | rulekit ~= 1.7.6
5 | lifelines ~= 0.28.0


--------------------------------------------------------------------------------
/rulexai/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = '1.1.0'


--------------------------------------------------------------------------------
/rulexai/explainer.py:
--------------------------------------------------------------------------------
  1 | from .importances import (
  2 |     ClassificationConditionImportance,
  3 |     RegressionConditionImportance,
  4 |     SurvivalConditionImportance,
  5 |     ConditionImportance,
  6 | )
  7 | from .models import ClassificationModel, RegressionModel, SurvivalModel, BlackBoxModel
  8 | import pandas as pd
  9 | import numpy as np
 10 | from typing import Union, List
 11 | import matplotlib.pyplot as plt
 12 | 
 13 | from .reduct import Reduct
 14 | 
 15 | Labels = Union[pd.DataFrame, pd.Series]
 16 | 
 17 | 
 18 | 
 19 | class BaseExplainer:
 20 |     """:meta private:"""
 21 |     def __init__(
 22 |         self, model, X: pd.DataFrame, y: Labels, type: str = "classification"
 23 |     ) -> None:
 24 | 
 25 | 
 26 |         self.model = model
 27 |         self.X = X
 28 |         self.y = y
 29 |         self.type = type
 30 | 
 31 |         self.condition_importances_ = None
 32 |         self.feature_importances_ = None
 33 |         self.if_basic_conditions = None
 34 | 
 35 |         self.condition_importance_class = None                            
 36 |         self._conditions_importances_for_training_set = None 
 37 |         self._basic_conditions_importances_for_training_set = None
 38 | 
 39 | 
 40 |     def explain(self, measure: str = "C2", basic_conditions: bool = False):
 41 |         """Compute conditions importances. The importances of a conditions are computed base on: \n
 42 |         Marek Sikora: Redefinition of Decision Rules Based on the Importance of Elementary Conditions Evaluation. Fundam. Informaticae 123(2): 171-197 (2013) \n
 43 |         https://dblp.org/rec/journals/fuin/Sikora13.html
 44 | 
 45 |         Parameters
 46 |         ----------
 47 |         measure: str
 48 |             Specifies the measure that is used to evaluate the quality of the rules. Possible measures for classification and regression problem are: C2, Lift, Correlation. Default: C2. It is not possible to select a measure for the survival problem, the LogRank test is used by default 
 49 |         basic_conditions : bool
 50 |             Specifies whether to evaluate the conditions contained in the input rules, or to break the conditions in the rules into base conditions so that individual conditions do not overlap
 51 |         Returns
 52 |         -------
 53 |         self: Explainer
 54 |             Fitted explainer with calculated conditions
 55 | 
 56 |         """
 57 |         self.if_basic_conditions = basic_conditions
 58 |         self.condition_importances_ = self._determine_condition_importances(measure)
 59 |         self.feature_importances_ = self._determine_feature_importances(self.condition_importances_)
 60 | 
 61 |         return self
 62 | 
 63 |     def fit_transform(
 64 |         self, X: pd.DataFrame, selector=None, y=None, POS=None) -> pd.DataFrame:
 65 | 
 66 |         """Creates a dataset based on given dataset in which the examples, instead of being described by the original attributes, will be described with the specified conditions - it will be a set with binary attributes determining whether a given example meets a given condition. It can be considered as kind of dummification.
 67 |         Thanks to this function you can discretize data and get rid of missing values. It can be used as prestep for others algorithms.
 68 | 
 69 |         Parameters
 70 |         ----------
 71 |         X: pd.DataFrame
 72 |             The input samples from which you want to create binary dataset. Should have the same columns and columns order as X specified when creating Explainer
 73 |         selector : string/float
 74 |             Specifies on what basis to select the conditions from the rules that will be included as attributes in the transformed set.   
 75 |             If None all conditions will be included in the transformed set. If number 0-1 percent of the most important conditions will be selected based on condition importance ranking. If "reduct" the reduct of the conditions set will be selected. Preferably, the option with the percentage of most important conditions will be selected.
 76 |         y: Union[pd.DataFrame, pd.Series]
 77 |             Only if selector = "reduct".The target values for input sample, used in the determination of the reduct
 78 |         POS: float
 79 |             Only if selector = "reduct".Target reduct POS
 80 |         Returns
 81 |         -------
 82 |         X_transformed: pd.DataFrame
 83 |             Transformed dataset
 84 | 
 85 |         """
 86 |         
 87 |         helper = ConditionImportance(self.model.rules, X, None, self.if_basic_conditions)
 88 |         if self.if_basic_conditions:
 89 |             rules = helper.split_conditions_in_rules(self.model.rules)
 90 |             conditions = helper._get_conditions_with_rules(rules)
 91 |         else:
 92 |             conditions = helper._get_conditions_with_rules(
 93 |                 self.model.rules
 94 |             )
 95 | 
 96 |         binary_dataset = self._prepare_binary_dataset(X, conditions)
 97 | 
 98 |         if selector=="reduct":
 99 |             reduct = Reduct()
100 |             binary_dataset = reduct.get_reduct(binary_dataset,y,POS)
101 |         elif not selector is None:
102 |             binary_dataset = self._get_top_conditions(binary_dataset, selector)
103 |         
104 |         chosen_conditions_names = binary_dataset.columns
105 |         self.conditions_names = chosen_conditions_names
106 |         self.conditions = []
107 |         for condition in conditions:
108 |             if str(condition) in chosen_conditions_names:
109 |                 self.conditions.append(condition)
110 | 
111 |         return binary_dataset
112 | 
113 |     def transform(
114 |         self, X: pd.DataFrame) -> pd.DataFrame:
115 | 
116 |         """Creates a dataset based on given dataset in which the examples, instead of being described by the original attributes, will be described with the specified conditions - it will be a set with binary attributes determining whether a given example meets a given condition. It can be considered as kind of dummification.
117 |         Thanks to this function you can discretize data and get rid of missing values. It can be used as prestep for others algorithms.
118 | 
119 |         Parameters
120 |         ----------
121 |         X: pd.DataFrame
122 |             The input samples from which you want to create binary dataset. Should have the same columns and columns order as X given in fit_transform
123 |         Returns
124 |         -------
125 |         X_transformed: pd.DataFrame
126 |             Transformed dataset
127 | 
128 |         """
129 |         transformed_dataset = self._prepare_binary_dataset(X, self.conditions)
130 |             
131 |         return transformed_dataset[self.conditions_names]
132 | 
133 |    
134 |     def get_rules_covering_example(self, x: pd.DataFrame, y: Labels) -> List[str]:
135 |         """Return rules that covers the given example
136 | 
137 |         Parameters
138 |         ----------
139 |         x : pd.DataFrame
140 |             The input sample.
141 |         y : Union[pd.DataFrame, pd.Series]
142 |             The target values for input sample.
143 |         Returns
144 |         -------
145 |         rules: List[str]
146 |             Rules that covers the given example
147 | 
148 |         """
149 |         rules_covering_example = []
150 |         for rule in self.model.rules:
151 |             if rule.premise.evaluate(x):
152 |                 rules_covering_example.append(rule)
153 | 
154 |         return rules_covering_example
155 | 
156 | 
157 |     def local_explainability(self, x: pd.DataFrame, y: Labels, plot: bool = False):
158 |         """Displays information about the local explanation of the example: the rules that cover the given example and the importance of the conditions contained in these rules
159 |         
160 |         Parameters
161 |         ----------
162 |         x : pd.DataFrame
163 |             The input sample.
164 |         y : Union[pd.DataFrame, pd.Series]
165 |             The target values for input sample.
166 |         plot : bool
167 |             If True the importance of the conditions will also be shown in the chart. Default: False
168 |         """
169 |         rules_covering_example = self.get_rules_covering_example(x,y)
170 | 
171 |         print("Example:")
172 |         print(pd.concat([x,y]))
173 |         print("")
174 | 
175 |         print("Rules that covers this example:")
176 |         for rule in rules_covering_example:
177 |             print(rule)
178 |         print("")
179 |             
180 |         conditions_importances = self.condition_importances_.copy()   
181 |         
182 |         classes_with_conditions = dict()
183 |         for rule in rules_covering_example:
184 |             
185 | 
186 |             if rule.consequence.left in classes_with_conditions.keys():
187 |                 conditions = classes_with_conditions[rule.consequence.left]
188 |             else:
189 |                 conditions = []
190 | 
191 |             conditions.extend(rule.premise.get_subconditions())
192 |             conditions = list(map(str, conditions))
193 |             classes_with_conditions[rule.consequence.left] = conditions
194 | 
195 | 
196 | 
197 |         if self.type == "classification":
198 |             importances_for_covering_rules = pd.DataFrame()
199 |             for j in range(0, conditions_importances.shape[1], 2):
200 |                 class_in_consequences = False
201 |                 tmp_df = conditions_importances.iloc[:, j : j + 2]
202 |                 for cl in classes_with_conditions.keys():
203 |                     if cl in tmp_df.columns[0]:
204 |                         class_in_consequences = True
205 |                         class_name = cl
206 | 
207 |                 if class_in_consequences:
208 |                     tmp_df.loc[~tmp_df[tmp_df.columns[0]].isin(classes_with_conditions[class_name]), tmp_df.columns[0]] = np.NaN
209 |                     tmp_df.dropna(inplace=True)
210 |                     tmp_df.reset_index(drop = True, inplace = True)
211 |                     importances_for_covering_rules = pd.concat([importances_for_covering_rules, tmp_df], ignore_index=False, axis=1)
212 | 
213 |             importances_for_covering_rules = importances_for_covering_rules.replace(np.nan, "-")
214 |         else:
215 |             conditions_importances.loc[~conditions_importances[conditions_importances.columns[0]].isin(conditions), conditions_importances.columns[0]] = np.NaN
216 |             conditions_importances.dropna(inplace=True)
217 |             importances_for_covering_rules = conditions_importances.reset_index(drop = True)
218 | 
219 | 
220 | 
221 |         print("Importances of the conditions from rules covering the example")
222 |         print(importances_for_covering_rules)
223 | 
224 |         if plot:
225 |             self.plot_importances(importances_for_covering_rules)
226 | 
227 |         return importances_for_covering_rules
228 | 
229 |     def get_rules(self):
230 |         """Return rules from model
231 | 
232 |         Returns
233 |         -------
234 |         rules: List[str]
235 |             Rules from model
236 |         """
237 |         rules = []
238 |         for rule in self.model.rules:
239 |             rules.append(rule.__str__())
240 |         return rules
241 | 
242 | 
243 |     def get_rules_with_basic_conditions(self):
244 |         """Return rules from model with conditions broken down into base conditions so that individual conditions do not overlap
245 | 
246 |         Returns
247 |         -------
248 |         rules: List[str]
249 |             Rules from the model containing the base conditions 
250 |         """
251 |         rules_with_basic_conditions = []
252 |         helper = ConditionImportance(self.model.rules, self.X, self.y, True)
253 |         rules = helper.rules
254 |         for rule in rules:
255 |             rules_with_basic_conditions.append(rule.__str__())
256 |         return rules_with_basic_conditions
257 | 
258 | 
259 |     def plot_importances(self, importances: pd.DataFrame):
260 |         """Plot importances
261 |         Parameters
262 |         ----------
263 |         importances : pd.DataFrame
264 |             Feature/Condition importances to plot.
265 |         """
266 |         
267 |         if "attributes" in importances.columns[0]:
268 |             title = "Feature Importance"
269 |         else:
270 |             title = "Condition Importance"
271 | 
272 |         plots_number = int(importances.shape[1]/2)
273 | 
274 |         if self.type == "classification" and plots_number > 1:
275 | 
276 |             fig, axs = plt.subplots(1,plots_number, sharex = True)
277 |             i = 0
278 |             ticks_number = importances.shape[0]
279 |             y_ticks = np.arange(0, ticks_number)
280 |             for j in range(0, importances.shape[1], 2):
281 | 
282 |                 tmp_df = importances.iloc[:, j : j + 2]
283 |                 tmp_df = tmp_df.replace("-", np.nan).dropna()
284 |                 tmp_df.sort_values(inplace=True, by=tmp_df.columns[1])
285 |                     
286 |                 ticks_values = tmp_df.iloc[:, 1].to_list()
287 |                 ticks_all = [0 for _ in range(len(ticks_values),ticks_number)]
288 |                 ticks_all.extend(ticks_values)  
289 |                 labels = tmp_df.iloc[:, 0].to_list()
290 |                 labels_all = ["" for _ in range(len(labels),ticks_number)]
291 |                 labels_all.extend(labels)
292 | 
293 |                 colors = ["green" if y >= 0 else "red" for y in ticks_all]
294 | 
295 |                 axs[i].barh(y_ticks, ticks_all, color = colors)
296 |                 axs[i].set_yticks(y_ticks)
297 |                 axs[i].set_yticklabels(labels_all)
298 |                 class_name = tmp_df.columns[0].split(" | ")[0]
299 |                 axs[i].set_title(f"Importance for class: {class_name}")
300 |                 axs[i].set_xlabel(f"Importance")
301 |                 i+=1
302 | 
303 |             fig.subplots_adjust(wspace = 0.5)
304 |             plt.show()
305 | 
306 |         else:
307 | 
308 |             tmp_df = importances.sort_values(by=importances.columns[1])     
309 |             colors = ["green" if y >= 0 else "red" for y in tmp_df.iloc[:,1].to_list()] 
310 |             y_ticks = np.arange(0, len(tmp_df))
311 |             fig, ax = plt.subplots()
312 |             ax.barh(y_ticks, tmp_df.iloc[:, 1], color = colors)
313 |             ax.set_yticks(y_ticks)
314 |             ax.set_yticklabels(tmp_df.iloc[:, 0])
315 |             ax.set_title(f"{title}")
316 |             ax.set_xlabel(f"Importance")
317 | 
318 |             plt.show()
319 | 
320 | 
321 |     
322 |     def _determine_condition_importances(self, measure: str = "C2", X = None, y = None):
323 |         if X is None:
324 |             X = self.X
325 |         if y is None:
326 |             y = self.y
327 | 
328 |         if self.type == "regression":
329 |             self.condition_importance_class = RegressionConditionImportance(
330 |                 rules = self.model.rules, data = X, labels = y, if_split = self.if_basic_conditions, measure = measure
331 |             )
332 |         elif self.type == "survival":
333 |             self.condition_importance_class = SurvivalConditionImportance(
334 |                 rules = self.model.rules, data = X, labels = y, if_split = self.if_basic_conditions, measure = measure
335 |             )
336 |         else:
337 |             self.condition_importance_class = ClassificationConditionImportance(
338 |                 rules = self.model.rules, data = X, labels = y, if_split = self.if_basic_conditions, measure = measure
339 |             )
340 |         return self.condition_importance_class.condition_importances()
341 | 
342 |     def _determine_feature_importances(self, conditions_importances):
343 |         feature_importances = pd.DataFrame()
344 | 
345 |         if self.type == "classification":
346 |             
347 |             for j in range(0, conditions_importances.shape[1], 2):
348 |                 class_importances = (
349 |                     conditions_importances.iloc[:, j : j + 2]
350 |                     .replace("-", np.nan)
351 |                     .dropna()
352 |                 )
353 |                 importances_df_tmp = pd.DataFrame()
354 |                 class_importances.iloc[:, 0] = class_importances.iloc[:, 0].apply(
355 |                     lambda x: x.split(" = ")[0]
356 |                 )
357 |                 class_importances = (
358 |                     class_importances.groupby(class_importances.columns[0])
359 |                     .sum()
360 |                     .reset_index()
361 |                 )
362 |                 class_importances.sort_values(
363 |                     class_importances.columns[1], ascending=False, inplace=True
364 |                 )
365 |                 class_importances.reset_index(drop=True, inplace= True)
366 | 
367 |                 class_name, _ = class_importances.columns[0].split(" | ")
368 |                 importances_df_tmp[class_name + " | attributes"] = pd.Series(class_importances[
369 |                     class_importances.columns[0]
370 |                 ])
371 |                 importances_df_tmp[class_name + " | importances"] = pd.Series(class_importances[
372 |                     class_importances.columns[1]
373 |                 ])
374 |            
375 |                 feature_importances = pd.concat(
376 |                     [feature_importances, importances_df_tmp],
377 |                     ignore_index=False,
378 |                     axis=1
379 |                 )
380 |              
381 |             feature_importances = feature_importances.replace(np.nan, "-")
382 |                             
383 |         else:
384 | 
385 |             importances_df = conditions_importances.copy()
386 |             importances_df.iloc[:, 0] = importances_df.iloc[:, 0].apply(
387 |                 lambda x: x.split(" = ")[0]
388 |             )
389 |             importances_df = (
390 |                                        
391 |                 importances_df.groupby(importances_df.columns[0]).sum().reset_index()
392 |             )
393 |             importances_df.sort_values(
394 |                 importances_df.columns[1], ascending=False, inplace=True
395 |             )
396 | 
397 |             importances_df.rename(
398 |                 columns={
399 |                     importances_df.columns[0]: "attributes",
400 |                     importances_df.columns[1]: "importances",
401 |                 },
402 |                 inplace=True,
403 |             )
404 | 
405 |             feature_importances = importances_df.replace(np.nan, "-")
406 | 
407 |         return feature_importances
408 | 
409 |     def _prepare_binary_dataset(
410 |         self, X: pd.DataFrame, conditions) -> pd.DataFrame:
411 | 
412 |         x = X.to_numpy()
413 |         binary_dataset_arr = np.zeros((x.shape[0], len(conditions)), dtype=int)
414 |         conditions_names = []
415 | 
416 |         for i, condition in enumerate(conditions):
417 |             condition.evaluate_mask(binary_dataset_arr, x, column_index=i)
418 |             conditions_names.append(str(condition))
419 | 
420 |         binary_dataset = pd.DataFrame(binary_dataset_arr, columns=conditions_names)
421 |             
422 |         return binary_dataset
423 | 
424 |     def _get_top_conditions(self,binary_dataset, percent):
425 |         if self.type == "classification":
426 |             importances_TOP = []
427 |             for j in range(0, self.condition_importances_.shape[1] + 0, 2):
428 |                 class_importances = (
429 |                     self.condition_importances_.iloc[:, j]
430 |                     .replace("-", np.nan)
431 |                     .dropna()
432 |                 )
433 |                 class_importances_TOP_number = np.round(
434 |                     (percent) * class_importances.shape[0]
435 |                 )
436 | 
437 |                 if class_importances_TOP_number == 0:
438 |                     class_importances_TOP_number = 1
439 | 
440 |                 class_importances_TOP = class_importances.loc[
441 |                     0 : class_importances_TOP_number - 1
442 |                 ]
443 |                 importances_TOP.extend(list(class_importances_TOP))
444 | 
445 |             importances_TOP_list = list(set(importances_TOP))
446 |         else:
447 | 
448 |             importances_TOP_number = np.round(
449 |                 (percent) * self.condition_importances_.shape[0]
450 |             )
451 |             if importances_TOP_number == 0:
452 |                 importances_TOP_number = 1
453 | 
454 |             importances_TOP = self.condition_importances_.loc[
455 |                 0 : importances_TOP_number - 1
456 |             ]
457 |             importances_TOP_list = importances_TOP["conditions"].to_list()
458 |             
459 |         return binary_dataset[importances_TOP_list]
460 | 
461 | 
462 | class RuleExplainer(BaseExplainer):
463 |     def __init__(
464 |         self, model, X: pd.DataFrame, y: Labels, type: str = "classification"
465 |     ) -> None:
466 |         """RuleExplainer
467 | 
468 |         Parameters
469 |         ----------
470 |         model : Model = Union[RuleClassifier, RuleRegressor, SurvivalRules, CN2UnorderedClassifier, CN2SDUnorderedClassifier, DecisionTreeClassifier, DecisionTreeRegressor, SurvivalTree, List[str]]
471 |             Model to be analyzed. RuleXai supports the following Rule models:
472 |              - RuleKit(https://adaa-polsl.github.io/RuleKit-python/): RuleClassifier, RuleRegressor, SurvivalRules
473 |              - Orange (https://orangedatamining.com/): CN2UnorderedClassifier, CN2SDUnorderedClassifier
474 |             It can also extract rules from decision trees:
475 |              - scikit-learn (https://scikit-learn.org/stable/): DecisionTreeClassifier, DecisionTreeRegressor
476 |              - scikit-survival (https://scikit-survival.readthedocs.io/en/stable/): SurvivalTree
477 |             Or you can provide a list of rules as:
478 |              - classification:
479 |                 IF attribute1 = (-inf, value) AND  ...  AND attribute2 = <value1, value2) THEN label_atrribute = {class_name}
480 |              - regression:
481 |                 IF attribute1 = (-inf, value) AND  ...  AND attribute2 = <value1, value2) THEN target_attribute = {value}
482 |              - survival:
483 |                 IF attribute1 = (-inf, value) AND  ...  AND attribute2 = <value1, value2) THEN survival_status_attribute = {survival_status}
484 |         X : pd.DataFrame
485 |             The training dataset used during provided model training
486 |         y : Union[pd.DataFrame, pd.Series]
487 |             The target values (class labels, real number, survival status) used during provided model training
488 |         type : str = None
489 |             The type of problem that the provided model solves. You can choose between:
490 |                 - "classification"
491 |                 - "regression"
492 |                 - "survival"
493 |                 default: "classification"
494 |         Attributes
495 |         ----------
496 |         condition_importances_ : pd.DataFrame
497 |             Computed conditions importances
498 |         feature_importances_ : pd.DataFrame
499 |             Feature importances computed base on conditions importances
500 |         """
501 | 
502 |         if isinstance(y, pd.DataFrame):
503 |             label_name = y.columns[0]
504 |         else:
505 |             label_name = y.name
506 | 
507 |         if type == "regression":
508 |             model = RegressionModel(model = model, feature_names= X.columns, label_name=label_name)
509 |         elif type == "survival":
510 |             model = SurvivalModel(model=model, feature_names = X.columns, survival_status_name=label_name)
511 |         else:
512 |                                         
513 |             model = ClassificationModel(model = model, feature_names = X.columns, class_names = np.unique(y), label_name = label_name)
514 | 
515 |         super().__init__(model, X, y, type)
516 | 
517 | class Explainer(BaseExplainer):
518 |     def __init__(
519 |         self, X: pd.DataFrame, model_predictions: Labels, type: str = "classification"
520 |     ) -> None:
521 |         """Explainer
522 | 
523 |         Parameters
524 |         ----------
525 |         X : pd.DataFrame
526 |             The training dataset used during provided model training
527 |         model_predictions : Union[pd.DataFrame, pd.Series]
528 |             The training dataset used during provided model training
529 |         type : str
530 |             The type of problem that the provided model solves. You can choose between:
531 |                 - "classification"
532 |                 - "regression"
533 |             default: "classification"
534 |         Attributes
535 |         ----------
536 |         condition_importances_ : pd.DataFrame
537 |             Computed conditions importances on given dataset
538 |         feature_importances_ : pd.DataFrame
539 |             Feature importances computed base on conditions importances
540 |         """
541 | 
542 |         
543 |         if (not isinstance(model_predictions, pd.DataFrame)) and (not isinstance(model_predictions, pd.Series)) : 
544 |               model_predictions =  pd.DataFrame(model_predictions, columns=["class"])                
545 |         self._bb_model = BlackBoxModel(X, model_predictions, type)
546 |         super().__init__(None, X, model_predictions, type)
547 | 
548 |     def explain(self, measure: str = "C2", basic_conditions: bool = False, X_org = None):
549 |         """Compute conditions importances. The importances of a conditions are computed base on: \n
550 |         Marek Sikora: Redefinition of Decision Rules Based on the Importance of Elementary Conditions Evaluation. Fundam. Informaticae 123(2): 171-197 (2013) \n
551 |         https://dblp.org/rec/journals/fuin/Sikora13.html
552 | 
553 |         Parameters
554 |         ----------
555 |         measure: str
556 |             Specifies the measure that is used to evaluate the quality of the rules. Possible measures for classification and regression problem are: C2, Lift, Correlation. Default: C2. It is not possible to select a measure for the survival problem, the LogRank test is used by default 
557 |         basic_conditions : bool
558 |             Specifies whether to evaluate the conditions contained in the input rules, or to break the conditions in the rules into base conditions so that individual conditions do not overlap
559 |         X_org:
560 |             The dataset on which the rule-based model should be built. It can be the set on which the black-box model was learned or this set before preprocessing (imputation of missing values, dummification, scaling), because such a set can be handled by the rule model 
561 |         Returns
562 |         -------
563 |         self: Explainer
564 |             Fitted explainer with calculated conditions
565 | 
566 |         """
567 |         self.if_basic_conditions = basic_conditions
568 | 
569 |         if X_org is None:
570 |             self.model = self._bb_model.get_rules_model(self.X)
571 |             self.condition_importances_ = self._determine_condition_importances(measure)
572 |         else:
573 |             self.model = self._bb_model.get_rules_model(X_org)
574 |             self.condition_importances_ = self._determine_condition_importances(measure,X_org)
575 |  
576 |         self.feature_importances_ = self._determine_feature_importances(self.condition_importances_)
577 | 
578 |         return self


--------------------------------------------------------------------------------
/rulexai/importances.py:
--------------------------------------------------------------------------------
  1 | from .rule import (
  2 |     CompoundCondition,
  3 |     CompoundConditionWithCombiningOperators,
  4 |     ClassificationRule,
  5 |     ElementaryCondition,
  6 |     RegressionRule,
  7 |     Rule,
  8 |     SurvivalRule,
  9 | )
 10 | import pandas as pd
 11 | import numpy as np
 12 | from typing import List
 13 | from operator import attrgetter
 14 | import math
 15 | from typing import Dict
 16 | import importlib
 17 | 
 18 | class ConditionImportance:
 19 |     def __init__(self, rules, data, labels, if_split, measure = None) -> None:
 20 |         self.data = data
 21 |         self.labels = labels
 22 |         self.dataset = pd.concat([self.data, self.labels], axis=1)
 23 |         self.if_split = if_split
 24 |         self.measure = measure
 25 |         self.column_indexes: Dict[str, int] = {column_name:i for i, column_name in enumerate(list(data.columns))}
 26 |         if if_split:
 27 |             self.rules = self.split_conditions_in_rules(rules)
 28 |         else:
 29 |             self.rules = rules
 30 | 
 31 |     def _get_conditions_with_rules(self, rules):
 32 |         conditions_with_rules = dict()
 33 | 
 34 |         for rule in rules:
 35 |             rule_conditions = rule.premise.get_subconditions()
 36 | 
 37 |             for condition in rule_conditions:
 38 |                 if condition in conditions_with_rules.keys():
 39 |                     conditions_rules = conditions_with_rules[condition]
 40 |                 else:
 41 |                     conditions_rules = []
 42 |                 conditions_rules.append(rule)
 43 |                 conditions_with_rules[condition] = conditions_rules
 44 | 
 45 |         return conditions_with_rules
 46 | 
 47 |     def _calculate_conditions_qualities(self, conditions_with_rules):
 48 |         conditions_qualities = []
 49 |         for condition in conditions_with_rules.keys():
 50 |             sum = 0
 51 |             for rule in conditions_with_rules[condition]:
 52 |                 sum += self._calculate_index_simplified(condition, rule)
 53 |             conditions_qualities.append(ConditionEvaluation(condition, sum))
 54 | 
 55 |         return conditions_qualities
 56 | 
 57 | 
 58 |     def _calculateMeasure(self, rule):
 59 |         p, n, P, N = rule.covers(self.dataset)
 60 | 
 61 |         if self.measure == "Correlation":
 62 |             if (P - p + N - n == 0):
 63 |                 return  0
 64 |             else: 
 65 |                 return (p * N - P * n) / math.sqrt(P * N * (p + n) * (P - p + N - n))
 66 | 
 67 |         elif self.measure == "Lift":
 68 |             if (p == 0 and n == 0) or P == 0:
 69 |                 return 0
 70 |             else:
 71 |                 return p * (P + N) / ((p + n) * P)
 72 | 
 73 |         else: # C2
 74 |             if (p == 0 and n == 0) or P == 0 or N == 0:
 75 |                 return 0
 76 |             else:
 77 |                 return (((P + N) * p / (p + n) - P) / N) * ((1 + p / P) / 2)  # C2
 78 | 
 79 |     def _condition_importances_to_DataFrame(self, condition_importances):
 80 |         importances_df = pd.DataFrame()
 81 |         importances_df["conditions"] = pd.Series(
 82 |             [str(cnd) for cnd in condition_importances.keys()]
 83 |         )
 84 |         importances_df["importances"] = pd.Series(condition_importances.values())
 85 | 
 86 |         return importances_df
 87 | 
 88 |     def split_conditions_in_rules(self, rules):
 89 |         conditions_with_rules = self._get_conditions_with_rules(rules)
 90 |         conditions = self._split_conditions_into_basic(conditions_with_rules.keys())
 91 | 
 92 |         return self._get_rules_with_splitted_conditions(conditions, rules)
 93 | 
 94 |     def _split_conditions_into_basic(self, conditions: List[ElementaryCondition]):
 95 |         splittedConditions = []
 96 |         conditions_for_attributes = dict()
 97 |         for condition in conditions:
 98 |             if condition.right is None: #attribute is Nominal
 99 |                 splittedConditions.append(condition)
100 |             else:
101 |                 attr = condition.attribute
102 |                 if attr in conditions_for_attributes:
103 |                     attribute_conditions = conditions_for_attributes[attr]
104 |                 else: 
105 |                     attribute_conditions = []
106 |                 attribute_conditions.append(condition)
107 |                 conditions_for_attributes[attr] = attribute_conditions
108 | 
109 |         for conditions_for_attribute in conditions_for_attributes.values():
110 |             if len(conditions_for_attribute) > 1:
111 |                 basic_conditions = self._get_basic_conditions_for_attribute(conditions_for_attribute)
112 |                 splittedConditions.extend(basic_conditions)
113 |             else:
114 |                 splittedConditions.extend(conditions_for_attribute)
115 | 
116 |         return splittedConditions
117 |     
118 |     def _get_basic_conditions_for_attribute(self, conditions: List[ElementaryCondition]):
119 |         basic_conditions = []
120 |         points = []
121 |         id = 0 
122 |         attribute = conditions[0].attribute
123 |         for condition in conditions:
124 |             point = Point(id, condition.left, "Left", condition.leftClosed)
125 |             points.append(point)
126 |             point = Point(id, condition.right, "Right", condition.rightClosed)
127 |             points.append(point)
128 |             id += 1
129 | 
130 |         min_point_first = min(points, key = attrgetter('value'))
131 |         all_points_len = len(points)
132 |         points = [point for point in points if point.value != min_point_first.value]
133 |         number_of_firts_mins = all_points_len - len(points)
134 | 
135 |         while(len(points) > 0):
136 |             min_point_second = min(points, key = attrgetter('value'))
137 |             points_with_second_min_len = len(points)
138 |             points = [point for point in points if point.value != min_point_second.value]
139 |             number_of_second_mins = points_with_second_min_len - len(points)
140 | 
141 |             if(min_point_first.value == float('-inf')):
142 |                 leftClosed = False
143 |             else:
144 |                 leftClosed = True
145 | 
146 |             condition = ElementaryCondition(attribute = attribute, left = min_point_first.value, right= min_point_second.value, leftClosed = leftClosed, rightClosed= False, column_index=self.column_indexes[attribute])
147 |             basic_conditions.append(condition)
148 | 
149 |             if (min_point_first.condition_id == min_point_second.condition_id) and number_of_firts_mins == 1 and number_of_second_mins == 1 and len(points) > 1:
150 |                 min_point_first = min(points, key = attrgetter('value'))
151 |                 points_with_first_min_len = len(points)
152 |                 points = [point for point in points if point.value != min_point_first.value]
153 |                 number_of_firts_mins = points_with_first_min_len - len(points)
154 |             else:
155 |                 min_point_first = min_point_second
156 |                 min_point_first.is_closed = not min_point_second.is_closed
157 | 
158 |         return basic_conditions
159 | 
160 |     
161 |     def _get_rules_with_splitted_conditions(self, basic_conditions, rules: List[ClassificationRule]):
162 |         rules_with_basic_conditions = []
163 |         for rule in rules:
164 |             compoundCondition = self._create_compound_condition_for_rule(basic_conditions, rule)
165 |             rules_with_basic_conditions.append(Rule(compoundCondition, rule.consequence))
166 |         return rules_with_basic_conditions
167 | 
168 |     def _create_compound_condition_for_rule(self,basic_conditions, rule: Rule):
169 |         subconditions = rule.premise.get_subconditions()
170 |         compoundCondition = CompoundConditionWithCombiningOperators()
171 |         for condition_basic in basic_conditions:
172 |             i = 0
173 |             condition_added = False
174 |             while(i<len(subconditions) and not condition_added):
175 |                 condition_from_rule = subconditions[i]
176 |                 if self._check_if_condition_contains_basic_condition(condition_from_rule, condition_basic):
177 |                     compoundCondition.add_subcondition(condition_basic)
178 |                     condition_added = True
179 |                 i += 1
180 |         return compoundCondition
181 | 
182 |     def _check_if_condition_contains_basic_condition(self,condition_from_rule: ElementaryCondition, basic_condition: ElementaryCondition):
183 |         if not condition_from_rule.attribute == basic_condition.attribute:
184 |             return False
185 |         elif condition_from_rule.right == None: #nominal attribute
186 |             if condition_from_rule.left == basic_condition.left:
187 |                 return True
188 |             else:
189 |                 return False
190 |         else:
191 |             if ((condition_from_rule.left <= basic_condition.left) and (condition_from_rule.right >= basic_condition.right)):
192 |                 return True
193 |             else:
194 |                 return False
195 |             
196 | 
197 | class ClassificationConditionImportance(ConditionImportance):
198 |     def __init__(self, rules, data, labels, if_split, measure) -> None:
199 |         self.column_indexes: Dict[str, int] = {column_name:i for i, column_name in enumerate(list(data.columns))}
200 |         super().__init__(rules, data, labels, if_split, measure)
201 | 
202 |     def condition_importances(self):
203 |         rules_by_class = self._split_rules_by_decision_class(self.rules)
204 | 
205 |         condition_importances_for_classes = dict()
206 | 
207 |         for class_name in rules_by_class.keys():
208 |             class_rules = rules_by_class[class_name]
209 |             conditions_with_rules = self._get_conditions_with_rules(class_rules)
210 |             conditions_qualities = self._calculate_conditions_qualities(
211 |                 conditions_with_rules
212 |             )
213 |             condition_importances_for_classes[class_name] = conditions_qualities
214 | 
215 |         conditions_importances = self._calculate_conditions_importances(
216 |             condition_importances_for_classes
217 |         )
218 |         return self._condition_importances_to_DataFrame(conditions_importances)
219 | 
220 |     def _split_rules_by_decision_class(self, rules):
221 |         rules_by_class = dict()
222 | 
223 |         for rule in rules:
224 |             class_name = rule.consequence.left
225 |             if class_name in rules_by_class.keys():
226 |                 class_rules = rules_by_class[class_name]
227 |             else:
228 |                 class_rules = []
229 | 
230 |             class_rules.append(rule)
231 |             rules_by_class[class_name] = class_rules
232 | 
233 |         return rules_by_class
234 | 
235 |     def _calculate_index_simplified(self, condition, rule):
236 |         rule = ClassificationRule(rule.premise, rule.consequence)
237 | 
238 |         rule_conditions = []
239 |         rule_conditions.extend(rule.premise.get_subconditions())
240 |         number_of_conditions = len(rule_conditions)
241 |         rule_conditions.remove(condition)
242 | 
243 |         if self.if_split:
244 |             premise_without_evaluated_condition = CompoundConditionWithCombiningOperators()
245 |         else:
246 |             premise_without_evaluated_condition = CompoundCondition()
247 |         
248 |         premise_without_evaluated_condition.add_subconditions(rule_conditions)
249 |         rule_without_evaluated_condition = ClassificationRule(
250 |             premise_without_evaluated_condition, rule.consequence
251 |         )
252 | 
253 |         factor = 1.0 / number_of_conditions
254 | 
255 |         if len(rule_conditions) == 0:
256 |             return factor * (
257 |                 self._calculateMeasure(rule)
258 |                 - self._calculateMeasure(rule_without_evaluated_condition)
259 |             )
260 |         else:
261 |             premise_with_only_evaluated_condition = CompoundCondition()
262 |             premise_with_only_evaluated_condition.add_subcondition(condition)
263 |             rule_with_only_evaluated_condition = ClassificationRule(
264 |                 premise_with_only_evaluated_condition, rule.consequence
265 |             )
266 | 
267 |             return factor * (
268 |                 self._calculateMeasure(rule)
269 |                 - self._calculateMeasure(rule_without_evaluated_condition)
270 |                 + self._calculateMeasure(rule_with_only_evaluated_condition)
271 |             )
272 | 
273 |     def _calculate_conditions_importances(self, condition_qualities_for_classes):
274 |         conditions_importances = dict()
275 | 
276 |         for evaluated_class in condition_qualities_for_classes.keys():
277 | 
278 |             conditions_importances_for_class = dict()
279 |             conditions_for_evaluated_class = condition_qualities_for_classes[
280 |                 evaluated_class
281 |             ]
282 | 
283 |             for condition in conditions_for_evaluated_class:
284 |                 sum = condition.quality
285 |                 """ 
286 |                 for class_name in condition_qualities_for_classes.keys():
287 |                     if class_name != evaluated_class:
288 |                         conditions_from_other_class = condition_qualities_for_classes[
289 |                             class_name
290 |                         ]
291 |                         evaluated_condition_from_other_class = list(
292 |                             filter(
293 |                                 lambda cnd: cnd.condition == condition.condition,
294 |                                 conditions_from_other_class,
295 |                             )
296 |                         )
297 |                         if len(evaluated_condition_from_other_class) > 0:
298 |                             sum -= evaluated_condition_from_other_class[0].quality
299 |                 """
300 |                 conditions_importances_for_class[condition.condition] = sum
301 | 
302 |             conditions_importances[evaluated_class] = dict(
303 |                 sorted(
304 |                     conditions_importances_for_class.items(),
305 |                     key=lambda item: item[1],
306 |                     reverse=True,
307 |                 )
308 |             )
309 | 
310 |         return conditions_importances
311 | 
312 |     def _condition_importances_to_DataFrame(self, condition_importances):
313 |         importances_df = pd.DataFrame()
314 |         for class_name in condition_importances.keys():
315 |             importances_df_tmp = pd.DataFrame()
316 |             importances_df_tmp[class_name + " | conditions_names"] = pd.Series(
317 |                 [str(cnd) for cnd in condition_importances[class_name].keys()]
318 |             )
319 |             importances_df_tmp[class_name + " | importances"] = pd.Series(
320 |                 condition_importances[class_name].values()
321 |             )
322 |             importances_df = pd.concat(
323 |                 [importances_df, importances_df_tmp], ignore_index=False, axis=1
324 |             )
325 |         return importances_df.replace(np.nan, "-")
326 | 
327 |     def _get_rules_with_splitted_conditions(self, basic_conditions, rules: List[ClassificationRule]):
328 |         rules_with_basic_conditions = []
329 |         for rule in rules:
330 |             compoundCondition = self._create_compound_condition_for_rule(basic_conditions, rule)
331 |             rules_with_basic_conditions.append(ClassificationRule(compoundCondition, rule.consequence))
332 |         return rules_with_basic_conditions
333 | 
334 | 
335 | 
336 | class RegressionConditionImportance(ConditionImportance):
337 |     def __init__(self, rules, data, labels, if_split, measure) -> None:
338 |         self.column_indexes: Dict[str, int] = {column_name:i for i, column_name in enumerate(list(data.columns))}
339 |         super().__init__(rules, data, labels, if_split, measure)
340 | 
341 |     def condition_importances(self):
342 |         conditions_with_rules = self._get_conditions_with_rules(self.rules)
343 |         conditions_qualities = self._calculate_conditions_qualities(
344 |             conditions_with_rules
345 |         )
346 |         conditions_importances = dict()
347 | 
348 |         for condition_evaluation in conditions_qualities:
349 |             conditions_importances[
350 |                 condition_evaluation.condition
351 |             ] = condition_evaluation.quality
352 | 
353 |         conditions_importances = dict(
354 |             sorted(
355 |                 conditions_importances.items(), key=lambda item: item[1], reverse=True
356 |             )
357 |         )
358 | 
359 |         return self._condition_importances_to_DataFrame(conditions_importances)
360 | 
361 |     def _calculate_index_simplified(self, condition, rule):
362 |         rule = RegressionRule(rule.premise, rule.consequence)
363 | 
364 |         rule_conditions = []
365 |         rule_conditions.extend(rule.premise.get_subconditions())
366 |         number_of_conditions = len(rule_conditions)
367 |         rule_conditions.remove(condition)
368 | 
369 |         if self.if_split:
370 |             premise_without_evaluated_condition = CompoundConditionWithCombiningOperators()
371 |         else:
372 |             premise_without_evaluated_condition = CompoundCondition()
373 |         premise_without_evaluated_condition.add_subconditions(rule_conditions)
374 |         rule_without_evaluated_condition = RegressionRule(
375 |             premise_without_evaluated_condition, rule.consequence
376 |         )
377 | 
378 |         factor = 1.0 / number_of_conditions
379 | 
380 |         if len(rule_conditions) == 0:
381 |             return factor * (
382 |                 self._calculateMeasure(rule)
383 |                 - self._calculateMeasure(rule_without_evaluated_condition)
384 |             )
385 |         else:
386 |             premise_with_only_evaluated_condition = CompoundCondition()
387 |             premise_with_only_evaluated_condition.add_subcondition(condition)
388 |             rule_with_only_evaluated_condition = RegressionRule(
389 |                 premise_with_only_evaluated_condition, rule.consequence
390 |             )
391 | 
392 |             return factor * (
393 |                 self._calculateMeasure(rule)
394 |                 - self._calculateMeasure(rule_without_evaluated_condition)
395 |                 + self._calculateMeasure(rule_with_only_evaluated_condition)
396 |             )
397 | 
398 |     def _get_rules_with_splitted_conditions(self, basic_conditions, rules: List[RegressionRule]):
399 |         rules_with_basic_conditions = []
400 |         for rule in rules:
401 |             compoundCondition = self._create_compound_condition_for_rule(basic_conditions, rule)
402 |             rules_with_basic_conditions.append(RegressionRule(compoundCondition, rule.consequence))
403 |         return rules_with_basic_conditions
404 | 
405 | class SurvivalConditionImportance(ConditionImportance):
406 |     def __init__(self, rules, data, labels, if_split, measure) -> None:
407 |         self.column_indexes: Dict[str, int] = {column_name:i for i, column_name in enumerate(list(data.columns))}
408 |         super().__init__(rules, data, labels, if_split, measure)
409 |         lifelines = importlib.import_module("lifelines.statistics")
410 |         self.logrank_test = getattr(lifelines, "logrank_test")
411 |         
412 | 
413 |     def condition_importances(self):
414 |         conditions_with_rules = self._get_conditions_with_rules(self.rules)
415 |         conditions_qualities = self._calculate_conditions_qualities(
416 |             conditions_with_rules
417 |         )
418 |         conditions_importances = dict()
419 | 
420 |         for condition_evaluation in conditions_qualities:
421 |             conditions_importances[
422 |                 condition_evaluation.condition
423 |             ] = condition_evaluation.quality
424 | 
425 |         conditions_importances = dict(
426 |             sorted(
427 |                 conditions_importances.items(), key=lambda item: item[1], reverse=True
428 |             )
429 |         )
430 | 
431 |         return self._condition_importances_to_DataFrame(conditions_importances)
432 | 
433 |     def _calculate_index_simplified(self, condition, rule):
434 |         rule = SurvivalRule(rule.premise, rule.consequence)
435 | 
436 |         rule_conditions = []
437 |         rule_conditions.extend(rule.premise.get_subconditions())
438 |         number_of_conditions = len(rule_conditions)
439 |         rule_conditions.remove(condition)
440 | 
441 |         if self.if_split:
442 |             premise_without_evaluated_condition = CompoundConditionWithCombiningOperators()
443 |         else:
444 |             premise_without_evaluated_condition = CompoundCondition()
445 | 
446 |         premise_without_evaluated_condition.add_subconditions(rule_conditions)
447 |         rule_without_evaluated_condition = SurvivalRule(
448 |             premise_without_evaluated_condition, rule.consequence
449 |         )
450 | 
451 |         factor = 1.0 / number_of_conditions
452 | 
453 |         if len(rule_conditions) == 0:
454 |             return factor * (
455 |                 self._calculateMeasure(rule)
456 |                 - self._calculateMeasure(rule_without_evaluated_condition)
457 |             )
458 |         else:
459 |             premise_with_only_evaluated_condition = CompoundCondition()
460 |             premise_with_only_evaluated_condition.add_subcondition(condition)
461 |             rule_with_only_evaluated_condition = SurvivalRule(
462 |                 premise_with_only_evaluated_condition, rule.consequence
463 |             )
464 | 
465 |             return factor * (
466 |                 self._calculateMeasure(rule)
467 |                 - self._calculateMeasure(rule_without_evaluated_condition)
468 |                 + self._calculateMeasure(rule_with_only_evaluated_condition)
469 |             )
470 | 
471 |     def _calculateMeasure(self, rule):
472 |         p, n, P, N, covered_indices = rule.covers(self.dataset, return_positives=True
473 |         )
474 |         # covered_indices -> in survival rules all examples are classified as positives
475 |         uncovered_indices = [
476 |             id for id in range(self.data.shape[0]) if id not in covered_indices
477 |         ]
478 | 
479 |         results = self.logrank_test(
480 |             self.data["survival_time"][
481 |                 self.data["survival_time"].index.isin(covered_indices)
482 |             ],
483 |             self.data["survival_time"][
484 |                 self.data["survival_time"].index.isin(uncovered_indices)
485 |             ],
486 |             event_observed_A=self.labels[self.labels.index.isin(covered_indices)],
487 |             event_observed_B=self.labels[self.labels.index.isin(uncovered_indices)],
488 |         )
489 | 
490 |         return results.test_statistic
491 | 
492 | 
493 |     def _get_rules_with_splitted_conditions(self, basic_conditions, rules: List[SurvivalRule]):
494 |         rules_with_basic_conditions = []
495 |         for rule in rules:
496 |             compoundCondition = self._create_compound_condition_for_rule(basic_conditions, rule)
497 |             rules_with_basic_conditions.append(SurvivalRule(compoundCondition, rule.consequence))
498 |         return rules_with_basic_conditions
499 | 
500 | 
501 | 
502 | class ConditionEvaluation:
503 |     def __init__(self, condition, quality) -> None:
504 |         self.condition = condition
505 |         self.quality = quality
506 | 
507 | 
508 | class Point:
509 |     def __init__(self, condition_id: int, value: float, side: str, is_closed: bool) -> None:
510 |         self.condition_id = condition_id
511 |         self.value = value
512 |         self.side = side
513 |         self.is_closed = is_closed


--------------------------------------------------------------------------------
/rulexai/models.py:
--------------------------------------------------------------------------------
  1 | from rulekit._operator import BaseOperator
  2 | import numpy as np
  3 | from .rule import Rule, CompoundCondition, ElementaryCondition
  4 | from rulekit import RuleKit
  5 | from rulekit.classification import RuleClassifier
  6 | from rulekit.regression import RuleRegressor
  7 | from rulekit.params import Measures
  8 | import pandas as pd
  9 | from typing import Dict
 10 | import importlib
 11 | 
 12 | class Model:
 13 |     def __init__(
 14 |         self, model, feature_names=None, class_names=None, label_name=None
 15 |     ) -> None:
 16 |         self.model = model
 17 |         self.rules = self.get_rules(self.model, feature_names, class_names, label_name)
 18 |         self.column_indexes: Dict[str, int] = {column_name:i for i, column_name in enumerate(list(feature_names))}
 19 | 
 20 | 
 21 |     def _map_rules_from_RuleKit(self, rules):
 22 |         preprocessed_Rules = []
 23 |         for rule in rules:
 24 |                                       
 25 |             preprocessed_Rules.append(self._map_rule_from_RuleKit(rule.__str__()))
 26 |         return preprocessed_Rules
 27 | 
 28 |     def _map_rules_from_sklearn(self, rules):
 29 |         preprocessed_Rules = []
 30 |         for rule in rules:
 31 |                                       
 32 |             preprocessed_Rules.append(self._map_rule_from_sklearn(rule.__str__()))
 33 |         return preprocessed_Rules
 34 | 
 35 |     def _map_rule_from_sklearn(self, rule):
 36 |         rule = rule[3:]
 37 |         premise, consequence = rule.split(" THEN ")
 38 |         conditions = premise.split(" AND ")
 39 |         compoundCondition = CompoundCondition()
 40 | 
 41 |         for condition in conditions:
 42 |             if "<=" in condition:
 43 |                 attribute, value = condition.split(" <= ")
 44 |                 elementaryCondition = ElementaryCondition(
 45 |                     attribute, ElementaryCondition.minus_inf, float(value), False, True, column_index=self.column_indexes[attribute]
 46 |                                            
 47 |                 )
 48 |             elif "<" in condition:
 49 |                 attribute, value = condition.split(" < ")
 50 |                 elementaryCondition = ElementaryCondition(
 51 |                     attribute, ElementaryCondition.minus_inf, float(value), False, False, column_index=self.column_indexes[attribute]
 52 |                                             
 53 |                 )
 54 |             elif ">=" in condition:
 55 |                 attribute, value = condition.split(" >= ")
 56 |                 elementaryCondition = ElementaryCondition(
 57 |                                      
 58 |                     attribute, float(value), ElementaryCondition.inf, True, False, column_index=self.column_indexes[attribute]
 59 |                 )
 60 |             elif ">" in condition:
 61 |                 attribute, value = condition.split(" > ")
 62 |                 elementaryCondition = ElementaryCondition(
 63 |                                      
 64 |                     attribute, float(value), ElementaryCondition.inf, False, False, column_index=self.column_indexes[attribute]
 65 |                 )
 66 |             else:
 67 |                 attribute, value = condition.split(" = ")
 68 |                 elementaryCondition = ElementaryCondition(attribute, str(value), column_index=self.column_indexes[attribute])
 69 |                                           
 70 | 
 71 |             compoundCondition.add_subcondition(elementaryCondition)
 72 | 
 73 |         consequence_att, consequence_value = consequence.split(" = ")
 74 |         consequence_value = consequence_value[1:-1]
 75 |         consequence = ElementaryCondition(consequence_att, consequence_value, column_index=self.column_indexes[attribute])
 76 | 
 77 |         return self._preprocessRule(Rule(compoundCondition, consequence))
 78 | 
 79 |     def _map_rules_from_list(self, rules):
 80 |         preprocessed_Rules = []
 81 |         for rule in rules:
 82 |             preprocessed_Rules.append(self._map_string_rule(rule))
 83 |         return preprocessed_Rules
 84 | 
 85 |     def _map_string_rule(self, rule):
 86 |         rule = rule[3:]
 87 |         premise, consequence = rule.split(" THEN ")
 88 |         conditions = premise.split(" AND ")
 89 |         compoundCondition = CompoundCondition()
 90 | 
 91 |         for condition in conditions:
 92 |             attribute, valueset = condition.split(" = ")
 93 |             if "," in valueset:
 94 |                 left, right = valueset.split(",")
 95 |                 leftClosed = False if left[0] == "(" else True
 96 |                 rightClosed = False if right[-1:] == ")" else True
 97 |                 left = left[1:]
 98 |                 right = right[1:-1]
 99 |                 elementaryCondition = ElementaryCondition(
100 |                                                   
101 |                     attribute, float(left), float(right), leftClosed, rightClosed, column_index=self.column_indexes[attribute]
102 |                 )
103 |             else:
104 |                 value = valueset[1:-1]
105 |                 elementaryCondition = ElementaryCondition(attribute, str(value), column_index=self.column_indexes[attribute])
106 |                                           
107 | 
108 |             compoundCondition.add_subcondition(elementaryCondition)
109 | 
110 |         consequence_att, consequence_value = consequence.split(" = ")
111 |         consequence_value = consequence_value[1:-1]
112 |         consequence = ElementaryCondition(consequence_att, consequence_value, column_index=self.column_indexes[attribute])
113 | 
114 |         return self._preprocessRule(Rule(compoundCondition, consequence))
115 | 
116 |     def _preprocessRule(self, rule):
117 |         conditions_for_attributes = {}
118 | 
119 |         for condition in rule.premise.subconditions:
120 |             attr = condition.attribute
121 | 
122 |             if attr in conditions_for_attributes.keys():
123 |                 old_condition = conditions_for_attributes[attr]
124 |                 condition = old_condition.get_intersection(condition)
125 |             conditions_for_attributes[attr] = condition
126 | 
127 |         premise = CompoundCondition()
128 |         premise.add_subconditions(conditions_for_attributes.values())
129 |         rule.premise = premise
130 | 
131 |         return rule
132 | 
133 |     def _get_rules_from_DT(self, tree, feature_names, class_names, label_name):
134 |         tree_ = tree.tree_
135 |         feature_name = [
136 |             feature_names[i] if i != self._tree.TREE_UNDEFINED else "undefined!"
137 |             for i in tree_.feature
138 |         ]
139 |         paths = []
140 |         path = []
141 | 
142 |         def recurse(node, path, paths):
143 |             if tree_.feature[node] != self._tree.TREE_UNDEFINED:
144 |                 name = feature_name[node]
145 |                 threshold = tree_.threshold[node]
146 |                 p1, p2 = list(path), list(path)
147 |                 p1 += [f"{name} <= {threshold}"]
148 |                 recurse(tree_.children_left[node], p1, paths)
149 |                 p2 += [f"{name} > {threshold}"]
150 |                 recurse(tree_.children_right[node], p2, paths)
151 |             else:
152 |                 path += [(tree_.value[node], tree_.n_node_samples[node])]
153 |                 paths += [path]
154 | 
155 |         recurse(0, path, paths)
156 | 
157 |         # sort by samples count
158 |         samples_count = [p[-1][1] for p in paths]
159 |         ii = list(np.argsort(samples_count))
160 |         paths = [paths[i] for i in reversed(ii)]
161 | 
162 |         rules = []
163 |         for path in paths:
164 |             rule = "IF "
165 |             for p in path[:-1]:
166 |                 if rule != "IF ":
167 |                     rule += " AND "
168 |                 rule += str(p)
169 |             rule += " THEN "
170 |             if class_names is None:
171 |                 rule += (
172 |                                         
173 |                     f"{label_name} = " + "{" + str(path[-1][0][0][0]) + "}"
174 |                 )
175 |             else:
176 |                 classes = path[-1][0][0]
177 |                 l = np.argmax(classes)
178 |                 rule += f"{label_name} = " + "{" + f"{class_names[l]}" + "}"
179 |             rules += [rule]
180 | 
181 |         return rules
182 | 
183 | 
184 | class ClassificationModel(Model):
185 |     def __init__(
186 |         self, model, feature_names=None, class_names=None, label_name=None
187 |     ) -> None:
188 |         self.column_indexes: Dict[str, int] = {column_name:i for i, column_name in enumerate(list(feature_names))}
189 |         super().__init__(
190 |             model,
191 |             feature_names=feature_names,
192 |             class_names=class_names,
193 |             label_name=label_name,
194 |         )
195 | 
196 |     def get_rules(self, model, feature_names=None, class_names=None, label_name=None):
197 | 
198 |         if isinstance(model, BaseOperator):
199 |             return self._map_rules_from_RuleKit(model.model.rules)
200 |         if isinstance(model, list):
201 |             return self._map_rules_from_list(model)
202 |         sklearn_tree_module = importlib.import_module("sklearn.tree")
203 |         BaseDecisionTree = getattr(sklearn_tree_module, "BaseDecisionTree")
204 |         if isinstance(model, BaseDecisionTree):
205 |             self._tree = getattr(sklearn_tree_module, "_tree")
206 |             rules = self._get_rules_from_DT(
207 |                 self.model, feature_names, class_names, label_name
208 |             )
209 |             return self._map_rules_from_sklearn(rules)
210 |         orange_module = importlib.import_module("Orange.classification.rules")
211 |         _RuleClassifier = getattr(orange_module, "_RuleClassifier")
212 |         if isinstance(model, _RuleClassifier):
213 |             return self._map_rules_from_Orange(model.rule_list)
214 | 
215 |     def _map_rule_from_RuleKit(self, rule):
216 |         rule = rule[3:]
217 |         premise, consequence = rule.split(" THEN ")
218 |         conditions = premise.split(" AND ")
219 | 
220 |         compoundCondition = CompoundCondition()
221 | 
222 |         for condition in conditions:
223 |             attribute, valueset = condition.split(" = ")
224 |             if "," in valueset:
225 |                 left, right = valueset.split(",")
226 |                 leftClosed = False if left[0] == "(" else True
227 |                 rightClosed = False if right[-1:] == ")" else True
228 |                 left = left[1:]
229 |                 right = right[1:-1]
230 |                 elementaryCondition = ElementaryCondition(
231 |                                                   
232 |                     attribute, float(left), float(right), leftClosed, rightClosed, column_index=self.column_indexes[attribute]
233 |                 )
234 |             else:
235 |                 value = valueset[1:-1]
236 |                 elementaryCondition = ElementaryCondition(attribute, str(value), column_index=self.column_indexes[attribute])
237 |                                           
238 | 
239 |             compoundCondition.add_subcondition(elementaryCondition)
240 | 
241 |         consequence_att, consequence_value = consequence.split(" = ")
242 |         consequence_value = consequence_value[1:-1]
243 |         consequence = ElementaryCondition(consequence_att, consequence_value, column_index=self.column_indexes[attribute])
244 | 
245 |         return Rule(compoundCondition, consequence)
246 | 
247 |     def _map_rules_from_Orange(self, rules):
248 |         preprocessed_Rules = []
249 |         for rule in rules:
250 |             if "TRUE" not in rule.__str__():
251 |                                           
252 |                 preprocessed_Rules.append(self._map_rule_from_Orange(rule.__str__()))
253 |         return preprocessed_Rules
254 | 
255 |     def _map_rule_from_Orange(self, rule):
256 |         rule = rule[3:]
257 |         premise, consequence = rule.split(" THEN ")
258 |         conditions = premise.split(" AND ")
259 |         compoundCondition = CompoundCondition()
260 | 
261 |         for condition in conditions:
262 |             if "<=" in condition:
263 |                 attribute, value = condition.split("<=")
264 |                 elementaryCondition = ElementaryCondition(
265 |                     attribute, ElementaryCondition.minus_inf, float(value), False, True, column_index=self.column_indexes[attribute]
266 |                                            
267 |                 )
268 |             elif "<" in condition:
269 |                 attribute, value = condition.split("<")
270 |                 elementaryCondition = ElementaryCondition(
271 |                     attribute, ElementaryCondition.minus_inf, float(value), False, False, column_index=self.column_indexes[attribute]
272 |                                             
273 |                 )
274 |             elif ">=" in condition:
275 |                 attribute, value = condition.split(">=")
276 |                 elementaryCondition = ElementaryCondition(
277 |                                      
278 |                     attribute, float(value), ElementaryCondition.inf, True, False, column_index=self.column_indexes[attribute]
279 |                 )
280 |             elif ">" in condition:
281 |                 attribute, value = condition.split(">")
282 |                 elementaryCondition = ElementaryCondition(
283 |                                      
284 |                     attribute, float(value), ElementaryCondition.inf, False, False, column_index=self.column_indexes[attribute]
285 |                 )
286 |             elif "=" in condition:
287 |                 attribute, value = condition.split("=")
288 |                 elementaryCondition = ElementaryCondition(attribute, str(value), column_index=self.column_indexes[attribute])
289 |                                           
290 |             else:
291 |                 elementaryCondition = ElementaryCondition("all", "TRUE")
292 | 
293 |             compoundCondition.add_subcondition(elementaryCondition)
294 | 
295 |         consequence_att, consequence_value = consequence.split("=")
296 |         consequence_value = consequence_value[:-1]
297 |         consequence = ElementaryCondition(consequence_att, consequence_value, column_index=self.column_indexes[attribute])
298 | 
299 |         return self._preprocessRule(Rule(compoundCondition, consequence))
300 | 
301 | 
302 | class RegressionModel(Model):
303 |     def __init__(self, model, feature_names=None, label_name=None) -> None:
304 |         self.column_indexes: Dict[str, int] = {column_name:i for i, column_name in enumerate(list(feature_names))}
305 |         super().__init__(
306 |             model, feature_names=feature_names, class_names=None, label_name=label_name
307 |         )
308 | 
309 |     def get_rules(self, model, feature_names=None, class_names=None, label_name=None):
310 |         if isinstance(model, BaseOperator):
311 |             return self._map_rules_from_RuleKit(model.model.rules)
312 |         if isinstance(model, list):
313 |             return self._map_rules_from_list(model)
314 |         sklearn_tree_module = importlib.import_module("sklearn.tree")
315 |         BaseDecisionTree = getattr(sklearn_tree_module, "BaseDecisionTree")
316 |         if isinstance(model, BaseDecisionTree):
317 |             self._tree = getattr(sklearn_tree_module, "_tree")
318 |             rules = self._get_rules_from_DT(
319 |                 self.model, feature_names, class_names, label_name
320 |             )
321 |             return self._map_rules_from_sklearn(rules)
322 | 
323 | 
324 |     def _map_rule_from_RuleKit(self, rule):
325 |         rule = rule[3:]
326 |         premise, consequence = rule.split(" THEN ")
327 |         conditions = premise.split(" AND ")
328 | 
329 |         compoundCondition = CompoundCondition()
330 |                                                         
331 |         for condition in conditions:
332 |             attribute, valueset = condition.split(" = ")
333 |             if "," in valueset:
334 |                 left, right = valueset.split(",")
335 |                 leftClosed = False if left[0] == "(" else True
336 |                 rightClosed = False if right[-1:] == ")" else True
337 |                 left = left[1:]
338 |                 right = right[1:-1]
339 |                 elementaryCondition = ElementaryCondition(
340 |                                                   
341 |                     attribute, float(left), float(right), leftClosed, rightClosed, column_index=self.column_indexes[attribute]
342 |                 )
343 |             else:
344 |                 value = valueset[1:-1]
345 |                 elementaryCondition = ElementaryCondition(attribute, str(value), column_index=self.column_indexes[attribute])
346 |                                           
347 | 
348 |             compoundCondition.add_subcondition(elementaryCondition)
349 | 
350 |         consequence_att, consequence_value = consequence.split(" = ")
351 |         consequence_val, consequence_range = consequence_value.split(" ")
352 |         consequence_val = consequence_val[1:-1]
353 |                                           
354 |         consequence = ElementaryCondition(consequence_att, float(consequence_val), column_index=self.column_indexes[attribute])
355 | 
356 |         return Rule(compoundCondition, consequence)
357 | 
358 |     def _map_rule_from_sklearn(self, rule):
359 |         rule = rule[3:]
360 |         premise, consequence = rule.split(" THEN ")
361 |         conditions = premise.split(" AND ")
362 |         compoundCondition = CompoundCondition()
363 | 
364 |         for condition in conditions:
365 |             if "<=" in condition:
366 |                 attribute, value = condition.split(" <= ")
367 |                 elementaryCondition = ElementaryCondition(
368 |                     attribute, ElementaryCondition.minus_inf, float(value), False, True, column_index=self.column_indexes[attribute]
369 |                                            
370 |                 )
371 |             elif "<" in condition:
372 |                 attribute, value = condition.split(" < ")
373 |                 elementaryCondition = ElementaryCondition(
374 |                     attribute, ElementaryCondition.minus_inf, float(value), False, False, column_index=self.column_indexes[attribute]
375 |                                             
376 |                 )
377 |             elif ">=" in condition:
378 |                 attribute, value = condition.split(" >= ")
379 |                 elementaryCondition = ElementaryCondition(
380 |                                      
381 |                     attribute, float(value), ElementaryCondition.inf, True, False, column_index=self.column_indexes[attribute]
382 |                 )
383 |             elif ">" in condition:
384 |                 attribute, value = condition.split(" > ")
385 |                 elementaryCondition = ElementaryCondition(
386 |                                      
387 |                     attribute, float(value), ElementaryCondition.inf, False, False, column_index=self.column_indexes[attribute]
388 |                 )
389 |             else:
390 |                 attribute, value = condition.split(" = ")
391 |                 elementaryCondition = ElementaryCondition(attribute, str(value), column_index=self.column_indexes[attribute])
392 |                                           
393 | 
394 |             compoundCondition.add_subcondition(elementaryCondition)
395 | 
396 |         consequence_att, consequence_value = consequence.split(" = ")
397 |         consequence_value = consequence_value[1:-1]
398 |                                           
399 |         consequence = ElementaryCondition(consequence_att, float(consequence_value), column_index=self.column_indexes[attribute])
400 | 
401 |         return self._preprocessRule(Rule(compoundCondition, consequence))
402 | 
403 |     def _map_string_rule(self, rule):
404 |         rule = rule[3:]
405 |         premise, consequence = rule.split(" THEN ")
406 |         conditions = premise.split(" AND ")
407 |         compoundCondition = CompoundCondition()
408 | 
409 |         for condition in conditions:
410 |             attribute, valueset = condition.split(" = ")
411 |             if "," in valueset:
412 |                 left, right = valueset.split(",")
413 |                 leftClosed = False if left[0] == "(" else True
414 |                 rightClosed = False if right[-1:] == ")" else True
415 |                 left = left[1:]
416 |                 right = right[1:-1]
417 |                 elementaryCondition = ElementaryCondition(
418 |                                                   
419 |                     attribute, float(left), float(right), leftClosed, rightClosed, column_index=self.column_indexes[attribute]
420 |                 )
421 |             else:
422 |                 value = valueset[1:-1]
423 |                 elementaryCondition = ElementaryCondition(attribute, str(value), column_index=self.column_indexes[attribute])
424 |                                           
425 | 
426 |             compoundCondition.add_subcondition(elementaryCondition)
427 | 
428 |         consequence_att, consequence_value = consequence.split(" = ")
429 |         consequence_value = consequence_value[1:-1]
430 |                                           
431 |         consequence = ElementaryCondition(consequence_att, float(consequence_value), column_index=self.column_indexes[attribute])
432 | 
433 |         return self._preprocessRule(Rule(compoundCondition, consequence))
434 | 
435 | 
436 | class SurvivalModel(Model):
437 |     def __init__(self, model, feature_names=None, survival_status_name=None) -> None:
438 |         self.column_indexes: Dict[str, int] = {column_name:i for i, column_name in enumerate(list(feature_names))}
439 |         super().__init__(
440 |             model,
441 |             feature_names=feature_names,
442 |             class_names=None,
443 |             label_name=survival_status_name,
444 |         )
445 | 
446 |     def get_rules(self, model, feature_names=None, class_names=None, label_name=None):
447 |         if isinstance(model, BaseOperator):
448 |             return self._map_rules_from_RuleKit(model.model.rules)
449 |         if isinstance(model, list):
450 |             return self._map_rules_from_list(model)
451 |         sksurv_tree_module = importlib.import_module("sksurv.tree")
452 |         SurvivalTree = getattr(sksurv_tree_module, "SurvivalTree")
453 |         if isinstance(model, SurvivalTree):
454 |             sklearn_tree_module = importlib.import_module("sklearn.tree")
455 |             self._tree = getattr(sklearn_tree_module, "_tree")
456 |             rules = self._get_rules_from_DT(
457 |                 self.model, feature_names, class_names, label_name
458 |             )
459 |             return self._map_rules_from_sklearn(rules)
460 | 
461 | 
462 |     def _map_rule_from_RuleKit(self, rule):
463 |         rule = rule[3:]
464 |         premise, consequence = rule.split(" THEN ")
465 |         conditions = premise.split(" AND ")
466 |         compoundCondition = CompoundCondition()
467 | 
468 |         for condition in conditions:
469 |             attribute, valueset = condition.split(" = ")
470 |             if "," in valueset:
471 |                 left, right = valueset.split(",")
472 |                 leftClosed = False if left[0] == "(" else True
473 |                 rightClosed = False if right[-1:] == ")" else True
474 |                 left = left[1:]
475 |                 right = right[1:-1]
476 |                 elementaryCondition = ElementaryCondition(
477 |                                                   
478 |                     attribute, float(left), float(right), leftClosed, rightClosed, column_index=self.column_indexes[attribute]
479 |                 )
480 |             else:
481 |                 value = valueset[1:-1]
482 |                 elementaryCondition = ElementaryCondition(attribute, str(value), column_index=self.column_indexes[attribute])
483 |                                           
484 | 
485 |             compoundCondition.add_subcondition(elementaryCondition)
486 | 
487 |         consequence_att = "survival_curve"
488 |         consequence_val = ""
489 |         consequence = ElementaryCondition(consequence_att, consequence_val, column_index=0)
490 | 
491 |         return Rule(compoundCondition, consequence)
492 | 
493 | 
494 | class BlackBoxModel:
495 |     def __init__(self, X_model, model_predictions, type) -> None:
496 |         RuleKit.init()
497 | 
498 |         if type == "regression":
499 |             rulekit_model = RuleRegressor(
500 |             induction_measure=Measures.C2,
501 |             pruning_measure=Measures.C2,
502 |             voting_measure=Measures.C2,
503 |         )
504 |         else:
505 |             rulekit_model = RuleClassifier(
506 |             induction_measure=Measures.C2,
507 |             pruning_measure=Measures.C2,
508 |             voting_measure=Measures.C2,
509 |         )
510 |         self.type = type
511 |         self.y = model_predictions
512 |         self.rulekit_model = rulekit_model
513 | 
514 | 
515 |     def get_rules_model(self, X_org):
516 |         
517 |         if isinstance(self.y, pd.DataFrame):
518 |             label_name = self.y.columns[0]
519 |             y = self.y.to_numpy().reshape(self.y.size)
520 |         else:
521 |             label_name = self.y.name
522 |             y=self.y
523 | 
524 |         for column in X_org.select_dtypes('object').columns.tolist():
525 |             X_org[column] = X_org[column].replace({np.nan: None})
526 |         self.rulekit_model.fit(X_org,y)
527 | 
528 |         if type == "regression":
529 |             model = RegressionModel(self.rulekit_model,label_name)
530 |         else:          
531 |             model = ClassificationModel(self.rulekit_model, X_org.columns, np.unique(self.y), label_name)
532 | 
533 |         return model
534 | 
535 | 
536 | 
537 | 
538 | 
539 | 
540 |         
541 | 
542 | 
543 | 
544 | 


--------------------------------------------------------------------------------
/rulexai/reduct.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | 
  3 | 
  4 | 
  5 | class Reduct:
  6 | 
  7 |     def __init__(self) -> None:
  8 |         pass
  9 | 
 10 |     def calculate_POS(self, reduct: pd.DataFrame, y: pd.DataFrame):
 11 |         POS = []
 12 |         
 13 |         reduct["if_duplicated"] = reduct.duplicated(keep=False)
 14 |         positive_region = [reduct.iloc[i,:].to_numpy() for i in range(reduct.shape[0]) if reduct.if_duplicated.iloc[i] == False]
 15 | 
 16 |         reduct["label"] = y
 17 |         df_duplicated = reduct[reduct.if_duplicated == True]
 18 |         df_duplicated = df_duplicated.drop(["if_duplicated"], axis=1)
 19 | 
 20 |         df_duplicated.sort_values(by = df_duplicated.columns.to_list(), inplace = True)
 21 |         df_duplicated.reset_index(inplace= True, drop = True)
 22 | 
 23 |         df_labels = df_duplicated["label"]
 24 |         df_duplicated = df_duplicated.drop(["label"], axis = 1)
 25 | 
 26 |         start_equivalence = 0
 27 |         for i in range(1, df_duplicated.shape[0]):
 28 |             if (not df_duplicated.iloc[i-1,:].equals(df_duplicated.iloc[i,:])) or i == (df_duplicated.shape[0] - 1):
 29 |                 
 30 |                 if i == (df_duplicated.shape[0] - 1):
 31 |                     end_equivalence = i + 1
 32 |                 else: 
 33 |                     end_equivalence = i
 34 |                 
 35 |                 Xj = df_duplicated.iloc[start_equivalence:end_equivalence, :]
 36 |                 Xj = Xj.assign(label = df_labels.iloc[start_equivalence:end_equivalence])
 37 | 
 38 |                 if len(Xj["label"].unique()) == 1:
 39 |                     positive_region.extend([Xj.iloc[i,:].to_numpy() for i in range(Xj.shape[0])])
 40 |                 
 41 |                 start_equivalence = end_equivalence
 42 | 
 43 |         return positive_region
 44 | 
 45 |     
 46 |     def eliminate_irrelevant_attributes(self, R: pd.DataFrame, y: pd.DataFrame):
 47 | 
 48 |         POS_R = self.calculate_POS(R.copy(), y)
 49 |         m = len(POS_R)
 50 |         A = R.columns.to_list()
 51 |         for a in A:
 52 |             POS_R_a = self.calculate_POS(R.drop([a], axis = 1), y)
 53 |             m_a = len(POS_R_a)
 54 |             if m_a == m:
 55 |                 R = R.drop([a], axis = 1)
 56 |         
 57 |         return R
 58 | 
 59 |     def eliminate_irrelevant_attributes_based_on_user_POS(self,R: pd.DataFrame, y: pd.DataFrame, user_POS: float):
 60 | 
 61 |         POS_R = self.calculate_POS(R.copy(), y)
 62 |         all_objects_number = len(R)
 63 |         m = len(POS_R)/all_objects_number
 64 |         
 65 |         if user_POS > m:
 66 |             print("Warning! The set POS is greater than the reduct POS. The reduct will not be limited")
 67 |             return R 
 68 | 
 69 |         A = R.columns.to_list()
 70 |         for a in A:
 71 |             POS_R_a = self.calculate_POS(R.drop([a], axis = 1), y)
 72 |             m_a = len(POS_R_a)/all_objects_number
 73 |             if m_a >= user_POS:
 74 |                 R = R.drop([a], axis = 1)
 75 |         return R
 76 | 
 77 |     # Jonhson's algorithm
 78 |     def semi_minimal_reduct(self, x: pd.DataFrame):
 79 | 
 80 |         repeate = True
 81 |         R = pd.DataFrame()
 82 |         L = [x]
 83 |         A = x.columns.to_list()
 84 | 
 85 |         while(repeate):
 86 | 
 87 |             W_U_R = dict()
 88 |             for a in A:            
 89 |                 W_U_R[a] = 0
 90 |                 for Xi in L:
 91 |                     cardinalities = Xi[a].value_counts()
 92 |                     W_X_i = (pow(cardinalities.sum(), 2) -
 93 |                             sum([pow(xi, 2) for xi in cardinalities]))/2
 94 |                     W_U_R[a] += W_X_i
 95 | 
 96 |             # attribute with the largest W_U_R(a)
 97 |             a = max(W_U_R, key=lambda key: W_U_R[key])
 98 | 
 99 |             A.remove(a)
100 |             R[a] = x[a].copy()
101 | 
102 |             tmp_L = []
103 |             for Xi in L:
104 |                 tmp_L.extend([Xi[Xi[a] == value] for value in Xi[a].unique()])
105 |             L = tmp_L
106 | 
107 |             if W_U_R[a] == 0 or len(A) == 0:
108 |                 repeate = False
109 | 
110 |         return R
111 | 
112 | 
113 |     def get_reduct(self, x: pd.DataFrame, y: pd.DataFrame = None, POS: float = None):
114 |         
115 |         R = self.semi_minimal_reduct(x)
116 |         if y is None:
117 |             return R 
118 | 
119 |         if POS is None:
120 |             R = self.eliminate_irrelevant_attributes(R, y)
121 |         else:
122 |             R = self.eliminate_irrelevant_attributes_based_on_user_POS(R,y,POS)
123 |         return R
124 | 
125 |   


--------------------------------------------------------------------------------
/rulexai/rule.py:
--------------------------------------------------------------------------------
  1 | import enum
  2 | import numpy as np
  3 | from typing import List
  4 | import warnings
  5 | warnings.filterwarnings('ignore')
  6 | 
  7 | # creating enumerations using class
  8 | class LogicalOperator(enum.Enum):
  9 |     conjuction = 1
 10 |     alternative = 2
 11 | 
 12 | 
 13 | class ElementaryCondition:
 14 | 
 15 |     inf = float("inf")
 16 |     minus_inf = float("-inf")
 17 | 
 18 |     def __init__(
 19 |         self,
 20 |         attribute: str,
 21 |         left: float,
 22 |         right: float = None,
 23 |         leftClosed: bool = None,
 24 |         rightClosed: bool = None,
 25 |         column_index: int = None,
 26 |     ) -> None:
 27 |         self.attribute = attribute
 28 |         self.left = left
 29 |         self.right = right
 30 |         self.leftClosed = leftClosed
 31 |         # if right is None its means that attribute is Nominal
 32 |         self.rightClosed = rightClosed
 33 |         self._column_index = column_index
 34 | 
 35 |     def covered_mask(self, X: np.ndarray) -> np.ndarray:
 36 |         if self.right is not None:
 37 |             left_part: np.ndarray = (X[:, self._column_index] >= self.left) if self.leftClosed else (X[:, self._column_index] > self.left)
 38 |             right_part: np.ndarray = (X[:, self._column_index] <= self.right) if self.rightClosed else (X[:, self._column_index] < self.right)
 39 |             return left_part & right_part
 40 |         else:
 41 |             return (X[:, self._column_index] == self.left)
 42 | 
 43 |     def uncovered_mask(self, X: np.ndarray) -> np.ndarray:
 44 |         return np.logical_not(self.covered_mask(X))
 45 | 
 46 |     def negative_covered_mask(self, X: np.ndarray, y: np.ndarray, decision) -> np.ndarray:
 47 |         return self.covered_mask(X) & (y[:] != decision)
 48 | 
 49 |     def positive_covered_mask(self, X: np.ndarray, y: np.ndarray, decision) -> np.ndarray:
 50 |         a =  self.covered_mask(X) & (y[:] == decision)
 51 |         return a
 52 |     def get_intersection(self, other_condition):
 53 |         return ElementaryCondition(
 54 |             self.attribute,
 55 |             np.max([self.left, other_condition.left]),
 56 |             np.min([self.right, other_condition.right]),
 57 |             other_condition.leftClosed
 58 |             if self.left < other_condition.left
 59 |             else self.leftClosed,
 60 |             other_condition.rightClosed
 61 |             if self.right > other_condition.right
 62 |             else self.rightClosed,
 63 |             column_index=self._column_index
 64 |         )
 65 | 
 66 |     def evaluate(self, ex):
 67 |         value = ex[self.attribute]
 68 |         if self.right == None:
 69 |             return str(value) == self.left
 70 |         else:
 71 |             return ((value >= self.left and self.leftClosed) or value > self.left) and (
 72 |                 (value <= self.right and self.rightClosed) or value < self.right
 73 |             )
 74 |     def evaluate_mask(self, X_t: np.ndarray, X: np.ndarray, column_index: int = None):
 75 |         X_t[:, column_index] = self.covered_mask(X)
 76 |         return X_t
 77 | 
 78 |     def __str__(self):
 79 |         if self.right == None:
 80 |             s = "".join(["{", str(self.left), "}"])
 81 |         else:
 82 |             s = "".join(
 83 |                 [
 84 |                     ("<" if self.leftClosed else "("),
 85 |                     ("-inf" if (self.left == self.minus_inf) else str(self.left)),
 86 |                     ", ",
 87 |                     ("inf" if (self.right == self.inf) else str(self.right)),
 88 |                     (">" if self.rightClosed else ")"),
 89 |                 ]
 90 |             )
 91 |         return "".join([self.attribute, " = ", s])
 92 | 
 93 |     def __eq__(self, other):
 94 |         if not isinstance(other, ElementaryCondition):
 95 |             return False
 96 |         return (
 97 |             (self.attribute == other.attribute)
 98 |             and (self.left == other.left)
 99 |             and (self.right == other.right)
100 |             and (self.leftClosed == other.leftClosed)
101 |             and (self.rightClosed == other.rightClosed)
102 |         )
103 | 
104 |     def __hash__(self):
105 |         return hash(
106 |             (self.attribute, self.left, self.right,
107 |              self.leftClosed, self.rightClosed)
108 |         )
109 | 
110 | 
111 | class CompoundCondition:
112 | 
113 |     def __init__(self) -> None:
114 |         self.operator = LogicalOperator.conjuction
115 |         self.subconditions = []
116 | 
117 |     def covered_mask(self, X: np.ndarray) -> np.ndarray:
118 |         if len(self.subconditions) == 0:
119 |             return np.full(X.shape[0], fill_value=False)
120 |         covered_mask = (self.subconditions[0].covered_mask(X))
121 |         if self.operator == LogicalOperator.conjuction:
122 |             for i in range(1, len(self.subconditions)):
123 |                 covered_mask &= self.subconditions[i].covered_mask(X)
124 |         elif self.operator == LogicalOperator.alternative:
125 |             for i in range(1, len(self.subconditions)):
126 |                 covered_mask |= self.subconditions[i].covered_mask(X)
127 |         return covered_mask
128 | 
129 |     def uncovered_mask(self, X: np.ndarray) -> np.ndarray:
130 |         if len(self.subconditions) == 0:
131 |             return np.full(X.shape[0], fill_value=True)
132 |         uncovered_mask = (self.subconditions[0].uncovered_mask(X))
133 |         if self.operator == LogicalOperator.conjuction:
134 |             for i in range(1, len(self.subconditions)):
135 |                 uncovered_mask &= self.subconditions[i].uncovered_mask(X)
136 |         elif self.operator == LogicalOperator.alternative:
137 |             for i in range(1, len(self.subconditions)):
138 |                 uncovered_mask |= self.subconditions[i].uncovered_mask(X)
139 |         return uncovered_mask
140 | 
141 |     def positive_covered_mask(self, X: np.ndarray, y: np.ndarray, decision) -> np.ndarray:
142 |         if len(self.subconditions) == 0:
143 |             return np.full(X.shape[0], fill_value=False)
144 |         positive_covered_mask = (
145 |             self.subconditions[0].positive_covered_mask(X, y, decision))
146 |         if self.operator == LogicalOperator.conjuction:
147 |             for i in range(1, len(self.subconditions)):
148 |                 positive_covered_mask &= self.subconditions[i].positive_covered_mask(
149 |                     X, y, decision)
150 |         elif self.operator == LogicalOperator.alternative:
151 |             for i in range(1, len(self.subconditions)):
152 |                 positive_covered_mask |= self.subconditions[i].positive_covered_mask(
153 |                     X, y, decision)
154 |         return positive_covered_mask
155 | 
156 |     def negative_covered_mask(self, X: np.ndarray, y: np.ndarray, decision) -> np.ndarray:
157 |         if len(self.subconditions) == 0:
158 |             return np.full(X.shape[0], fill_value=False)
159 |         negative_covered_mask = (
160 |             self.subconditions[0].negative_covered_mask(X, y, decision))
161 |         if self.operator == LogicalOperator.conjuction:
162 |             for i in range(1, len(self.subconditions)):
163 |                 negative_covered_mask &= self.subconditions[i].negative_covered_mask(
164 |                     X, y, decision)
165 |         elif self.operator == LogicalOperator.alternative:
166 |             for i in range(1, len(self.subconditions)):
167 |                 negative_covered_mask |= self.subconditions[i].negative_covered_mask(
168 |                     X, y, decision)
169 |         return negative_covered_mask
170 | 
171 |     def add_subcondition(self, cnd: ElementaryCondition):
172 |         self.subconditions.append(cnd)
173 | 
174 |     def add_subconditions(self, cnds: List[ElementaryCondition]):
175 |         self.subconditions.extend(cnds)
176 | 
177 |     def get_subconditions(self):
178 |         return self.subconditions
179 | 
180 |     def set_logical_operator(self, operator: LogicalOperator):
181 |         self.operator = operator
182 | 
183 |     def __str__(self):
184 |         s = ""
185 |         operator = " AND " if (
186 |             self.operator == LogicalOperator.conjuction) else " OR "
187 | 
188 |         for i in range(len(self.subconditions)):
189 |             s += self.subconditions[i].__str__()
190 |             if i != (len(self.subconditions) - 1):
191 |                 s += operator
192 |         return s
193 | 
194 |     def evaluate(self, ex):
195 |         for condition in self.subconditions:
196 |             partial = condition.evaluate(ex)
197 |             if self.operator == LogicalOperator.conjuction and partial == False:
198 |                 return False
199 |             elif self.operator == LogicalOperator.alternative and partial == True:
200 |                 return True
201 |         return True if (self.operator == LogicalOperator.conjuction) else False
202 | 
203 | 
204 | class CompoundConditionWithCombiningOperators(CompoundCondition):
205 | 
206 |     def __init__(self) -> None:
207 |         self.operator = LogicalOperator.conjuction
208 |         self.subCompoundConditions = dict()
209 |     
210 |     def covered_mask(self, X: np.ndarray) -> np.ndarray:
211 |         subCompoundConditions_keys = list(self.subCompoundConditions.keys())
212 |         if len(self.subCompoundConditions) == 0:
213 |             return np.full(X.shape[0], fill_value=False)
214 |         covered_mask = (self.subCompoundConditions[subCompoundConditions_keys[0]].covered_mask(X))
215 |         if self.operator == LogicalOperator.conjuction:
216 |             for i in range(1, len(subCompoundConditions_keys)):
217 |                 covered_mask &= self.subCompoundConditions[subCompoundConditions_keys[i]].covered_mask(X)
218 |         elif self.operator == LogicalOperator.alternative:
219 |             for i in range(1, len(subCompoundConditions_keys)):
220 |                 covered_mask |= self.subCompoundConditions[subCompoundConditions_keys[i]].covered_mask(X)
221 |         return covered_mask
222 | 
223 |     def uncovered_mask(self, X: np.ndarray) -> np.ndarray:
224 |         subCompoundConditions_keys = list(self.subCompoundConditions.keys())
225 |         if len(self.subCompoundConditions) == 0:
226 |             return np.full(X.shape[0], fill_value=True)
227 |         uncovered_mask = (self.subCompoundConditions[subCompoundConditions_keys[0]].uncovered_mask(X))
228 |         if self.operator == LogicalOperator.conjuction:
229 |             for i in range(1, len(subCompoundConditions_keys)):
230 |                 uncovered_mask &= self.subCompoundConditions[subCompoundConditions_keys[i]].uncovered_mask(X)
231 |         elif self.operator == LogicalOperator.alternative:
232 |             for i in range(1, len(subCompoundConditions_keys)):
233 |                 uncovered_mask |= self.subCompoundConditions[subCompoundConditions_keys[i]].uncovered_mask(X)
234 |         return uncovered_mask
235 | 
236 |     def positive_covered_mask(self, X: np.ndarray, y: np.ndarray, decision) -> np.ndarray:
237 |         subCompoundConditions_keys = list(self.subCompoundConditions.keys())
238 |         if len(self.subCompoundConditions) == 0:
239 |             return np.full(X.shape[0], fill_value=False)
240 |         positive_covered_mask = (
241 |             self.subCompoundConditions[subCompoundConditions_keys[0]].positive_covered_mask(X, y, decision))
242 |         if self.operator == LogicalOperator.conjuction:
243 |             for i in range(1, len(subCompoundConditions_keys)):
244 |                 positive_covered_mask &= self.subCompoundConditions[subCompoundConditions_keys[i]].positive_covered_mask(
245 |                     X, y, decision)
246 |         elif self.operator == LogicalOperator.alternative:
247 |             for i in range(1, len(subCompoundConditions_keys)):
248 |                 positive_covered_mask |= self.subCompoundConditions[subCompoundConditions_keys[i]].positive_covered_mask(
249 |                     X, y, decision)
250 |         return positive_covered_mask
251 | 
252 |     def negative_covered_mask(self, X: np.ndarray, y: np.ndarray, decision) -> np.ndarray:
253 |         subCompoundConditions_keys = list(self.subCompoundConditions.keys())
254 |         if len(self.subCompoundConditions) == 0:
255 |             return np.full(X.shape[0], fill_value=False)
256 |         negative_covered_mask = (
257 |             self.subCompoundConditions[subCompoundConditions_keys[0]].negative_covered_mask(X, y, decision))
258 |         if self.operator == LogicalOperator.conjuction:
259 |             for i in range(1, len(subCompoundConditions_keys)):
260 |                 negative_covered_mask &= self.subCompoundConditions[subCompoundConditions_keys[i]].negative_covered_mask(
261 |                     X, y, decision)
262 |         elif self.operator == LogicalOperator.alternative:
263 |             for i in range(1, len(subCompoundConditions_keys)):
264 |                 negative_covered_mask |= self.subCompoundConditions[subCompoundConditions_keys[i]].negative_covered_mask(
265 |                     X, y, decision)
266 |         return negative_covered_mask
267 | 
268 |     def add_subcondition(self, cnd: ElementaryCondition):
269 |         attr = cnd.attribute
270 |         if attr in self.subCompoundConditions.keys():
271 |             compoundCondition = self.subCompoundConditions[attr]
272 |         else:
273 |             compoundCondition = CompoundCondition()
274 |             compoundCondition.set_logical_operator(LogicalOperator.alternative)
275 |         compoundCondition.add_subcondition(cnd)
276 |         self.subCompoundConditions[attr] = compoundCondition
277 | 
278 |     def add_subconditions(self, cnds: List[ElementaryCondition]):
279 |         for condition in cnds:
280 |             self.add_subcondition(condition)
281 | 
282 |     def get_subconditions(self):
283 |         all_conditions_list = []
284 |         for compundCondition in self.subCompoundConditions.values():
285 |             all_conditions_list.extend(compundCondition.get_subconditions())
286 |         return all_conditions_list
287 | 
288 |     def set_logical_operator(self, operator: LogicalOperator):
289 |         self.operator = operator
290 | 
291 |     def __str__(self):
292 |         s = ""
293 |         operator = " AND " if (
294 |             self.operator == LogicalOperator.conjuction) else " OR "
295 |         operator_internal = " OR " if (
296 |             self.operator == LogicalOperator.conjuction) else " AND "
297 | 
298 |         for compound_condition in self.subCompoundConditions.values():
299 |             s += "["
300 |             for condition_base in compound_condition.get_subconditions():
301 |                 s += condition_base.__str__() + operator_internal
302 |             s = s[0: len(s) - len(operator_internal)]
303 |             s += "]" + operator
304 | 
305 |         s = s[0: len(s) - len(operator)]
306 |         return s
307 | 
308 |     def evaluate(self, ex):
309 |         for compound_condition in self.subCompoundConditions.values():
310 |             partial = compound_condition.evaluate(ex)
311 |             if self.operator == LogicalOperator.conjuction and partial == False:
312 |                 return False
313 |             elif self.operator == LogicalOperator.alternative and partial == True:
314 |                 return True
315 |         return True if (self.operator == LogicalOperator.conjuction) else False
316 | 
317 | 
318 | class Rule:
319 | 
320 |     def __init__(
321 |         self, premise: CompoundCondition, consequence: ElementaryCondition
322 |     ) -> None:
323 |         self.premise = premise
324 |         self.consequence = consequence
325 | 
326 |     def __str__(self) -> str:
327 |         return "".join(
328 |             ["IF ", self.premise.__str__(), " THEN ", self.consequence.__str__()]
329 |         )
330 | 
331 | 
332 | class ClassificationRule(Rule):
333 | 
334 |     def __init__(
335 |         self, premise: CompoundCondition, consequence: ElementaryCondition
336 |     ) -> None:
337 |         super().__init__(premise, consequence)
338 |         self.decision = consequence.left
339 | 
340 |     def covers(self, X_df):
341 |         x = X_df.to_numpy()
342 |         y = x[:, -1].astype(str)
343 |         x = x[:, 0:-1]
344 | 
345 |         P = y[y == self.decision].shape[0]
346 |         N = y.shape[0] - P
347 |         p = x[self.premise.positive_covered_mask(x, y, self.decision)].shape[0]
348 |         n = x[self.premise.negative_covered_mask(x, y, self.decision)].shape[0]
349 | 
350 |         return p, n, P, N
351 | 
352 | 
353 | class RegressionRule(Rule):
354 | 
355 |     def __init__(
356 |         self, premise: CompoundCondition, consequence: ElementaryCondition
357 |     ) -> None:
358 |         super().__init__(premise, consequence)
359 | 
360 |     def covers(self, x):
361 |         P = 0
362 |         N = 0
363 |         p = 0
364 |         n = 0
365 | 
366 |         sum_y = 0.0
367 |         sum_y2 = 0.0
368 | 
369 |         label_name = self.consequence.attribute
370 |         x.sort_values(label_name, inplace=True)
371 | 
372 |         orderedNegatives = []
373 |         negatives = []
374 |         positives = []
375 | 
376 |         for i in range(x.shape[0]):
377 |             ex = x.iloc[i]
378 |             N += 1
379 | 
380 |             if self.premise.evaluate(ex):  # if covered
381 |                 n += 1
382 |                 orderedNegatives.append(i)
383 |                 negatives.append(i)
384 | 
385 |                 y = ex[label_name]
386 | 
387 |                 sum_y += y
388 |                 sum_y2 += y * y
389 | 
390 |         if len(negatives) == 0:
391 |             return p, n, P, N
392 | 
393 |         mean_y = sum_y / n
394 |         stddev_y = np.sqrt(sum_y2 / n - mean_y * mean_y)
395 | 
396 |         medianId = orderedNegatives[len(orderedNegatives) // 2]
397 | 
398 |         median_y = x.iloc[medianId][label_name]
399 | 
400 |         # update positives
401 |         for i in range(x.shape[0]):
402 |             ex = x.iloc[i]
403 | 
404 |             # if inside epsilon
405 |             if np.abs(ex[label_name] - median_y) <= stddev_y:
406 |                 N -= 1
407 |                 P += 1
408 | 
409 |                 # if covered
410 |                 if self.premise.evaluate(ex):
411 |                     negatives.remove(i)
412 |                     n -= 1
413 |                     positives.append(i)
414 |                     p += 1
415 | 
416 |         return p, n, P, N
417 | 
418 | 
419 | class SurvivalRule(Rule):
420 | 
421 |     def __init__(
422 |         self, premise: CompoundCondition, consequence: ElementaryCondition
423 |     ) -> None:
424 |         super().__init__(premise, consequence)
425 | 
426 |     def covers(self, x, return_positives: bool = False):
427 |         P = 0
428 |         N = 0
429 |         p = 0
430 |         n = 0
431 |         if return_positives:
432 |             positives = []
433 | 
434 |         for i in range(x.shape[0]):
435 |             ex = x.iloc[i]
436 |             P += 1
437 |             if self.premise.evaluate(ex):
438 |                 p += 1
439 |                 if return_positives:
440 |                     positives.append(i)
441 |         if return_positives:
442 |             return p, n, P, N, positives
443 |         else:
444 |             return p, n, P, N
445 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | import os
 3 | import io
 4 | 
 5 | current_path = os.path.dirname(os.path.realpath(__file__))
 6 | 
 7 | with io.open(f"{current_path}/README.md", mode="r", encoding="utf-8") as fh:
 8 |     long_description = fh.read()
 9 | 
10 | 
11 | setuptools.setup(
12 |     name="rulexai",
13 |     version="1.1.0",
14 |     author="Dawid Macha",
15 |     author_email="dawid.m.macha@gmail.com",
16 |     description="RuleXAI is a rule-based aproach to explain the output of any machine learning model. It is suitable for classification, regression and survival tasks.",
17 |     long_description=long_description,
18 |     long_description_content_type="text/markdown",
19 |     url="https://github.com/adaa-polsl/RuleXAI",
20 |     packages=setuptools.find_packages(),
21 |     classifiers=[
22 |         "Development Status :: 5 - Production/Stable",
23 |         "License :: OSI Approved :: GNU Affero General Public License v3",
24 |         "Programming Language :: Python :: 3",
25 |         "Operating System :: Microsoft :: Windows",
26 |         "Operating System :: Unix",
27 |         "Topic :: Scientific/Engineering",
28 |         "Topic :: Scientific/Engineering :: Artificial Intelligence",
29 |         "Intended Audience :: Developers",
30 |         "Intended Audience :: Science/Research",
31 |     ],
32 |     include_package_data=True,
33 |     python_requires=">=3.9",
34 |     install_requires=[
35 |         "pandas >= 1.5.0, < 2.3.0",
36 |         "numpy ~= 1.26.4",
37 |         "matplotlib ~= 3.8.3",
38 |         "rulekit ~= 1.7.6",
39 |         "lifelines ~= 0.28.0"
40 |     ],
41 |     test_suite="tests",
42 | )
43 | 


--------------------------------------------------------------------------------
/tests/resources/classification/iris.arff:
--------------------------------------------------------------------------------
  1 | % 1. Title: Iris Plants Database
  2 | % 
  3 | % 2. Sources:
  4 | %      (a) Creator: R.A. Fisher
  5 | %      (b) Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
  6 | %      (c) Date: July, 1988
  7 | % 
  8 | % 3. Past Usage:
  9 | %    - Publications: too many to mention!!!  Here are a few.
 10 | %    1. Fisher,R.A. "The use of multiple measurements in taxonomic problems"
 11 | %       Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions
 12 | %       to Mathematical Statistics" (John Wiley, NY, 1950).
 13 | %    2. Duda,R.O., & Hart,P.E. (1973) Pattern Classification and Scene Analysis.
 14 | %       (Q327.D83) John Wiley & Sons.  ISBN 0-471-22361-1.  See page 218.
 15 | %    3. Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System
 16 | %       Structure and Classification Rule for Recognition in Partially Exposed
 17 | %       Environments".  IEEE Transactions on Pattern Analysis and Machine
 18 | %       Intelligence, Vol. PAMI-2, No. 1, 67-71.
 19 | %       -- Results:
 20 | %          -- very low misclassification rates (0% for the setosa class)
 21 | %    4. Gates, G.W. (1972) "The Reduced Nearest Neighbor Rule".  IEEE 
 22 | %       Transactions on Information Theory, May 1972, 431-433.
 23 | %       -- Results:
 24 | %          -- very low misclassification rates again
 25 | %    5. See also: 1988 MLC Proceedings, 54-64.  Cheeseman et al's AUTOCLASS II
 26 | %       conceptual clustering system finds 3 classes in the data.
 27 | % 
 28 | % 4. Relevant Information:
 29 | %    --- This is perhaps the best known database to be found in the pattern
 30 | %        recognition literature.  Fisher's paper is a classic in the field
 31 | %        and is referenced frequently to this day.  (See Duda & Hart, for
 32 | %        example.)  The data set contains 3 classes of 50 instances each,
 33 | %        where each class refers to a type of iris plant.  One class is
 34 | %        linearly separable from the other 2; the latter are NOT linearly
 35 | %        separable from each other.
 36 | %    --- Predicted attribute: class of iris plant.
 37 | %    --- This is an exceedingly simple domain.
 38 | % 
 39 | % 5. Number of Instances: 150 (50 in each of three classes)
 40 | % 
 41 | % 6. Number of Attributes: 4 numeric, predictive attributes and the class
 42 | % 
 43 | % 7. Attribute Information:
 44 | %    1. sepal length in cm
 45 | %    2. sepal width in cm
 46 | %    3. petal length in cm
 47 | %    4. petal width in cm
 48 | %    5. class: 
 49 | %       -- Iris Setosa
 50 | %       -- Iris Versicolour
 51 | %       -- Iris Virginica
 52 | % 
 53 | % 8. Missing Attribute Values: None
 54 | % 
 55 | % Summary Statistics:
 56 | %  	           Min  Max   Mean    SD   Class Correlation
 57 | %    sepal length: 4.3  7.9   5.84  0.83    0.7826   
 58 | %     sepal width: 2.0  4.4   3.05  0.43   -0.4194
 59 | %    petal length: 1.0  6.9   3.76  1.76    0.9490  (high!)
 60 | %     petal width: 0.1  2.5   1.20  0.76    0.9565  (high!)
 61 | % 
 62 | % 9. Class Distribution: 33.3% for each of 3 classes.
 63 | 
 64 | @RELATION iris
 65 | 
 66 | @ATTRIBUTE sepallength	REAL
 67 | @ATTRIBUTE sepalwidth 	REAL
 68 | @ATTRIBUTE petallength 	REAL
 69 | @ATTRIBUTE petalwidth	REAL
 70 | @ATTRIBUTE class 	{Iris-setosa,Iris-versicolor,Iris-virginica}
 71 | 
 72 | @DATA
 73 | 5.1,3.5,1.4,0.2,Iris-setosa
 74 | 4.9,3.0,1.4,0.2,Iris-setosa
 75 | 4.7,3.2,1.3,0.2,Iris-setosa
 76 | 4.6,3.1,1.5,0.2,Iris-setosa
 77 | 5.0,3.6,1.4,0.2,Iris-setosa
 78 | 5.4,3.9,1.7,0.4,Iris-setosa
 79 | 4.6,3.4,1.4,0.3,Iris-setosa
 80 | 5.0,3.4,1.5,0.2,Iris-setosa
 81 | 4.4,2.9,1.4,0.2,Iris-setosa
 82 | 4.9,3.1,1.5,0.1,Iris-setosa
 83 | 5.4,3.7,1.5,0.2,Iris-setosa
 84 | 4.8,3.4,1.6,0.2,Iris-setosa
 85 | 4.8,3.0,1.4,0.1,Iris-setosa
 86 | 4.3,3.0,1.1,0.1,Iris-setosa
 87 | 5.8,4.0,1.2,0.2,Iris-setosa
 88 | 5.7,4.4,1.5,0.4,Iris-setosa
 89 | 5.4,3.9,1.3,0.4,Iris-setosa
 90 | 5.1,3.5,1.4,0.3,Iris-setosa
 91 | 5.7,3.8,1.7,0.3,Iris-setosa
 92 | 5.1,3.8,1.5,0.3,Iris-setosa
 93 | 5.4,3.4,1.7,0.2,Iris-setosa
 94 | 5.1,3.7,1.5,0.4,Iris-setosa
 95 | 4.6,3.6,1.0,0.2,Iris-setosa
 96 | 5.1,3.3,1.7,0.5,Iris-setosa
 97 | 4.8,3.4,1.9,0.2,Iris-setosa
 98 | 5.0,3.0,1.6,0.2,Iris-setosa
 99 | 5.0,3.4,1.6,0.4,Iris-setosa
100 | 5.2,3.5,1.5,0.2,Iris-setosa
101 | 5.2,3.4,1.4,0.2,Iris-setosa
102 | 4.7,3.2,1.6,0.2,Iris-setosa
103 | 4.8,3.1,1.6,0.2,Iris-setosa
104 | 5.4,3.4,1.5,0.4,Iris-setosa
105 | 5.2,4.1,1.5,0.1,Iris-setosa
106 | 5.5,4.2,1.4,0.2,Iris-setosa
107 | 4.9,3.1,1.5,0.1,Iris-setosa
108 | 5.0,3.2,1.2,0.2,Iris-setosa
109 | 5.5,3.5,1.3,0.2,Iris-setosa
110 | 4.9,3.1,1.5,0.1,Iris-setosa
111 | 4.4,3.0,1.3,0.2,Iris-setosa
112 | 5.1,3.4,1.5,0.2,Iris-setosa
113 | 5.0,3.5,1.3,0.3,Iris-setosa
114 | 4.5,2.3,1.3,0.3,Iris-setosa
115 | 4.4,3.2,1.3,0.2,Iris-setosa
116 | 5.0,3.5,1.6,0.6,Iris-setosa
117 | 5.1,3.8,1.9,0.4,Iris-setosa
118 | 4.8,3.0,1.4,0.3,Iris-setosa
119 | 5.1,3.8,1.6,0.2,Iris-setosa
120 | 4.6,3.2,1.4,0.2,Iris-setosa
121 | 5.3,3.7,1.5,0.2,Iris-setosa
122 | 5.0,3.3,1.4,0.2,Iris-setosa
123 | 7.0,3.2,4.7,1.4,Iris-versicolor
124 | 6.4,3.2,4.5,1.5,Iris-versicolor
125 | 6.9,3.1,4.9,1.5,Iris-versicolor
126 | 5.5,2.3,4.0,1.3,Iris-versicolor
127 | 6.5,2.8,4.6,1.5,Iris-versicolor
128 | 5.7,2.8,4.5,1.3,Iris-versicolor
129 | 6.3,3.3,4.7,1.6,Iris-versicolor
130 | 4.9,2.4,3.3,1.0,Iris-versicolor
131 | 6.6,2.9,4.6,1.3,Iris-versicolor
132 | 5.2,2.7,3.9,1.4,Iris-versicolor
133 | 5.0,2.0,3.5,1.0,Iris-versicolor
134 | 5.9,3.0,4.2,1.5,Iris-versicolor
135 | 6.0,2.2,4.0,1.0,Iris-versicolor
136 | 6.1,2.9,4.7,1.4,Iris-versicolor
137 | 5.6,2.9,3.6,1.3,Iris-versicolor
138 | 6.7,3.1,4.4,1.4,Iris-versicolor
139 | 5.6,3.0,4.5,1.5,Iris-versicolor
140 | 5.8,2.7,4.1,1.0,Iris-versicolor
141 | 6.2,2.2,4.5,1.5,Iris-versicolor
142 | 5.6,2.5,3.9,1.1,Iris-versicolor
143 | 5.9,3.2,4.8,1.8,Iris-versicolor
144 | 6.1,2.8,4.0,1.3,Iris-versicolor
145 | 6.3,2.5,4.9,1.5,Iris-versicolor
146 | 6.1,2.8,4.7,1.2,Iris-versicolor
147 | 6.4,2.9,4.3,1.3,Iris-versicolor
148 | 6.6,3.0,4.4,1.4,Iris-versicolor
149 | 6.8,2.8,4.8,1.4,Iris-versicolor
150 | 6.7,3.0,5.0,1.7,Iris-versicolor
151 | 6.0,2.9,4.5,1.5,Iris-versicolor
152 | 5.7,2.6,3.5,1.0,Iris-versicolor
153 | 5.5,2.4,3.8,1.1,Iris-versicolor
154 | 5.5,2.4,3.7,1.0,Iris-versicolor
155 | 5.8,2.7,3.9,1.2,Iris-versicolor
156 | 6.0,2.7,5.1,1.6,Iris-versicolor
157 | 5.4,3.0,4.5,1.5,Iris-versicolor
158 | 6.0,3.4,4.5,1.6,Iris-versicolor
159 | 6.7,3.1,4.7,1.5,Iris-versicolor
160 | 6.3,2.3,4.4,1.3,Iris-versicolor
161 | 5.6,3.0,4.1,1.3,Iris-versicolor
162 | 5.5,2.5,4.0,1.3,Iris-versicolor
163 | 5.5,2.6,4.4,1.2,Iris-versicolor
164 | 6.1,3.0,4.6,1.4,Iris-versicolor
165 | 5.8,2.6,4.0,1.2,Iris-versicolor
166 | 5.0,2.3,3.3,1.0,Iris-versicolor
167 | 5.6,2.7,4.2,1.3,Iris-versicolor
168 | 5.7,3.0,4.2,1.2,Iris-versicolor
169 | 5.7,2.9,4.2,1.3,Iris-versicolor
170 | 6.2,2.9,4.3,1.3,Iris-versicolor
171 | 5.1,2.5,3.0,1.1,Iris-versicolor
172 | 5.7,2.8,4.1,1.3,Iris-versicolor
173 | 6.3,3.3,6.0,2.5,Iris-virginica
174 | 5.8,2.7,5.1,1.9,Iris-virginica
175 | 7.1,3.0,5.9,2.1,Iris-virginica
176 | 6.3,2.9,5.6,1.8,Iris-virginica
177 | 6.5,3.0,5.8,2.2,Iris-virginica
178 | 7.6,3.0,6.6,2.1,Iris-virginica
179 | 4.9,2.5,4.5,1.7,Iris-virginica
180 | 7.3,2.9,6.3,1.8,Iris-virginica
181 | 6.7,2.5,5.8,1.8,Iris-virginica
182 | 7.2,3.6,6.1,2.5,Iris-virginica
183 | 6.5,3.2,5.1,2.0,Iris-virginica
184 | 6.4,2.7,5.3,1.9,Iris-virginica
185 | 6.8,3.0,5.5,2.1,Iris-virginica
186 | 5.7,2.5,5.0,2.0,Iris-virginica
187 | 5.8,2.8,5.1,2.4,Iris-virginica
188 | 6.4,3.2,5.3,2.3,Iris-virginica
189 | 6.5,3.0,5.5,1.8,Iris-virginica
190 | 7.7,3.8,6.7,2.2,Iris-virginica
191 | 7.7,2.6,6.9,2.3,Iris-virginica
192 | 6.0,2.2,5.0,1.5,Iris-virginica
193 | 6.9,3.2,5.7,2.3,Iris-virginica
194 | 5.6,2.8,4.9,2.0,Iris-virginica
195 | 7.7,2.8,6.7,2.0,Iris-virginica
196 | 6.3,2.7,4.9,1.8,Iris-virginica
197 | 6.7,3.3,5.7,2.1,Iris-virginica
198 | 7.2,3.2,6.0,1.8,Iris-virginica
199 | 6.2,2.8,4.8,1.8,Iris-virginica
200 | 6.1,3.0,4.9,1.8,Iris-virginica
201 | 6.4,2.8,5.6,2.1,Iris-virginica
202 | 7.2,3.0,5.8,1.6,Iris-virginica
203 | 7.4,2.8,6.1,1.9,Iris-virginica
204 | 7.9,3.8,6.4,2.0,Iris-virginica
205 | 6.4,2.8,5.6,2.2,Iris-virginica
206 | 6.3,2.8,5.1,1.5,Iris-virginica
207 | 6.1,2.6,5.6,1.4,Iris-virginica
208 | 7.7,3.0,6.1,2.3,Iris-virginica
209 | 6.3,3.4,5.6,2.4,Iris-virginica
210 | 6.4,3.1,5.5,1.8,Iris-virginica
211 | 6.0,3.0,4.8,1.8,Iris-virginica
212 | 6.9,3.1,5.4,2.1,Iris-virginica
213 | 6.7,3.1,5.6,2.4,Iris-virginica
214 | 6.9,3.1,5.1,2.3,Iris-virginica
215 | 5.8,2.7,5.1,1.9,Iris-virginica
216 | 6.8,3.2,5.9,2.3,Iris-virginica
217 | 6.7,3.3,5.7,2.5,Iris-virginica
218 | 6.7,3.0,5.2,2.3,Iris-virginica
219 | 6.3,2.5,5.0,1.9,Iris-virginica
220 | 6.5,3.0,5.2,2.0,Iris-virginica
221 | 6.2,3.4,5.4,2.3,Iris-virginica
222 | 5.9,3.0,5.1,1.8,Iris-virginica
223 | %
224 | %
225 | %
226 | 


--------------------------------------------------------------------------------
/tests/resources/classification/results.csv:
--------------------------------------------------------------------------------
 1 | Iris-setosa | conditions_names;Iris-setosa | importances;Iris-versicolor | conditions_names;Iris-versicolor | importances;Iris-virginica | conditions_names;Iris-virginica | importances
 2 | petallength = (-inf, 2.45);1.0;petallength = <2.45, 4.75);0.8234509803921568;petallength = <4.75, inf);0.5136838148584906
 3 | ;;petallength = <2.45, 5.35);0.3740469221835075;petalwidth = <1.75, inf);0.496569693094629
 4 | ;;petallength = (-inf, 4.75);0.2981388613042039;petallength = <4.85, inf);0.43343030690537093
 5 | ;;sepalwidth = (-inf, 2.95);0.25828887286709923;petalwidth = <1.45, inf);0.32763693985849063
 6 | ;;sepalwidth = (-inf, 2.45);0.25589852008456654;;
 7 | ;;petalwidth = (-inf, 1.75);0.18359664404223222;;
 8 | ;;petalwidth = (-inf, 1.65);0.11654901960784321;;
 9 | ;;sepallength = <4.95, inf);0.0870106947586727;;
10 | ;;sepallength = <4.7, inf);0.04013829787234042;;
11 | 


--------------------------------------------------------------------------------
/tests/resources/classification/results_split.csv:
--------------------------------------------------------------------------------
 1 | Iris-setosa | conditions_names;Iris-setosa | importances;Iris-versicolor | conditions_names;Iris-versicolor | importances;Iris-virginica | conditions_names;Iris-virginica | importances
 2 | petallength = (-inf, 2.45);1.0;petallength = <2.45, 4.75);1.4069;petalwidth = <1.75, inf);0.6118
 3 | ;;petalwidth = <1.45, 1.65);0.2591;petallength = <5.35, inf);0.5545
 4 | ;;sepalwidth = (-inf, 2.45);0.2554;petallength = <4.85, 5.35);0.3008
 5 | ;;petalwidth = (-inf, 1.45);0.2154;petallength = <4.75, 4.85);0.0211
 6 | ;;sepalwidth = <2.45, 2.95);0.0913;petalwidth = <1.65, 1.75);0.017
 7 | ;;sepallength = <4.95, inf);0.0777;petalwidth = <1.45, 1.65);-0.0273
 8 | ;;petallength = <4.75, 4.85);0.0201;;
 9 | ;;petalwidth = <1.65, 1.75);0.0197;;
10 | ;;petallength = <4.85, 5.35);-0.0192;;
11 | ;;sepallength = <4.7, 4.95);-0.0372;;
12 | ;;petallength = (-inf, 2.45);-0.1;;
13 | 


--------------------------------------------------------------------------------
/tests/resources/features_importances.csv:
--------------------------------------------------------------------------------
1 | 0 | attributes;0 | importances;1 | attributes;1 | importances;2 | attributes;2 | importances
2 | petal length (cm);1.0;petal length (cm);1.4956367638798682;petal length (cm);0.9471141217638614
3 | ;;sepal width (cm);0.5141873929516658;petal width (cm);0.8242066329531197
4 | ;;petal width (cm);0.30014566365007544;;
5 | ;;sepal length (cm);0.12714899263101312;;
6 | 


--------------------------------------------------------------------------------
/tests/resources/regression/diabetes.arff:
--------------------------------------------------------------------------------
 1 | @relation diabetes
 2 | @attribute age numeric
 3 | @attribute deficit numeric
 4 | @attribute class numeric
 5 | @data
 6 | 5.2,-8.1,4.8
 7 | 8.8,-16.1,4.1
 8 | 10.5,-0.9,5.2
 9 | 10.6,-7.8,5.5
10 | 10.4,-29,5
11 | 1.8,-19.2,3.4
12 | 12.7,-18.9,3.4
13 | 15.6,-10.6,4.9
14 | 5.8,-2.8,5.6
15 | 1.9,-25,3.7
16 | 2.2,-3.1,3.9
17 | 4.8,-7.8,4.5
18 | 7.9,-13.9,4.8
19 | 5.2,-4.5,4.9
20 | 0.9,-11.6,3
21 | 11.8,-2.1,4.6
22 | 7.9,-2,4.8
23 | 11.5,-9,5.5
24 | 10.6,-11.2,4.5
25 | 8.5,-0.2,5.3
26 | 11.1,-6.1,4.7
27 | 12.8,-1,6.6
28 | 11.3,-3.6,5.1
29 | 1,-8.2,3.9
30 | 14.5,-0.5,5.7
31 | 11.9,-2,5.1
32 | 8.1,-1.6,5.2
33 | 13.8,-11.9,3.7
34 | 15.5,-0.7,4.9
35 | 9.8,-1.2,4.8
36 | 11,-14.3,4.4
37 | 12.4,-0.8,5.2
38 | 11.1,-16.8,5.1
39 | 5.1,-5.1,4.6
40 | 4.8,-9.5,3.9
41 | 4.2,-17,5.1
42 | 6.9,-3.3,5.1
43 | 13.2,-0.7,6
44 | 9.9,-3.3,4.9
45 | 12.5,-13.6,4.1
46 | 13.2,-1.9,4.6
47 | 8.9,-10,4.9
48 | 10.8,-13.5,5.1
49 | 


--------------------------------------------------------------------------------
/tests/resources/regression/results.csv:
--------------------------------------------------------------------------------
 1 | conditions;importances
 2 | age = (-inf, 3.2);0.598015873015873
 3 | age = <6.35, 11.4);0.47138047138047134
 4 | deficit = <-6.95, inf);0.4384617898711036
 5 | age = <3.5, 13.85);0.26523427274457995
 6 | age = <6.35, inf);0.2532969417601067
 7 | deficit = <-8.15, -0.45);0.22110831984801255
 8 | deficit = <-13.55, inf);0.12160660762260922
 9 | age = (-inf, 13.0);0.0934009552269355
10 | 


--------------------------------------------------------------------------------
/tests/resources/regression/results_split.csv:
--------------------------------------------------------------------------------
 1 | conditions;importances
 2 | age = <6.35, 11.4);0.8521793135064287
 3 | age = (-inf, 3.2);0.7091553650445686
 4 | deficit = <-0.45, inf);0.2753641116521085
 5 | deficit = <-6.95, -0.45);0.1819302480607627
 6 | age = <3.5, 6.35);0.06542692694361696
 7 | deficit = <-8.15, -6.95);0.02060332289928545
 8 | age = <13.85, inf);0.01639772819856604
 9 | age = <3.2, 3.5);0.0
10 | deficit = <-13.55, -8.15);-0.007044158555616169
11 | age = <11.4, 13.0);-0.11752390907377938
12 | age = <13.0, 13.85);-0.15006381616152903
13 | 


--------------------------------------------------------------------------------
/tests/resources/survival/pbc.arff:
--------------------------------------------------------------------------------
  1 | @relation pbc
  2 | @attribute survival_time numeric
  3 | @attribute survival_status numeric
  4 | @attribute trt numeric
  5 | @attribute age numeric
  6 | @attribute sex {'f','m'}
  7 | @attribute ascites numeric
  8 | @attribute hepato numeric
  9 | @attribute spiders numeric
 10 | @attribute edema numeric
 11 | @attribute bili numeric
 12 | @attribute chol numeric
 13 | @attribute albumin numeric
 14 | @attribute copper numeric
 15 | @attribute alk.phos numeric
 16 | @attribute ast numeric
 17 | @attribute trig numeric
 18 | @attribute platelet numeric
 19 | @attribute protime numeric
 20 | @attribute stage numeric
 21 | @data
 22 | 400,1,1,58.765229,'f',1,1,1,1,14.5,261,2.6,156,1718,137.95,172,190,12.2,4
 23 | 4500,0,1,56.44627,'f',0,1,1,0,1.1,302,4.14,54,7394.8,113.52,88,221,10.6,3
 24 | 1012,1,1,70.072553,'m',0,0,0,0.5,1.4,176,3.48,210,516,96.1,55,151,12,4
 25 | 1925,1,1,54.740589,'f',0,1,1,0.5,1.8,244,2.54,64,6121.8,60.63,92,183,10.3,4
 26 | 2503,1,2,66.258727,'f',0,1,0,0,0.8,248,3.98,50,944,93,63,?,11,3
 27 | 1832,0,2,55.534565,'f',0,1,0,0,1,322,4.09,52,824,60.45,213,204,9.7,3
 28 | 2466,1,2,53.05681,'f',0,0,0,0,0.3,280,4,52,4651.2,28.38,189,373,11,3
 29 | 2400,1,1,42.507871,'f',0,0,1,0,3.2,562,3.08,79,2276,144.15,88,251,11,2
 30 | 51,1,2,70.55989,'f',1,0,1,1,12.6,200,2.74,140,918,147.25,143,302,11.5,4
 31 | 3762,1,2,53.713895,'f',0,1,1,0,1.4,259,4.16,46,1104,79.05,79,258,12,4
 32 | 304,1,2,59.137577,'f',0,0,1,0,3.6,236,3.52,94,591,82.15,95,71,13.6,4
 33 | 3577,0,2,45.689254,'f',0,0,0,0,0.7,281,3.85,40,1181,88.35,130,244,10.6,3
 34 | 1217,1,2,56.221766,'m',1,1,0,1,0.8,?,2.27,43,728,71,?,156,11,4
 35 | 3584,1,1,64.646133,'f',0,0,0,0,0.8,231,3.87,173,9009.8,127.71,96,295,11,3
 36 | 3672,0,2,40.443532,'f',0,0,0,0,0.7,204,3.66,28,685,72.85,58,198,10.8,3
 37 | 769,1,2,52.183436,'f',0,1,0,0,2.7,274,3.15,159,1533,117.8,128,224,10.5,4
 38 | 131,1,1,53.930185,'f',0,1,1,1,11.4,178,2.8,588,961,280.55,200,283,12.4,4
 39 | 4232,0,1,49.560575,'f',0,1,0,0.5,0.7,235,3.56,39,1881,93,123,209,11,3
 40 | 1356,1,2,59.953457,'f',0,1,0,0,5.1,374,3.51,140,1919,122.45,135,322,13,4
 41 | 3445,0,2,64.188912,'m',0,1,1,0,0.6,252,3.83,41,843,65.1,83,336,11.4,4
 42 | 673,1,1,56.276523,'f',0,0,1,0,3.4,271,3.63,464,1376,120.9,55,173,11.6,4
 43 | 264,1,2,55.967146,'f',1,1,1,1,17.4,395,2.94,558,6064.8,227.04,191,214,11.7,4
 44 | 4079,1,1,44.520192,'m',0,1,0,0,2.1,456,4,124,5719,221.88,230,70,9.9,2
 45 | 4127,0,2,45.073238,'f',0,0,0,0,0.7,298,4.1,40,661,106.95,66,324,11.3,2
 46 | 1444,1,2,52.024641,'f',0,1,1,0,5.2,1128,3.68,53,3228,165.85,166,421,9.9,3
 47 | 77,1,2,54.439425,'f',1,1,1,0.5,21.6,175,3.31,221,3697.4,101.91,168,80,12,4
 48 | 549,1,2,44.947296,'f',1,1,1,1,17.2,222,3.23,209,1975,189.1,195,144,13,4
 49 | 4509,0,2,63.876797,'f',0,0,0,0,0.7,370,3.78,24,5833,73.53,86,390,10.6,2
 50 | 321,1,2,41.385352,'f',0,1,1,0,3.6,260,2.54,172,7277,121.26,158,124,11,4
 51 | 3839,1,2,41.552361,'f',0,1,0,0,4.7,296,3.44,114,9933.2,206.4,101,195,10.3,2
 52 | 4523,0,2,53.995893,'f',0,1,0,0,1.8,262,3.34,101,7277,82.56,158,286,10.6,4
 53 | 3170,1,2,51.282683,'f',0,0,0,0,0.8,210,3.19,82,1592,218.55,113,180,12,3
 54 | 3933,0,1,52.060233,'f',0,0,0,0,0.8,364,3.7,37,1840,170.5,64,273,10.5,2
 55 | 2847,1,2,48.618754,'f',0,0,0,0,1.2,314,3.2,201,12258.8,72.24,151,431,10.6,3
 56 | 3611,0,2,56.410678,'f',0,0,0,0,0.3,172,3.39,18,558,71.3,96,311,10.6,2
 57 | 223,1,1,61.727584,'f',1,1,0,1,7.1,334,3.01,150,6931.2,180.6,118,102,12,4
 58 | 3244,1,2,36.626968,'f',0,1,1,0,3.3,383,3.53,102,1234,137.95,87,234,11,4
 59 | 2297,1,1,55.392197,'f',0,1,0,0,0.7,282,3,52,9066.8,72.24,111,563,10.6,4
 60 | 4467,0,1,46.669405,'f',0,0,0,0,1.3,?,3.34,105,11046.6,104.49,?,358,11,4
 61 | 1350,1,1,33.634497,'f',0,1,0,0,6.8,?,3.26,96,1215,151.9,?,226,11.7,4
 62 | 4453,0,2,33.69473,'f',0,1,1,0,2.1,?,3.54,122,8778,56.76,?,344,11,4
 63 | 4556,0,1,48.870637,'f',0,0,0,0,1.1,361,3.64,36,5430.2,67.08,89,203,10.6,2
 64 | 3428,1,2,37.582478,'f',0,1,1,1,3.3,299,3.55,131,1029,119.35,50,199,11.7,3
 65 | 4025,0,2,41.793292,'f',0,0,0,0,0.6,?,3.93,19,1826,71.3,?,474,10.9,2
 66 | 2256,1,1,45.798768,'f',0,1,0,0,5.7,482,2.84,161,11552,136.74,165,518,12.7,3
 67 | 2576,0,2,47.427789,'f',0,0,0,0,0.5,316,3.65,68,1716,187.55,71,356,9.8,3
 68 | 4427,0,2,49.136208,'m',0,0,0,0,1.9,259,3.7,281,10396.8,188.34,178,214,11,3
 69 | 708,1,2,61.152635,'f',0,1,0,0,0.8,?,3.82,58,678,97.65,?,233,11,4
 70 | 2598,1,1,53.508556,'f',0,1,0,0,1.1,257,3.36,43,1080,106.95,73,128,10.6,4
 71 | 3853,1,2,52.087611,'f',0,0,0,0,0.8,276,3.6,54,4332,99.33,143,273,10.6,2
 72 | 2386,1,1,50.540726,'m',0,0,0,0,6,614,3.7,158,5084.4,206.4,93,362,10.6,1
 73 | 1000,1,1,67.408624,'f',0,1,0,0,2.6,?,3.1,94,6456.2,56.76,?,214,11,4
 74 | 1434,1,1,39.19781,'f',1,1,1,1,1.3,288,3.4,262,5487.2,73.53,125,254,11,4
 75 | 1360,1,1,65.763176,'m',0,0,0,0,1.8,416,3.94,121,10165,79.98,219,213,11,3
 76 | 1847,1,2,33.61807,'f',0,1,1,0,1.1,498,3.8,88,13862.4,95.46,319,365,10.6,2
 77 | 3282,1,1,53.571526,'f',0,1,0,0.5,2.3,260,3.18,231,11320.2,105.78,94,216,12.4,3
 78 | 4459,0,1,44.569473,'m',0,0,0,0,0.7,242,4.08,73,5890,56.76,118,?,10.6,1
 79 | 2224,1,1,40.394251,'f',0,1,1,0,0.8,329,3.5,49,7622.8,126.42,124,321,10.6,3
 80 | 4365,0,1,58.38193,'f',0,0,0,0,0.9,604,3.4,82,876,71.3,58,228,10.3,3
 81 | 4256,0,2,43.8987,'m',0,0,0,0,0.6,216,3.94,28,601,60.45,188,211,13,1
 82 | 3090,1,2,60.706366,'f',1,0,0,0,1.3,302,2.75,58,1523,43.4,112,329,13.2,4
 83 | 859,1,2,46.628337,'f',0,0,1,1,22.5,932,3.12,95,5396,244.9,133,165,11.6,3
 84 | 1487,1,2,62.907598,'f',0,1,0,0,2.1,373,3.5,52,1009,150.35,188,178,11,3
 85 | 3992,0,1,40.202601,'f',0,0,0,0,1.2,256,3.6,74,724,141.05,108,430,10,1
 86 | 4191,1,1,46.453114,'m',0,1,0,0,1.4,427,3.7,105,1909,182.9,171,123,11,3
 87 | 2769,1,2,51.288159,'f',0,0,0,0,1.1,466,3.91,84,1787,328.6,185,261,10,3
 88 | 4039,0,1,32.613279,'f',0,0,0,0,0.7,174,4.09,58,642,71.3,46,203,10.6,3
 89 | 1170,1,1,49.338809,'f',0,1,1,0.5,20,652,3.46,159,3292,215.45,184,227,12.4,3
 90 | 3458,0,1,56.399726,'f',0,0,0,0,0.6,?,4.64,20,666,54.25,?,265,10.6,2
 91 | 4196,0,2,48.845996,'f',0,1,0,0,1.2,258,3.57,79,2201,120.9,76,410,11.5,4
 92 | 4184,0,2,32.492813,'f',0,0,0,0,0.5,320,3.54,51,1243,122.45,80,225,10,3
 93 | 4190,0,2,38.494182,'f',0,0,0,0,0.7,132,3.6,17,423,49.6,56,265,11,1
 94 | 1827,1,1,51.920602,'f',0,1,1,0,8.4,558,3.99,280,967,89.9,309,278,11,4
 95 | 1191,1,1,43.518138,'f',1,1,1,0.5,17.1,674,2.53,207,2078,182.9,598,268,11.5,4
 96 | 71,1,1,51.942505,'f',0,1,1,0.5,12.2,394,3.08,111,2132,155,243,165,11.6,4
 97 | 326,1,2,49.826146,'f',0,1,1,0.5,6.6,244,3.41,199,1819,170.5,91,132,12.1,3
 98 | 1690,1,1,47.945243,'f',0,1,0,0,6.3,436,3.02,75,2176,170.5,104,236,10.6,4
 99 | 3707,0,1,46.516085,'f',0,1,0,0,0.8,315,4.24,13,1637,170.5,70,426,10.9,3
100 | 890,1,2,67.411362,'m',0,1,0,0,7.2,247,3.72,269,1303,176.7,91,360,11.2,4
101 | 2540,1,1,63.263518,'f',0,1,1,0,14.4,448,3.65,34,1218,60.45,318,385,11.7,4
102 | 3574,1,1,67.310062,'f',0,0,0,0,4.5,472,4.09,154,1580,117.8,272,412,11.1,3
103 | 4050,0,1,56.013689,'f',0,1,0,0.5,1.3,250,3.5,48,1138,71.3,100,81,12.9,4
104 | 4032,0,2,55.830253,'f',0,0,0,0,0.4,263,3.76,29,1345,137.95,74,181,11.2,3
105 | 3358,1,2,47.216975,'f',0,1,0,0,2.1,262,3.48,58,2045,89.9,84,225,11.5,4
106 | 1657,1,1,52.758385,'f',0,1,1,0,5,1600,3.21,75,2656,82.15,174,181,10.9,3
107 | 198,1,1,37.278576,'f',0,0,0,0,1.1,345,4.4,75,1860,218.55,72,447,10.7,3
108 | 2452,0,2,41.393566,'f',0,0,0,0.5,0.6,296,4.06,37,1032,80.6,83,442,12,3
109 | 1741,1,1,52.443532,'f',0,1,0,0,2,408,3.65,50,1083,110.05,98,200,11.4,2
110 | 2689,1,1,33.475702,'m',0,0,0,0,1.6,660,4.22,94,1857,151.9,155,337,11,2
111 | 460,1,2,45.607118,'f',0,1,1,0.5,5,325,3.47,110,2460,246.45,56,430,11.9,4
112 | 388,1,1,76.709103,'f',1,0,0,1,1.4,206,3.13,36,1626,86.8,70,145,12.2,4
113 | 3913,0,1,36.533881,'f',0,0,0,0,1.3,353,3.67,73,2039,232.5,68,380,11.1,2
114 | 750,1,1,53.916496,'f',0,1,1,0,3.2,201,3.11,178,1212,159.65,69,188,11.8,4
115 | 130,1,2,46.390144,'f',1,1,1,1,17.4,?,2.64,182,559,119.35,?,401,11.7,2
116 | 3850,0,1,48.845996,'f',0,0,0,0,1,?,3.7,33,1258,99.2,?,338,10.4,3
117 | 611,1,2,71.893224,'m',0,1,0,0.5,2,420,3.26,62,3196,77.5,91,344,11.4,3
118 | 3823,0,1,28.884326,'f',0,0,0,0,1,239,3.77,77,1877,97.65,101,312,10.2,1
119 | 3820,0,2,48.468172,'m',0,0,0,0,1.8,460,3.35,148,1472,108.5,118,172,10.2,2
120 | 552,1,2,51.468857,'m',0,1,0,0,2.3,178,3,145,746,178.25,122,119,12,4
121 | 3581,0,2,44.950034,'f',0,0,0,0,0.9,400,3.6,31,1689,164.3,166,327,10.4,3
122 | 3099,0,1,56.569473,'f',0,0,0,0,0.9,248,3.97,172,646,62,84,128,10.1,1
123 | 110,1,2,48.963723,'f',1,1,1,1,2.5,188,3.67,57,1273,119.35,102,110,11.1,4
124 | 3086,1,1,43.017112,'f',0,0,0,0,1.1,303,3.64,20,2108,128.65,53,349,11.1,2
125 | 3092,0,2,34.039699,'f',0,1,0,0,1.1,464,4.2,38,1644,151.9,102,348,10.3,3
126 | 3222,1,1,68.50924,'f',1,1,0,0,2.1,?,3.9,50,1087,103.85,?,137,10.6,2
127 | 3388,0,2,62.521561,'f',0,0,0,0,0.6,212,4.03,10,648,71.3,77,316,17.1,1
128 | 2583,1,1,50.35729,'f',0,0,0,0,0.4,127,3.5,14,1062,49.6,84,334,10.3,2
129 | 2504,0,2,44.062971,'f',0,0,0,0,0.5,120,3.61,53,804,110.05,52,271,10.6,3
130 | 2105,1,1,38.910335,'f',0,1,1,0,1.9,486,3.54,74,1052,108.5,109,141,10.9,3
131 | 2350,0,1,41.152635,'f',0,0,0,0,5.5,528,4.18,77,2404,172.05,78,467,10.7,3
132 | 3445,1,2,55.457906,'f',0,1,1,0,2,267,3.67,89,754,196.85,90,136,11.8,4
133 | 980,1,1,51.233402,'f',0,1,1,0,6.7,374,3.74,103,979,128.65,100,266,11.1,4
134 | 3395,1,2,52.826831,'m',0,0,0,0,3.2,259,4.3,208,1040,110.05,78,268,11.7,3
135 | 3422,0,2,42.639288,'f',0,0,1,0,0.7,303,4.19,81,1584,111.6,156,307,10.3,3
136 | 3336,0,1,61.0705,'f',0,0,1,0.5,3,458,3.63,74,1588,106.95,382,438,9.9,3
137 | 1083,1,1,49.6564,'f',0,1,1,0,6.5,950,3.11,111,2374,170.5,149,354,11,4
138 | 2288,1,1,48.854209,'f',0,1,0,0,3.5,390,3.3,67,878,137.95,93,207,10.2,3
139 | 515,1,1,54.255989,'f',0,0,1,0,0.6,636,3.83,129,944,97.65,114,306,9.5,3
140 | 2033,0,1,35.151266,'m',0,0,0,0,3.5,325,3.98,444,766,130.2,210,344,10.6,3
141 | 191,1,2,67.906913,'m',1,1,0,1,1.3,151,3.08,73,1112,46.5,49,213,13.2,4
142 | 3297,0,1,55.436003,'f',0,0,0,0,0.6,298,4.13,29,758,65.1,85,256,10.7,3
143 | 971,1,1,45.820671,'f',0,1,1,1,5.1,?,3.23,18,790,179.8,?,104,13,4
144 | 3069,0,1,52.889802,'m',0,1,0,0,0.6,251,3.9,25,681,57.35,107,182,10.8,4
145 | 2468,0,2,47.181383,'f',0,1,0,0,1.3,316,3.51,75,1162,147.25,137,238,10,4
146 | 824,1,1,53.598905,'f',1,1,1,0,1.2,269,3.12,?,1441,165.85,68,166,11.1,4
147 | 3255,0,2,44.104038,'f',0,0,0,0,0.5,268,4.08,9,1174,86.8,95,453,10,2
148 | 1037,1,1,41.94935,'f',0,1,1,0,16.2,?,2.89,42,1828,299.15,?,123,12.6,4
149 | 3239,0,1,63.613963,'f',0,1,0,0,0.9,420,3.87,30,1009,57.35,232,?,9.7,3
150 | 1413,1,2,44.227242,'f',0,1,1,0,17.4,1775,3.43,205,2065,165.85,97,418,11.5,3
151 | 850,1,2,62.001369,'f',0,1,1,0,2.8,242,3.8,74,614,136.4,104,121,13.2,4
152 | 2944,0,1,40.553046,'f',0,0,0,0,1.9,448,3.83,60,1052,127.1,175,181,9.8,3
153 | 2796,1,2,62.644764,'m',0,0,0,0,1.5,331,3.95,13,577,128.65,99,165,10.1,4
154 | 3149,0,2,42.335387,'f',0,0,0,0,0.7,578,3.67,35,1353,127.1,105,427,10.7,2
155 | 3150,0,1,42.96783,'f',0,0,0,0,0.4,263,3.57,123,836,74.4,121,445,11,2
156 | 3098,0,1,55.96167,'f',0,0,0,0,0.8,263,3.35,27,1636,116.25,69,206,9.8,2
157 | 2990,0,1,62.861054,'f',0,0,0,0,1.1,399,3.6,79,3472,155,152,344,10.1,2
158 | 1297,1,1,51.249829,'m',0,1,0,0,7.3,426,3.93,262,2424,145.7,218,252,10.5,3
159 | 2106,0,2,46.762491,'f',0,1,0,0,1.1,328,3.31,159,1260,94.55,134,142,11.6,4
160 | 3059,0,1,54.075291,'f',0,1,0,0,1.1,290,4.09,38,2120,186,146,318,10,3
161 | 3050,0,1,47.036277,'f',0,0,0,0,0.9,346,3.77,59,794,125.55,56,336,10.6,2
162 | 2419,1,2,55.726215,'f',0,1,0,0,1,364,3.48,20,720,134.85,88,283,9.9,2
163 | 786,1,2,46.102669,'f',0,1,0,0,2.9,332,3.6,86,1492,134.85,103,277,11,4
164 | 943,1,2,52.287474,'f',0,1,0,0.5,28,556,3.26,152,3896,198.4,171,335,10,3
165 | 2976,0,2,51.200548,'f',0,0,1,0,0.7,309,3.84,96,858,41.85,106,253,11.4,3
166 | 2615,0,2,33.864476,'f',0,0,0,0.5,1.2,?,3.89,58,1284,173.6,?,239,9.4,3
167 | 2995,0,1,75.011636,'f',0,0,0,0.5,1.2,288,3.37,32,791,57.35,114,213,10.7,2
168 | 1427,1,2,30.863792,'f',0,1,0,0,7.2,1015,3.26,247,3836,198.4,280,330,9.8,3
169 | 762,1,1,61.804244,'m',0,1,1,0.5,3,257,3.79,290,1664,102.3,112,140,9.9,4
170 | 2891,0,2,34.986995,'f',0,0,1,0,1,?,3.63,57,1536,134.85,?,233,10,1
171 | 2870,0,1,55.041752,'f',0,0,0,0,0.9,460,3.03,57,721,85.25,174,301,9.4,2
172 | 1152,1,1,69.941136,'m',0,1,0,0,2.3,586,3.01,243,2276,114.7,126,339,10.9,3
173 | 2863,0,1,49.604381,'f',0,0,0,0,0.5,217,3.85,68,453,54.25,68,270,11.1,1
174 | 140,1,1,69.377139,'m',0,0,1,1,2.4,168,2.56,225,1056,120.9,75,108,14.1,3
175 | 2666,0,2,43.556468,'f',0,1,1,0.5,0.6,220,3.35,57,1620,153.45,80,311,11.2,4
176 | 853,1,2,59.408624,'f',0,1,0,0,25.5,358,3.52,219,2468,201.5,205,151,11.5,2
177 | 2835,0,2,48.758385,'f',0,0,0,0,0.6,286,3.42,34,1868,77.5,206,487,10,2
178 | 2475,0,1,36.492813,'f',0,0,0,0,3.4,450,3.37,32,1408,116.25,118,313,11.2,2
179 | 1536,1,2,45.760438,'m',0,0,0,0,2.5,317,3.46,217,714,130.2,140,207,10.1,3
180 | 2772,0,2,57.371663,'f',0,0,0,0,0.6,217,3.62,13,414,75.95,119,224,10.5,3
181 | 2797,0,2,42.743326,'f',0,0,0,0,2.3,502,3.56,4,964,120.9,180,269,9.6,2
182 | 186,1,2,58.817248,'f',0,1,1,0,3.2,260,3.19,91,815,127.1,101,160,12,4
183 | 2055,1,1,53.497604,'f',0,0,0,0,0.3,233,4.08,20,622,66.65,68,358,9.9,3
184 | 264,1,2,43.4141,'f',0,1,1,0.5,8.5,?,3.34,161,1428,181.35,?,88,13.3,4
185 | 1077,1,1,53.305955,'m',0,1,0,0,4,196,3.45,80,2496,133.3,142,212,11.3,4
186 | 2721,0,2,41.355236,'f',0,1,0,0,5.7,1480,3.26,84,1960,457.25,108,213,9.5,2
187 | 1682,1,1,60.958248,'m',0,1,0,0,0.9,376,3.86,200,1015,83.7,154,238,10.3,4
188 | 2713,0,2,47.753593,'f',0,1,0,0,0.4,257,3.8,44,842,97.65,110,?,9.2,2
189 | 1212,1,2,35.49076,'f',0,0,0,0,1.3,408,4.22,67,1387,142.6,137,295,10.1,3
190 | 2692,0,1,48.66256,'f',0,0,0,0,1.2,390,3.61,32,1509,88.35,52,263,9,3
191 | 2574,0,1,52.668036,'f',0,0,0,0,0.5,?,4.52,31,784,74.4,?,361,10.1,3
192 | 2301,0,2,49.869952,'f',0,0,1,0,1.3,205,3.34,65,1031,91.45,126,217,9.8,3
193 | 2657,0,1,30.275154,'f',0,1,1,0,3,236,3.42,76,1403,89.9,86,493,9.8,2
194 | 2644,0,1,55.56742,'f',0,0,0,0,0.5,?,3.85,63,663,79.05,?,311,9.7,1
195 | 2624,0,2,52.15332,'f',0,0,0,0,0.8,283,3.8,152,718,108.5,168,340,10.1,3
196 | 1492,1,1,41.609856,'f',0,1,1,0,3.2,?,3.56,77,1790,139.5,?,149,10.1,4
197 | 2609,0,2,55.45243,'f',0,0,0,0,0.9,258,4.01,49,559,43.4,133,277,10.4,2
198 | 2580,0,1,70.004107,'f',0,0,0,0,0.6,?,4.08,51,665,74.4,?,325,10.2,4
199 | 2573,0,2,43.942505,'f',0,1,0,0,1.8,396,3.83,39,2148,102.3,133,278,9.9,4
200 | 2563,0,2,42.568104,'f',0,0,0,0,4.7,478,4.38,44,1629,237.15,76,175,10.4,3
201 | 2556,0,1,44.569473,'f',0,1,1,0,1.4,248,3.58,63,554,75.95,106,79,10.3,4
202 | 2555,0,1,56.944559,'f',0,1,0,0,0.6,?,3.69,161,674,26.35,?,539,9.9,2
203 | 2241,0,2,40.260096,'f',0,0,0,0,0.5,201,3.73,44,1345,54.25,145,445,10.1,2
204 | 974,1,2,37.607118,'f',0,1,0,0,11,674,3.55,358,2412,167.4,140,471,9.8,3
205 | 2527,0,1,48.361396,'f',0,0,0,0,0.8,256,3.54,42,1132,74.4,94,192,10.5,3
206 | 1576,1,1,70.836413,'f',0,0,1,0.5,2,225,3.53,51,933,69.75,62,200,12.7,3
207 | 733,1,2,35.791923,'f',0,1,0,0,14,808,3.43,251,2870,153.45,137,268,11.5,3
208 | 2332,0,1,62.622861,'f',0,1,0,0,0.7,187,3.48,41,654,120.9,98,164,11,4
209 | 2456,0,2,50.647502,'f',0,1,0,0,1.3,360,3.63,52,1812,97.65,164,256,9.9,3
210 | 2504,0,1,54.527036,'f',0,0,1,0,2.3,?,3.93,24,1828,133.3,?,327,10.2,2
211 | 216,1,2,52.692676,'f',1,1,1,0,24.5,1092,3.35,233,3740,147.25,432,399,15.2,4
212 | 2443,0,1,52.720055,'f',0,1,0,0,0.9,308,3.69,67,696,51.15,101,344,9.8,4
213 | 797,1,2,56.772074,'f',0,0,0,0,10.8,932,3.19,267,2184,161.2,157,382,10.4,4
214 | 2449,0,1,44.396988,'f',0,0,0,0,1.5,293,4.3,50,975,125.55,56,336,9.1,2
215 | 2330,0,1,29.555099,'f',0,1,0,0,3.7,347,3.9,76,2544,221.65,90,129,11.5,4
216 | 2363,0,1,57.040383,'f',0,1,1,0,1.4,226,3.36,13,810,72.85,62,117,11.6,4
217 | 2365,0,1,44.626968,'f',0,0,0,0,0.6,266,3.97,25,1164,102.3,102,201,10.1,2
218 | 2357,0,2,35.797399,'f',0,0,1,0,0.7,286,2.9,38,1692,141.05,90,381,9.6,2
219 | 2318,0,2,32.232717,'f',0,0,1,0,4.7,236,3.55,112,1391,137.95,114,332,9.9,3
220 | 2294,0,2,41.092402,'f',0,1,0,0,0.6,235,3.2,26,1758,106.95,67,228,10.8,4
221 | 2272,0,1,61.639973,'f',0,0,0,0,0.5,223,3.8,15,1044,80.6,89,514,10,2
222 | 2221,0,2,37.05681,'f',0,1,0,0,0.5,149,4.04,227,598,52.7,57,166,9.9,2
223 | 2090,1,2,62.579055,'f',0,0,0,0,0.7,255,3.74,23,1024,77.5,58,281,10.2,3
224 | 2081,1,1,48.977413,'f',1,0,0,0,2.5,382,3.55,108,1516,238.7,?,126,10.3,3
225 | 2255,0,1,61.990418,'f',0,0,0,0,0.6,213,4.07,12,5300,57.35,68,240,11,1
226 | 2171,0,1,72.772074,'f',0,0,0,0.5,0.6,?,3.33,14,733,85.25,?,259,10.1,4
227 | 904,1,1,61.295003,'f',0,1,0,0,3.9,396,3.2,58,1440,153.45,131,156,10,4
228 | 2216,0,2,52.62423,'f',0,1,1,0,0.7,252,4.01,11,1210,72.85,58,309,9.5,2
229 | 2224,0,2,49.763176,'m',0,1,0,0,0.9,346,3.37,81,1098,122.45,90,298,10,2
230 | 2195,0,2,52.914442,'f',0,0,0,0,1.3,?,3.76,27,1282,100.75,?,114,10.3,3
231 | 2176,0,2,47.263518,'f',0,0,0,0,1.2,232,3.98,11,1074,100.75,99,223,9.9,3
232 | 2178,0,1,50.20397,'f',0,0,1,0,0.5,400,3.4,9,1134,96.1,55,356,10.2,3
233 | 1786,1,2,69.347023,'f',0,1,0,0,0.9,404,3.43,34,1866,79.05,224,236,9.9,3
234 | 1080,1,2,41.169062,'f',0,0,0,0,5.9,1276,3.85,141,1204,203.05,157,216,10.7,3
235 | 2168,0,1,59.164956,'f',0,0,0,0,0.5,?,3.68,20,856,55.8,?,146,10.4,3
236 | 790,1,2,36.079398,'f',0,1,0,0,11.4,608,3.31,65,1790,151.9,210,298,10.8,4
237 | 2170,0,1,34.595483,'f',0,0,0,0,0.5,?,3.89,29,897,66.65,?,423,10.1,1
238 | 2157,0,2,42.71321,'f',0,0,0,0,1.6,215,4.17,67,936,134.85,85,176,9.6,3
239 | 1235,1,1,63.63039,'f',0,0,1,0,3.8,426,3.22,96,2716,210.8,113,228,10.6,2
240 | 2050,0,2,56.629706,'f',0,1,0,0,0.9,360,3.65,72,3186,94.55,154,269,9.7,4
241 | 597,1,2,46.264203,'f',0,1,0,0,4.5,372,3.38,227,2310,167.4,135,240,12.4,3
242 | 334,1,1,61.242984,'f',1,1,0,1,14.1,448,2.43,123,1833,134,155,210,11,4
243 | 1945,0,1,38.620123,'f',0,0,0,0,1,309,3.66,67,1214,158.1,101,309,9.7,3
244 | 2022,0,1,38.770705,'f',0,0,0,0,0.7,274,3.66,108,1065,88.35,135,251,10.1,2
245 | 1978,0,2,56.695414,'f',0,1,0,0,0.5,223,3.7,39,884,75.95,104,231,9.6,3
246 | 999,1,1,58.951403,'m',0,0,0,0,2.3,316,3.35,172,1601,179.8,63,394,9.7,2
247 | 1967,0,2,36.922656,'f',0,0,0,0,0.7,215,3.35,41,645,93,74,165,9.6,3
248 | 348,1,1,62.414784,'f',1,1,0,0.5,4.5,191,3.05,200,1020,175.15,118,139,11.4,4
249 | 1979,0,2,34.609172,'f',0,1,1,0,3.3,302,3.41,51,310,83.7,44,95,11.5,4
250 | 1165,1,2,58.335387,'f',0,1,1,0,3.4,518,1.96,115,2250,203.05,90,190,10.7,4
251 | 1951,0,1,50.182067,'f',0,1,0,0,0.4,267,3.02,47,1001,133.3,87,265,10.6,3
252 | 1932,0,1,42.685832,'f',0,1,1,0,0.9,514,3.06,412,2622,105.4,87,284,9.8,4
253 | 1776,0,2,34.379192,'f',0,0,0,0,0.9,578,3.35,78,976,116.25,177,322,11.2,2
254 | 1882,0,2,33.182752,'f',0,1,0,0,13,1336,4.16,71,3510,209.25,111,338,11.9,3
255 | 1908,0,1,38.38193,'f',0,1,1,0,1.5,253,3.79,67,1006,139.5,106,341,9.7,3
256 | 1882,0,1,59.761807,'f',0,1,0,0,1.6,442,2.95,105,820,85.25,108,181,10.1,3
257 | 1874,0,2,66.412047,'f',0,0,0,0.5,0.6,280,3.35,?,1093,128.65,81,295,9.8,2
258 | 694,1,1,46.78987,'f',0,1,1,0,0.8,300,2.94,231,1794,130.2,99,319,11.2,4
259 | 1831,0,1,56.079398,'f',0,0,0,0,0.4,232,3.72,24,369,51.15,139,326,10.1,3
260 | 1810,0,1,64.572211,'f',0,1,0,0,1.9,354,2.97,86,1553,196.85,152,277,9.9,3
261 | 930,1,2,67.488022,'f',0,1,0,0,8,468,2.81,139,2009,198.4,139,233,10,4
262 | 1690,1,1,44.829569,'f',0,0,1,0,3.9,350,3.22,121,1268,272.8,231,270,9.6,3
263 | 1790,0,2,45.771389,'f',0,1,0,0,0.6,273,3.65,48,794,52.7,214,305,9.6,3
264 | 1785,0,2,55.416838,'f',0,1,0,0,0.8,324,3.51,39,1237,66.65,146,371,10,3
265 | 1783,0,1,47.980835,'f',0,0,1,0,1.3,242,3.2,35,1556,175.15,71,195,10.6,4
266 | 1769,0,2,40.791239,'f',0,1,0,0,0.6,299,3.36,23,2769,220.1,85,303,10.9,4
267 | 1770,0,1,68.462697,'f',0,1,1,0,1.1,246,3.35,116,924,113.15,90,317,10,4
268 | 1765,0,1,78.439425,'m',1,1,1,0,7.1,243,3.03,380,983,158.1,154,97,11.2,4
269 | 1735,0,2,35.310062,'f',0,1,1,0,0.7,193,3.85,35,466,53,118,156,10.3,3
270 | 179,1,1,70.907598,'f',1,1,1,1,6.6,222,2.33,138,620,106,91,195,12.1,4
271 | 1191,1,1,55.394935,'f',1,1,0,0.5,6.4,344,2.75,16,834,82,179,149,11,4
272 | 41,1,1,65.883641,'f',1,0,0,1,17.9,175,2.1,220,705,338,229,62,12.9,4
273 | 799,1,1,67.572895,'m',0,1,0,0.5,4,416,3.99,177,960,86,242,269,9.8,2
274 | 207,1,2,58.171116,'f',0,1,0,0,5.2,?,2.23,234,601,135,?,206,12.3,4
275 | 4062,0,?,60,'f',?,?,?,0,0.7,?,3.65,?,?,?,?,378,11,?
276 | 3561,1,?,64.999316,'f',?,?,?,0.5,1.4,?,3.04,?,?,?,?,331,12.1,4
277 | 2844,0,?,54.001369,'f',?,?,?,0,0.7,?,4.03,?,?,?,?,226,9.8,4
278 | 2071,1,?,75.000684,'f',?,?,?,0.5,0.7,?,3.96,?,?,?,?,?,11.3,4
279 | 3030,0,?,62.001369,'f',?,?,?,0,0.8,?,2.48,?,?,?,?,273,10,?
280 | 41,1,?,46.001369,'f',?,?,?,0,5,?,2.93,?,?,?,?,260,10.4,?
281 | 2403,0,?,44,'f',?,?,?,0.5,0.4,?,3.81,?,?,?,?,226,10.5,3
282 | 2011,1,?,64,'f',?,?,?,0,1.1,?,3.69,?,?,?,?,139,10.5,?
283 | 3523,0,?,40,'f',?,?,?,0,0.6,?,4.04,?,?,?,?,130,11.2,2
284 | 3468,0,?,63.000684,'f',?,?,?,0,0.6,?,3.94,?,?,?,?,234,11.5,2
285 | 4795,0,?,34.001369,'f',?,?,?,0,1.8,?,3.24,?,?,?,?,?,18,2
286 | 4214,0,?,48.999316,'f',?,?,?,0,1.2,?,3.99,?,?,?,?,?,11.2,2
287 | 2111,1,?,54.001369,'f',?,?,?,0,1,?,3.6,?,?,?,?,?,12.1,2
288 | 1462,1,?,63.000684,'f',?,?,?,0,0.7,?,3.4,?,?,?,?,371,10.1,4
289 | 1746,1,?,54.001369,'m',?,?,?,0,3.5,?,3.63,?,?,?,?,325,10.3,2
290 | 94,1,?,46.001369,'f',?,?,?,0.5,3.1,?,3.56,?,?,?,?,142,13.6,4
291 | 785,1,?,52.999316,'f',?,?,?,0,12.6,?,2.87,?,?,?,?,114,11.8,4
292 | 1518,1,?,56,'f',?,?,?,0,2.8,?,3.92,?,?,?,?,?,10.6,4
293 | 466,1,?,56,'f',?,?,?,0,7.1,?,3.51,?,?,?,?,721,11.8,?
294 | 3527,0,?,55.000684,'f',?,?,?,0,0.6,?,4.15,?,?,?,?,280,10.1,2
295 | 2635,0,?,64.999316,'f',?,?,?,0,2.1,?,3.34,?,?,?,?,155,10.1,4
296 | 2286,1,?,56,'f',?,?,?,0,1.8,?,3.64,?,?,?,?,141,10,?
297 | 791,1,?,47.000684,'f',?,?,?,0,16,?,3.42,?,?,?,?,475,13.8,2
298 | 3492,0,?,60,'f',?,?,?,0,0.6,?,4.38,?,?,?,?,269,10.6,2
299 | 3495,0,?,52.999316,'f',?,?,?,0,5.4,?,4.19,?,?,?,?,141,11.2,2
300 | 111,1,?,54.001369,'f',?,?,?,0,9,?,3.29,?,?,?,?,286,13.1,4
301 | 3231,0,?,50.001369,'f',?,?,?,0,0.9,?,4.01,?,?,?,?,244,10.5,3
302 | 625,1,?,48,'f',?,?,?,0,11.1,?,2.84,?,?,?,?,?,12.2,2
303 | 3157,0,?,36,'f',?,?,?,0,8.9,?,3.76,?,?,?,?,209,10.6,3
304 | 3021,0,?,48,'f',?,?,?,0,0.5,?,3.76,?,?,?,?,388,10.1,2
305 | 559,1,?,70.001369,'f',?,?,?,0.5,0.6,?,3.81,?,?,?,?,160,11,4
306 | 2812,1,?,51.000684,'f',?,?,?,0,3.4,?,3.92,?,?,?,?,?,9.3,2
307 | 2834,0,?,52,'m',?,?,?,0,0.9,?,3.14,?,?,?,?,191,12.3,2
308 | 2855,0,?,54.001369,'f',?,?,?,0,1.4,?,3.82,?,?,?,?,249,10.3,2
309 | 662,1,?,48,'f',?,?,?,0,2.1,?,4.1,?,?,?,?,200,9,3
310 | 727,1,?,66.001369,'f',?,?,?,0,15,?,3.4,?,?,?,?,150,11.1,4
311 | 2716,0,?,52.999316,'f',?,?,?,0,0.6,?,4.19,?,?,?,?,330,9.9,1
312 | 2698,0,?,62.001369,'f',?,?,?,0,1.3,?,3.4,?,?,?,?,167,10.6,4
313 | 990,1,?,59.000684,'f',?,?,?,0,1.3,?,3.12,?,?,?,?,125,9.6,2
314 | 2338,0,?,39.000684,'f',?,?,?,0,1.6,?,3.75,?,?,?,?,145,10.4,3
315 | 1616,1,?,67.000684,'f',?,?,?,0.5,2.2,?,3.26,?,?,?,?,171,11.1,4
316 | 2563,0,?,58.001369,'f',?,?,?,0,3,?,3.46,?,?,?,?,109,10.4,4
317 | 2537,0,?,64,'f',?,?,?,0,0.8,?,3.49,?,?,?,?,314,10.3,3
318 | 2534,0,?,46.001369,'f',?,?,?,0,0.8,?,2.89,?,?,?,?,419,?,1
319 | 778,1,?,64,'f',?,?,?,0,1.8,?,3.15,?,?,?,?,183,10.4,4
320 | 2267,0,?,48.999316,'f',?,?,?,0,18,?,3.04,?,?,?,?,432,9.7,2
321 | 2249,0,?,44,'f',?,?,?,0,0.6,?,3.5,?,?,?,?,150,9.9,3
322 | 359,1,?,59.000684,'f',?,?,?,0,2.7,?,3.35,?,?,?,?,142,11.5,4
323 | 1925,0,?,63.000684,'f',?,?,?,0,0.9,?,3.58,?,?,?,?,224,10,3
324 | 249,1,?,60.999316,'f',?,?,?,0,1.3,?,3.01,?,?,?,?,223,10.7,3
325 | 2202,0,?,64,'f',?,?,?,0,1.1,?,3.49,?,?,?,?,166,9.8,3
326 | 43,1,?,48.999316,'f',?,?,?,0,13.8,?,2.77,?,?,?,?,388,?,4
327 | 1197,1,?,42.001369,'f',?,?,?,0,4.4,?,4.52,?,?,?,?,102,10.8,4
328 | 1095,1,?,50.001369,'f',?,?,?,0,16,?,3.36,?,?,?,?,384,10,3
329 | 489,1,?,51.000684,'f',?,?,?,0.5,7.3,?,3.52,?,?,?,?,265,11.1,1
330 | 2149,0,?,36.999316,'f',?,?,?,0,0.6,?,3.55,?,?,?,?,248,10.3,2
331 | 2103,0,?,62.001369,'f',?,?,?,0,0.7,?,3.29,?,?,?,?,190,9.8,2
332 | 1980,0,?,51.000684,'f',?,?,?,0,0.7,?,3.1,?,?,?,?,274,10.6,3
333 | 1478,1,?,44,'m',?,?,?,0,9.5,?,3.63,?,?,?,?,292,10.2,3
334 | 1987,0,?,32.999316,'f',?,?,?,0,2.2,?,3.76,?,?,?,?,253,9.9,3
335 | 1168,1,?,60,'f',?,?,?,0.5,1.8,?,3.62,?,?,?,?,225,9.9,2
336 | 597,1,?,63.000684,'f',?,?,?,0.5,3.3,?,2.73,?,?,?,?,224,11.1,4
337 | 1899,0,?,40.999316,'m',?,?,?,0,1.7,?,3.66,?,?,?,?,92,11,4
338 | 221,1,?,51.000684,'f',?,?,?,0,14,?,2.58,?,?,?,?,190,11.6,4
339 | 193,1,?,52,'f',?,?,?,0.5,0.7,?,2.96,?,?,?,?,319,9.9,4
340 | 935,1,?,68.999316,'f',?,?,?,0,4.2,?,3.19,?,?,?,?,120,11.1,4
341 | 703,1,?,46.001369,'f',?,?,?,0,4.5,?,2.68,?,?,?,?,219,11.5,4
342 | 681,1,?,67.000684,'f',?,?,?,0,1.2,?,2.96,?,?,?,?,174,10.9,3
343 | 


--------------------------------------------------------------------------------
/tests/resources/survival/results.csv:
--------------------------------------------------------------------------------
1 | conditions;importances
2 | bili = <2.4, inf);156.62083444387682
3 | bili = <2.15, inf);153.35066738474507
4 | bili = <1.95, inf);146.35072783977444
5 | bili = (-inf, 1.95);146.35072783977438
6 | 


--------------------------------------------------------------------------------
/tests/resources/survival/results_split.csv:
--------------------------------------------------------------------------------
1 | conditions;importances
2 | bili = <2.4, inf);411.736
3 | bili = (-inf, 1.95);146.3507
4 | bili = <2.15, 2.4);-1.3703
5 | bili = <1.95, 2.15);-2.0269
6 | 


--------------------------------------------------------------------------------
/tests/test_conditions_importances.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import rulekit
  3 | 
  4 | from rulekit.main import RuleKit
  5 | import pandas as pd
  6 | from rulekit import RuleKit
  7 | from rulekit.classification import RuleClassifier
  8 | from rulekit.regression import RuleRegressor
  9 | from rulekit.survival import SurvivalRules
 10 | from rulekit.params import Measures
 11 | from scipy.io import arff
 12 | import numpy as np
 13 | import os 
 14 | from pathlib import Path
 15 | from rulexai.explainer import RuleExplainer
 16 | 
 17 | class TestConditionsImportances(unittest.TestCase):
 18 | 
 19 |     @classmethod
 20 |     def setUpClass(cls):
 21 |         RuleKit.init()
 22 | 
 23 |     
 24 |     def test_classification(self):
 25 |         classification_resources = "resources/classification/"
 26 |         dataset_path = classification_resources + "iris.arff"
 27 |         results_path = classification_resources + "results.csv" 
 28 |         results_split_path = classification_resources + "results_split.csv" 
 29 |         dataset_path = os.path.join(Path(__file__).resolve().parent, dataset_path)
 30 |         results_path = os.path.join(Path(__file__).resolve().parent, results_path)
 31 |         results_split_path = os.path.join(Path(__file__).resolve().parent, results_split_path)
 32 | 
 33 |         train_df = pd.DataFrame(arff.loadarff(dataset_path)[0])
 34 | 
 35 |         # code to change encoding of the file
 36 |         tmp_df = train_df.select_dtypes([object])
 37 |         tmp_df = tmp_df.stack().str.decode("utf-8").unstack()
 38 |         for col in tmp_df:
 39 |             train_df[col] = tmp_df[col].replace({"?": None})
 40 | 
 41 |         x = train_df.drop(["class"], axis=1)
 42 |         y = train_df["class"]
 43 | 
 44 |         # RuleKit
 45 |         clf = RuleClassifier(
 46 |             induction_measure=Measures.C2,
 47 |             pruning_measure=Measures.C2,
 48 |             voting_measure=Measures.C2,
 49 |         )
 50 |         clf.fit(x, y)
 51 | 
 52 | 
 53 |         # RuleXai
 54 |         explainer = RuleExplainer(clf, x, y, type="classification")
 55 |         explainer.explain()
 56 |         conditions_importances = explainer.condition_importances_
 57 |         conditions_importances.replace("-", np.nan, inplace = True)
 58 |         conditions_importances = conditions_importances.round(4)
 59 | 
 60 |         importances_ground_truth = pd.read_csv(results_path, sep = ";")
 61 |         importances_ground_truth = importances_ground_truth.round(4)
 62 | 
 63 |         # with splitting conditions to basic
 64 |         explainer.explain(basic_conditions=True)
 65 |         basic_conditions_importances= explainer.condition_importances_
 66 |         basic_conditions_importances.replace("-", np.nan, inplace = True)
 67 |         basic_conditions_importances = basic_conditions_importances.round(4)
 68 | 
 69 |         basic_importances_ground_truth = pd.read_csv(results_split_path, sep = ";")
 70 |         basic_importances_ground_truth = basic_importances_ground_truth.round(4)
 71 | 
 72 |         assert conditions_importances.equals(importances_ground_truth)
 73 |         assert basic_conditions_importances.equals(basic_importances_ground_truth)
 74 | 
 75 | 
 76 |     def test_regression(self):
 77 | 
 78 |         regression_resources = "resources/regression/"
 79 |         dataset_path = regression_resources + "diabetes.arff"
 80 |         results_path = regression_resources + "results.csv" 
 81 |         results_split_path = regression_resources + "results_split.csv" 
 82 |         dataset_path = os.path.join(Path(__file__).resolve().parent, dataset_path)
 83 |         results_path = os.path.join(Path(__file__).resolve().parent, results_path)
 84 |         results_split_path = os.path.join(Path(__file__).resolve().parent, results_split_path)
 85 | 
 86 |         train_df = pd.DataFrame(arff.loadarff(dataset_path)[0])
 87 | 
 88 |         # code to change encoding of the file
 89 |         tmp_df = train_df.select_dtypes([object])
 90 |         if (tmp_df.size != 0):  
 91 |             tmp_df = tmp_df.stack().str.decode("utf-8").unstack()
 92 |             for col in tmp_df:
 93 |                 train_df[col] = tmp_df[col].replace({"?": None})
 94 | 
 95 |         x = train_df.drop(["class"], axis=1)
 96 |         y = train_df["class"]
 97 | 
 98 |         df = train_df
 99 | 
100 |         # RuleKit
101 |         reg = RuleRegressor(
102 |             induction_measure=Measures.C2,
103 |             pruning_measure=Measures.C2,
104 |             voting_measure=Measures.C2,
105 |             mean_based_regression = False
106 |         )
107 |         reg.fit(x, y)
108 | 
109 | 
110 |         # RuleXai
111 |         explainer = RuleExplainer(reg, x, y, type="regression")
112 |         explainer.explain()
113 |         conditions_importances = explainer.condition_importances_
114 |         conditions_importances = conditions_importances.round(4)
115 | 
116 |         importances_ground_truth = pd.read_csv(results_path, sep = ";")
117 |         importances_ground_truth = importances_ground_truth.round(4)
118 | 
119 |         # with splitting conditions to basic
120 |         explainer.explain(basic_conditions=True)
121 |         basic_conditions_importances= explainer.condition_importances_
122 |         basic_conditions_importances = basic_conditions_importances.round(4)
123 | 
124 |         basic_importances_ground_truth = pd.read_csv(results_split_path, sep = ";")
125 |         basic_importances_ground_truth = basic_importances_ground_truth.round(4)
126 | 
127 |         assert conditions_importances.equals(importances_ground_truth)
128 |         assert basic_conditions_importances.equals(basic_importances_ground_truth)
129 | 
130 | 
131 |  
132 | 
133 | if __name__ == '__main__':
134 |     unittest.main()


--------------------------------------------------------------------------------
/tests/test_functionalities.py:
--------------------------------------------------------------------------------
  1 | from rulexai import rule
  2 | import unittest
  3 | import rulekit
  4 | from rulekit.main import RuleKit
  5 | from rulexai.explainer import RuleExplainer
  6 | from sklearn.datasets import load_iris
  7 | import pandas as pd
  8 | from rulekit.classification import RuleClassifier
  9 | from rulekit.params import Measures
 10 | from sklearn.tree import DecisionTreeClassifier
 11 | from Orange.data.table import Table
 12 | from Orange.data.pandas_compat import table_to_frame
 13 | from Orange.classification import CN2UnorderedLearner
 14 | 
 15 | import os 
 16 | from pathlib import Path
 17 | import numpy as np
 18 | 
 19 | class TestFunctionalities(unittest.TestCase):
 20 | 
 21 |     @classmethod
 22 |     def setUpClass(cls):
 23 |         RuleKit.init()
 24 | 
 25 |     def test_creating_model_with_rules_RuleKit(self):
 26 |         data = load_iris()
 27 |         x = pd.DataFrame(data.data, columns=data.feature_names)
 28 |         y = pd.DataFrame(data.target.astype(str), columns=["label"]) 
 29 | 
 30 |         clf = RuleClassifier(
 31 |             induction_measure=Measures.C2,
 32 |             pruning_measure=Measures.C2,
 33 |             voting_measure=Measures.C2,
 34 |         )
 35 |         clf.fit(x, data.target.astype(str))
 36 | 
 37 |         explainer = RuleExplainer(clf, x, y, type="classification")
 38 | 
 39 |         assert len(clf.model.rules) == len(explainer.model.rules)
 40 | 
 41 |     
 42 |     def test_creating_model_with_rules_Sklearn(self):
 43 |         clf = DecisionTreeClassifier(random_state=0)
 44 |         data = load_iris()
 45 |         clf.fit(data.data, data.target)
 46 | 
 47 |         x = pd.DataFrame(data.data, columns=data.feature_names)
 48 |         y = pd.DataFrame(data.target.astype(str), columns=["class"]) 
 49 | 
 50 |         explainer = RuleExplainer(clf, x, y, type="classification")
 51 |         
 52 |         assert len(explainer.model.rules) > 0 
 53 | 
 54 |     def test_creating_model_with_rules_Orange(self):
 55 |         data = Table("iris")
 56 |         learner = CN2UnorderedLearner()
 57 |         clf = learner(data)
 58 |         df= table_to_frame(data)
 59 |         x = df.drop(['iris'], axis=1)
 60 |         y = df['iris']
 61 | 
 62 |         explainer = RuleExplainer(clf, x, y, type="classification")
 63 |         
 64 |         assert len(explainer.model.rules) > 0 
 65 | 
 66 | 
 67 |     def test_features_importances(self):
 68 | 
 69 |         results_path = "resources/features_importances.csv"
 70 |         results_path = os.path.join(Path(__file__).resolve().parent, results_path)
 71 | 
 72 |         data = load_iris()
 73 |         x = pd.DataFrame(data.data, columns=data.feature_names)
 74 |         y = pd.DataFrame(data.target.astype(str), columns=["label"]) 
 75 | 
 76 |         clf = RuleClassifier(
 77 |             induction_measure=Measures.C2,
 78 |             pruning_measure=Measures.C2,
 79 |             voting_measure=Measures.C2,
 80 |         )
 81 |         clf.fit(x, data.target.astype(str))
 82 | 
 83 |         explainer = RuleExplainer(clf, x, y, type="classification")
 84 |         explainer.explain()
 85 | 
 86 |         features_importances = explainer.feature_importances_
 87 |         features_importances.replace("-", np.nan, inplace = True)
 88 |         features_importances = features_importances.round(4)
 89 | 
 90 | 
 91 |         features_importances_ground_truth = pd.read_csv(results_path, sep = ";")
 92 |         features_importances_ground_truth.replace("-", np.nan, inplace = True)
 93 |         features_importances_ground_truth = features_importances_ground_truth.round(4)
 94 | 
 95 |         assert features_importances.equals(features_importances_ground_truth)
 96 | 
 97 | 
 98 |     def test_prepare_binary_dataset(self):
 99 |         data = load_iris()
100 |         x = pd.DataFrame(data.data, columns=data.feature_names)
101 |         y = pd.DataFrame(data.target.astype(str), columns=["label"]) 
102 | 
103 |         clf = RuleClassifier(
104 |             induction_measure=Measures.C2,
105 |             pruning_measure=Measures.C2,
106 |             voting_measure=Measures.C2,
107 |         )
108 |         clf.fit(x, data.target.astype(str))
109 | 
110 |         explainer = RuleExplainer(clf, x, y, type="classification")
111 |         explainer.explain()
112 | 
113 |         binary_dataset = explainer.fit_transform(x)
114 |         binary_dataset_10 = explainer.transform(x.iloc[0:10])
115 | 
116 |  
117 |         binary_dataset_TOP_50 = explainer.fit_transform(x, selector=0.5)
118 | 
119 |         assert binary_dataset.shape[0] == x.shape[0]
120 |         assert binary_dataset_10.shape[0] == 10
121 |         assert binary_dataset.shape[1] == 14
122 |         assert binary_dataset_TOP_50.shape[0] == x.shape[0]
123 |         assert binary_dataset_TOP_50.shape[1] == 7
124 | 
125 | 
126 | 
127 |     def test_get_rules_covering_example_and_get_rules(self):
128 |         data = load_iris()
129 |         x = pd.DataFrame(data.data, columns=data.feature_names)
130 |         y = pd.DataFrame(data.target.astype(str), columns=["label"]) 
131 | 
132 |         clf = RuleClassifier(
133 |             induction_measure=Measures.C2,
134 |             pruning_measure=Measures.C2,
135 |             voting_measure=Measures.C2,
136 |         )
137 |         clf.fit(x, data.target.astype(str))
138 | 
139 |         explainer = RuleExplainer(clf, x, y, type="classification")
140 | 
141 |         rules_covering_example_0 = explainer.get_rules_covering_example(x.iloc[0,:], y.iloc[0,:])
142 |         rules_covering_example_100 = explainer.get_rules_covering_example(x.iloc[100,:], y.iloc[100,:])
143 | 
144 |         assert len(rules_covering_example_0) == 1
145 |         assert len(rules_covering_example_100) == 2
146 |         assert len(explainer.get_rules()) == len(explainer.get_rules_with_basic_conditions())
147 |         assert len(explainer.get_rules()) == len(clf.model.rules)
148 | 
149 | 
150 | if __name__ == '__main__':
151 |     unittest.main()


--------------------------------------------------------------------------------