├── papers
    └── readit.md
├── ycimpute
    ├── nn
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── gainnets.cpython-37.pyc
    │   │   └── autoencoder.cpython-37.pyc
    │   ├── autoencoder.py
    │   └── gainnets.py
    ├── tree
    │   ├── __init__.py
    │   └── tree.py
    ├── unsupervised
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   └── __init__.cpython-37.pyc
    │   └── knn
    │   │   ├── __pycache__
    │   │       ├── common.cpython-37.pyc
    │   │       ├── __init__.cpython-37.pyc
    │   │       ├── reference.cpython-37.pyc
    │   │       ├── optimistic.cpython-37.pyc
    │   │       ├── argpartition.cpython-37.pyc
    │   │       ├── normalized_distance.cpython-37.pyc
    │   │       └── few_observed_entries.cpython-37.pyc
    │   │   ├── __init__.py
    │   │   ├── common.py
    │   │   ├── reference.py
    │   │   ├── few_observed_entries.py
    │   │   ├── argpartition.py
    │   │   ├── normalized_distance.py
    │   │   └── optimistic.py
    ├── utils
    │   ├── __init__.py
    │   ├── evaluator
    │   │   ├── __init__.py
    │   │   └── evaluate_em.py
    │   ├── shower
    │   │   ├── test_show.py
    │   │   ├── __init__.py
    │   │   └── show.py
    │   ├── __pycache__
    │   │   ├── tools.cpython-37.pyc
    │   │   ├── __init__.cpython-37.pyc
    │   │   └── normalizer.cpython-37.pyc
    │   ├── evaluate.py
    │   ├── normalizer.py
    │   └── tools.py
    ├── esemble
    │   ├── __init__.py
    │   └── random_forest.py
    ├── datasets
    │   ├── iris.hdf5
    │   ├── wine.hdf5
    │   ├── boston.hdf5
    │   ├── load_data.py
    │   └── dpath.py
    ├── __pycache__
    │   └── __init__.cpython-37.pyc
    ├── imputer
    │   ├── __pycache__
    │   │   ├── gain.cpython-37.pyc
    │   │   ├── mice.cpython-37.pyc
    │   │   ├── mida.cpython-37.pyc
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── knnimput.cpython-37.pyc
    │   │   ├── iterforest.cpython-37.pyc
    │   │   └── expectation_maximization.cpython-37.pyc
    │   ├── __init__.py
    │   ├── expectation_maximization.py
    │   ├── knnimput.py
    │   ├── mida.py
    │   ├── gain.py
    │   ├── mice.py
    │   └── iterforest.py
    ├── __init__.py
    ├── doc_zh_cn.ipynb
    └── doc.ipynb
├── test_data
    └── readme.md
├── img
    ├── readme.md
    ├── 1.png
    ├── 2.png
    ├── 3.png
    ├── WINE.svg
    └── IRIS.svg
├── requirements.txt
├── test
    ├── metrics.py
    ├── test_em.py
    ├── test_gain.py
    ├── test_knn.py
    ├── test_mida.py
    ├── test_mice.py
    ├── test_missforest.py
    └── generate_data.py
├── setup.py
├── README.md
├── doc_eng.md
└── LICENSE


/papers/readit.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/ycimpute/nn/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/test_data/readme.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/ycimpute/tree/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ycimpute/unsupervised/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ycimpute/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | 


--------------------------------------------------------------------------------
/ycimpute/utils/evaluator/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ycimpute/utils/shower/test_show.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/img/readme.md:
--------------------------------------------------------------------------------
1 | .........................
2 | 


--------------------------------------------------------------------------------
/ycimpute/esemble/__init__.py:
--------------------------------------------------------------------------------
1 | from ..tree import tree
2 | 
3 | __all__=["tree"]


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | scikit-learn>=0.17.1
2 | numpy
3 | pandas
4 | torch>=1.1.0
5 | 


--------------------------------------------------------------------------------
/img/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenIDEA-YunanUniversity/ycimpute/HEAD/img/1.png


--------------------------------------------------------------------------------
/img/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenIDEA-YunanUniversity/ycimpute/HEAD/img/2.png


--------------------------------------------------------------------------------
/img/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenIDEA-YunanUniversity/ycimpute/HEAD/img/3.png


--------------------------------------------------------------------------------
/ycimpute/datasets/iris.hdf5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenIDEA-YunanUniversity/ycimpute/HEAD/ycimpute/datasets/iris.hdf5


--------------------------------------------------------------------------------
/ycimpute/datasets/wine.hdf5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenIDEA-YunanUniversity/ycimpute/HEAD/ycimpute/datasets/wine.hdf5


--------------------------------------------------------------------------------
/ycimpute/datasets/boston.hdf5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenIDEA-YunanUniversity/ycimpute/HEAD/ycimpute/datasets/boston.hdf5


--------------------------------------------------------------------------------
/ycimpute/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenIDEA-YunanUniversity/ycimpute/HEAD/ycimpute/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/ycimpute/nn/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenIDEA-YunanUniversity/ycimpute/HEAD/ycimpute/nn/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/ycimpute/nn/__pycache__/gainnets.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenIDEA-YunanUniversity/ycimpute/HEAD/ycimpute/nn/__pycache__/gainnets.cpython-37.pyc


--------------------------------------------------------------------------------
/ycimpute/utils/__pycache__/tools.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenIDEA-YunanUniversity/ycimpute/HEAD/ycimpute/utils/__pycache__/tools.cpython-37.pyc


--------------------------------------------------------------------------------
/ycimpute/imputer/__pycache__/gain.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenIDEA-YunanUniversity/ycimpute/HEAD/ycimpute/imputer/__pycache__/gain.cpython-37.pyc


--------------------------------------------------------------------------------
/ycimpute/imputer/__pycache__/mice.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenIDEA-YunanUniversity/ycimpute/HEAD/ycimpute/imputer/__pycache__/mice.cpython-37.pyc


--------------------------------------------------------------------------------
/ycimpute/imputer/__pycache__/mida.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenIDEA-YunanUniversity/ycimpute/HEAD/ycimpute/imputer/__pycache__/mida.cpython-37.pyc


--------------------------------------------------------------------------------
/ycimpute/imputer/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenIDEA-YunanUniversity/ycimpute/HEAD/ycimpute/imputer/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/ycimpute/imputer/__pycache__/knnimput.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenIDEA-YunanUniversity/ycimpute/HEAD/ycimpute/imputer/__pycache__/knnimput.cpython-37.pyc


--------------------------------------------------------------------------------
/ycimpute/nn/__pycache__/autoencoder.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenIDEA-YunanUniversity/ycimpute/HEAD/ycimpute/nn/__pycache__/autoencoder.cpython-37.pyc


--------------------------------------------------------------------------------
/ycimpute/utils/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenIDEA-YunanUniversity/ycimpute/HEAD/ycimpute/utils/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/ycimpute/utils/__pycache__/normalizer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenIDEA-YunanUniversity/ycimpute/HEAD/ycimpute/utils/__pycache__/normalizer.cpython-37.pyc


--------------------------------------------------------------------------------
/ycimpute/imputer/__pycache__/iterforest.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenIDEA-YunanUniversity/ycimpute/HEAD/ycimpute/imputer/__pycache__/iterforest.cpython-37.pyc


--------------------------------------------------------------------------------
/ycimpute/unsupervised/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenIDEA-YunanUniversity/ycimpute/HEAD/ycimpute/unsupervised/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/ycimpute/unsupervised/knn/__pycache__/common.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenIDEA-YunanUniversity/ycimpute/HEAD/ycimpute/unsupervised/knn/__pycache__/common.cpython-37.pyc


--------------------------------------------------------------------------------
/ycimpute/unsupervised/knn/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenIDEA-YunanUniversity/ycimpute/HEAD/ycimpute/unsupervised/knn/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/ycimpute/unsupervised/knn/__pycache__/reference.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenIDEA-YunanUniversity/ycimpute/HEAD/ycimpute/unsupervised/knn/__pycache__/reference.cpython-37.pyc


--------------------------------------------------------------------------------
/ycimpute/unsupervised/knn/__pycache__/optimistic.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenIDEA-YunanUniversity/ycimpute/HEAD/ycimpute/unsupervised/knn/__pycache__/optimistic.cpython-37.pyc


--------------------------------------------------------------------------------
/ycimpute/unsupervised/knn/__pycache__/argpartition.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenIDEA-YunanUniversity/ycimpute/HEAD/ycimpute/unsupervised/knn/__pycache__/argpartition.cpython-37.pyc


--------------------------------------------------------------------------------
/ycimpute/imputer/__pycache__/expectation_maximization.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenIDEA-YunanUniversity/ycimpute/HEAD/ycimpute/imputer/__pycache__/expectation_maximization.cpython-37.pyc


--------------------------------------------------------------------------------
/ycimpute/unsupervised/knn/__pycache__/normalized_distance.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenIDEA-YunanUniversity/ycimpute/HEAD/ycimpute/unsupervised/knn/__pycache__/normalized_distance.cpython-37.pyc


--------------------------------------------------------------------------------
/ycimpute/unsupervised/knn/__pycache__/few_observed_entries.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenIDEA-YunanUniversity/ycimpute/HEAD/ycimpute/unsupervised/knn/__pycache__/few_observed_entries.cpython-37.pyc


--------------------------------------------------------------------------------
/ycimpute/utils/shower/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | from ...imputer.mice import MICE
 3 | from ...imputer.knnimput import KNN
 4 | from ...imputer.iterforest import IterImput
 5 | from ...imputer.simple import SimpleFill
 6 | 
 7 | __all__=["MICE",
 8 |          "KNN",
 9 |          "IterImput",
10 |          "SimpleFill"]
11 | """


--------------------------------------------------------------------------------
/ycimpute/imputer/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | #from ..unsupervised.expectation_maximization import EM
 3 | from .mice import MICE
 4 | from .iterforest import MissForest
 5 | from .expectation_maximization import EM
 6 | from .knnimput import KNN
 7 | from .mida import MIDA
 8 | from .gain import GAIN
 9 | #from .simple import SimpleFill
10 | 
11 | __all__=['MICE',
12 |          'MissForest',
13 |          'EM',
14 |          'KNN',
15 |          'MIDA',
16 |          'GAIN']


--------------------------------------------------------------------------------
/test/metrics.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | 
 4 | from sklearn.metrics import accuracy_score
 5 | 
 6 | 
 7 | def get_missing_index(mask_all):
 8 |     return np.where(mask_all==True)
 9 | 
10 | 
11 | def accuracy(original, filled):
12 |     score = accuracy_score(original, filled)
13 |     return score
14 | 
15 | def RMSE(original, filled):
16 |     from sklearn.metrics import mean_squared_error
17 |     score = np.sqrt(mean_squared_error(original, filled))
18 |     return score
19 | 


--------------------------------------------------------------------------------
/ycimpute/utils/evaluate.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | 
 4 | from sklearn.metrics import accuracy_score
 5 | 
 6 | def get_missing_index(mask_all):
 7 |     return np.where(mask_all==True)
 8 | 
 9 | 
10 | def accuracy(original, filled):
11 |     score = accuracy_score(original, filled)
12 |     return score
13 | 
14 | def RMSE(original, filled):
15 |     from sklearn.metrics import mean_squared_error
16 |     score = np.sqrt(mean_squared_error(original, filled))
17 |     return score
18 | 


--------------------------------------------------------------------------------
/test/test_em.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | 
 4 | from  ycimpute.imputer import EM
 5 | from .metrics import RMSE
 6 | from .generate_data import missing_data,missing_mask,complete_data
 7 | from ycimpute.utils.normalizer import min_max_scale
 8 | 
 9 | def test_em():
10 |     X_filled = EM().complete(missing_data)
11 |     complete_data_, _, _ = min_max_scale(complete_data)
12 |     X_filled, _, _ = min_max_scale(X_filled)
13 | 
14 |     score = RMSE(complete_data_[missing_mask],
15 |                  X_filled[missing_mask])
16 |     print(score)
17 | 


--------------------------------------------------------------------------------
/test/test_gain.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | 
 4 | from  ycimpute.imputer import GAIN
 5 | from .metrics import RMSE
 6 | from .generate_data import missing_mask,missing_data,complete_data
 7 | 
 8 | from ycimpute.utils.normalizer import min_max_scale
 9 | def test_gain():
10 |     X_filled = GAIN().complete(missing_data)
11 |     complete_data_, _, _ = min_max_scale(complete_data)
12 |     X_filled, _, _ = min_max_scale(X_filled)
13 | 
14 |     score = RMSE(complete_data_[missing_mask],
15 |                  X_filled[missing_mask])
16 |     print(score)
17 | 


--------------------------------------------------------------------------------
/test/test_knn.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | 
 4 | from  ycimpute.imputer import KNN
 5 | from .metrics import RMSE
 6 | from .generate_data import missing_mask,missing_data,complete_data
 7 | from ycimpute.utils.normalizer import min_max_scale
 8 | 
 9 | def test_knn():
10 |     X_filled = KNN().complete(missing_data)
11 |     complete_data_,_ ,_ = min_max_scale(complete_data)
12 |     X_filled , _ ,_ = min_max_scale(X_filled)
13 | 
14 |     score = RMSE(complete_data_[missing_mask],
15 |                  X_filled[missing_mask])
16 |     print(score)
17 | 


--------------------------------------------------------------------------------
/test/test_mida.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | 
 4 | from  ycimpute.imputer import MIDA
 5 | from .metrics import RMSE
 6 | from .generate_data import missing_mask,missing_data,complete_data
 7 | 
 8 | from ycimpute.utils.normalizer import min_max_scale
 9 | def test_mida():
10 |     X_filled = MIDA().complete(missing_data)
11 |     complete_data_, _, _ = min_max_scale(complete_data)
12 |     X_filled, _, _ = min_max_scale(X_filled)
13 | 
14 |     score = RMSE(complete_data_[missing_mask],
15 |                  X_filled[missing_mask])
16 |     print(score)
17 | 


--------------------------------------------------------------------------------
/test/test_mice.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | 
 4 | from  ycimpute.imputer import MICE
 5 | from .metrics import RMSE
 6 | from .generate_data import missing_mask,missing_data,complete_data
 7 | 
 8 | from ycimpute.utils.normalizer import min_max_scale
 9 | 
10 | def test_mice():
11 |     X_filled = MICE().complete(missing_data)
12 |     complete_data_, _, _ = min_max_scale(complete_data)
13 |     X_filled, _, _ = min_max_scale(X_filled)
14 | 
15 |     score = RMSE(complete_data_[missing_mask],
16 |                  X_filled[missing_mask])
17 |     print(score)
18 | 
19 | 


--------------------------------------------------------------------------------
/test/test_missforest.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | 
 4 | from  ycimpute.imputer import MissForest
 5 | from .metrics import RMSE
 6 | from .generate_data import missing_data,missing_mask,complete_data
 7 | from ycimpute.utils.normalizer import min_max_scale
 8 | 
 9 | def test_missforest():
10 |     X_filled = MissForest().complete(missing_data)
11 |     complete_data_, _, _ = min_max_scale(complete_data)
12 |     X_filled, _, _ = min_max_scale(X_filled)
13 | 
14 |     score = RMSE(complete_data_[missing_mask],
15 |                  X_filled[missing_mask])
16 |     print(score)
17 | 
18 | 


--------------------------------------------------------------------------------
/ycimpute/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright, the CVXPY authors
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | """
16 | 
17 | from ycimpute.imputer import  *
18 | 
19 | 


--------------------------------------------------------------------------------
/ycimpute/unsupervised/knn/__init__.py:
--------------------------------------------------------------------------------
 1 | from .normalized_distance import (
 2 |     all_pairs_normalized_distances,
 3 |     all_pairs_normalized_distances_reference
 4 | )
 5 | from .reference import knn_impute_reference
 6 | from .optimistic import knn_impute_optimistic
 7 | from .common import knn_initialize
 8 | from .few_observed_entries import knn_impute_few_observed
 9 | from .argpartition import knn_impute_with_argpartition
10 | 
11 | __version__ = "0.1.0"
12 | 
13 | __all__ = [
14 |     "all_pairs_normalized_distances",
15 |     "all_pairs_normalized_distances_reference",
16 |     "knn_initialize",
17 |     "knn_impute_reference",
18 |     "knn_impute_optimistic",
19 |     "knn_impute_few_observed",
20 |     "knn_impute_with_argpartition",
21 | ]


--------------------------------------------------------------------------------
/ycimpute/doc_zh_cn.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {
 7 |     "collapsed": true
 8 |    },
 9 |    "outputs": [],
10 |    "source": []
11 |   }
12 |  ],
13 |  "metadata": {
14 |   "kernelspec": {
15 |    "display_name": "Python 3",
16 |    "language": "python",
17 |    "name": "python3"
18 |   },
19 |   "language_info": {
20 |    "codemirror_mode": {
21 |     "name": "ipython",
22 |     "version": 3
23 |    },
24 |    "file_extension": ".py",
25 |    "mimetype": "text/x-python",
26 |    "name": "python",
27 |    "nbconvert_exporter": "python",
28 |    "pygments_lexer": "ipython3",
29 |    "version": "3.6.3"
30 |   }
31 |  },
32 |  "nbformat": 4,
33 |  "nbformat_minor": 2
34 | }
35 | 


--------------------------------------------------------------------------------
/ycimpute/utils/evaluator/evaluate_em.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | import numpy as np
 3 | 
 4 | from ycimpute.utils import evaluate
 5 | from ycimpute.imputer import EM
 6 | from ycimpute.datasets import load_data
 7 | 
 8 | class Evaluate(object):
 9 |     def __init__(self):
10 |         pass
11 | 
12 |     def evaluate(self, X_mis,X_full):
13 |         missing_index = evaluate.get_missing_index(np.isnan(X_mis))
14 |         original_arr = X_full[missing_index]
15 |         em_X_filled = EM().complete(copy.copy(X_mis))
16 |         em_filled_arr = em_X_filled[missing_index]
17 |         rmse_em_score = evaluate.RMSE(original_arr, em_filled_arr)
18 |         return rmse_em_score
19 | 
20 | if __name__ == '__main__':
21 |     boston_mis, boston_full = load_data.load_boston()
22 |     iris_mis, iris_ful = load_data.load_iris()
23 | 
24 |     boston_score = Evaluate().evaluate(boston_mis, boston_full)
25 |     iris_score = Evaluate().evaluate(iris_mis, iris_ful)
26 |     print(boston_score)
27 |     print(iris_score)


--------------------------------------------------------------------------------
/ycimpute/utils/normalizer.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | import numpy as np
 4 | 
 5 | def min_max_scale(x):
 6 |     cols = x.shape[1]
 7 |     min_record = []
 8 |     max_record = []
 9 | 
10 |     for col in range(cols):
11 |         min_val= np.min(x[:,col])
12 |         max_val = np.max(x[:,col])
13 |         x[:,col] = (x[:,col] - min_val)/(max_val - min_val)
14 |         min_record.append(min_val)
15 |         max_record.append(max_val)
16 | 
17 |     return x, min_record,max_record
18 | 
19 | def zero_score_scale(x):
20 |     cols = x.shape[1]
21 |     for col in range(cols):
22 |         x[:,col] = (x[:,col]-np.mean(x[:,col]))/(np.std(x[:,col]))
23 | 
24 |     return x
25 | 
26 | def min_max_recover(X, min_vec, max_vec):
27 |     cols = X.shape[1]
28 |     for col in range(cols):
29 |         X[:,col] = X[:,col]*(max_vec[col]-min_vec[col])+min_vec[col]
30 |     return X
31 | 
32 | 
33 | NORMALIZERS = {'min_max':min_max_scale,
34 |                 'zero_score':zero_score_scale}
35 | 
36 | RECOVER = {'min_max':min_max_recover}


--------------------------------------------------------------------------------
/ycimpute/nn/autoencoder.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch.nn as nn
 3 | 
 4 | class Autoencoder(nn.Module):
 5 |     def __init__(self, dim,theta):
 6 |         super(Autoencoder, self).__init__()
 7 |         self.dim = dim
 8 | 
 9 |         self.drop_out = nn.Dropout(p=0.1)
10 | 
11 |         self.encoder = nn.Sequential(
12 |             nn.Linear(dim + theta * 0, dim + theta * 1),
13 |             nn.Tanh(),
14 |             nn.Linear(dim + theta * 1, dim + theta * 2),
15 |             nn.Tanh(),
16 |             nn.Linear(dim + theta * 2, dim + theta * 3)
17 |         )
18 | 
19 |         self.decoder = nn.Sequential(
20 |             nn.Linear(dim + theta * 3, dim + theta * 2),
21 |             nn.Tanh(),
22 |             nn.Linear(dim + theta * 2, dim + theta * 1),
23 |             nn.Tanh(),
24 |             nn.Linear(dim + theta * 1, dim + theta * 0)
25 |         )
26 | 
27 |     def forward(self, x):
28 |         x = x.view(-1, self.dim)
29 |         x_missed = self.drop_out(x)
30 | 
31 |         z = self.encoder(x_missed)
32 |         out = self.decoder(z)
33 | 
34 |         out = out.view(-1, self.dim)
35 | 
36 |         return out


--------------------------------------------------------------------------------
/ycimpute/doc.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "### Welcome to ycimpute!"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "markdown",
12 |    "metadata": {
13 |     "collapsed": true
14 |    },
15 |    "source": [
16 |     "## ycimpute Overview\n",
17 |     "### yc impute is a high-level missing value imputation methods collection API. Its writen in python and capable of running on [scikit-lean](http://scikit-learn.org/stable/)\n",
18 |     "### It was developed with a focus on solving the common problem of "
19 |    ]
20 |   }
21 |  ],
22 |  "metadata": {
23 |   "kernelspec": {
24 |    "display_name": "Python 3",
25 |    "language": "python",
26 |    "name": "python3"
27 |   },
28 |   "language_info": {
29 |    "codemirror_mode": {
30 |     "name": "ipython",
31 |     "version": 3
32 |    },
33 |    "file_extension": ".py",
34 |    "mimetype": "text/x-python",
35 |    "name": "python",
36 |    "nbconvert_exporter": "python",
37 |    "pygments_lexer": "ipython3",
38 |    "version": "3.6.3"
39 |   }
40 |  },
41 |  "nbformat": 4,
42 |  "nbformat_minor": 2
43 | }
44 | 


--------------------------------------------------------------------------------
/ycimpute/datasets/load_data.py:
--------------------------------------------------------------------------------
 1 | import h5py
 2 | from os.path import dirname
 3 | 
 4 | import numpy as  np
 5 | 
 6 | def load_iris():
 7 |     abs_path = dirname(__file__).__add__('/iris.hdf5')
 8 |     try:
 9 |         file = h5py.File(abs_path,'r')
10 |         missing_x = file['missing']
11 |         original_x = file['full']
12 |         return np.asarray(missing_x), np.asarray(original_x)
13 |     except:
14 |         file.close()
15 |         raise ("can't load data")
16 | 
17 | def load_boston():
18 |     abs_path = dirname(__file__).__add__('/boston.hdf5')
19 |     try:
20 |         file = h5py.File(abs_path,'r')
21 |         missing_x = file['missing']
22 |         original_x = file['full']
23 |         return np.asarray(missing_x), np.asarray(original_x)
24 |     except:
25 |         file.close()
26 |         raise ("can't load data")
27 | 
28 | 
29 | 
30 | def load_wine():
31 |     abs_path = dirname(__file__).__add__('/wine.hdf5')
32 |     try:
33 |         file = h5py.File(abs_path,'r')
34 |         missing_x = file['missing']
35 |         original_x = file['full']
36 |         return np.asarray(missing_x), np.asarray(original_x)
37 |     except:
38 |         file.close()
39 |         raise ("can't load data")


--------------------------------------------------------------------------------
/ycimpute/unsupervised/knn/common.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | from __future__ import absolute_import, print_function, division
 4 | 
 5 | import numpy as np
 6 | 
 7 | from .normalized_distance import all_pairs_normalized_distances
 8 | 
 9 | 
10 | def knn_initialize(
11 |         X,
12 |         missing_mask,
13 |         min_dist=1e-6,
14 |         max_dist_multiplier=1e6):
15 |     """
16 |     Fill X with NaN values if necessary, construct the n_samples x n_samples
17 |     distance matrix and set the self-distance of each row to infinity.
18 | 
19 |     Returns contents of X laid out in row-major, the distance matrix,
20 |     and an "effective infinity" which is larger than any entry of the
21 |     distance matrix.
22 |     """
23 |     X_row_major = X.copy("C")
24 |     if missing_mask.sum() != np.isnan(X_row_major).sum():
25 |         # if the missing values have already been zero-filled need
26 |         # to put NaN's back in the data matrix for the distances function
27 |         X_row_major[missing_mask] = np.nan
28 |     D = all_pairs_normalized_distances(X_row_major)
29 |     D_finite_flat = D[np.isfinite(D)]
30 |     if len(D_finite_flat) > 0:
31 |         max_dist = max_dist_multiplier * max(1, D_finite_flat.max())
32 |     else:
33 |         max_dist = max_dist_multiplier
34 |     # set diagonal of distance matrix to a large value since we don't want
35 |     # points considering themselves as neighbors
36 |     np.fill_diagonal(D, max_dist)
37 |     D[D < min_dist] = min_dist  # prevents 0s
38 |     D[D > max_dist] = max_dist  # prevents infinities
39 |     return X_row_major, D, max_dist
40 | 


--------------------------------------------------------------------------------
/ycimpute/unsupervised/knn/reference.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from __future__ import absolute_import, print_function, division
 3 | 
 4 | import numpy as np
 5 | from six.moves import range
 6 | 
 7 | from .common import knn_initialize
 8 | 
 9 | def knn_impute_reference(
10 |         X,
11 |         missing_mask,
12 |         k,
13 |         verbose=False,
14 |         print_interval=100):
15 |     """
16 |     Reference implementation of kNN imputation logic.
17 |     """
18 |     n_rows, n_cols = X.shape
19 |     X_result, D, effective_infinity = \
20 |         knn_initialize(X, missing_mask)
21 | 
22 |     for i in range(n_rows):
23 |         for j in np.where(missing_mask[i, :])[0]:
24 |             distances = D[i, :].copy()
25 | 
26 |             # any rows that don't have the value we're currently trying
27 |             # to impute are set to infinite distances
28 |             distances[missing_mask[:, j]] = effective_infinity
29 |             neighbor_indices = np.argsort(distances)
30 |             neighbor_distances = distances[neighbor_indices]
31 | 
32 |             # get rid of any infinite distance neighbors in the top k
33 |             valid_distances = neighbor_distances < effective_infinity
34 |             neighbor_distances = neighbor_distances[valid_distances][:k]
35 |             neighbor_indices = neighbor_indices[valid_distances][:k]
36 | 
37 |             weights = 1.0 / neighbor_distances
38 |             weight_sum = weights.sum()
39 | 
40 |             if weight_sum > 0:
41 |                 column = X[:, j]
42 |                 values = column[neighbor_indices]
43 |                 X_result[i, j] = np.dot(values, weights) / weight_sum
44 |     return X_result
45 | 


--------------------------------------------------------------------------------
/ycimpute/datasets/dpath.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import h5py
 4 | 
 5 | 
 6 | 
 7 | def make_missing(npdata):
 8 |     import random
 9 |     import numpy as np
10 |     rows, cols = npdata.shape
11 |     random_cols = range(cols)
12 |     for col in random_cols:
13 |         random_rows = random.sample(range(rows - 1), int(0.1 * rows))
14 |         npdata[random_rows, col] = np.nan
15 |     return npdata
16 | 
17 | 
18 | def create_data(data):
19 |     import copy
20 |     full_data = copy.copy(data)
21 |     missing_data = make_missing(data)
22 | 
23 |     return missing_data, full_data
24 | 
25 | 
26 | def boston():
27 |     from sklearn.datasets import load_boston
28 |     boston = load_boston()
29 |     data = boston.data
30 |     missing_data, full_data = create_data(data)
31 |     h5_file = h5py.File('boston.hdf5','w')
32 |     h5_file['missing'] = missing_data
33 |     h5_file['full'] = full_data
34 |     h5_file.close()
35 | 
36 | 
37 | def diabetes():
38 |     """
39 |     Pima Indians Diabetes Datase
40 |     :return:
41 |     """
42 |     from sklearn.datasets import load_diabetes
43 |     load_diabetes = load_diabetes()
44 |     data = load_diabetes.data
45 |     missing_data, full_data = create_data(data)
46 |     h5_file = h5py.File('diabetes.hdf5', 'w')
47 |     h5_file['missing'] = missing_data
48 |     h5_file['full'] = full_data
49 |     h5_file.close()
50 | 
51 | 
52 | def iris():
53 |     from sklearn.datasets import load_iris
54 |     data = load_iris().data
55 |     missing_data, full_data = create_data(data)
56 |     h5_file = h5py.File('iris.hdf5', 'w')
57 |     h5_file['missing'] = missing_data
58 |     h5_file['full'] = full_data
59 |     h5_file.close()
60 | 
61 | def wine():
62 |     from sklearn.datasets import load_wine
63 |     data = load_wine().data
64 |     missing_data, full_data = create_data(data)
65 |     h5_file = h5py.File('wine.hdf5', 'w')
66 |     h5_file['missing'] = missing_data
67 |     h5_file['full'] = full_data
68 |     h5_file.close()
69 | 
70 | if __name__=="__main__":
71 |     #boston()
72 |     #diabetes()
73 |     #iris()
74 |     wine()


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # Licensed under the Apache License, Version 2.0 (the "License");
 2 | # you may not use this file except in compliance with the License.
 3 | # You may obtain a copy of the License at
 4 | #
 5 | #     http://www.apache.org/licenses/LICENSE-2.0
 6 | #
 7 | # Unless required by applicable law or agreed to in writing, software
 8 | # distributed under the License is distributed on an "AS IS" BASIS,
 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | # See the License for the specific language governing permissions and
11 | # limitations under the License.
12 | 
13 | import os
14 | import logging
15 | 
16 | from setuptools import setup, find_packages
17 | 
18 | package_name = 'ycimpute'
19 | 
20 | readme_dir = os.path.dirname(__file__)
21 | readme_filename = os.path.join(readme_dir, 'README.md')
22 | 
23 | try:
24 |     with open(readme_filename, 'r') as f:
25 |         readme = f.read()
26 | except:
27 |     logging.warning("Failed to load %s" % readme_filename)
28 |     readme = ""
29 | 
30 | try:
31 |     import pypandoc
32 |     readme = pypandoc.convert(readme, to='rst', format='md')
33 | except:
34 |     logging.warning("Conversion of long_description from MD to RST failed")
35 |     pass
36 | 
37 | if __name__ == '__main__':
38 |     setup(
39 |         name=package_name,
40 |         version="0.2",
41 |         description="Matrix completion and feature imputation algorithms",
42 |         author="zhouyc",
43 |         author_email="yuanchenzhouhcmy@gmail.com",
44 |         url="https://github.com/OpenIDEA-YunanUniversity/ycimpute",
45 |         license="http://www.apache.org/licenses/LICENSE-2.0.html",
46 |         classifiers=[
47 |             'Development Status :: 3 - Alpha',
48 |             'Environment :: Console',
49 |             'Operating System :: OS Independent',
50 |             'Intended Audience :: Science/Research',
51 |             'License :: OSI Approved :: Apache Software License',
52 |             'Programming Language :: Python',
53 |             'Topic :: Missing Value Imputation',
54 |         ],
55 |         install_requires=[
56 |             'six',
57 |             'numpy>=1.10',
58 |             'scipy',
59 |             'scikit-learn>=0.17.1',
60 |             'torch>=1.1.0',
61 |         ],
62 |         long_description=readme,
63 |         packages=find_packages(),
64 |     )
65 | 


--------------------------------------------------------------------------------
/ycimpute/nn/gainnets.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | class NetD(torch.nn.Module):
 4 |     def __init__(self, feature_dim):
 5 |         """
 6 | 
 7 |         :param feature_dim:
 8 |         """
 9 |         super(NetD, self).__init__()
10 |         self.fc1 = torch.nn.Linear(feature_dim * 2, 256)
11 |         self.fc2 = torch.nn.Linear(256, 128)
12 |         self.fc3 = torch.nn.Linear(128, feature_dim)
13 |         self.relu = torch.nn.ReLU()
14 |         self.sigmoid = torch.nn.Sigmoid()
15 |         self.init_weight()
16 | 
17 |     def init_weight(self):
18 |         layers = [self.fc1, self.fc2, self.fc3]
19 |         [torch.nn.init.xavier_normal_(layer.weight) for layer in layers]
20 | 
21 |     def forward(self, x, m, g, h):
22 |         """
23 |         reference equation(4) in paper
24 | 
25 |         :param x: original data
26 |         :param m: missing mask
27 |         :param g: generated data by Generator
28 |         :param h: hint, see paper
29 |         :return: as a prob matrix, denote where is missing or not
30 |         """
31 |         self.init_weight()
32 |         inp = m * x + (1 - m) * g
33 |         inp = torch.cat((inp, h), dim=1)
34 |         out = self.relu(self.fc1(inp))
35 |         out = self.relu(self.fc2(out))
36 |         out = self.sigmoid(self.fc3(out))
37 | 
38 |         return out
39 | 
40 | 
41 | class NetG(torch.nn.Module):
42 |     def __init__(self,feature_dim):
43 |         """
44 | 
45 |         :param feature_dim:
46 |         """
47 |         super(NetG, self).__init__()
48 |         self.fc1 = torch.nn.Linear(feature_dim * 2, 256)
49 |         self.fc2 = torch.nn.Linear(256, 128)
50 |         self.fc3 = torch.nn.Linear(128, feature_dim)
51 |         self.relu = torch.nn.ReLU()
52 |         self.sigmoid = torch.nn.Sigmoid()
53 |         self.init_weight()
54 | 
55 |     def init_weight(self):
56 |         layers = [self.fc1, self.fc2, self.fc3]
57 |         [torch.nn.init.xavier_normal_(layer.weight) for layer in layers]
58 | 
59 |     def forward(self, x, z, m):
60 |         """
61 | 
62 |         reference equation(2,3) in paper
63 | 
64 |         :param x: mising data
65 |         :param z: noise
66 |         :param m: missing mask, used to replace missing part bu noise
67 |         :return: generated data, size same as original data
68 |         """
69 |         self.init_weight()
70 |         inp = m * x + (1 - m) * z
71 |         inp = torch.cat((inp, m), dim=1)
72 |         out = self.relu(self.fc1(inp))
73 |         out = self.relu(self.fc2(out))
74 |         out = self.sigmoid(self.fc3(out))
75 | 
76 |         return out


--------------------------------------------------------------------------------
/ycimpute/unsupervised/knn/few_observed_entries.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | from __future__ import absolute_import, print_function, division
 4 | import time
 5 | 
 6 | import numpy as np
 7 | from six.moves import range
 8 | 
 9 | from .common import knn_initialize
10 | 
11 | def knn_impute_few_observed(
12 |         X, missing_mask, k, verbose=False, print_interval=100):
13 |     """
14 |     Seems to be the fastest kNN implementation. Pre-sorts each rows neighbors
15 |     and then filters these sorted indices using each columns mask of
16 |     observed values.
17 | 
18 |     Important detail: If k observed values are not available then uses fewer
19 |     than k neighboring rows.
20 | 
21 |     Parameters
22 |     ----------
23 |     X : np.ndarray
24 |         Matrix to fill of shape (n_samples, n_features)
25 | 
26 |     missing_mask : np.ndarray
27 |         Boolean array of same shape as X
28 | 
29 |     k : int
30 | 
31 |     verbose : bool
32 |     """
33 |     start_t = time.time()
34 |     n_rows, n_cols = X.shape
35 |     # put the missing mask in column major order since it's accessed
36 |     # one column at a time
37 |     missing_mask_column_major = np.asarray(missing_mask, order="F")
38 |     observed_mask_column_major = ~missing_mask_column_major
39 |     X_column_major = X.copy(order="F")
40 |     X_row_major, D, effective_infinity = \
41 |         knn_initialize(X, missing_mask)
42 |     # get rid of infinities, replace them with a very large number
43 |     D_sorted = np.argsort(D, axis=1)
44 |     inv_D = 1.0 / D
45 |     D_valid_mask = D < effective_infinity
46 |     valid_distances_per_row = D_valid_mask.sum(axis=1)
47 | 
48 |     # trim the number of other rows we consider to exclude those
49 |     # with infinite distances
50 |     D_sorted = [
51 |         D_sorted[i, :count]
52 |         for i, count in enumerate(valid_distances_per_row)
53 |     ]
54 | 
55 |     dot = np.dot
56 |     for i in range(n_rows):
57 |         missing_row = missing_mask[i, :]
58 |         missing_indices = np.where(missing_row)[0]
59 |         row_weights = inv_D[i, :]
60 |         if verbose and i % print_interval == 0:
61 |             print(
62 |                 "Imputing row %d/%d with %d missing, elapsed time: %0.3f" % (
63 |                     i + 1,
64 |                     n_rows,
65 |                     len(missing_indices),
66 |                     time.time() - start_t))
67 |         candidate_neighbor_indices = D_sorted[i]
68 | 
69 |         for j in missing_indices:
70 |             observed = observed_mask_column_major[:, j]
71 |             sorted_observed = observed[candidate_neighbor_indices]
72 |             observed_neighbor_indices = candidate_neighbor_indices[sorted_observed]
73 |             k_nearest_indices = observed_neighbor_indices[:k]
74 |             weights = row_weights[k_nearest_indices]
75 |             weight_sum = weights.sum()
76 |             if weight_sum > 0:
77 |                 column = X_column_major[:, j]
78 |                 values = column[k_nearest_indices]
79 |                 X_row_major[i, j] = dot(values, weights) / weight_sum
80 |     return X_row_major
81 | 


--------------------------------------------------------------------------------
/ycimpute/imputer/expectation_maximization.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from ..utils.tools import Solver
 3 | 
 4 | import numpy as np
 5 | import copy
 6 | 
 7 | class EM(Solver):
 8 |     """
 9 |     this algorithm just require to lean the Gauss distribution elements 'mu' and 'sigma'
10 |     """
11 |     def __init__(self,
12 |                  max_iter=100,
13 |                  theta=1e-5,
14 |                  normalizer='min_max'):
15 |         Solver.__init__(self,
16 |                         normalizer=normalizer)
17 | 
18 |         self.max_iter = max_iter
19 |         self.theta = theta
20 | 
21 |     def _init_parameters(self, X):
22 |         rows, cols = X.shape
23 |         mu_init = np.nanmean(X, axis=0)
24 |         sigma_init = np.zeros((cols, cols))
25 |         for i in range(cols):
26 |             for j in range(i, cols):
27 |                 vec_col = X[:, [i, j]]
28 |                 vec_col = vec_col[~np.any(np.isnan(vec_col), axis=1), :].T
29 |                 if len(vec_col) > 0:
30 |                     cov = np.cov(vec_col)
31 |                     cov = cov[0, 1]
32 |                     sigma_init[i, j] = cov
33 |                     sigma_init[j, i] = cov
34 | 
35 |                 else:
36 |                     sigma_init[i, j] = 1.0
37 |                     sigma_init[j, i] = 1.0
38 | 
39 |         return mu_init, sigma_init
40 | 
41 |     def _e_step(self, mu,sigma, X):
42 |         samples,_ = X.shape
43 |         for sample in range(samples):
44 |             if np.any(np.isnan(X[sample,:])):
45 |                 loc_nan = np.isnan(X[sample,:])
46 |                 new_mu = np.dot(sigma[loc_nan, :][:, ~loc_nan],
47 |                                 np.dot(np.linalg.inv(sigma[~loc_nan, :][:, ~loc_nan]),
48 |                                        (X[sample, ~loc_nan] - mu[~loc_nan])[:,np.newaxis]))
49 |                 nan_count = np.sum(loc_nan)
50 |                 X[sample, loc_nan] = mu[loc_nan] + new_mu.reshape(1,nan_count)
51 | 
52 |         return X
53 | 
54 |     def _m_step(self,X):
55 |         rows, cols = X.shape
56 |         mu = np.mean(X, axis=0)
57 |         sigma = np.cov(X.T)
58 |         tmp_theta = -0.5 * rows * (cols * np.log(2 * np.pi) +
59 |                                   np.log(np.linalg.det(sigma)))
60 | 
61 |         return mu, sigma,tmp_theta
62 | 
63 | 
64 | 
65 |     def solve(self, X, missing_mask):
66 |         mu, sigma = self._init_parameters(X)
67 |         complete_X,updated_X = None, None
68 |         rows,_ = X.shape
69 |         theta = -np.inf
70 |         for iter in range(self.max_iter):
71 |             updated_X = self._e_step(mu=mu, sigma=sigma, X=copy.copy(X))
72 |             mu, sigma, tmp_theta = self._m_step(updated_X)
73 |             for i in range(rows):
74 |                 tmp_theta -= 0.5 * np.dot((updated_X[i, :] - mu),
75 |                                           np.dot(np.linalg.inv(sigma), (updated_X[i, :] - mu)[:, np.newaxis]))
76 |             if abs(tmp_theta-theta)<self.theta:
77 |                 complete_X = updated_X
78 |                 break;
79 |             else:
80 |                 theta = tmp_theta
81 |         else:
82 |             complete_X = updated_X
83 | 
84 |         return complete_X
85 | 


--------------------------------------------------------------------------------
/ycimpute/imputer/knnimput.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | from __future__ import absolute_import, print_function, division
 4 | import numpy as np
 5 | 
 6 | #**********************************************
 7 | from ..unsupervised.knn import knn_impute_few_observed, knn_impute_with_argpartition
 8 | from ..utils.tools import Solver
 9 | 
10 | class KNN(Solver):
11 | 
12 |     def __init__(
13 |             self,
14 |             k=5,
15 |             orientation="rows",
16 |             use_argpartition=False,
17 |             print_interval=100,
18 |             min_value=None,
19 |             max_value=None,
20 |             verbose=True,
21 |             normalizer='min_max'
22 |             ):
23 |         """
24 |         Parameters
25 |         ----------
26 |         k : int
27 |             Number of neighboring rows to use for imputation.
28 | 
29 |         orientation : str
30 |             Which axis of the input matrix should be treated as a sample
31 |             (default is "rows" but can also be "columns")
32 | 
33 |         use_argpartition : bool
34 |            Use a more naive implementation of kNN imputation whichs calls
35 |            numpy.argpartition for each row/column pair. May give NaN if fewer
36 |            than k neighbors are available for a missing value.
37 | 
38 |         print_interval : int
39 | 
40 |         min_value : float
41 |             Minimum possible imputed value
42 | 
43 |         max_value : float
44 |             Maximum possible imputed value
45 | 
46 |         normalizer : object
47 |             Any object (such as BiScaler) with fit() and transform() methods
48 | 
49 |         verbose : bool
50 |         """
51 |         Solver.__init__(self,
52 |                         normalizer=normalizer)
53 | 
54 |         self.k = k
55 |         self.verbose = verbose
56 |         self.orientation = orientation
57 |         self.print_interval = print_interval
58 |         if use_argpartition:
59 |             self._impute_fn = knn_impute_with_argpartition
60 |         else:
61 |             self._impute_fn = knn_impute_few_observed
62 | 
63 |     def solve(self, X,missing_mask):
64 |         if self.orientation == "columns":
65 |             missing_mask = self.masker(X)[config.all]
66 | 
67 |         elif self.orientation != "rows":
68 |             raise ValueError(
69 |                 "Orientation must be either 'rows' or 'columns', got: %s" % (
70 |                     self.orientation,))
71 | 
72 |         X_imputed = self._impute_fn(
73 |             X=X,
74 |             missing_mask=missing_mask,
75 |             k=self.k,
76 |             verbose=self.verbose,
77 |             print_interval=self.print_interval)
78 | 
79 |         failed_to_impute = np.isnan(X_imputed)
80 |         n_missing_after_imputation = failed_to_impute.sum()
81 |         if n_missing_after_imputation != 0:
82 |             if self.verbose:
83 |                 print("[KNN] Warning: %d/%d still missing after imputation, replacing with 0" % (
84 |                     n_missing_after_imputation,
85 |                     X.shape[0] * X.shape[1]))
86 |             X_imputed[failed_to_impute] = X[failed_to_impute]
87 | 
88 |         if self.orientation == "columns":
89 |             X_imputed = X_imputed.T
90 | 
91 |         return X_imputed
92 | 


--------------------------------------------------------------------------------
/ycimpute/unsupervised/knn/argpartition.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | from __future__ import absolute_import, print_function, division
 4 | import time
 5 | 
 6 | import numpy as np
 7 | 
 8 | from six.moves import range
 9 | 
10 | from .common import knn_initialize
11 | 
12 | def knn_impute_with_argpartition(
13 |         X,
14 |         missing_mask,
15 |         k,
16 |         verbose=False,
17 |         print_interval=100):
18 |     """
19 |     Fill in the given incomplete matrix using k-nearest neighbor imputation.
20 | 
21 |     This version is a simpler algorithm meant primarily for testing but
22 |     surprisingly it's faster for many (but not all) dataset sizes, particularly
23 |     when most of the columns are missing in any given row. The crucial
24 |     bottleneck is the call to numpy.argpartition for every missing element
25 |     in the array.
26 | 
27 |     Parameters
28 |     ----------
29 |     X : np.ndarray
30 |         Matrix to fill of shape (n_samples, n_features)
31 | 
32 |     missing_mask : np.ndarray
33 |         Boolean array of same shape as X
34 | 
35 |     k : int
36 | 
37 |     verbose : bool
38 | 
39 |     Returns a row-major copy of X with imputed values.
40 |     """
41 |     start_t = time.time()
42 |     n_rows, n_cols = X.shape
43 |     # put the missing mask in column major order since it's accessed
44 |     # one column at a time
45 |     missing_mask_column_major = np.asarray(missing_mask, order="F")
46 |     X_row_major, D, effective_infinity = \
47 |         knn_initialize(X, missing_mask)
48 |     D_reciprocal = 1.0 / D
49 | 
50 |     dot = np.dot
51 |     array = np.array
52 |     argpartition = np.argpartition
53 | 
54 |     for i in range(n_rows):
55 |         missing_indices = np.where(missing_mask[i])[0]
56 | 
57 |         if verbose and i % print_interval == 0:
58 |             print(
59 |                 "Imputing row %d/%d with %d missing, elapsed time: %0.3f" % (
60 |                     i + 1,
61 |                     n_rows,
62 |                     len(missing_indices),
63 |                     time.time() - start_t))
64 |         d = D[i, :]
65 |         inv_d = D_reciprocal[i, :]
66 |         for j in missing_indices:
67 |             # move rows which lack this feature to be infinitely far away
68 |             d_copy = d.copy()
69 |             d_copy[missing_mask_column_major[:, j]] = effective_infinity
70 | 
71 |             neighbor_indices = argpartition(d_copy, k)[:k]
72 |             if d_copy[neighbor_indices].max() >= effective_infinity:
73 |                 # if there aren't k rows with the feature of interest then
74 |                 # we need to filter out indices of points at infinite distance
75 |                 neighbor_indices = array([
76 |                     neighbor_index
77 |                     for neighbor_index in neighbor_indices
78 |                     if d_copy[neighbor_index] < effective_infinity
79 |                 ])
80 |             n_current_neighbors = len(neighbor_indices)
81 | 
82 |             if n_current_neighbors > 0:
83 |                 neighbor_weights = inv_d[neighbor_indices]
84 |                 X_row_major[i, j] = (
85 |                     dot(X[:, j][neighbor_indices], neighbor_weights) /
86 |                     neighbor_weights.sum()
87 |                 )
88 |     return X_row_major
89 | 


--------------------------------------------------------------------------------
/test/generate_data.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | 
  4 | import os
  5 | 
  6 | 
  7 | def shulle_data(data):
  8 |     seed = 2323  # 1314(second val) #123456(first val)
  9 | 
 10 |     np.random.seed(seed)
 11 |     np.random.shuffle(data)
 12 | 
 13 |     return data
 14 | 
 15 | def missing(m, n, rate):
 16 |     p_miss_vec = rate * np.ones((n, 1))
 17 |     Missing = np.zeros((m, n))
 18 | 
 19 |     for i in range(n):
 20 |         A = np.random.uniform(0., 1., size=[m, ])
 21 |         B = A > p_miss_vec[i]
 22 |         Missing[:, i] = 1. * B
 23 | 
 24 |     return Missing
 25 | 
 26 | def sample_Z(m, n):
 27 |     return np.random.uniform(0., 0.01, size = [m, n])
 28 | 
 29 | 
 30 | def make_dataset(data_path, missing_rate, train_ratio=0.8, is_label_numerical=False):
 31 |     data = np.loadtxt(data_path, delimiter=',')
 32 | 
 33 |     data = shulle_data(data)
 34 | 
 35 |     label = data[:, 0]
 36 |     data = data[:, 1:]
 37 | 
 38 |     data_dim = data.shape[1]
 39 |     min_val = np.zeros(data_dim)
 40 |     max_val = np.zeros(data_dim)
 41 |     min_label = None
 42 |     max_label = None
 43 | 
 44 |     for i in range(data_dim):
 45 |         min_val[i] = np.min(data[:, i])
 46 |         max_val[i] = np.max(data[:, i])
 47 |         if max_val[i] == 0:
 48 |             max_val[i] = 0.1
 49 | 
 50 |     if is_label_numerical:
 51 |         min_label = np.min(label)
 52 |         max_label = np.max(label)
 53 | 
 54 |         label = (label - min_label) / (max_label - min_label)
 55 | 
 56 |     missing_mat = missing(data.shape[0], data.shape[1],
 57 |                           missing_rate)
 58 | 
 59 |     train = data[:int(train_ratio * data.shape[0])]
 60 |     test = data[int(train_ratio * data.shape[0]):]
 61 | 
 62 |     train_label = label[:int(train_ratio * data.shape[0])]
 63 |     test_label = label[int(train_ratio * data.shape[0]):]
 64 | 
 65 |     train_missing = missing_mat[:int(train_ratio * data.shape[0])]
 66 |     test_missing = missing_mat[int(train_ratio * data.shape[0]):]
 67 | 
 68 |     train_noise = sample_Z(train.shape[0], train.shape[1])
 69 |     test_noise = sample_Z(test.shape[0], test.shape[1])
 70 | 
 71 |     info = {'train': train,
 72 |             'test': test,
 73 |             'train_missing': train_missing,
 74 |             'test_missing': test_missing,
 75 |             'train_noise': train_noise,
 76 |             'test_noise': test_noise,
 77 |             'min_val': min_val,
 78 |             'max_val': max_val,
 79 |             'train_label': train_label,
 80 |             'test_label': test_label,
 81 |             'max_label': max_label,
 82 |             'min_label': min_label,
 83 |             'missing_rate': missing_rate,
 84 |             'train_rate': train_ratio
 85 |             }
 86 | 
 87 |     return info
 88 | 
 89 | dir_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 90 | data_path = os.path.join(dir_path,'test_data/wave.csv')
 91 | info_dict = make_dataset(data_path=data_path,
 92 |                          missing_rate=0.2,
 93 |                          is_label_numerical=False)
 94 | 
 95 | 
 96 | missing_mask = info_dict['train_missing'][2000:2600,:]
 97 | complete_data = info_dict['train'][2000:2600,:]
 98 | 
 99 | missing_mask[:300] = True
100 | missing_data = complete_data.copy()
101 | missing_mask = missing_mask.astype(bool)
102 | missing_mask = ~missing_mask
103 | missing_data[missing_mask]=np.nan


--------------------------------------------------------------------------------
/ycimpute/imputer/mida.py:
--------------------------------------------------------------------------------
 1 | from ..utils.tools import Solver
 2 | from ..nn.autoencoder import Autoencoder
 3 | 
 4 | import numpy as np
 5 | import torch
 6 | import torch.nn as nn
 7 | import torch.utils.data
 8 | 
 9 | class MIDA(Solver):
10 |     def __init__(
11 |             self,
12 |             theta=5,
13 |             epochs=300,
14 |             use_cuda=False,
15 |             batch_size=64,
16 |             early_stop=1e-06,
17 |             normalizer='min_max',
18 |             verbose=True):
19 | 
20 |         Solver.__init__(
21 |             self,
22 |             normalizer=normalizer)
23 | 
24 |         self.theta = theta
25 |         self.epochs = epochs
26 |         self.use_cuda = use_cuda
27 |         self.batch_size = batch_size
28 |         self.verbose = verbose
29 |         self.early_stop = early_stop
30 | 
31 |         self.device = torch.device("cuda:0" if self.use_cuda else "cpu")
32 | 
33 |     def training(self, training_data):
34 |         n_features = training_data.shape[1]
35 |         training_data = torch.from_numpy(training_data).float()
36 | 
37 |         train_loader = torch.utils.data.DataLoader(dataset=training_data,
38 |                                                    batch_size=self.batch_size,
39 |                                                    shuffle=True)
40 | 
41 |         model = Autoencoder(dim=n_features,
42 |                             theta=self.theta).to(self.device)
43 |         loss = nn.MSELoss()
44 |         optimizer = torch.optim.RMSprop(model.parameters(), lr=0.0001,momentum=0.8)
45 | 
46 |         cost_list = []
47 |         early_stop = False
48 |         total_batch = len(training_data) // self.batch_size
49 | 
50 |         for epoch in range(self.epochs):
51 |             for i, batch_data in enumerate(train_loader):
52 |                 batch_data = batch_data.to(self.device)
53 |                 reconst_data = model(batch_data)
54 |                 cost = loss(reconst_data, batch_data)
55 | 
56 |                 optimizer.zero_grad()
57 |                 cost.backward()
58 |                 optimizer.step()
59 |                 if self.verbose:
60 |                     if (i + 1) % (total_batch // 2) == 0:
61 |                         print('Epoch [%d/%d], lter [%d/%d], Loss: %.6f' %
62 |                               (epoch + 1, self.epochs, i + 1, total_batch, cost.item()))
63 | 
64 |                 # early stopping rule 1 : MSE < 1e-06
65 |                 if cost.item() < 1e-06:
66 |                     early_stop = True
67 |                     break
68 | 
69 |                 cost_list.append(cost.item())
70 | 
71 |             if early_stop:
72 |                 break
73 |         return model
74 | 
75 | 
76 |     def solve(self, X, missing_mask):
77 |         complete_rows_index, missing_rows_index = self.detect_complete_part(missing_mask)
78 |         if len(complete_rows_index)==0:
79 |             raise ValueError('Cant find a completely part for training...')
80 |         missing_data = X[missing_rows_index]
81 |         training_data = X[complete_rows_index]
82 | 
83 |         model = self.training(training_data.copy())
84 |         model.eval()
85 | 
86 |         missing_data = torch.from_numpy(missing_data).float()
87 |         filled_data = model(missing_data.to(self.device))
88 |         filled_data = filled_data.cpu().detach().numpy()
89 |         tmp_mask = missing_mask[missing_rows_index]
90 |         missing_data = missing_data.cpu().numpy()
91 |         filled_data = missing_data * (1 - tmp_mask) + filled_data * (tmp_mask)
92 | 
93 |         X[missing_rows_index] = filled_data
94 |         X[complete_rows_index] = training_data
95 | 
96 |         return X


--------------------------------------------------------------------------------
/ycimpute/utils/shower/show.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import pandas as pd
 3 | import numpy as np
 4 | import copy
 5 | import h5py
 6 | ##################################################
 7 | 
 8 | from ...imputer.mice import MICE
 9 | from ...imputer.knnimput import KNN
10 | from ...imputer.iterforest import IterImput
11 | from ...imputer.simple import SimpleFill
12 | from ...imputer import EM
13 | 
14 | from ...utils.tools import Solver
15 | from .. import evaluate
16 | from .. import config
17 | 
18 | solver = Solver()
19 | 
20 | def analysiser(missing_X, original_X):
21 |     missing_X = np.asarray(missing_X)
22 |     original_X = np.asarray(original_X)
23 | 
24 |     mask_all = solver.masker(missing_X)[config.all]
25 |     missing_index = evaluate.get_missing_index(mask_all)
26 |     original_arr = original_X[missing_index]
27 | 
28 |     ##################################################
29 | 
30 |     mice_X_filled = MICE().complete(copy.copy(missing_X))
31 |     mice_filled_arr = mice_X_filled[missing_index]
32 |     rmse_mice_score = evaluate.RMSE(original_arr, mice_filled_arr)
33 | 
34 |     #########################################################
35 |     iterforest_X_filled = IterImput().complete(copy.copy(missing_X))
36 |     iterforest_filled_arr = iterforest_X_filled[missing_index]
37 |     rmse_iterforest_score = evaluate.RMSE(original_arr, iterforest_filled_arr)
38 | 
39 | 
40 |     ############################################################
41 |     knn_X_filled = KNN(k=3).complete(copy.copy(missing_X))
42 |     knn_filled_arr = knn_X_filled[missing_index]
43 |     rmse_knn_score = evaluate.RMSE(original_arr, knn_filled_arr)
44 | 
45 |     ######################################################
46 |     mean_X_filled = SimpleFill(fill_method='mean').complete(copy.copy(missing_X))
47 |     mean_filled_arr = mean_X_filled[missing_index]
48 |     rmse_mean_score = evaluate.RMSE(original_arr, mean_filled_arr)
49 |     #################################################################
50 |     zero_X_filled = SimpleFill(fill_method='zero').complete(copy.copy(missing_X))
51 |     zero_filled_arr = zero_X_filled[missing_index]
52 |     rmse_zero_score = evaluate.RMSE(original_arr, zero_filled_arr)
53 | 
54 |     ################################################
55 |     median_X_filled = SimpleFill(fill_method='median').complete(copy.copy(missing_X))
56 |     median_filled_arr = median_X_filled[missing_index]
57 |     rmse_median_score = evaluate.RMSE(original_arr, median_filled_arr)
58 |     ##########################################################################
59 |     min_X_filled = SimpleFill(fill_method='min').complete(copy.copy(missing_X))
60 |     min_filled_arr = min_X_filled[missing_index]
61 |     rmse_min_score = evaluate.RMSE(original_arr, min_filled_arr)
62 | 
63 |     #######################################################
64 |     em_X_filled = EM().complete(copy.copy(missing_X))
65 |     em_filled_arr = em_X_filled[missing_index]
66 |     rmse_em_score = evaluate.RMSE(original_arr,em_filled_arr)
67 |     ################################################
68 | 
69 |     return {'rmse_mice_score':rmse_mice_score,
70 |             'rmse_iterforest_score':rmse_iterforest_score,
71 |             'rmse_knn_score':rmse_knn_score,
72 |             'rmse_mean_score':rmse_mean_score,
73 |             'rmse_zero_score':rmse_zero_score,
74 |             'rmse_median_score':rmse_median_score,
75 |             'rmse_min_score':rmse_min_score,
76 |             'rmse_em_score': rmse_em_score
77 |             }
78 | 
79 | 
80 | def example():
81 |     from ...datasets import load_data
82 |     boston_mis, boston_full = load_data.load_boston()
83 |     print(analysiser(boston_mis, boston_full))


--------------------------------------------------------------------------------
/ycimpute/unsupervised/knn/normalized_distance.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | from __future__ import absolute_import, print_function, division
 4 | 
 5 | from six.moves import range
 6 | import numpy as np
 7 | 
 8 | def all_pairs_normalized_distances(X):
 9 |     """
10 |     We can't really compute distances over incomplete data since
11 |     rows are missing different numbers of entries.
12 |     The next best thing is the mean squared difference between two vectors
13 |     (a normalized distance), which gets computed only over the columns that
14 |     two vectors have in common. If two vectors have no features in common
15 |     then their distance is infinity.
16 | 
17 |     Parameters
18 |     ----------
19 |     X : np.ndarray
20 |         Data matrix of shape (n_samples, n_features) with missing entries
21 |         marked using np.nan
22 | 
23 |     Returns a (n_samples, n_samples) matrix of pairwise normalized distances.
24 |     """
25 |     n_rows, n_cols = X.shape
26 | 
27 |     # matrix of mean squared difference between between samples
28 |     D = np.ones((n_rows, n_rows), dtype="float32", order="C") * np.inf
29 | 
30 |     # we can cheaply determine the number of columns that two rows share
31 |     # by taking the dot product between their finite masks
32 |     observed_elements = np.isfinite(X).astype(int)
33 |     n_shared_features_for_pairs_of_rows = np.dot(
34 |         observed_elements,
35 |         observed_elements.T)
36 |     no_overlapping_features_rows = n_shared_features_for_pairs_of_rows == 0
37 |     number_incomparable_rows = no_overlapping_features_rows.sum(axis=1)
38 |     row_overlaps_every_other_row = (number_incomparable_rows == 0)
39 |     row_overlaps_no_other_rows = number_incomparable_rows == n_rows
40 |     valid_rows_mask = ~row_overlaps_no_other_rows
41 |     valid_row_indices = np.where(valid_rows_mask)[0]
42 | 
43 |     # preallocate all the arrays that we would otherwise create in the
44 |     # following loop and pass them as "out" parameters to NumPy ufuncs
45 |     diffs = np.zeros_like(X)
46 |     missing_differences = np.zeros_like(diffs, dtype=bool)
47 |     valid_rows = np.zeros(n_rows, dtype=bool)
48 |     ssd = np.zeros(n_rows, dtype=X.dtype)
49 | 
50 |     for i in valid_row_indices:
51 |         x = X[i, :]
52 |         np.subtract(X, x.reshape((1, n_cols)), out=diffs)
53 |         np.isnan(diffs, out=missing_differences)
54 | 
55 |         # zero out all NaN's
56 |         diffs[missing_differences] = 0
57 | 
58 |         # square each difference
59 |         diffs **= 2
60 | 
61 |         observed_counts_per_row = n_shared_features_for_pairs_of_rows[i]
62 | 
63 |         if row_overlaps_every_other_row[i]:
64 |             # add up all the non-missing squared differences
65 |             diffs.sum(axis=1, out=D[i, :])
66 |             D[i, :] /= observed_counts_per_row
67 |         else:
68 |             np.logical_not(no_overlapping_features_rows[i], out=valid_rows)
69 | 
70 |             # add up all the non-missing squared differences
71 |             diffs.sum(axis=1, out=ssd)
72 |             ssd[valid_rows] /= observed_counts_per_row[valid_rows]
73 |             D[i, valid_rows] = ssd[valid_rows]
74 |     return D
75 | 
76 | 
77 | def all_pairs_normalized_distances_reference(X):
78 |     """
79 |     Reference implementation of normalized all-pairs distance, used
80 |     for testing the more efficient implementation above for equivalence.
81 |     """
82 |     n_samples, n_cols = X.shape
83 |     # matrix of mean squared difference between between samples
84 |     D = np.ones((n_samples, n_samples), dtype="float32") * np.inf
85 |     for i in range(n_samples):
86 |         diffs = X - X[i, :].reshape((1, n_cols))
87 |         missing_diffs = np.isnan(diffs)
88 |         missing_counts_per_row = missing_diffs.sum(axis=1)
89 |         valid_rows = missing_counts_per_row < n_cols
90 |         D[i, valid_rows] = np.nanmean(
91 |             diffs[valid_rows, :] ** 2,
92 |             axis=1)
93 |     return D
94 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # ycimpute
 3 | 
 4 | 【Notice！】 I've been so busy at work since i was graduated from colleage, so this project will not be maintain anymore. I apologize for any inconvenience caused and thank you for your support. 
 5 | 
 6 | # Updated
 7 | - pypi updated
 8 | - added GAN based algorithm
 9 | 
10 | ![AppVeyor](https://img.shields.io/appveyor/ci/gruntjs/grunt.svg)
11 | ![Hex.pm](https://img.shields.io/hexpm/l/plug.svg)
12 | ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/Django.svg)
13 | 
14 | ## [中文文档]( https://hcmy.gitbooks.io/ycimpute/content/)     [Documentation](https://hcmy.gitbooks.io/ycimpute-api/content/)
15 | # What is ycimpute?
16 | ycimpute is a high-level API for padding missing values library. It is written in python, which integrates methods for missing values imputation based on machine learning and statistics. Some modules require [scikit-lean](http://scikit-learn.org/stable/) support.
17 | 
18 | The original intention of writing this library is that I often encounter some missing values in the process of doing data mining, most of the missing values of the scene can use the same set of missing approach, so the final decision to write a function library to facilitate the call
19 | 
20 | ## Up untill now, There are a couple of methods I've been implemented:
21 | 
22 | For various algorithms' detail, Please look up the API below:
23 | 
24 | - simple imputation methods(mean value, padding zero, select maxmum, minimum ...etc)
25 | - based on Random Forest (missforest)
26 | - Multiple Imputation(MICE)
27 | - based on Expectation Maximization (EM)
28 | - based on KNN
29 | - based AutoEncoder MIDA[MIDA: Multiple Imputation using Denoising Autoencoders](https://arxiv.org/abs/1705.02737)
30 | - based GAIN[GAIN: Missing Data Imputation using Generative Adversarial Nets](https://arxiv.org/abs/1806.02920)
31 | 
32 | ### Suggestion: Data loss mechanism varies in different scenarios, which requires the engineer to choose the appropriate filling method based on the business.
33 | ## Missing values can be of three general types:
34 | 
35 | >+ Missing Completely At Random (MCAR):
36 |     When missing data are MCAR, the presence/absence of data is completely independent of observable variables and parameters of interest. In this case, the analysis performed on the data are unbiased. In practice, it is highly unlikely.
37 | >+ Missing At Random (MAR):
38 |     When missing data is not random but can be totally related to a variable where there is complete information. An example is that males are less likely to fill in a depression survey but this has nothing to do with their level of depression, after accounting for maleness. This kind of missing data can induce a bias in your analysis especially if it unbalances your data because of many missing values in a certain category.
39 | >+ Missing Not At Random (MNAR):
40 |    When the missing values are neither MCAR nor MAR. In the previous example that would be the case if people tended not to answer the survey depending on their depression level.
41 | Let's check out the performance of per imputation methods in various data sets:
42 | 
43 | ### the data sets include: [IRIS dataset](https://github.com/OpenIDEA-YunanUniversity/ycimpute/tree/master/ycimpute/datasets) [WINE dataset](https://github.com/OpenIDEA-YunanUniversity/ycimpute/tree/master/ycimpute/datasets) [Boston dataset](https://github.com/OpenIDEA-YunanUniversity/ycimpute/tree/master/ycimpute/datasets).
44 | 
45 | ## These are the complete data. I used them to experiment and evaluate the model after randomly deleting the data. About 10% of the data is missing, and each feature contains different degrees of data loss.
46 | 
47 | ## All of the data are continuous, the evaluation function which I used was RMSE(root mean square error) Red line represents the average of all errors.(Note: All data has not been normalized so RMSE looks higher)
48 | ![葡萄酒数据集](https://github.com/HCMY/ycimpute/blob/master/img/WINE.svg)
49 | ![IRIS数据集](https://github.com/HCMY/ycimpute/blob/master/img/IRIS.svg)
50 | ![波士顿房产数据集](https://github.com/HCMY/ycimpute/blob/master/img/BOSTON.svg)
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/ycimpute/imputer/gain.py:
--------------------------------------------------------------------------------
  1 | from  ..utils.tools import generate_noise
  2 | from ..utils.tools import Solver
  3 | from ..nn.gainnets import NetD,NetG
  4 | 
  5 | import torch
  6 | import numpy as np
  7 | import torch.nn as nn
  8 | import torch.utils.data
  9 | from torch.utils.data import Dataset, DataLoader
 10 | 
 11 | 
 12 | def generate_hint(n_rows, n_cols, missing_rate):
 13 |     """
 14 |     @n_rows: number of rows to generate missing matrix
 15 |     @n_cols: number of columns to generate missing matrix
 16 |     """
 17 | 
 18 | 
 19 |     random_data = np.random.uniform(0., 1., size=[n_rows, n_cols])
 20 |     tmp = random_data > missing_rate
 21 |     missing_mat = 1. * tmp
 22 | 
 23 |     return missing_mat
 24 | 
 25 | 
 26 | class SimpleDataLoader(Dataset):
 27 |     """Face Landmarks dataset."""
 28 | 
 29 |     def __init__(self, specify_data, mask):
 30 |         """
 31 |         """
 32 |         self.specify_data = specify_data
 33 |         self.mask = mask
 34 | 
 35 |     def __len__(self):
 36 |         return len(self.specify_data)
 37 | 
 38 |     def __getitem__(self, idx):
 39 |         data = self.specify_data[idx]
 40 |         mask = self.mask[idx]
 41 | 
 42 |         return data, mask
 43 | 
 44 | class GAIN(Solver):
 45 |     def __init__(self,
 46 |                  normalizer='min_max',
 47 |                  epochs=10,
 48 |                  use_cuda=False,
 49 |                  batch_size=64,
 50 |                  verbose=True,
 51 |                  alpha = 0.2,
 52 |                  lr = 0.0001,
 53 |                  hint_rate=0.2,
 54 |                 ):
 55 |         Solver.__init__(self,
 56 |             normalizer=normalizer)
 57 | 
 58 |         self.epochs = epochs
 59 |         self.lr = lr
 60 |         self.alpha = alpha
 61 |         self.use_cuda = use_cuda
 62 |         self.batch_size = batch_size
 63 |         self.verbose = verbose
 64 |         self.hint_rate = hint_rate
 65 |         self.device = torch.device("cuda:0" if self.use_cuda else "cpu")
 66 | 
 67 | 
 68 |     def training(self,training_data,train_mask):
 69 |         train_mask = ~train_mask
 70 |         train_mask = train_mask.astype(int)
 71 | 
 72 |         _, n_cols = training_data.shape
 73 |         netD = NetD(feature_dim=n_cols).to(self.device)
 74 |         netG = NetG(feature_dim=n_cols).to(self.device)
 75 |         optimD = torch.optim.RMSprop(netD.parameters(), lr=self.lr)
 76 |         optimG = torch.optim.RMSprop(netG.parameters(), lr=self.lr)
 77 | 
 78 |         train_dset = SimpleDataLoader(training_data,train_mask)
 79 |         train_loder = DataLoader(train_dset,
 80 |                                  batch_size=self.batch_size,
 81 |                                  num_workers=1)
 82 |         bce_loss = torch.nn.BCEWithLogitsLoss(reduction="elementwise_mean")
 83 |         mse_loss = torch.nn.MSELoss(reduction="elementwise_mean")
 84 | 
 85 |         for epoch in range(self.epochs):
 86 |             for idx, (x, mask) in enumerate(train_loder):
 87 |                 noise = generate_noise(x.shape[0], x.shape[1])
 88 |                 hint = generate_hint(x.shape[0], x.shape[1], self.hint_rate)
 89 | 
 90 |                 x = torch.tensor(x).float().to(self.device)
 91 |                 noise = torch.tensor(noise).float().to(self.device)
 92 |                 mask = torch.tensor(mask).float().to(self.device)
 93 |                 hint = torch.tensor(hint).float().to(self.device)
 94 | 
 95 |                 hint = mask * hint + 0.5 * (1 - hint)
 96 | 
 97 |                 # train D
 98 |                 optimD.zero_grad()
 99 |                 G_sample = netG(x, noise, mask)
100 | 
101 |                 D_prob = netD(x, mask, G_sample, hint)
102 |                 D_loss = bce_loss(D_prob, mask)
103 |                 D_loss.backward()
104 |                 optimD.step()
105 |                 # train G
106 |                 optimG.zero_grad()
107 |                 G_sample = netG(x, noise, mask)
108 | 
109 |                 D_prob = netD(x, mask, G_sample, hint)
110 | 
111 |                 D_prob.detach_()
112 |                 G_loss = ((1 - mask) * (torch.sigmoid(D_prob) + 1e-8).log()).mean() / (1 - mask).sum()+0.001
113 |                 G_mse_loss = mse_loss(mask * x, mask * G_sample) / mask.sum()+0.0001
114 |                 G_loss = G_loss + self.alpha * G_mse_loss
115 | 
116 |                 G_loss.backward()
117 |                 optimG.step()
118 | 
119 |                 G_mse_train = mse_loss((mask) * x, (mask) * G_sample) / (mask).sum()
120 |                 if self.verbose:
121 |                     if epoch % 2 == 0:
122 |                         print('Iter:{}\tD_loss: {:.4f}\tG_loss: {:.4f}\tTrain MSE:{:.4f}'. \
123 |                             format(epoch, D_loss, G_loss, np.sqrt(G_mse_train.data.cpu().numpy())))
124 | 
125 |         return netG
126 | 
127 | 
128 |     def solve(self, X,missing_mask):
129 |         complete_rows_index, missing_rows_index = self.detect_complete_part(missing_mask)
130 |         if len(complete_rows_index) == 0:
131 |             raise ValueError('Cant find a completely part for training...')
132 |         model = self.training(training_data=X.copy(),train_mask=missing_mask.copy())
133 |         model.eval()
134 | 
135 |         missing_mask = ~missing_mask
136 |         missing_mask = missing_mask.astype(int)
137 | 
138 |         noise = generate_noise(X.shape[0], X.shape[1])
139 |         noise = torch.tensor(noise).float().to(self.device)
140 |         X = torch.tensor(X).float().to(self.device)
141 |         mask = torch.tensor(missing_mask).float().to(self.device)
142 | 
143 |         filled_data = model(X,noise,mask)
144 |         filled_data = filled_data.cpu().detach().numpy()
145 | 
146 |         X = X.cpu().detach().numpy()
147 |         X[missing_rows_index] = filled_data[missing_rows_index]
148 | 
149 |         return X
150 | 
151 | 


--------------------------------------------------------------------------------
/ycimpute/unsupervised/knn/optimistic.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | from __future__ import absolute_import, print_function, division
  4 | import time
  5 | 
  6 | from six.moves import range
  7 | import numpy as np
  8 | 
  9 | from .common import knn_initialize
 10 | 
 11 | def knn_impute_optimistic(
 12 |         X,
 13 |         missing_mask,
 14 |         k,
 15 |         verbose=False,
 16 |         print_interval=100):
 17 |     """
 18 |     Fill in the given incomplete matrix using k-nearest neighbor imputation.
 19 | 
 20 |     This version assumes that most of the time the same neighbors will be
 21 |     used so first performs the weighted average of a row's k-nearest neighbors
 22 |     and checks afterward whether it was valid (due to possible missing values).
 23 | 
 24 |     Has been observed to be a lot faster for 1/4 missing images matrix
 25 |     with 1000 rows and ~9000 columns.
 26 | 
 27 |     Parameters
 28 |     ----------
 29 |     X : np.ndarray
 30 |         Matrix to fill of shape (n_samples, n_features)
 31 | 
 32 |     missing_mask : np.ndarray
 33 |         Boolean array of same shape as X
 34 | 
 35 |     k : int
 36 | 
 37 |     verbose : bool
 38 | 
 39 |     Modifies X by replacing its missing values with weighted averages of
 40 |     similar rows. Returns the modified X.
 41 |     """
 42 |     start_t = time.time()
 43 |     n_rows, n_cols = X.shape
 44 |     X_row_major, D, _ = knn_initialize(X, missing_mask)
 45 |     D_sorted_indices = np.argsort(D, axis=1)
 46 |     X_column_major = X_row_major.copy(order="F")
 47 | 
 48 |     dot = np.dot
 49 | 
 50 |     # preallocate array to prevent repeated creation in the following loops
 51 |     neighbor_weights = np.ones(k, dtype=X.dtype)
 52 | 
 53 |     missing_mask_column_major = np.asarray(missing_mask, order="F")
 54 |     observed_mask_column_major = ~missing_mask_column_major
 55 | 
 56 |     for i in range(n_rows):
 57 |         missing_columns = np.where(missing_mask[i])[0]
 58 |         if verbose and i % print_interval == 0:
 59 |             print(
 60 |                 "Imputing row %d/%d with %d missing, elapsed time: %0.3f" % (
 61 |                     i + 1,
 62 |                     n_rows,
 63 |                     len(missing_columns),
 64 |                     time.time() - start_t))
 65 |         n_missing_columns = len(missing_columns)
 66 |         if n_missing_columns == 0:
 67 |             continue
 68 | 
 69 |         row_distances = D[i, :]
 70 |         neighbor_indices = D_sorted_indices[i, :]
 71 |         X_missing_columns = X_column_major[:, missing_columns]
 72 | 
 73 |         # precompute these for the fast path where the k nearest neighbors
 74 |         # are not missing the feature value we're currently trying to impute
 75 |         k_nearest_indices = neighbor_indices[:k]
 76 |         np.divide(1.0, row_distances[k_nearest_indices], out=neighbor_weights)
 77 |         # optimistically impute all the columns from the k nearest neighbors
 78 |         # we'll have to back-track for some of the columns for which
 79 |         # one of the neighbors did not have a value
 80 |         X_knn = X_missing_columns[k_nearest_indices, :]
 81 |         weighted_average_of_neighboring_rows = dot(
 82 |             X_knn.T,
 83 |             neighbor_weights)
 84 |         sum_weights = neighbor_weights.sum()
 85 |         weighted_average_of_neighboring_rows /= sum_weights
 86 |         imputed_values = weighted_average_of_neighboring_rows
 87 | 
 88 |         observed_mask_missing_columns = observed_mask_column_major[:, missing_columns]
 89 |         observed_mask_missing_columns_sorted = observed_mask_missing_columns[
 90 |             neighbor_indices, :]
 91 | 
 92 |         # We can determine the maximum number of other rows that must be
 93 |         # inspected across all features missing for this row by
 94 |         # looking at the column-wise running sums of the observed feature
 95 |         # matrix.
 96 |         observed_cumulative_sum = observed_mask_missing_columns_sorted.cumsum(axis=0)
 97 |         sufficient_rows = (observed_cumulative_sum == k)
 98 |         n_rows_needed = sufficient_rows.argmax(axis=0) + 1
 99 |         max_rows_needed = n_rows_needed.max()
100 | 
101 |         if max_rows_needed == k:
102 |             # if we never needed more than k rows then we're done after the
103 |             # optimistic averaging above, so go on to the next sample
104 |             X[i, missing_columns] = imputed_values
105 |             continue
106 | 
107 |         # truncate all the sorted arrays to only include the necessary
108 |         # number of rows (should significantly speed up the "slow" path)
109 |         necessary_indices = neighbor_indices[:max_rows_needed]
110 |         d_sorted = row_distances[necessary_indices]
111 |         X_missing_columns_sorted = X_missing_columns[necessary_indices, :]
112 |         observed_mask_missing_columns_sorted = observed_mask_missing_columns_sorted[
113 |             :max_rows_needed, :]
114 | 
115 |         for missing_column_idx in range(n_missing_columns):
116 |             # since all the arrays we're looking into have already been
117 |             # sliced out at the missing features, we need to address these
118 |             # features from 0..n_missing using missing_idx rather than j
119 |             if n_rows_needed[missing_column_idx] == k:
120 |                 assert np.isfinite(imputed_values[missing_column_idx]), \
121 |                     "Expected finite imputed value #%d (column #%d for row %d)" % (
122 |                         missing_column_idx,
123 |                         missing_columns[missing_column_idx],
124 |                         i)
125 |                 continue
126 |             row_mask = observed_mask_missing_columns_sorted[:, missing_column_idx]
127 |             sorted_column_values = X_missing_columns_sorted[:, missing_column_idx]
128 |             neighbor_distances = d_sorted[row_mask][:k]
129 | 
130 |             # may not have enough values in a column for all k neighbors
131 |             k_or_less = len(neighbor_distances)
132 |             usable_weights = neighbor_weights[:k_or_less]
133 |             np.divide(
134 |                 1.0,
135 |                 neighbor_distances, out=usable_weights)
136 |             neighbor_values = sorted_column_values[row_mask][:k_or_less]
137 | 
138 |             imputed_values[missing_column_idx] = (
139 |                 dot(neighbor_values, usable_weights) / usable_weights.sum())
140 | 
141 |         X[i, missing_columns] = imputed_values
142 |     return X
143 | 


--------------------------------------------------------------------------------
/doc_eng.md:
--------------------------------------------------------------------------------
  1 | 
  2 | ### Welcome to ycimpute!
  3 | ![AppVeyor](https://img.shields.io/appveyor/ci/gruntjs/grunt.svg)
  4 | ![Hex.pm](https://img.shields.io/hexpm/l/plug.svg)
  5 | ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/Django.svg)
  6 | 
  7 | ## ycimpute Overview
  8 | #### ycimpute is a high-level API for padding missing values library. It is written in python, which integrates methods for missing values imputation based on machine learning and statistics. Some modules require scikit-lean support.
  9 | ##### The original intention of writing this library is that I often encounter some missing values in the process of doing data mining, most of the missing values of the scene can use the same set of missing approach, so the final decision to write a function library to facilitate the call
 10 | ### Suggestion: Data loss mechanism varies in different scenarios, which requires the engineer to choose the appropriate filling method based on the business.
 11 | 
 12 | ## performence of various models
 13 | 
 14 | ![UCI WINE data set](https://github.com/HCMY/ycimpute/blob/master/img/WINE.svg)
 15 | ![IRIS data set](https://github.com/HCMY/ycimpute/blob/master/img/IRIS.svg)
 16 | ![BOSTON housing data set](https://github.com/HCMY/ycimpute/blob/master/img/BOSTON.svg)
 17 | 
 18 | # Install
 19 | 
 20 | ### via pip
 21 | 
 22 | pip install ycimpute
 23 | 
 24 | ### via source
 25 | 
 26 | 
 27 | ```python
 28 | git clone https://github.com/HCMY/ycimpute.git
 29 | cd ycimpute
 30 | python setup install
 31 | ```
 32 | 
 33 | ## API Reference
 34 | 
 35 | ## select surpvised methods
 36 | 
 37 | ### 1 based on Random Forest
 38 | 
 39 |  theories of this method: [MissForest—non-parametric missing value imputation for mixed-type data](https://academic.oup.com/bioinformatics/article/28/1/112/219101)
 40 | 
 41 | ### usage:
 42 | 
 43 | #### Before using the example, you need to download the data file and copy it to the function directory of your python package 
 44 | (  ``` your python path / site-packages / ycimpute / datasets / ``` ) 
 45 | #### Linux users can use wget download, data download in the current working directory:
 46 |  ```
 47 |  wget https://github.com/HCMY/ycimpute/raw/master/test_data/boston.hdf5
 48 |  wget https://github.com/HCMY/ycimpute/raw/master/test_data/iris.hdf5
 49 |  wget https://github.com/HCMY/ycimpute/raw/master/test_data/wine.hdf5
 50 |  ```
 51 | 
 52 | 
 53 | ```python
 54 | import numpy as np
 55 | from ycimpute.datasets.load_data import load_boston
 56 | from ycimpute.imputer.iterforest import IterImput
 57 | X_missing, X_original = load_boston()#加载boston房产数据
 58 | 
 59 | print(X_missing.shape)
 60 | print("X missing\n\n",np.argwhere(np.isnan(X_missing)))
 61 | X_filled = IterImput().complete(X_missing)
 62 | print("X filled\n\n",np.argwhere(np.isnan(X_filled)))
 63 | ```
 64 | 
 65 |     (506, 13)
 66 |     X missing
 67 |     
 68 |      [[  1   2]
 69 |      [  1   4]
 70 |      [  1   8]
 71 |      ..., 
 72 |      [502  12]
 73 |      [504   3]
 74 |      [504   7]]
 75 |     X filled
 76 |     
 77 |      []
 78 | 
 79 | 
 80 | ### parameters：
 81 | ### TODO
 82 | 
 83 | ## fill based on MICE 
 84 | theories of this method：[Multiple Imputation by Chained Equations](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3074241/)
 85 | 
 86 | #### usage: 
 87 | 
 88 | 
 89 | ```python
 90 | from ycimpute.imputer.mice import MICE
 91 | print("X missing\n\n",np.argwhere(np.isnan(X_missing)))
 92 | X_filled = MICE().complete(X_missing)
 93 | print("X filled\n\n",np.argwhere(np.isnan(X_filled)))
 94 | ```
 95 | 
 96 |     X missing
 97 |     
 98 |      [[  1   2]
 99 |      [  1   4]
100 |      [  1   8]
101 |      ..., 
102 |      [502  12]
103 |      [504   3]
104 |      [504   7]]
105 |     X filled
106 |     
107 |      []
108 | 
109 | 
110 | ### parameters：
111 | ### TODO
112 | 
113 | ## select unsurpvised methods
114 | 
115 | ### based on KNN
116 | 
117 | #### usage
118 | 
119 | 
120 | ```python
121 | from ycimpute.imputer.knnimput import KNN
122 | print("X missing\n\n",np.argwhere(np.isnan(X_missing)))
123 | X_filled = KNN(k=4).complete(X_missing)
124 | print("X filled\n\n",np.argwhere(np.isnan(X_filled)))
125 | ```
126 | 
127 |     X missing
128 |     
129 |      [[  1   2]
130 |      [  1   4]
131 |      [  1   8]
132 |      ..., 
133 |      [502  12]
134 |      [504   3]
135 |      [504   7]]
136 |     Imputing row 1/506 with 0 missing, elapsed time: 0.094
137 |     Imputing row 101/506 with 2 missing, elapsed time: 0.096
138 |     Imputing row 201/506 with 2 missing, elapsed time: 0.098
139 |     Imputing row 301/506 with 3 missing, elapsed time: 0.100
140 |     Imputing row 401/506 with 1 missing, elapsed time: 0.102
141 |     Imputing row 501/506 with 1 missing, elapsed time: 0.104
142 |     X filled
143 |     
144 |      []
145 | 
146 | 
147 | ### parameters
148 | parameter | function | value 
149 | - | :-: | -: 
150 | k | --- | int 
151 | 
152 | ## visualization fill effects of these method，metrics used by rmse
153 | 
154 | 
155 | ```python
156 | from ycimpute.utils.tools import Solver
157 | from ycimpute.utils import evaluate
158 | from ycimpute.datasets.load_data import load_boston
159 | solver = Solver()
160 | X_missing, X_original = load_boston()
161 | from ycimpute.imputer.mice import MICE
162 | 
163 | X_filled = MICE().complete(X_missing)
164 | mask_all = solver.masker(X_missing)['all']
165 | missing_index = evaluate.get_missing_index(mask_all)
166 | original_arr = X_original[missing_index]
167 | mice_filled_arr = X_filled[missing_index]
168 | rmse_mice_score = evaluate.RMSE(original_arr, mice_filled_arr)
169 | print(rmse_mice_score)
170 | ```
171 | 
172 |     29.1028614966
173 | 
174 | 
175 | ### you could look over all of methods effects one shot:
176 |  notes: all the model use default parameters, which shoule be improved :)
177 | 
178 | 
179 | ```python
180 | from ycimpute.utils.test_evaluate import show
181 | result = show.analysiser(X_missing,X_original)
182 | import pandas as pd
183 | result = pd.DataFrame.from_dict(result, orient='index')
184 | print(result)
185 | ```
186 | 
187 |     Imputing row 1/506 with 0 missing, elapsed time: 0.050
188 |     Imputing row 101/506 with 2 missing, elapsed time: 0.052
189 |     Imputing row 201/506 with 2 missing, elapsed time: 0.054
190 |     Imputing row 301/506 with 3 missing, elapsed time: 0.056
191 |     Imputing row 401/506 with 1 missing, elapsed time: 0.058
192 |     Imputing row 501/506 with 1 missing, elapsed time: 0.060
193 |                                     0
194 |     rmse_mice_score         28.971895
195 |     rmse_iterforest_score   23.639840
196 |     rmse_knn_score          40.944330
197 |     rmse_mean_score         52.154860
198 |     rmse_zero_score        159.534384
199 |     rmse_median_score       57.616702
200 |     rmse_min_score         127.874980
201 | 


--------------------------------------------------------------------------------
/ycimpute/imputer/mice.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from time import time
  3 | import numpy as np
  4 | from sklearn.linear_model import LinearRegression
  5 | 
  6 | from ..utils.tools import Solver
  7 | 
  8 | class MICE(Solver):
  9 |     """
 10 |         Basic implementation of MICE package from R.
 11 |         This version assumes all of the columns are ordinal,
 12 |         and uses ridge regression.
 13 | 
 14 |             Parameters
 15 |             ----------
 16 |             visit_sequence : str
 17 |                 Possible values: "monotone" (default), "roman", "arabic",
 18 |                     "revmonotone".
 19 | 
 20 |             n_imputations : int
 21 |                 Defaults to 100
 22 | 
 23 |             n_burn_in : int
 24 |                 Defaults to 10
 25 | 
 26 |             impute_type : str
 27 |                 "pmm" (default) is probablistic moment matching.
 28 |                 "col" means fill in with samples from posterior predictive
 29 |                     distribution.
 30 | 
 31 |             n_pmm_neighbors : int
 32 |                 Number of nearest neighbors for PMM, defaults to 5.
 33 | 
 34 |             model : predictor function
 35 |                 A model that has fit, predict, and predict_dist methods.
 36 |                 Defaults to LinerRegression() from scikit-learn
 37 |                 Note that the regularization parameter lambda_reg
 38 |                 is by default scaled by np.linalg.norm(np.dot(X.T,X)).
 39 |                 Sensible lambda_regs to try: 0.25, 0.1, 0.01, 0.001, 0.0001.
 40 | 
 41 |             n_nearest_columns : int
 42 |                 Number of other columns to use to estimate current column.
 43 |                 Useful when number of columns is huge.
 44 |                 Default is to use all columns.
 45 | 
 46 |             init_fill_method : str
 47 |                 Valid values: {"mean", "median", or "random"}
 48 |                 (the latter meaning fill with random samples from the observed
 49 |                 values of a column)
 50 | 
 51 |             min_value : float
 52 |                 Minimum possible imputed value
 53 | 
 54 |             max_value : float
 55 |                 Maximum possible imputed value
 56 | 
 57 |             verbose : boolean
 58 |         """
 59 |     def __init__(self,
 60 |                  visit_sequence='monotone',
 61 |                  n_imputations=100,
 62 |                  n_burn_in=10,
 63 |                  n_pmm_neighbors=5,
 64 |                  impute_type='pmm',
 65 |                  model=LinearRegression(),
 66 |                  n_nearest_columns=np.infty,
 67 |                  init_fill_method="mean",
 68 |                  min_value=None,
 69 |                  max_value=None,
 70 |                  verbose=False,
 71 |                  normalizer='min_max'):
 72 | 
 73 | 
 74 |         Solver.__init__(self,
 75 |                         normalizer=normalizer)
 76 | 
 77 |         self.visit_sequence = visit_sequence
 78 |         self.n_burn_in = n_burn_in
 79 |         self.n_pmm_neighbors = n_pmm_neighbors
 80 |         self.impute_type = impute_type
 81 |         self.model = model
 82 |         self.n_nearest_columns = n_nearest_columns
 83 |         self.verbose = verbose
 84 |         self.fill_method = init_fill_method
 85 |         self.min_value = min_value
 86 |         self.max_value = max_value
 87 |         self.n_imputations = n_imputations
 88 | 
 89 |     def _imputation_round(self, X_filled, visit_indices,missing_mask):
 90 |         global imputed_values
 91 |         for col in visit_indices:
 92 |             x_obs, y_obs, x_mis = self.split(X_filled, col, missing_mask)
 93 |             model = self.model
 94 |             model.fit(x_obs, y_obs)
 95 | 
 96 |             if self.impute_type == 'pmm':
 97 |                 col_preds_missing = model.predict(x_mis)
 98 |                 col_preds_observed = model.predict(x_obs)
 99 |                 D = np.abs(col_preds_missing[:, np.newaxis] - col_preds_observed)
100 |                 k = np.minimum(self.n_pmm_neighbors, len(col_preds_observed) - 1)
101 |                 k_nearest_indices = np.argpartition(D, k, 1)[:, :k]
102 |                 imputed_indices = np.array([
103 |                     np.random.choice(neighbor_index)
104 |                     for neighbor_index in k_nearest_indices])
105 |                 imputed_values = y_obs[imputed_indices]
106 |             elif self.impute_type == 'col':
107 |                 pass
108 | 
109 |             X_filled[missing_mask[:, col], col] = imputed_values
110 |         return X_filled
111 | 
112 |     def clip(self, X, *kwargs):
113 |         """
114 |         Clip values to fall within any global or column-wise min/max constraints
115 |         :param **kwargs:
116 |         """
117 |         if self.min_value is not None:
118 |             X[X < self.min_value] = self.min_value
119 |         if self.max_value is not None:
120 |             X[X > self.max_value] = self.max_value
121 |         return X
122 | 
123 |     def get_visit_indices(self, missing_mask):
124 |         """
125 |             Decide what order we will update the columns.e.g. sort columns
126 |             As a homage to the MICE package, we will have 4 options of
127 |             how to order the updates.
128 |         """
129 |         n_rows, n_cols = missing_mask.shape
130 |         if self.visit_sequence == 'roman':
131 |             return np.arange(n_cols)
132 |         elif self.visit_sequence == 'arabic':
133 |             return np.arange(n_cols - 1, -1, -1)  # same as np.arange(d)[::-1]
134 |         elif self.visit_sequence == 'monotone':
135 |             return np.argsort(missing_mask.sum(0))[::-1]
136 |         elif self.visit_sequence == 'revmonotone':
137 |             return np.argsort(missing_mask.sum(0))
138 |         else:
139 |             raise ValueError("Invalid choice for visit order: %s" % self.visit_sequence)
140 | 
141 | 
142 |     def solve(self, X, missing_mask):
143 |         if self.verbose:
144 |             print("[MICE] Completing matrix with shape %s" % (X.shape,))
145 |         start_t = time()
146 | 
147 |         X_filled = np.array(X.copy())
148 |         visit_idx = self.sort_col(missing_mask)
149 |         total_rounds = self.n_burn_in + self.n_imputations
150 | 
151 |         results_list = []
152 | 
153 |         for m in range(total_rounds):
154 |             if self.verbose:
155 |                 print(
156 |                     "[MICE] Starting imputation round %d/%d, elapsed time %0.3f" % (
157 |                         m + 1,
158 |                         total_rounds,
159 |                         time() - start_t))
160 |             X_filled = self._imputation_round(X_filled, visit_idx, missing_mask)
161 | 
162 |             if m >= self.n_burn_in:
163 |                 results_list.append(X_filled[missing_mask])
164 | 
165 |         imputed_arrays = np.asarray(results_list)
166 | 
167 |         # average the imputed values for each feature
168 |         average_imputated_values = imputed_arrays.mean(axis=0)
169 | 
170 |         X[missing_mask] = average_imputated_values
171 | 
172 |         return X
173 | 


--------------------------------------------------------------------------------
/ycimpute/esemble/random_forest.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from ..tree.tree import ClassifyTree, RegressionTree
  3 | from abc import ABCMeta
  4 | from scipy.stats import mode
  5 | 
  6 | 
  7 | 
  8 | class RandomForest(metaclass=ABCMeta):
  9 |     """
 10 |     Attributes
 11 |     ----------
 12 |     num_trees : the number of trees to be made in the forest
 13 |     max_depth : the maximum depth that each tree is allowed to grow
 14 |     min_size : the minimum number of data observations needed in each split
 15 |     sample_percentage : size of data to be sampled per tree
 16 | 
 17 |     Note
 18 |     ----
 19 |     This class is not to be instantiated. It is simply a base class for the
 20 |     classification and regression forest classes
 21 |     """
 22 | 
 23 |     def __init__(self,
 24 |                  num_trees,
 25 |                  seed, max_depth,
 26 |                  min_size,
 27 |                  sample_percentage):
 28 |         """
 29 |         Initializes the random forest
 30 | 
 31 |         Parameters
 32 |         ----------
 33 |         num_trees : the number of trees to be made in the forest
 34 |         seed : the seed from which the random sample choices will be made
 35 |         max_depth : the maximum depth that each tree is allowed to grow
 36 |         min_size : the minimum number of data observations needed in each split
 37 |         sample_percentage : size of data to be sampled per tree
 38 |         """
 39 | 
 40 |         self.num_trees = num_trees
 41 |         self.max_depth = max_depth
 42 |         self.min_size = min_size
 43 |         self.sample_percentage = sample_percentage
 44 |         np.random.seed(seed)
 45 | 
 46 |     def fit(self, X, y):
 47 |         """
 48 |         Grows a forest of decision trees based off the num_trees
 49 |         attribute
 50 | 
 51 |         Parameters
 52 |         ----------
 53 |         X : N x D matrix of real or ordinal values
 54 |         y : size N vector consisting of either real values or labels for corresponding
 55 |         index in X
 56 |         """
 57 | 
 58 |         data = np.column_stack((X, y))
 59 |         self.forest = np.empty(shape=self.num_trees, dtype='object')
 60 |         sample_size = int(X.shape[0] * self.sample_percentage)
 61 | 
 62 |         for i in range(self.num_trees):
 63 |             sample = data[np.random.choice(data.shape[0], sample_size, replace=True)]
 64 | 
 65 |             sampled_X = data[:, :data.shape[1] - 1]
 66 |             sampled_y = data[:, data.shape[1] - 1]
 67 | 
 68 |             if isinstance(self, RegressionForest):
 69 |                 tree = RegressionTree(
 70 |                     max_depth=self.max_depth,
 71 |                     min_size=self.min_size,
 72 |                     in_forest=True)
 73 |             else:
 74 |                 tree = ClassifyTree(
 75 |                     max_depth=self.max_depth,
 76 |                     min_size=self.min_size,
 77 |                     in_forest=True)
 78 | 
 79 |             tree.fit(sampled_X, sampled_y)
 80 |             self.forest[i] = tree
 81 | 
 82 |     def predict(self, X):
 83 |         """
 84 |         Predicts the output (y) of a given matrix X
 85 | 
 86 |         Parameters
 87 |         ----------
 88 |         X : numerical or ordinal matrix of values corresponding to some output
 89 | 
 90 |         Returns
 91 |         -------
 92 |         The predict values corresponding to the inputs
 93 |         """
 94 | 
 95 |         votes = np.zeros(shape=(self.num_trees, X.shape[0]))
 96 |         for i, tree in enumerate(self.forest):
 97 |             votes[i] = tree.predict(X)
 98 | 
 99 |         predictions = np.zeros(shape=X.shape[0])
100 |         if isinstance(self, RegressionForest):
101 |             predictions = votes.mean(axis=0)
102 |         else:
103 |             # print(votes)
104 |             predictions = np.squeeze(mode(votes, axis=0)[0])
105 | 
106 |         return predictions
107 | 
108 | 
109 | class RegressionForest(RandomForest):
110 |     """
111 |     Attributes
112 |     ----------
113 |     num_trees : the number of trees to be made in the forest
114 |     max_depth : the maximum depth that each tree is allowed to grow
115 |     cost_func : function that determines the cost of each split in the trees
116 |     min_size : the minimum number of data observations needed in each split
117 |     sample_percentage : size of data to be sampled per tree
118 |     """
119 | 
120 |     def __init__(self,
121 |                  num_trees=10,
122 |                  seed=0,
123 |                  max_depth=None,
124 |                  min_size=1,
125 |                  sample_percentage=1):
126 |         """
127 |         Initializes Regression Forest
128 | 
129 |         Parameters
130 |         ----------
131 |         num_trees : the number of trees to be made in the forest
132 |         seed : the seed from which the random sample choices will be made
133 |         max_depth : the maximum depth that each tree is allowed to grow
134 |         min_size : the minimum number of data observations needed in each split
135 |         sample_percentage : size of data to be sampled per tree
136 |         """
137 | 
138 |         self.num_trees = num_trees
139 |         self.max_depth = max_depth
140 |         self.min_size = min_size
141 |         self.sample_percentage = sample_percentage
142 |         super(RegressionForest, self).__init__(
143 |             num_trees=num_trees,
144 |             seed=seed,
145 |             max_depth=max_depth,
146 |             min_size=min_size,
147 |             sample_percentage=sample_percentage
148 |             )
149 | 
150 | 
151 | class ClassificationForest(RandomForest):
152 |     """
153 |     Attributes
154 |     ----------
155 |     num_trees : the number of trees to be made in the forest
156 |     max_depth : the maximum depth that each tree is allowed to grow
157 |     cost_func : function that determines the cost of each split in the trees
158 |     min_size : the minimum number of data observations needed in each split
159 |     sample_percentage : size of data to be sampled per tree
160 |     """
161 | 
162 |     def __init__(self,
163 |                  num_trees=10,
164 |                  seed=0,
165 |                  max_depth=None,
166 |                  min_size=1,
167 |                  sample_percentage=1):
168 |         """
169 |         Initializes Regression Forest
170 | 
171 |         Parameters
172 |         ----------
173 |         num_trees : the number of trees to be made in the forest
174 |         seed : the seed from which the random sample choices will be made
175 |         max_depth : the maximum depth that each tree is allowed to grow
176 |         cost_func : function that determines the cost of each split in the trees
177 |         min_size : the minimum number of data observations needed in each split
178 |         sample_percentage : size of data to be sampled per tree
179 |         """
180 | 
181 |         self.num_trees = num_trees
182 |         self.max_depth = max_depth
183 |         self.min_size = min_size
184 |         self.sample_percentage = sample_percentage
185 |         super(ClassificationForest,self).__init__(
186 |             num_trees=num_trees,
187 |             seed=seed,
188 |             max_depth=max_depth,
189 |             min_size=min_size,
190 |             sample_percentage=sample_percentage
191 |             )
192 | 


--------------------------------------------------------------------------------
/ycimpute/imputer/iterforest.py:
--------------------------------------------------------------------------------
  1 | from sklearn.ensemble import RandomForestRegressor
  2 | from sklearn.ensemble import RandomForestClassifier
  3 | from sklearn.utils import check_array
  4 | import numpy as np
  5 | 
  6 | from  ..utils.tools import Solver
  7 | 
  8 | class MissForest(Solver):
  9 |     def __init__(
 10 |             self,
 11 |             n_estimators=300,
 12 |             max_depth=None,
 13 |             min_samples_split=2,
 14 |             min_samples_leaf=1,
 15 |             max_features='auto',
 16 |             max_samples=None,
 17 |             normalizer='min_max'):
 18 |         """
 19 |         Parameters
 20 |         ----------
 21 |         n_estimators: integer, optional (default=10)
 22 |         max_depth: integer or None, optional (default=None)
 23 |             The maximum depth of the tree.
 24 |             If None, then nodes are expanded until all leaves are pure
 25 |             or until all leaves contain less than min_samples_split samples.
 26 |         min_samples_split: int, float, optional (default=2)
 27 |             The minimum number of samples required to split an internal node
 28 |         min_samples_leaf: int, float, optional (default=1)
 29 |              The minimum number of samples required to be at a leaf node.
 30 |              A split point at any depth will only be considered if it leaves
 31 |              at least min_samples_leaf training samples in each of the left and right branches.
 32 |              This may have the effect of smoothing the model, especially in regression.
 33 |         max_features: int, float, string or None, optional (default=”auto”)
 34 |             The number of features to consider when looking for the best split
 35 |             if int, then consider max_features features at each split.
 36 |             If float, then max_features is a fraction and int(max_features * n_features) features are considered at each split.
 37 |             If “auto”, then max_features=n_features.
 38 |             If “sqrt”, then max_features=sqrt(n_features).
 39 |             If “log2”, then max_features=log2(n_features).
 40 |             If None, then max_features=n_features.
 41 |         max_samples: int or float, default=None
 42 |             If bootstrap is True, the number of samples to draw from X to train each base estimator.
 43 |             If None (default), then draw X.shape[0] samples.
 44 |             If int, then draw max_samples samples.
 45 |             If float, then draw max_samples * X.shape[0] samples. Thus, max_samples should be in the interval (0, 1)
 46 |         """
 47 |         self.coltype_dict = None
 48 |         self.mask_memo_dict = None
 49 |         self.sorted_col = None
 50 |         self.stop = False
 51 |         self.rf_reg = RandomForestRegressor(n_estimators=n_estimators,
 52 |                                             max_depth=max_depth,
 53 |                                             min_samples_leaf=min_samples_leaf,
 54 |                                             max_features=max_features,
 55 |                                             min_samples_split=min_samples_split)
 56 |         self.rf_cla = RandomForestClassifier(n_estimators=n_estimators,
 57 |                                             max_depth=max_depth,
 58 |                                             min_samples_leaf=min_samples_leaf,
 59 |                                             max_features=max_features,
 60 |                                             min_samples_split=min_samples_split)
 61 |         self.imp_continuous_index = None
 62 |         self.imp_categorical_index = None
 63 |         self.normalizer = normalizer
 64 | 
 65 |         Solver.__init__(self,
 66 |             normalizer=normalizer)
 67 | 
 68 |     def solve(self, X, missing_mask):
 69 |         X = check_array(X, force_all_finite=False)
 70 |         self.sorted_col = self.sort_col(missing_mask)
 71 |         self.coltype_dict = self._judge_type(X)
 72 | 
 73 |         self.imp_continuous_index, self.imp_categorical_index = \
 74 |                 self.get_type_index(missing_mask, self.coltype_dict)
 75 | 
 76 |         differ_categorical = float('inf')
 77 |         differ_continuous = float('inf')
 78 | 
 79 |         init_fill = X
 80 | 
 81 |         while self.stop is False:
 82 | 
 83 |             differ_categorical_old = differ_categorical
 84 |             differ_continuous_old = differ_continuous
 85 | 
 86 |             x_old_imp = init_fill
 87 | 
 88 |             x_new_imp = []
 89 | 
 90 |             for col in self.sorted_col:
 91 |                 tmp = []
 92 |                 if self.coltype_dict[col] is 'categorical':
 93 |                     model = self.rf_cla
 94 |                 else:
 95 |                     model = self.rf_reg
 96 | 
 97 |                 x_obs, y_obs, x_mis = self.split(init_fill, col, missing_mask)
 98 |                 model.fit(x_obs, y_obs)
 99 |                 y_mis = model.predict(x_mis)
100 |                 for ele in y_mis:
101 |                     tmp.append(ele)
102 |                     x_new_imp.append(ele)
103 |                 init_fill[:, col][missing_mask[:,col]] = tmp
104 |             x_new_imp = np.asarray(x_new_imp)
105 | 
106 |             differ_continuous, differ_categorical = self._lose_func(x_new_imp, x_old_imp)
107 |             if differ_continuous >= differ_continuous_old and differ_categorical >= differ_categorical_old:
108 |                 self.stop = True
109 |         return init_fill
110 | 
111 |     def _lose_func(self, imp_new, imp_old):
112 |         """
113 |         Evaluation Method, mathematical concept are available at 'https://www.stu-zhouyc.com/iterForest/metrics'
114 |         :param imputed_data_old: a dict like {'col name':[predicted value1,...],...}
115 |                                         the dict contains original missing index which is part of the original data
116 |                                         its the last estimated data
117 |                                         accompany with brand-new imputed data, they are going to be evaluate.
118 |         :return:
119 |         """
120 | 
121 |         continuous_imp_new = imp_new[self.imp_continuous_index]
122 |         continuous_imp_old = imp_old[self.imp_continuous_index]
123 |         categorical_imp_new = imp_new[self.imp_categorical_index]
124 |         categorical_imp_old = imp_old[self.imp_categorical_index]
125 | 
126 |         try:
127 |             continuous_div = continuous_imp_new - continuous_imp_old
128 |             continuous_div = continuous_div.dot(continuous_div)
129 |             continuous_sum = continuous_imp_new.dot(continuous_imp_new)
130 | 
131 |             categorical_count = np.sum(categorical_imp_new == categorical_imp_old)
132 |             categorical_var_len = len(categorical_imp_new)
133 | 
134 |         except:
135 |             categorical_var_len = 0.01
136 |             categorical_count = 0
137 | 
138 |             continuous_div = 0
139 |             continuous_sum = 0.001
140 | 
141 |         if categorical_var_len is 0:
142 |             categorical_differ = 0
143 |         else:
144 |             categorical_differ = categorical_count / categorical_var_len
145 | 
146 |         if continuous_sum is 0:
147 |             continuous_differ = 0
148 |         else:
149 |             continuous_differ = continuous_div / continuous_sum
150 |         return continuous_differ, categorical_differ


--------------------------------------------------------------------------------
/ycimpute/tree/tree.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | import numpy.linalg as la
  4 | import scipy.stats as stats
  5 | from abc import ABCMeta
  6 | 
  7 | class DecisionTree(metaclass=ABCMeta):
  8 |     """
  9 |     use CART tree
 10 |     """
 11 |     def __init__(self,
 12 |                  lose_func=None,
 13 |                  max_depth=None,
 14 |                  min_sample_split=5,
 15 |                  min_cost=None,
 16 |                  is_forest=False
 17 |                  ):
 18 |         self.max_depth = max_depth
 19 |         self.min_sample_split = min_sample_split
 20 |         self.min_cost = min_cost
 21 |         self.is_forest = is_forest
 22 |         self.lose_func = lose_func
 23 |         self.num_samples = None
 24 | 
 25 |         if isinstance(self, RegressionTree):
 26 |             self.lose_func = self._mse
 27 |         elif isinstance(self, ClassifyTree):
 28 |             self.lose_func = self._gini_index
 29 | 
 30 |     def _mse(self, y):
 31 |         """
 32 |         MSE(mean-square error) see https://
 33 |         :param y: ndarray, a vector like array
 34 |         :return: the mse value of y, flaot
 35 |         """
 36 |         if (y.size == 0):
 37 |             return  0
 38 |         c_m = np.mean(y)
 39 |         diff = np.abs(c_m-y)
 40 |         mse = np.square(diff).sum()
 41 |         return mse
 42 | 
 43 |     def _gini_index(self, pure_y):
 44 |         """
 45 |         GINI INDEX see: https://
 46 |         :param pure_y: ndarray, vector like
 47 |         :return:flaot
 48 |         """
 49 |         dist = np.empty(np.unique(pure_y).shape)
 50 |         for lable in range(dist.shape[0]):
 51 |             dist[lable] = np.sum(pure_y==lable) / pure_y.shape[0]
 52 |         sub_feature_gini = 1.0-np.sum(np.square(dist))
 53 |         return abs(pure_y.shape[0]/self.num_samples)*sub_feature_gini
 54 | 
 55 |     def _entropy(self):
 56 |         """
 57 |         up until now, cart tree do not necessary need entropy except ID3 or C4.5
 58 |         :return: None
 59 |         """
 60 |         pass
 61 | 
 62 |     def cost_reduction(self, data_left, data_right):
 63 |         y_total = np.hstack((data_left[1], data_right[1]))
 64 |         total_norm = la.norm(y_total)
 65 |         left_norm = la.norm(data_left[1])
 66 |         right_norm = la.norm(data_right[1])
 67 | 
 68 |         total_cost = self.lose_func(y_total)
 69 |         normalized_left = (left_norm / total_norm) * self.lose_func(data_left[1])
 70 |         normalized_right = (right_norm / total_norm) * self.lose_func(data_right[1])
 71 | 
 72 |         return total_cost - (normalized_left + normalized_right)
 73 | 
 74 |     def choose_best_feature(self, X, y, node):
 75 |         split_threshold = None
 76 |         split_feature = None
 77 |         min_gini_index = None
 78 | 
 79 |         real_features = range(X.shape[1])
 80 |         self.num_samples = X.shape[0]
 81 |         if self.is_forest:
 82 |             if isinstance(self, RegressionTree):
 83 |                 features = np.random.choice(real_features, size=int(X.shape[1]/3))
 84 |             else:
 85 |                 features = np.random.choice(real_features, size=int(np.sqrt(X.shape[1])))
 86 | 
 87 |         else:
 88 |             features = real_features
 89 | 
 90 | 
 91 |         for feature in features:
 92 |             for sub_feature in np.unique(X[:, feature]):
 93 |                 left = y[X[:, feature]==sub_feature]
 94 |                 right = y[X[:, feature]!= sub_feature]
 95 |                 gini_index = self.lose_func(left)+self.lose_func(right)
 96 |                 if min_gini_index is None or gini_index<min_gini_index:
 97 |                     split_threshold = sub_feature
 98 |                     split_feature = feature
 99 |                     min_gini_index = gini_index
100 | 
101 |         node.threshold = split_threshold
102 |         node.feature = split_feature
103 |         low_mask = X[:, split_feature] == split_threshold
104 |         high_mask = X[:, split_feature] != split_threshold
105 | 
106 |         return (X[low_mask],y[low_mask]),(X[high_mask],y[high_mask])
107 | 
108 |     def stop_split(self, left_data, right_data, depth):
109 |         if self.max_depth and depth > self.max_depth:
110 |             return True
111 |         if not isinstance(self, ClassifyTree) and \
112 |                 self.cost_reduction(left_data, right_data)<self.min_cost:
113 |             return True
114 |         if left_data[0].size<self.min_sample_split or right_data[0].size<self.min_sample_split:
115 |             return True
116 | 
117 |         return False
118 | 
119 |     def test_purity(self, y):
120 |         """
121 |         Tests labels in node to see if they are all the same
122 | 
123 |         Parameters
124 |         ----------
125 |         y : current labels in the node
126 | 
127 |         Returns
128 |         -------
129 |         true or false, indicating whether all labels are the same
130 |         """
131 | 
132 |         common = stats.mode(y)[0][0]
133 |         return np.sum(y == common) == y.size
134 | 
135 |     def grow_tree(self, node, X, y, depth):
136 |         """
137 |         recursion building decision tree
138 |         """
139 |         if isinstance(self, RegressionTree):
140 |             node.mean_dist = np.mean(y)
141 |         else:
142 |             node.mean_dist = common = stats.mode(y)[0][0]
143 | 
144 |         if y.size < 2:
145 |             return node
146 |         if isinstance(self, ClassifyTree) and self.test_purity(y):
147 |             return node
148 | 
149 |         data_left, data_right = self.choose_best_feature(X, y, node)
150 |         if self.stop_split(data_left, data_right, depth):
151 |             return node
152 | 
153 |         left = DecisionNode()
154 |         right = DecisionNode()
155 |         node.left = self.grow_tree(left,
156 |                                    data_left[0],
157 |                                    data_left[1],
158 |                                    depth+1)
159 |         node.right = self.grow_tree(right,
160 |                                     data_right[0],
161 |                                     data_right[1],
162 |                                     depth+1)
163 | 
164 |         return node
165 | 
166 |     def single_prediction(self, x, node):
167 |         if x[node.feature] is None or (not node.left and not node.right):
168 |             return node.mean_dist
169 | 
170 |         go_left = x[node.feature] <= node.threshold
171 | 
172 |         if (go_left and node.left):
173 |             return self.single_prediction(x, node.left)
174 |         if (not go_left and node.right):
175 |             return self.single_prediction(x, node.right)
176 |         return node.mean_dist
177 | 
178 | 
179 |     def fit(self, X, y):
180 |         node = DecisionNode()
181 |         self.root = self.grow_tree(node, X, y, 0)
182 | 
183 |     def predict(self, X):
184 |         predictions = np.zeros(X.shape[0])
185 |         for i, observation in enumerate(X):
186 |             predictions[i] = self.single_prediction(observation, self.root)
187 |         return predictions
188 | 
189 | 
190 | 
191 | 
192 | 
193 | 
194 | 
195 | 
196 | 
197 | class RegressionTree(DecisionTree):
198 |     def __init__(self,
199 |                  max_depth=None,
200 |                  min_size=5,
201 |                  min_cost=0,
202 |                  in_forest=False):
203 |         """
204 |         Parameters
205 |         ----------
206 |         max_depth : maximum depth of tree
207 |         min_size : minimum size of the data being split
208 |         min_cost : minimum cost difference i.e. the minimum amount gained from splitting data
209 |         in_forest : specifies whether tree will be a part of a random forest
210 |         """
211 |         self.max_depth = max_depth
212 |         self.min_size = min_size
213 |         self.min_cost = min_cost
214 |         self.in_forest = in_forest
215 |         super(RegressionTree, self).__init__(
216 |             min_sample_split=self.min_size,
217 |             min_cost=self.min_cost,
218 |             is_forest=self.in_forest)
219 | 
220 | 
221 | class ClassifyTree(DecisionTree):
222 |     def __init__(self,
223 |                  max_depth=None,
224 |                  min_size=1,
225 |                  min_cost=0,
226 |                  in_forest=False):
227 |         """
228 |         Parameters
229 |         ----------
230 |         max_depth : maximum depth of tree
231 |         min_size : minimum size of the data being split
232 |         in_forest : specifies whether tree will be a part of a random forest
233 |         """
234 |         self.max_depth = max_depth
235 |         self.min_size = min_size
236 |         self.min_cost = min_cost
237 |         self.in_forest = in_forest
238 |         super(ClassifyTree, self).__init__(
239 |             max_depth=self.max_depth,
240 |             min_sample_split=self.min_size,
241 |             min_cost=self.min_cost,
242 |             is_forest=self.in_forest)
243 | 
244 | 
245 | class DecisionNode():
246 |     """
247 |     Represents a single node in the binary decision tree that will be built
248 | 
249 |     Attributes
250 |     ----------
251 |     threshold : Value that determines how the data is split
252 |     mean_dist : If the node is in a regression tree, this will be the mean of the
253 |     values in this node. If the node is in a classification tree, this will be the
254 |     distribution of classes in this node
255 |     feature : the feature to split the data on based on the threshold
256 |     type : specifies the type of node, can either be regression node or classification node
257 |     left_child : the left child of this node in the decision tree
258 |     right_child : the right child of this node in the decision tree
259 |     """
260 | 
261 |     def __init__(self, threshold=None, mean_dist=None, feature=None):
262 |         """
263 |         Initiliazes Node using data
264 | 
265 |         Parameters
266 |         ----------
267 |         threshold : Value that determines how the data is split
268 |         mean_dist : If the node is in a regression tree, this will be the mean of the
269 |         values in this node. If the node is in a classification tree, this will be the
270 |         distribution of classes in this node
271 |         feature : the feature to split the data on based on the threshold
272 |         """
273 | 
274 |         self.threshold = threshold
275 |         self.mean_dist = mean_dist
276 |         self.feature = feature
277 |         self.right = None
278 |         self.left = None
279 | 
280 | 
281 | 
282 | 
283 | 
284 | 
285 | 
286 | class DecisionNode():
287 |     def __init__(self,
288 |                  threshold=None,
289 |                  mean_dist=None,
290 |                  feature=None):
291 | 
292 |         self.threshold = threshold
293 |         self.mean_dist = mean_dist
294 |         self.feature = feature
295 |         self.right = None
296 |         self.left = None
297 | 
298 | 


--------------------------------------------------------------------------------
/ycimpute/utils/tools.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | 
  4 | 
  5 | import numpy as np
  6 | from pandas import isnull
  7 | 
  8 | from ..utils.normalizer import NORMALIZERS,RECOVER
  9 | 
 10 | 
 11 | 
 12 | class Solver(object):
 13 |     def __init__(self,
 14 |                  init_fill_method='zero',
 15 |                  normalizer = None,
 16 |                  ):
 17 | 
 18 |         self.fill_method = init_fill_method
 19 |         self.normalizer = normalizer
 20 | 
 21 |     def __repr__(self):
 22 |         return str(self)
 23 | 
 24 |     def __str__(self):
 25 |         field_list = []
 26 |         for (k, v) in sorted(self.__dict__.items()):
 27 |             if v is None or isinstance(v, (float, int)):
 28 |                 field_list.append("%s=%s" % (k, v))
 29 |             elif isinstance(v, str):
 30 |                 field_list.append("%s='%s'" % (k, v))
 31 |         return "%s(%s)" % (
 32 |             self.__class__.__name__,
 33 |             ", ".join(field_list))
 34 | 
 35 |     def _check_input(self, X):
 36 |         if len(X.shape) != 2:  # Note that ndarray's shpe is a tuple like (rows, cols)
 37 |             raise ValueError("Expected 2d matrix, got %s array" % (X.shape,))
 38 | 
 39 |     def _check_missing_value_mask(self, missing_mask):
 40 |         """
 41 |         check whether your wait-imputation data contains null value
 42 |         :param missing: missing totally as your 'mask', an numpy array.
 43 |         :return:raise error
 44 |         """
 45 |         if not missing_mask.any():
 46 |             raise ValueError("Input matrix is not missing any values")
 47 |         if missing_mask.all():
 48 |             raise ValueError("Input matrix must have some non-missing values")
 49 | 
 50 |     def _judge_type(self, X):
 51 |         coltype_dic = {}
 52 |         for col in range(X.shape[1]):
 53 |             col_val = X[:, col]
 54 |             nan_index = np.where(isnull(col_val))
 55 |             col_val = np.delete(col_val, nan_index)
 56 | 
 57 |             if len(np.unique(col_val)) / len(col_val) < 0.05 and (np.any(col_val == col_val.astype(int))):
 58 |                 coltype_dic[col] = 'categorical'
 59 |             else:
 60 |                 coltype_dic[col] = 'continuous'
 61 |         return coltype_dic
 62 | 
 63 |     def solve(self, X,missing_mask):
 64 |         """
 65 |         Given an initialized matrix X and a mask of where its missing values
 66 |         had been, return a completion of X.
 67 |         """
 68 |         raise ValueError("%s.solve not yet implemented!" % (
 69 |             self.__class__.__name__,))
 70 | 
 71 |     def complete(self, x):
 72 |         """
 73 |         Expects 2d float matrix with NaN entries signifying missing values
 74 | 
 75 |         Returns completed matrix without any NaNs.
 76 |         """
 77 |         self._check_input(x)
 78 |         self._check_missing_value_mask(isnull(x))
 79 |         x, missing_mask = self.prepare_input_data(x)
 80 | 
 81 |         x_zero_replaced = self.fill(x.copy(),missing_mask,'zero')
 82 |         if self.normalizer is not None:
 83 |             normalizer = NORMALIZERS[self.normalizer]
 84 |             x_zero_replaced, min_record, max_record = normalizer(x_zero_replaced)
 85 | 
 86 |         x_filled = self.solve(x_zero_replaced, missing_mask)
 87 |         revocer = RECOVER[self.normalizer]
 88 |         x_filled = revocer(x_filled, min_record, max_record)
 89 |         return x_filled
 90 | 
 91 | 
 92 |     def sort_col(self, mask):
 93 |         """
 94 |         count various cols, the missing value wages,
 95 |         :param X: the original data matrix which is waiting to be imputed
 96 |         :return: col1, col2,.... colx, those cols has been sorted according its status of missing values
 97 |         """
 98 |         nan_index = np.where(mask == True)[1]
 99 |         unique = np.unique(nan_index)
100 |         nan_index = list(nan_index)
101 |         dict = {}
102 |         for item in unique:
103 |             count = nan_index.count(item)
104 |             dict[item] = count
105 |         tmp = sorted(dict.items(), key=lambda e: e[1], reverse=True)
106 |         sort_index = []
107 |         for item in tmp:
108 |             sort_index.append(item[0])
109 |         return sort_index
110 | 
111 |     def get_type_index(self, mask_all, col_type_dict):
112 |         """
113 |         get the index of every missing value, because the imputed array is 1D
114 |         where the continuous and categorical index are needed.
115 |         :param mask_all:
116 |         :param col_type_dict:
117 |         :return: double list
118 |         """
119 |         where_target = np.argwhere(mask_all == True)
120 |         imp_categorical_index = []
121 |         imp_continuous_index = []
122 |         for index in where_target:
123 |             col_type = col_type_dict[index[1]]
124 |             if col_type is 'categotical':
125 |                 imp_categorical_index.append(index)
126 |             elif col_type is 'continuous':
127 |                 imp_continuous_index.append(index)
128 | 
129 |         return imp_continuous_index, imp_categorical_index
130 | 
131 |     @staticmethod
132 |     def _fill_column_with_fn(X, missing_mask, method):
133 |         """
134 | 
135 |         :param X: numpy array, the data which waiting to be imputation
136 |         :param missing_mask:numpy array
137 |         :param method: the way of what kind of normal imputation algorithm you use
138 |         :return:
139 |         """
140 |         n_missing = missing_mask.sum()  # np.sum() which could calculate the number of 'TRUE'
141 |         if n_missing == 0:
142 |             return X
143 | 
144 |         if method == 'frequency':
145 |             unique, counts = np.array(np.unique(X[~np.isnan(X)], return_counts=True))
146 |             fill_values = np.random.choice(unique, size=np.count_nonzero(np.isnan(X)), p=counts / np.sum(counts))
147 |         else:
148 |             fill_values = method(X)
149 |         X[missing_mask] = fill_values
150 | 
151 |         return X
152 | 
153 |     def fill(self, X, missing_mask, fill_method=None):
154 |         """
155 |         Parameters
156 |         ----------
157 |         X : np.array or pandas.DataFrame
158 |             Data array containing NaN entries
159 | 
160 |         missing_mask : np.array
161 |             Boolean array indicating where NaN entries are
162 |             matrix like: [[T,F,T T],
163 |                           [F,T,T,T]
164 |                           [.......]]
165 | 
166 |         fill_method : str
167 |             "zero": fill missing entries with zeros
168 |             "mean": fill with column means
169 |             "median" : fill with column medians
170 |             "min": fill with min value per column
171 |             "random": fill with gaussian samples according to mean/std of column
172 | 
173 |         inplace : bool
174 |             Modify matrix or fill a copy
175 |         """
176 |         if not fill_method:
177 |             fill_method = self.fill_method
178 | 
179 |         if fill_method not in ("zero", "mean", "median", "min", "random", "frequency"):
180 |             raise ValueError("Invalid fill method: '%s'" % (fill_method))
181 |         elif fill_method == "zero":
182 |             # replace NaN's with 0
183 |             X[missing_mask] = 0  # this is the match data feature of numpy array
184 |         elif fill_method == "mean":
185 |             self._fill_column_with_fn(X, missing_mask, np.nanmean)
186 |         elif fill_method == "median":
187 |             self._fill_column_with_fn(X, missing_mask, np.nanmedian)
188 |         elif fill_method == "min":
189 |             self._fill_column_with_fn(X, missing_mask, np.nanmin)
190 |         elif fill_method == "frequency":
191 |             self._fill_column_with_fn(X, missing_mask, "frequency")
192 |         return X
193 | 
194 |     def prepare_input_data(self, X):
195 |         """
196 |         Check to make sure that the input matrix and its mask of missing
197 |         values are valid. Returns X and missing mask.
198 |         """
199 |         X = np.asarray(X)
200 |         if X.dtype != "f" and X.dtype != "d":
201 |             X = X.astype(float)
202 | 
203 |         self._check_input(X)
204 |         missing_mask = np.isnan(X)
205 |         self._check_missing_value_mask(missing_mask)
206 |         return X, missing_mask
207 | 
208 |     def split(self, X, target_col, mask):
209 |         col_mask = mask[:,target_col]
210 |         nan_index = np.where(col_mask == True)
211 |         not_nan_index = np.where(col_mask == False)
212 | 
213 |         contain_nan_rows = np.delete(X, not_nan_index, 0)
214 |         no_contain_nan_rows = np.delete(X, nan_index, 0)
215 | 
216 |         train_X = np.delete(no_contain_nan_rows, target_col, 1)
217 |         train_y = no_contain_nan_rows[:, target_col]
218 |         test_X = np.delete(contain_nan_rows, target_col, 1)
219 | 
220 |         return train_X, train_y, test_X
221 | 
222 |     @staticmethod
223 |     def _get_missing_loc(missing_mask):
224 |         missing_tuple = np.where(missing_mask)
225 |         missing_row = missing_tuple[0]
226 |         missing_col = missing_tuple[1]
227 |         location = zip(missing_row, missing_col)
228 |         return location, missing_row, missing_col
229 | 
230 |     @staticmethod
231 |     def _pure_data(data, missing_mask):
232 |         """
233 |         pure a completely data set from data
234 |         :param data: a matrix which contains missing value
235 |         :param missing_mask:
236 |         :return: a complete data set
237 |         """
238 |         missing_rows = np.where(missing_mask)[0]
239 |         pure_data = np.delete(data, missing_rows, axis=0)
240 | 
241 |         return pure_data
242 | 
243 |     def _is_mix_type(self, X):
244 |         mask_dict = self.masker(X)
245 |         categorical_count = 0
246 |         continuous_count = 0
247 |         for col in range(X.shape[1]):
248 |             col_type = mask_dict[col]
249 |             if col_type is 'categotical':
250 |                 categorical_count += 1
251 |             elif col_type is 'continuous':
252 |                 continuous_count += 1
253 | 
254 |         if categorical_count == 0 and continuous_count != 0:
255 |             return 'continuous'
256 |         elif categorical_count != 0 and continuous_count == 0:
257 |             return 'categotical'
258 |         elif categorical_count != 0 and continuous_count != 0:
259 |             return 'mix'
260 |         else:
261 |             raise ("unkonwn col type")
262 | 
263 |     @staticmethod
264 |     def extract_imp_data_by_col(filled_X, required_cols, col_mask):
265 |         x_imp = []
266 |         for col in required_cols:
267 |             nan_val = filled_X[:, col][col_mask[col]]
268 |             for item in nan_val:
269 |                 x_imp.append(item)
270 |         return x_imp
271 | 
272 |     def detect_complete_part(self, missing_mask):
273 |         complete_rows = []
274 |         missing_rows = []
275 |         for idx, row in enumerate(missing_mask):
276 |             if (row == False).all():
277 |                 complete_rows.append(idx)
278 |             else:
279 |                 missing_rows.append(idx)
280 | 
281 |         return np.asarray(complete_rows), np.asarray(missing_rows)
282 | 
283 | def generate_noise(n_rows, n_cols):
284 |     """
285 |     generate noise matrix
286 |     """
287 |     return np.random.uniform(0., 1., size=[n_rows, n_cols])
288 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 
203 | 


--------------------------------------------------------------------------------
/img/WINE.svg:
--------------------------------------------------------------------------------
   1 | <?xml version="1.0" encoding="utf-8" standalone="no"?>
   2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
   3 |   "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
   4 | <!-- Created with matplotlib (http://matplotlib.org/) -->
   5 | <svg height="306pt" version="1.1" viewBox="0 0 927 306" width="927pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
   6 |  <defs>
   7 |   <style type="text/css">
   8 | *{stroke-linecap:butt;stroke-linejoin:round;}
   9 |   </style>
  10 |  </defs>
  11 |  <g id="figure_1">
  12 |   <g id="patch_1">
  13 |    <path d="M 0 306.834216 
  14 | L 927.048312 306.834216 
  15 | L 927.048312 0 
  16 | L 0 0 
  17 | z
  18 | " style="fill:#f0f0f0;"/>
  19 |   </g>
  20 |   <g id="axes_1">
  21 |    <g id="patch_2">
  22 |     <path d="M 175.368313 254.59845 
  23 | L 927.048312 254.59845 
  24 | L 927.048312 21.31845 
  25 | L 175.368313 21.31845 
  26 | z
  27 | " style="fill:#f0f0f0;"/>
  28 |    </g>
  29 |    <g id="matplotlib.axis_1">
  30 |     <g id="xtick_1">
  31 |      <g id="line2d_1">
  32 |       <path clip-path="url(#p1100a4c66a)" d="M 175.368313 254.59845 
  33 | L 175.368313 21.31845 
  34 | " style="fill:none;stroke:#cbcbcb;"/>
  35 |      </g>
  36 |      <g id="line2d_2"/>
  37 |      <g id="text_1">
  38 |       <!-- 0 -->
  39 |       <defs>
  40 |        <path d="M 31.78125 66.40625 
  41 | Q 24.171875 66.40625 20.328125 58.90625 
  42 | Q 16.5 51.421875 16.5 36.375 
  43 | Q 16.5 21.390625 20.328125 13.890625 
  44 | Q 24.171875 6.390625 31.78125 6.390625 
  45 | Q 39.453125 6.390625 43.28125 13.890625 
  46 | Q 47.125 21.390625 47.125 36.375 
  47 | Q 47.125 51.421875 43.28125 58.90625 
  48 | Q 39.453125 66.40625 31.78125 66.40625 
  49 | z
  50 | M 31.78125 74.21875 
  51 | Q 44.046875 74.21875 50.515625 64.515625 
  52 | Q 56.984375 54.828125 56.984375 36.375 
  53 | Q 56.984375 17.96875 50.515625 8.265625 
  54 | Q 44.046875 -1.421875 31.78125 -1.421875 
  55 | Q 19.53125 -1.421875 13.0625 8.265625 
  56 | Q 6.59375 17.96875 6.59375 36.375 
  57 | Q 6.59375 54.828125 13.0625 64.515625 
  58 | Q 19.53125 74.21875 31.78125 74.21875 
  59 | z
  60 | " id="DejaVuSans-30"/>
  61 |       </defs>
  62 |       <g transform="translate(167.010973 271.919073)rotate(-45)scale(0.14 -0.14)">
  63 |        <use xlink:href="#DejaVuSans-30"/>
  64 |       </g>
  65 |      </g>
  66 |     </g>
  67 |     <g id="xtick_2">
  68 |      <g id="line2d_3">
  69 |       <path clip-path="url(#p1100a4c66a)" d="M 329.518426 254.59845 
  70 | L 329.518426 21.31845 
  71 | " style="fill:none;stroke:#cbcbcb;"/>
  72 |      </g>
  73 |      <g id="line2d_4"/>
  74 |      <g id="text_2">
  75 |       <!-- 50 -->
  76 |       <defs>
  77 |        <path d="M 10.796875 72.90625 
  78 | L 49.515625 72.90625 
  79 | L 49.515625 64.59375 
  80 | L 19.828125 64.59375 
  81 | L 19.828125 46.734375 
  82 | Q 21.96875 47.46875 24.109375 47.828125 
  83 | Q 26.265625 48.1875 28.421875 48.1875 
  84 | Q 40.625 48.1875 47.75 41.5 
  85 | Q 54.890625 34.8125 54.890625 23.390625 
  86 | Q 54.890625 11.625 47.5625 5.09375 
  87 | Q 40.234375 -1.421875 26.90625 -1.421875 
  88 | Q 22.3125 -1.421875 17.546875 -0.640625 
  89 | Q 12.796875 0.140625 7.71875 1.703125 
  90 | L 7.71875 11.625 
  91 | Q 12.109375 9.234375 16.796875 8.0625 
  92 | Q 21.484375 6.890625 26.703125 6.890625 
  93 | Q 35.15625 6.890625 40.078125 11.328125 
  94 | Q 45.015625 15.765625 45.015625 23.390625 
  95 | Q 45.015625 31 40.078125 35.4375 
  96 | Q 35.15625 39.890625 26.703125 39.890625 
  97 | Q 22.75 39.890625 18.8125 39.015625 
  98 | Q 14.890625 38.140625 10.796875 36.28125 
  99 | z
 100 | " id="DejaVuSans-35"/>
 101 |       </defs>
 102 |       <g transform="translate(314.862533 278.217627)rotate(-45)scale(0.14 -0.14)">
 103 |        <use xlink:href="#DejaVuSans-35"/>
 104 |        <use x="63.623047" xlink:href="#DejaVuSans-30"/>
 105 |       </g>
 106 |      </g>
 107 |     </g>
 108 |     <g id="xtick_3">
 109 |      <g id="line2d_5">
 110 |       <path clip-path="url(#p1100a4c66a)" d="M 483.668539 254.59845 
 111 | L 483.668539 21.31845 
 112 | " style="fill:none;stroke:#cbcbcb;"/>
 113 |      </g>
 114 |      <g id="line2d_6"/>
 115 |      <g id="text_3">
 116 |       <!-- 100 -->
 117 |       <defs>
 118 |        <path d="M 12.40625 8.296875 
 119 | L 28.515625 8.296875 
 120 | L 28.515625 63.921875 
 121 | L 10.984375 60.40625 
 122 | L 10.984375 69.390625 
 123 | L 28.421875 72.90625 
 124 | L 38.28125 72.90625 
 125 | L 38.28125 8.296875 
 126 | L 54.390625 8.296875 
 127 | L 54.390625 0 
 128 | L 12.40625 0 
 129 | z
 130 | " id="DejaVuSans-31"/>
 131 |       </defs>
 132 |       <g transform="translate(462.714092 284.51618)rotate(-45)scale(0.14 -0.14)">
 133 |        <use xlink:href="#DejaVuSans-31"/>
 134 |        <use x="63.623047" xlink:href="#DejaVuSans-30"/>
 135 |        <use x="127.246094" xlink:href="#DejaVuSans-30"/>
 136 |       </g>
 137 |      </g>
 138 |     </g>
 139 |     <g id="xtick_4">
 140 |      <g id="line2d_7">
 141 |       <path clip-path="url(#p1100a4c66a)" d="M 637.818652 254.59845 
 142 | L 637.818652 21.31845 
 143 | " style="fill:none;stroke:#cbcbcb;"/>
 144 |      </g>
 145 |      <g id="line2d_8"/>
 146 |      <g id="text_4">
 147 |       <!-- 150 -->
 148 |       <g transform="translate(616.864205 284.51618)rotate(-45)scale(0.14 -0.14)">
 149 |        <use xlink:href="#DejaVuSans-31"/>
 150 |        <use x="63.623047" xlink:href="#DejaVuSans-35"/>
 151 |        <use x="127.246094" xlink:href="#DejaVuSans-30"/>
 152 |       </g>
 153 |      </g>
 154 |     </g>
 155 |     <g id="xtick_5">
 156 |      <g id="line2d_9">
 157 |       <path clip-path="url(#p1100a4c66a)" d="M 791.968765 254.59845 
 158 | L 791.968765 21.31845 
 159 | " style="fill:none;stroke:#cbcbcb;"/>
 160 |      </g>
 161 |      <g id="line2d_10"/>
 162 |      <g id="text_5">
 163 |       <!-- 200 -->
 164 |       <defs>
 165 |        <path d="M 19.1875 8.296875 
 166 | L 53.609375 8.296875 
 167 | L 53.609375 0 
 168 | L 7.328125 0 
 169 | L 7.328125 8.296875 
 170 | Q 12.9375 14.109375 22.625 23.890625 
 171 | Q 32.328125 33.6875 34.8125 36.53125 
 172 | Q 39.546875 41.84375 41.421875 45.53125 
 173 | Q 43.3125 49.21875 43.3125 52.78125 
 174 | Q 43.3125 58.59375 39.234375 62.25 
 175 | Q 35.15625 65.921875 28.609375 65.921875 
 176 | Q 23.96875 65.921875 18.8125 64.3125 
 177 | Q 13.671875 62.703125 7.8125 59.421875 
 178 | L 7.8125 69.390625 
 179 | Q 13.765625 71.78125 18.9375 73 
 180 | Q 24.125 74.21875 28.421875 74.21875 
 181 | Q 39.75 74.21875 46.484375 68.546875 
 182 | Q 53.21875 62.890625 53.21875 53.421875 
 183 | Q 53.21875 48.921875 51.53125 44.890625 
 184 | Q 49.859375 40.875 45.40625 35.40625 
 185 | Q 44.1875 33.984375 37.640625 27.21875 
 186 | Q 31.109375 20.453125 19.1875 8.296875 
 187 | z
 188 | " id="DejaVuSans-32"/>
 189 |       </defs>
 190 |       <g transform="translate(771.014318 284.51618)rotate(-45)scale(0.14 -0.14)">
 191 |        <use xlink:href="#DejaVuSans-32"/>
 192 |        <use x="63.623047" xlink:href="#DejaVuSans-30"/>
 193 |        <use x="127.246094" xlink:href="#DejaVuSans-30"/>
 194 |       </g>
 195 |      </g>
 196 |     </g>
 197 |     <g id="text_6">
 198 |      <!-- RMSE sore -->
 199 |      <defs>
 200 |       <path d="M 44.390625 34.1875 
 201 | Q 47.5625 33.109375 50.5625 29.59375 
 202 | Q 53.5625 26.078125 56.59375 19.921875 
 203 | L 66.609375 0 
 204 | L 56 0 
 205 | L 46.6875 18.703125 
 206 | Q 43.0625 26.03125 39.671875 28.421875 
 207 | Q 36.28125 30.8125 30.421875 30.8125 
 208 | L 19.671875 30.8125 
 209 | L 19.671875 0 
 210 | L 9.8125 0 
 211 | L 9.8125 72.90625 
 212 | L 32.078125 72.90625 
 213 | Q 44.578125 72.90625 50.734375 67.671875 
 214 | Q 56.890625 62.453125 56.890625 51.90625 
 215 | Q 56.890625 45.015625 53.6875 40.46875 
 216 | Q 50.484375 35.9375 44.390625 34.1875 
 217 | z
 218 | M 19.671875 64.796875 
 219 | L 19.671875 38.921875 
 220 | L 32.078125 38.921875 
 221 | Q 39.203125 38.921875 42.84375 42.21875 
 222 | Q 46.484375 45.515625 46.484375 51.90625 
 223 | Q 46.484375 58.296875 42.84375 61.546875 
 224 | Q 39.203125 64.796875 32.078125 64.796875 
 225 | z
 226 | " id="DejaVuSans-52"/>
 227 |       <path d="M 9.8125 72.90625 
 228 | L 24.515625 72.90625 
 229 | L 43.109375 23.296875 
 230 | L 61.8125 72.90625 
 231 | L 76.515625 72.90625 
 232 | L 76.515625 0 
 233 | L 66.890625 0 
 234 | L 66.890625 64.015625 
 235 | L 48.09375 14.015625 
 236 | L 38.1875 14.015625 
 237 | L 19.390625 64.015625 
 238 | L 19.390625 0 
 239 | L 9.8125 0 
 240 | z
 241 | " id="DejaVuSans-4d"/>
 242 |       <path d="M 53.515625 70.515625 
 243 | L 53.515625 60.890625 
 244 | Q 47.90625 63.578125 42.921875 64.890625 
 245 | Q 37.9375 66.21875 33.296875 66.21875 
 246 | Q 25.25 66.21875 20.875 63.09375 
 247 | Q 16.5 59.96875 16.5 54.203125 
 248 | Q 16.5 49.359375 19.40625 46.890625 
 249 | Q 22.3125 44.4375 30.421875 42.921875 
 250 | L 36.375 41.703125 
 251 | Q 47.40625 39.59375 52.65625 34.296875 
 252 | Q 57.90625 29 57.90625 20.125 
 253 | Q 57.90625 9.515625 50.796875 4.046875 
 254 | Q 43.703125 -1.421875 29.984375 -1.421875 
 255 | Q 24.8125 -1.421875 18.96875 -0.25 
 256 | Q 13.140625 0.921875 6.890625 3.21875 
 257 | L 6.890625 13.375 
 258 | Q 12.890625 10.015625 18.65625 8.296875 
 259 | Q 24.421875 6.59375 29.984375 6.59375 
 260 | Q 38.421875 6.59375 43.015625 9.90625 
 261 | Q 47.609375 13.234375 47.609375 19.390625 
 262 | Q 47.609375 24.75 44.3125 27.78125 
 263 | Q 41.015625 30.8125 33.5 32.328125 
 264 | L 27.484375 33.5 
 265 | Q 16.453125 35.6875 11.515625 40.375 
 266 | Q 6.59375 45.0625 6.59375 53.421875 
 267 | Q 6.59375 63.09375 13.40625 68.65625 
 268 | Q 20.21875 74.21875 32.171875 74.21875 
 269 | Q 37.3125 74.21875 42.625 73.28125 
 270 | Q 47.953125 72.359375 53.515625 70.515625 
 271 | z
 272 | " id="DejaVuSans-53"/>
 273 |       <path d="M 9.8125 72.90625 
 274 | L 55.90625 72.90625 
 275 | L 55.90625 64.59375 
 276 | L 19.671875 64.59375 
 277 | L 19.671875 43.015625 
 278 | L 54.390625 43.015625 
 279 | L 54.390625 34.71875 
 280 | L 19.671875 34.71875 
 281 | L 19.671875 8.296875 
 282 | L 56.78125 8.296875 
 283 | L 56.78125 0 
 284 | L 9.8125 0 
 285 | z
 286 | " id="DejaVuSans-45"/>
 287 |       <path id="DejaVuSans-20"/>
 288 |       <path d="M 44.28125 53.078125 
 289 | L 44.28125 44.578125 
 290 | Q 40.484375 46.53125 36.375 47.5 
 291 | Q 32.28125 48.484375 27.875 48.484375 
 292 | Q 21.1875 48.484375 17.84375 46.4375 
 293 | Q 14.5 44.390625 14.5 40.28125 
 294 | Q 14.5 37.15625 16.890625 35.375 
 295 | Q 19.28125 33.59375 26.515625 31.984375 
 296 | L 29.59375 31.296875 
 297 | Q 39.15625 29.25 43.1875 25.515625 
 298 | Q 47.21875 21.78125 47.21875 15.09375 
 299 | Q 47.21875 7.46875 41.1875 3.015625 
 300 | Q 35.15625 -1.421875 24.609375 -1.421875 
 301 | Q 20.21875 -1.421875 15.453125 -0.5625 
 302 | Q 10.6875 0.296875 5.421875 2 
 303 | L 5.421875 11.28125 
 304 | Q 10.40625 8.6875 15.234375 7.390625 
 305 | Q 20.0625 6.109375 24.8125 6.109375 
 306 | Q 31.15625 6.109375 34.5625 8.28125 
 307 | Q 37.984375 10.453125 37.984375 14.40625 
 308 | Q 37.984375 18.0625 35.515625 20.015625 
 309 | Q 33.0625 21.96875 24.703125 23.78125 
 310 | L 21.578125 24.515625 
 311 | Q 13.234375 26.265625 9.515625 29.90625 
 312 | Q 5.8125 33.546875 5.8125 39.890625 
 313 | Q 5.8125 47.609375 11.28125 51.796875 
 314 | Q 16.75 56 26.8125 56 
 315 | Q 31.78125 56 36.171875 55.265625 
 316 | Q 40.578125 54.546875 44.28125 53.078125 
 317 | z
 318 | " id="DejaVuSans-73"/>
 319 |       <path d="M 30.609375 48.390625 
 320 | Q 23.390625 48.390625 19.1875 42.75 
 321 | Q 14.984375 37.109375 14.984375 27.296875 
 322 | Q 14.984375 17.484375 19.15625 11.84375 
 323 | Q 23.34375 6.203125 30.609375 6.203125 
 324 | Q 37.796875 6.203125 41.984375 11.859375 
 325 | Q 46.1875 17.53125 46.1875 27.296875 
 326 | Q 46.1875 37.015625 41.984375 42.703125 
 327 | Q 37.796875 48.390625 30.609375 48.390625 
 328 | z
 329 | M 30.609375 56 
 330 | Q 42.328125 56 49.015625 48.375 
 331 | Q 55.71875 40.765625 55.71875 27.296875 
 332 | Q 55.71875 13.875 49.015625 6.21875 
 333 | Q 42.328125 -1.421875 30.609375 -1.421875 
 334 | Q 18.84375 -1.421875 12.171875 6.21875 
 335 | Q 5.515625 13.875 5.515625 27.296875 
 336 | Q 5.515625 40.765625 12.171875 48.375 
 337 | Q 18.84375 56 30.609375 56 
 338 | z
 339 | " id="DejaVuSans-6f"/>
 340 |       <path d="M 41.109375 46.296875 
 341 | Q 39.59375 47.171875 37.8125 47.578125 
 342 | Q 36.03125 48 33.890625 48 
 343 | Q 26.265625 48 22.1875 43.046875 
 344 | Q 18.109375 38.09375 18.109375 28.8125 
 345 | L 18.109375 0 
 346 | L 9.078125 0 
 347 | L 9.078125 54.6875 
 348 | L 18.109375 54.6875 
 349 | L 18.109375 46.1875 
 350 | Q 20.953125 51.171875 25.484375 53.578125 
 351 | Q 30.03125 56 36.53125 56 
 352 | Q 37.453125 56 38.578125 55.875 
 353 | Q 39.703125 55.765625 41.0625 55.515625 
 354 | z
 355 | " id="DejaVuSans-72"/>
 356 |       <path d="M 56.203125 29.59375 
 357 | L 56.203125 25.203125 
 358 | L 14.890625 25.203125 
 359 | Q 15.484375 15.921875 20.484375 11.0625 
 360 | Q 25.484375 6.203125 34.421875 6.203125 
 361 | Q 39.59375 6.203125 44.453125 7.46875 
 362 | Q 49.3125 8.734375 54.109375 11.28125 
 363 | L 54.109375 2.78125 
 364 | Q 49.265625 0.734375 44.1875 -0.34375 
 365 | Q 39.109375 -1.421875 33.890625 -1.421875 
 366 | Q 20.796875 -1.421875 13.15625 6.1875 
 367 | Q 5.515625 13.8125 5.515625 26.8125 
 368 | Q 5.515625 40.234375 12.765625 48.109375 
 369 | Q 20.015625 56 32.328125 56 
 370 | Q 43.359375 56 49.78125 48.890625 
 371 | Q 56.203125 41.796875 56.203125 29.59375 
 372 | z
 373 | M 47.21875 32.234375 
 374 | Q 47.125 39.59375 43.09375 43.984375 
 375 | Q 39.0625 48.390625 32.421875 48.390625 
 376 | Q 24.90625 48.390625 20.390625 44.140625 
 377 | Q 15.875 39.890625 15.1875 32.171875 
 378 | z
 379 | " id="DejaVuSans-65"/>
 380 |      </defs>
 381 |      <g transform="translate(506.679125 303.340341)scale(0.168 -0.168)">
 382 |       <use xlink:href="#DejaVuSans-52"/>
 383 |       <use x="69.482422" xlink:href="#DejaVuSans-4d"/>
 384 |       <use x="155.761719" xlink:href="#DejaVuSans-53"/>
 385 |       <use x="219.238281" xlink:href="#DejaVuSans-45"/>
 386 |       <use x="282.421875" xlink:href="#DejaVuSans-20"/>
 387 |       <use x="314.208984" xlink:href="#DejaVuSans-73"/>
 388 |       <use x="366.308594" xlink:href="#DejaVuSans-6f"/>
 389 |       <use x="427.490234" xlink:href="#DejaVuSans-72"/>
 390 |       <use x="468.572266" xlink:href="#DejaVuSans-65"/>
 391 |      </g>
 392 |     </g>
 393 |    </g>
 394 |    <g id="matplotlib.axis_2">
 395 |     <g id="ytick_1">
 396 |      <g id="line2d_11">
 397 |       <path clip-path="url(#p1100a4c66a)" d="M 175.368313 231.519947 
 398 | L 927.048312 231.519947 
 399 | " style="fill:none;stroke:#cbcbcb;"/>
 400 |      </g>
 401 |      <g id="line2d_12"/>
 402 |      <g id="text_7">
 403 |       <!-- rmse_iterforest_score -->
 404 |       <defs>
 405 |        <path d="M 52 44.1875 
 406 | Q 55.375 50.25 60.0625 53.125 
 407 | Q 64.75 56 71.09375 56 
 408 | Q 79.640625 56 84.28125 50.015625 
 409 | Q 88.921875 44.046875 88.921875 33.015625 
 410 | L 88.921875 0 
 411 | L 79.890625 0 
 412 | L 79.890625 32.71875 
 413 | Q 79.890625 40.578125 77.09375 44.375 
 414 | Q 74.3125 48.1875 68.609375 48.1875 
 415 | Q 61.625 48.1875 57.5625 43.546875 
 416 | Q 53.515625 38.921875 53.515625 30.90625 
 417 | L 53.515625 0 
 418 | L 44.484375 0 
 419 | L 44.484375 32.71875 
 420 | Q 44.484375 40.625 41.703125 44.40625 
 421 | Q 38.921875 48.1875 33.109375 48.1875 
 422 | Q 26.21875 48.1875 22.15625 43.53125 
 423 | Q 18.109375 38.875 18.109375 30.90625 
 424 | L 18.109375 0 
 425 | L 9.078125 0 
 426 | L 9.078125 54.6875 
 427 | L 18.109375 54.6875 
 428 | L 18.109375 46.1875 
 429 | Q 21.1875 51.21875 25.484375 53.609375 
 430 | Q 29.78125 56 35.6875 56 
 431 | Q 41.65625 56 45.828125 52.96875 
 432 | Q 50 49.953125 52 44.1875 
 433 | z
 434 | " id="DejaVuSans-6d"/>
 435 |        <path d="M 50.984375 -16.609375 
 436 | L 50.984375 -23.578125 
 437 | L -0.984375 -23.578125 
 438 | L -0.984375 -16.609375 
 439 | z
 440 | " id="DejaVuSans-5f"/>
 441 |        <path d="M 9.421875 54.6875 
 442 | L 18.40625 54.6875 
 443 | L 18.40625 0 
 444 | L 9.421875 0 
 445 | z
 446 | M 9.421875 75.984375 
 447 | L 18.40625 75.984375 
 448 | L 18.40625 64.59375 
 449 | L 9.421875 64.59375 
 450 | z
 451 | " id="DejaVuSans-69"/>
 452 |        <path d="M 18.3125 70.21875 
 453 | L 18.3125 54.6875 
 454 | L 36.8125 54.6875 
 455 | L 36.8125 47.703125 
 456 | L 18.3125 47.703125 
 457 | L 18.3125 18.015625 
 458 | Q 18.3125 11.328125 20.140625 9.421875 
 459 | Q 21.96875 7.515625 27.59375 7.515625 
 460 | L 36.8125 7.515625 
 461 | L 36.8125 0 
 462 | L 27.59375 0 
 463 | Q 17.1875 0 13.234375 3.875 
 464 | Q 9.28125 7.765625 9.28125 18.015625 
 465 | L 9.28125 47.703125 
 466 | L 2.6875 47.703125 
 467 | L 2.6875 54.6875 
 468 | L 9.28125 54.6875 
 469 | L 9.28125 70.21875 
 470 | z
 471 | " id="DejaVuSans-74"/>
 472 |        <path d="M 37.109375 75.984375 
 473 | L 37.109375 68.5 
 474 | L 28.515625 68.5 
 475 | Q 23.6875 68.5 21.796875 66.546875 
 476 | Q 19.921875 64.59375 19.921875 59.515625 
 477 | L 19.921875 54.6875 
 478 | L 34.71875 54.6875 
 479 | L 34.71875 47.703125 
 480 | L 19.921875 47.703125 
 481 | L 19.921875 0 
 482 | L 10.890625 0 
 483 | L 10.890625 47.703125 
 484 | L 2.296875 47.703125 
 485 | L 2.296875 54.6875 
 486 | L 10.890625 54.6875 
 487 | L 10.890625 58.5 
 488 | Q 10.890625 67.625 15.140625 71.796875 
 489 | Q 19.390625 75.984375 28.609375 75.984375 
 490 | z
 491 | " id="DejaVuSans-66"/>
 492 |        <path d="M 48.78125 52.59375 
 493 | L 48.78125 44.1875 
 494 | Q 44.96875 46.296875 41.140625 47.34375 
 495 | Q 37.3125 48.390625 33.40625 48.390625 
 496 | Q 24.65625 48.390625 19.8125 42.84375 
 497 | Q 14.984375 37.3125 14.984375 27.296875 
 498 | Q 14.984375 17.28125 19.8125 11.734375 
 499 | Q 24.65625 6.203125 33.40625 6.203125 
 500 | Q 37.3125 6.203125 41.140625 7.25 
 501 | Q 44.96875 8.296875 48.78125 10.40625 
 502 | L 48.78125 2.09375 
 503 | Q 45.015625 0.34375 40.984375 -0.53125 
 504 | Q 36.96875 -1.421875 32.421875 -1.421875 
 505 | Q 20.0625 -1.421875 12.78125 6.34375 
 506 | Q 5.515625 14.109375 5.515625 27.296875 
 507 | Q 5.515625 40.671875 12.859375 48.328125 
 508 | Q 20.21875 56 33.015625 56 
 509 | Q 37.15625 56 41.109375 55.140625 
 510 | Q 45.0625 54.296875 48.78125 52.59375 
 511 | z
 512 | " id="DejaVuSans-63"/>
 513 |       </defs>
 514 |       <g transform="translate(20.25925 236.838854)scale(0.14 -0.14)">
 515 |        <use xlink:href="#DejaVuSans-72"/>
 516 |        <use x="41.097656" xlink:href="#DejaVuSans-6d"/>
 517 |        <use x="138.509766" xlink:href="#DejaVuSans-73"/>
 518 |        <use x="190.609375" xlink:href="#DejaVuSans-65"/>
 519 |        <use x="252.132812" xlink:href="#DejaVuSans-5f"/>
 520 |        <use x="302.132812" xlink:href="#DejaVuSans-69"/>
 521 |        <use x="329.916016" xlink:href="#DejaVuSans-74"/>
 522 |        <use x="369.125" xlink:href="#DejaVuSans-65"/>
 523 |        <use x="430.648438" xlink:href="#DejaVuSans-72"/>
 524 |        <use x="471.761719" xlink:href="#DejaVuSans-66"/>
 525 |        <use x="506.966797" xlink:href="#DejaVuSans-6f"/>
 526 |        <use x="568.148438" xlink:href="#DejaVuSans-72"/>
 527 |        <use x="609.230469" xlink:href="#DejaVuSans-65"/>
 528 |        <use x="670.753906" xlink:href="#DejaVuSans-73"/>
 529 |        <use x="722.853516" xlink:href="#DejaVuSans-74"/>
 530 |        <use x="762.0625" xlink:href="#DejaVuSans-5f"/>
 531 |        <use x="812.0625" xlink:href="#DejaVuSans-73"/>
 532 |        <use x="864.162109" xlink:href="#DejaVuSans-63"/>
 533 |        <use x="919.142578" xlink:href="#DejaVuSans-6f"/>
 534 |        <use x="980.324219" xlink:href="#DejaVuSans-72"/>
 535 |        <use x="1021.40625" xlink:href="#DejaVuSans-65"/>
 536 |       </g>
 537 |      </g>
 538 |     </g>
 539 |     <g id="ytick_2">
 540 |      <g id="line2d_13">
 541 |       <path clip-path="url(#p1100a4c66a)" d="M 175.368313 200.332782 
 542 | L 927.048312 200.332782 
 543 | " style="fill:none;stroke:#cbcbcb;"/>
 544 |      </g>
 545 |      <g id="line2d_14"/>
 546 |      <g id="text_8">
 547 |       <!-- rmse_knn_score -->
 548 |       <defs>
 549 |        <path d="M 9.078125 75.984375 
 550 | L 18.109375 75.984375 
 551 | L 18.109375 31.109375 
 552 | L 44.921875 54.6875 
 553 | L 56.390625 54.6875 
 554 | L 27.390625 29.109375 
 555 | L 57.625 0 
 556 | L 45.90625 0 
 557 | L 18.109375 26.703125 
 558 | L 18.109375 0 
 559 | L 9.078125 0 
 560 | z
 561 | " id="DejaVuSans-6b"/>
 562 |        <path d="M 54.890625 33.015625 
 563 | L 54.890625 0 
 564 | L 45.90625 0 
 565 | L 45.90625 32.71875 
 566 | Q 45.90625 40.484375 42.875 44.328125 
 567 | Q 39.84375 48.1875 33.796875 48.1875 
 568 | Q 26.515625 48.1875 22.3125 43.546875 
 569 | Q 18.109375 38.921875 18.109375 30.90625 
 570 | L 18.109375 0 
 571 | L 9.078125 0 
 572 | L 9.078125 54.6875 
 573 | L 18.109375 54.6875 
 574 | L 18.109375 46.1875 
 575 | Q 21.34375 51.125 25.703125 53.5625 
 576 | Q 30.078125 56 35.796875 56 
 577 | Q 45.21875 56 50.046875 50.171875 
 578 | Q 54.890625 44.34375 54.890625 33.015625 
 579 | z
 580 | " id="DejaVuSans-6e"/>
 581 |       </defs>
 582 |       <g transform="translate(58.796438 205.651688)scale(0.14 -0.14)">
 583 |        <use xlink:href="#DejaVuSans-72"/>
 584 |        <use x="41.097656" xlink:href="#DejaVuSans-6d"/>
 585 |        <use x="138.509766" xlink:href="#DejaVuSans-73"/>
 586 |        <use x="190.609375" xlink:href="#DejaVuSans-65"/>
 587 |        <use x="252.132812" xlink:href="#DejaVuSans-5f"/>
 588 |        <use x="302.132812" xlink:href="#DejaVuSans-6b"/>
 589 |        <use x="360.042969" xlink:href="#DejaVuSans-6e"/>
 590 |        <use x="423.421875" xlink:href="#DejaVuSans-6e"/>
 591 |        <use x="486.800781" xlink:href="#DejaVuSans-5f"/>
 592 |        <use x="536.800781" xlink:href="#DejaVuSans-73"/>
 593 |        <use x="588.900391" xlink:href="#DejaVuSans-63"/>
 594 |        <use x="643.880859" xlink:href="#DejaVuSans-6f"/>
 595 |        <use x="705.0625" xlink:href="#DejaVuSans-72"/>
 596 |        <use x="746.144531" xlink:href="#DejaVuSans-65"/>
 597 |       </g>
 598 |      </g>
 599 |     </g>
 600 |     <g id="ytick_3">
 601 |      <g id="line2d_15">
 602 |       <path clip-path="url(#p1100a4c66a)" d="M 175.368313 169.145616 
 603 | L 927.048312 169.145616 
 604 | " style="fill:none;stroke:#cbcbcb;"/>
 605 |      </g>
 606 |      <g id="line2d_16"/>
 607 |      <g id="text_9">
 608 |       <!-- rmse_mean_score -->
 609 |       <defs>
 610 |        <path d="M 34.28125 27.484375 
 611 | Q 23.390625 27.484375 19.1875 25 
 612 | Q 14.984375 22.515625 14.984375 16.5 
 613 | Q 14.984375 11.71875 18.140625 8.90625 
 614 | Q 21.296875 6.109375 26.703125 6.109375 
 615 | Q 34.1875 6.109375 38.703125 11.40625 
 616 | Q 43.21875 16.703125 43.21875 25.484375 
 617 | L 43.21875 27.484375 
 618 | z
 619 | M 52.203125 31.203125 
 620 | L 52.203125 0 
 621 | L 43.21875 0 
 622 | L 43.21875 8.296875 
 623 | Q 40.140625 3.328125 35.546875 0.953125 
 624 | Q 30.953125 -1.421875 24.3125 -1.421875 
 625 | Q 15.921875 -1.421875 10.953125 3.296875 
 626 | Q 6 8.015625 6 15.921875 
 627 | Q 6 25.140625 12.171875 29.828125 
 628 | Q 18.359375 34.515625 30.609375 34.515625 
 629 | L 43.21875 34.515625 
 630 | L 43.21875 35.40625 
 631 | Q 43.21875 41.609375 39.140625 45 
 632 | Q 35.0625 48.390625 27.6875 48.390625 
 633 | Q 23 48.390625 18.546875 47.265625 
 634 | Q 14.109375 46.140625 10.015625 43.890625 
 635 | L 10.015625 52.203125 
 636 | Q 14.9375 54.109375 19.578125 55.046875 
 637 | Q 24.21875 56 28.609375 56 
 638 | Q 40.484375 56 46.34375 49.84375 
 639 | Q 52.203125 43.703125 52.203125 31.203125 
 640 | z
 641 | " id="DejaVuSans-61"/>
 642 |       </defs>
 643 |       <g transform="translate(44.945188 174.269835)scale(0.14 -0.14)">
 644 |        <use xlink:href="#DejaVuSans-72"/>
 645 |        <use x="41.097656" xlink:href="#DejaVuSans-6d"/>
 646 |        <use x="138.509766" xlink:href="#DejaVuSans-73"/>
 647 |        <use x="190.609375" xlink:href="#DejaVuSans-65"/>
 648 |        <use x="252.132812" xlink:href="#DejaVuSans-5f"/>
 649 |        <use x="302.132812" xlink:href="#DejaVuSans-6d"/>
 650 |        <use x="399.544922" xlink:href="#DejaVuSans-65"/>
 651 |        <use x="461.068359" xlink:href="#DejaVuSans-61"/>
 652 |        <use x="522.347656" xlink:href="#DejaVuSans-6e"/>
 653 |        <use x="585.726562" xlink:href="#DejaVuSans-5f"/>
 654 |        <use x="635.726562" xlink:href="#DejaVuSans-73"/>
 655 |        <use x="687.826172" xlink:href="#DejaVuSans-63"/>
 656 |        <use x="742.806641" xlink:href="#DejaVuSans-6f"/>
 657 |        <use x="803.988281" xlink:href="#DejaVuSans-72"/>
 658 |        <use x="845.070312" xlink:href="#DejaVuSans-65"/>
 659 |       </g>
 660 |      </g>
 661 |     </g>
 662 |     <g id="ytick_4">
 663 |      <g id="line2d_17">
 664 |       <path clip-path="url(#p1100a4c66a)" d="M 175.368313 137.95845 
 665 | L 927.048312 137.95845 
 666 | " style="fill:none;stroke:#cbcbcb;"/>
 667 |      </g>
 668 |      <g id="line2d_18"/>
 669 |      <g id="text_10">
 670 |       <!-- rmse_median_score -->
 671 |       <defs>
 672 |        <path d="M 45.40625 46.390625 
 673 | L 45.40625 75.984375 
 674 | L 54.390625 75.984375 
 675 | L 54.390625 0 
 676 | L 45.40625 0 
 677 | L 45.40625 8.203125 
 678 | Q 42.578125 3.328125 38.25 0.953125 
 679 | Q 33.9375 -1.421875 27.875 -1.421875 
 680 | Q 17.96875 -1.421875 11.734375 6.484375 
 681 | Q 5.515625 14.40625 5.515625 27.296875 
 682 | Q 5.515625 40.1875 11.734375 48.09375 
 683 | Q 17.96875 56 27.875 56 
 684 | Q 33.9375 56 38.25 53.625 
 685 | Q 42.578125 51.265625 45.40625 46.390625 
 686 | z
 687 | M 14.796875 27.296875 
 688 | Q 14.796875 17.390625 18.875 11.75 
 689 | Q 22.953125 6.109375 30.078125 6.109375 
 690 | Q 37.203125 6.109375 41.296875 11.75 
 691 | Q 45.40625 17.390625 45.40625 27.296875 
 692 | Q 45.40625 37.203125 41.296875 42.84375 
 693 | Q 37.203125 48.484375 30.078125 48.484375 
 694 | Q 22.953125 48.484375 18.875 42.84375 
 695 | Q 14.796875 37.203125 14.796875 27.296875 
 696 | z
 697 | " id="DejaVuSans-64"/>
 698 |       </defs>
 699 |       <g transform="translate(32.168 143.277356)scale(0.14 -0.14)">
 700 |        <use xlink:href="#DejaVuSans-72"/>
 701 |        <use x="41.097656" xlink:href="#DejaVuSans-6d"/>
 702 |        <use x="138.509766" xlink:href="#DejaVuSans-73"/>
 703 |        <use x="190.609375" xlink:href="#DejaVuSans-65"/>
 704 |        <use x="252.132812" xlink:href="#DejaVuSans-5f"/>
 705 |        <use x="302.132812" xlink:href="#DejaVuSans-6d"/>
 706 |        <use x="399.544922" xlink:href="#DejaVuSans-65"/>
 707 |        <use x="461.068359" xlink:href="#DejaVuSans-64"/>
 708 |        <use x="524.544922" xlink:href="#DejaVuSans-69"/>
 709 |        <use x="552.328125" xlink:href="#DejaVuSans-61"/>
 710 |        <use x="613.607422" xlink:href="#DejaVuSans-6e"/>
 711 |        <use x="676.986328" xlink:href="#DejaVuSans-5f"/>
 712 |        <use x="726.986328" xlink:href="#DejaVuSans-73"/>
 713 |        <use x="779.085938" xlink:href="#DejaVuSans-63"/>
 714 |        <use x="834.066406" xlink:href="#DejaVuSans-6f"/>
 715 |        <use x="895.248047" xlink:href="#DejaVuSans-72"/>
 716 |        <use x="936.330078" xlink:href="#DejaVuSans-65"/>
 717 |       </g>
 718 |      </g>
 719 |     </g>
 720 |     <g id="ytick_5">
 721 |      <g id="line2d_19">
 722 |       <path clip-path="url(#p1100a4c66a)" d="M 175.368313 106.771284 
 723 | L 927.048312 106.771284 
 724 | " style="fill:none;stroke:#cbcbcb;"/>
 725 |      </g>
 726 |      <g id="line2d_20"/>
 727 |      <g id="text_11">
 728 |       <!-- rmse_mice_score -->
 729 |       <g transform="translate(50.809875 112.09019)scale(0.14 -0.14)">
 730 |        <use xlink:href="#DejaVuSans-72"/>
 731 |        <use x="41.097656" xlink:href="#DejaVuSans-6d"/>
 732 |        <use x="138.509766" xlink:href="#DejaVuSans-73"/>
 733 |        <use x="190.609375" xlink:href="#DejaVuSans-65"/>
 734 |        <use x="252.132812" xlink:href="#DejaVuSans-5f"/>
 735 |        <use x="302.132812" xlink:href="#DejaVuSans-6d"/>
 736 |        <use x="399.544922" xlink:href="#DejaVuSans-69"/>
 737 |        <use x="427.328125" xlink:href="#DejaVuSans-63"/>
 738 |        <use x="482.308594" xlink:href="#DejaVuSans-65"/>
 739 |        <use x="543.832031" xlink:href="#DejaVuSans-5f"/>
 740 |        <use x="593.832031" xlink:href="#DejaVuSans-73"/>
 741 |        <use x="645.931641" xlink:href="#DejaVuSans-63"/>
 742 |        <use x="700.912109" xlink:href="#DejaVuSans-6f"/>
 743 |        <use x="762.09375" xlink:href="#DejaVuSans-72"/>
 744 |        <use x="803.175781" xlink:href="#DejaVuSans-65"/>
 745 |       </g>
 746 |      </g>
 747 |     </g>
 748 |     <g id="ytick_6">
 749 |      <g id="line2d_21">
 750 |       <path clip-path="url(#p1100a4c66a)" d="M 175.368313 75.584118 
 751 | L 927.048312 75.584118 
 752 | " style="fill:none;stroke:#cbcbcb;"/>
 753 |      </g>
 754 |      <g id="line2d_22"/>
 755 |      <g id="text_12">
 756 |       <!-- rmse_min_score -->
 757 |       <g transform="translate(58.249563 80.903025)scale(0.14 -0.14)">
 758 |        <use xlink:href="#DejaVuSans-72"/>
 759 |        <use x="41.097656" xlink:href="#DejaVuSans-6d"/>
 760 |        <use x="138.509766" xlink:href="#DejaVuSans-73"/>
 761 |        <use x="190.609375" xlink:href="#DejaVuSans-65"/>
 762 |        <use x="252.132812" xlink:href="#DejaVuSans-5f"/>
 763 |        <use x="302.132812" xlink:href="#DejaVuSans-6d"/>
 764 |        <use x="399.544922" xlink:href="#DejaVuSans-69"/>
 765 |        <use x="427.328125" xlink:href="#DejaVuSans-6e"/>
 766 |        <use x="490.707031" xlink:href="#DejaVuSans-5f"/>
 767 |        <use x="540.707031" xlink:href="#DejaVuSans-73"/>
 768 |        <use x="592.806641" xlink:href="#DejaVuSans-63"/>
 769 |        <use x="647.787109" xlink:href="#DejaVuSans-6f"/>
 770 |        <use x="708.96875" xlink:href="#DejaVuSans-72"/>
 771 |        <use x="750.050781" xlink:href="#DejaVuSans-65"/>
 772 |       </g>
 773 |      </g>
 774 |     </g>
 775 |     <g id="ytick_7">
 776 |      <g id="line2d_23">
 777 |       <path clip-path="url(#p1100a4c66a)" d="M 175.368313 44.396953 
 778 | L 927.048312 44.396953 
 779 | " style="fill:none;stroke:#cbcbcb;"/>
 780 |      </g>
 781 |      <g id="line2d_24"/>
 782 |      <g id="text_13">
 783 |       <!-- rmse_zero_score -->
 784 |       <defs>
 785 |        <path d="M 5.515625 54.6875 
 786 | L 48.1875 54.6875 
 787 | L 48.1875 46.484375 
 788 | L 14.40625 7.171875 
 789 | L 48.1875 7.171875 
 790 | L 48.1875 0 
 791 | L 4.296875 0 
 792 | L 4.296875 8.203125 
 793 | L 38.09375 47.515625 
 794 | L 5.515625 47.515625 
 795 | z
 796 | " id="DejaVuSans-7a"/>
 797 |       </defs>
 798 |       <g transform="translate(54.368938 49.521171)scale(0.14 -0.14)">
 799 |        <use xlink:href="#DejaVuSans-72"/>
 800 |        <use x="41.097656" xlink:href="#DejaVuSans-6d"/>
 801 |        <use x="138.509766" xlink:href="#DejaVuSans-73"/>
 802 |        <use x="190.609375" xlink:href="#DejaVuSans-65"/>
 803 |        <use x="252.132812" xlink:href="#DejaVuSans-5f"/>
 804 |        <use x="302.132812" xlink:href="#DejaVuSans-7a"/>
 805 |        <use x="354.623047" xlink:href="#DejaVuSans-65"/>
 806 |        <use x="416.146484" xlink:href="#DejaVuSans-72"/>
 807 |        <use x="457.228516" xlink:href="#DejaVuSans-6f"/>
 808 |        <use x="518.410156" xlink:href="#DejaVuSans-5f"/>
 809 |        <use x="568.410156" xlink:href="#DejaVuSans-73"/>
 810 |        <use x="620.509766" xlink:href="#DejaVuSans-63"/>
 811 |        <use x="675.490234" xlink:href="#DejaVuSans-6f"/>
 812 |        <use x="736.671875" xlink:href="#DejaVuSans-72"/>
 813 |        <use x="777.753906" xlink:href="#DejaVuSans-65"/>
 814 |       </g>
 815 |      </g>
 816 |     </g>
 817 |     <g id="text_14">
 818 |      <!-- fill methods -->
 819 |      <defs>
 820 |       <path d="M 9.421875 75.984375 
 821 | L 18.40625 75.984375 
 822 | L 18.40625 0 
 823 | L 9.421875 0 
 824 | z
 825 | " id="DejaVuSans-6c"/>
 826 |       <path d="M 54.890625 33.015625 
 827 | L 54.890625 0 
 828 | L 45.90625 0 
 829 | L 45.90625 32.71875 
 830 | Q 45.90625 40.484375 42.875 44.328125 
 831 | Q 39.84375 48.1875 33.796875 48.1875 
 832 | Q 26.515625 48.1875 22.3125 43.546875 
 833 | Q 18.109375 38.921875 18.109375 30.90625 
 834 | L 18.109375 0 
 835 | L 9.078125 0 
 836 | L 9.078125 75.984375 
 837 | L 18.109375 75.984375 
 838 | L 18.109375 46.1875 
 839 | Q 21.34375 51.125 25.703125 53.5625 
 840 | Q 30.078125 56 35.796875 56 
 841 | Q 45.21875 56 50.046875 50.171875 
 842 | Q 54.890625 44.34375 54.890625 33.015625 
 843 | z
 844 | " id="DejaVuSans-68"/>
 845 |      </defs>
 846 |      <g transform="translate(12.765375 187.401637)rotate(-90)scale(0.168 -0.168)">
 847 |       <use xlink:href="#DejaVuSans-66"/>
 848 |       <use x="35.205078" xlink:href="#DejaVuSans-69"/>
 849 |       <use x="62.988281" xlink:href="#DejaVuSans-6c"/>
 850 |       <use x="90.771484" xlink:href="#DejaVuSans-6c"/>
 851 |       <use x="118.554688" xlink:href="#DejaVuSans-20"/>
 852 |       <use x="150.341797" xlink:href="#DejaVuSans-6d"/>
 853 |       <use x="247.753906" xlink:href="#DejaVuSans-65"/>
 854 |       <use x="309.277344" xlink:href="#DejaVuSans-74"/>
 855 |       <use x="348.486328" xlink:href="#DejaVuSans-68"/>
 856 |       <use x="411.865234" xlink:href="#DejaVuSans-6f"/>
 857 |       <use x="473.046875" xlink:href="#DejaVuSans-64"/>
 858 |       <use x="536.523438" xlink:href="#DejaVuSans-73"/>
 859 |      </g>
 860 |     </g>
 861 |    </g>
 862 |    <g id="patch_3">
 863 |     <path clip-path="url(#p1100a4c66a)" d="M 175.368313 119.246151 
 864 | L 322.013891 119.246151 
 865 | L 322.013891 94.296418 
 866 | L 175.368313 94.296418 
 867 | z
 868 | " style="fill:#008fd5;"/>
 869 |    </g>
 870 |    <g id="patch_4">
 871 |     <path clip-path="url(#p1100a4c66a)" d="M 175.368313 243.994814 
 872 | L 326.597589 243.994814 
 873 | L 326.597589 219.045081 
 874 | L 175.368313 219.045081 
 875 | z
 876 | " style="fill:#008fd5;"/>
 877 |    </g>
 878 |    <g id="patch_5">
 879 |     <path clip-path="url(#p1100a4c66a)" d="M 175.368313 212.807648 
 880 | L 349.304507 212.807648 
 881 | L 349.304507 187.857915 
 882 | L 175.368313 187.857915 
 883 | z
 884 | " style="fill:#008fd5;"/>
 885 |    </g>
 886 |    <g id="patch_6">
 887 |     <path clip-path="url(#p1100a4c66a)" d="M 175.368313 181.620482 
 888 | L 437.806764 181.620482 
 889 | L 437.806764 156.670749 
 890 | L 175.368313 156.670749 
 891 | z
 892 | " style="fill:#008fd5;"/>
 893 |    </g>
 894 |    <g id="patch_7">
 895 |     <path clip-path="url(#p1100a4c66a)" d="M 175.368313 56.871819 
 896 | L 891.254027 56.871819 
 897 | L 891.254027 31.922086 
 898 | L 175.368313 31.922086 
 899 | z
 900 | " style="fill:#008fd5;"/>
 901 |    </g>
 902 |    <g id="patch_8">
 903 |     <path clip-path="url(#p1100a4c66a)" d="M 175.368313 150.433316 
 904 | L 449.642016 150.433316 
 905 | L 449.642016 125.483584 
 906 | L 175.368313 125.483584 
 907 | z
 908 | " style="fill:#008fd5;"/>
 909 |    </g>
 910 |    <g id="patch_9">
 911 |     <path clip-path="url(#p1100a4c66a)" d="M 175.368313 88.058985 
 912 | L 673.137925 88.058985 
 913 | L 673.137925 63.109252 
 914 | L 175.368313 63.109252 
 915 | z
 916 | " style="fill:#008fd5;"/>
 917 |    </g>
 918 |    <g id="line2d_25">
 919 |     <path clip-path="url(#p1100a4c66a)" d="M 492.822388 254.59845 
 920 | L 492.822388 21.31845 
 921 | " style="fill:none;stroke:#ff0000;stroke-dasharray:14.8,6.4;stroke-dashoffset:0;stroke-width:4;"/>
 922 |    </g>
 923 |    <g id="patch_10">
 924 |     <path d="M 175.368313 254.59845 
 925 | L 175.368313 21.31845 
 926 | " style="fill:none;stroke:#f0f0f0;stroke-linecap:square;stroke-linejoin:miter;stroke-width:3;"/>
 927 |    </g>
 928 |    <g id="patch_11">
 929 |     <path d="M 927.048312 254.59845 
 930 | L 927.048312 21.31845 
 931 | " style="fill:none;stroke:#f0f0f0;stroke-linecap:square;stroke-linejoin:miter;stroke-width:3;"/>
 932 |    </g>
 933 |    <g id="patch_12">
 934 |     <path d="M 175.368313 254.59845 
 935 | L 927.048312 254.59845 
 936 | " style="fill:none;stroke:#f0f0f0;stroke-linecap:square;stroke-linejoin:miter;stroke-width:3;"/>
 937 |    </g>
 938 |    <g id="patch_13">
 939 |     <path d="M 175.368313 21.31845 
 940 | L 927.048312 21.31845 
 941 | " style="fill:none;stroke:#f0f0f0;stroke-linecap:square;stroke-linejoin:miter;stroke-width:3;"/>
 942 |    </g>
 943 |    <g id="text_15">
 944 |     <!-- WINE DATA SET -->
 945 |     <defs>
 946 |      <path d="M 3.328125 72.90625 
 947 | L 13.28125 72.90625 
 948 | L 28.609375 11.28125 
 949 | L 43.890625 72.90625 
 950 | L 54.984375 72.90625 
 951 | L 70.3125 11.28125 
 952 | L 85.59375 72.90625 
 953 | L 95.609375 72.90625 
 954 | L 77.296875 0 
 955 | L 64.890625 0 
 956 | L 49.515625 63.28125 
 957 | L 33.984375 0 
 958 | L 21.578125 0 
 959 | z
 960 | " id="DejaVuSans-57"/>
 961 |      <path d="M 9.8125 72.90625 
 962 | L 19.671875 72.90625 
 963 | L 19.671875 0 
 964 | L 9.8125 0 
 965 | z
 966 | " id="DejaVuSans-49"/>
 967 |      <path d="M 9.8125 72.90625 
 968 | L 23.09375 72.90625 
 969 | L 55.421875 11.921875 
 970 | L 55.421875 72.90625 
 971 | L 64.984375 72.90625 
 972 | L 64.984375 0 
 973 | L 51.703125 0 
 974 | L 19.390625 60.984375 
 975 | L 19.390625 0 
 976 | L 9.8125 0 
 977 | z
 978 | " id="DejaVuSans-4e"/>
 979 |      <path d="M 19.671875 64.796875 
 980 | L 19.671875 8.109375 
 981 | L 31.59375 8.109375 
 982 | Q 46.6875 8.109375 53.6875 14.9375 
 983 | Q 60.6875 21.78125 60.6875 36.53125 
 984 | Q 60.6875 51.171875 53.6875 57.984375 
 985 | Q 46.6875 64.796875 31.59375 64.796875 
 986 | z
 987 | M 9.8125 72.90625 
 988 | L 30.078125 72.90625 
 989 | Q 51.265625 72.90625 61.171875 64.09375 
 990 | Q 71.09375 55.28125 71.09375 36.53125 
 991 | Q 71.09375 17.671875 61.125 8.828125 
 992 | Q 51.171875 0 30.078125 0 
 993 | L 9.8125 0 
 994 | z
 995 | " id="DejaVuSans-44"/>
 996 |      <path d="M 34.1875 63.1875 
 997 | L 20.796875 26.90625 
 998 | L 47.609375 26.90625 
 999 | z
1000 | M 28.609375 72.90625 
1001 | L 39.796875 72.90625 
1002 | L 67.578125 0 
1003 | L 57.328125 0 
1004 | L 50.6875 18.703125 
1005 | L 17.828125 18.703125 
1006 | L 11.1875 0 
1007 | L 0.78125 0 
1008 | z
1009 | " id="DejaVuSans-41"/>
1010 |      <path d="M -0.296875 72.90625 
1011 | L 61.375 72.90625 
1012 | L 61.375 64.59375 
1013 | L 35.5 64.59375 
1014 | L 35.5 0 
1015 | L 25.59375 0 
1016 | L 25.59375 64.59375 
1017 | L -0.296875 64.59375 
1018 | z
1019 | " id="DejaVuSans-54"/>
1020 |     </defs>
1021 |     <g transform="translate(471.340063 15.31845)scale(0.2016 -0.2016)">
1022 |      <use xlink:href="#DejaVuSans-57"/>
1023 |      <use x="98.876953" xlink:href="#DejaVuSans-49"/>
1024 |      <use x="128.369141" xlink:href="#DejaVuSans-4e"/>
1025 |      <use x="203.173828" xlink:href="#DejaVuSans-45"/>
1026 |      <use x="266.357422" xlink:href="#DejaVuSans-20"/>
1027 |      <use x="298.144531" xlink:href="#DejaVuSans-44"/>
1028 |      <use x="375.130859" xlink:href="#DejaVuSans-41"/>
1029 |      <use x="443.429688" xlink:href="#DejaVuSans-54"/>
1030 |      <use x="504.404297" xlink:href="#DejaVuSans-41"/>
1031 |      <use x="572.8125" xlink:href="#DejaVuSans-20"/>
1032 |      <use x="604.599609" xlink:href="#DejaVuSans-53"/>
1033 |      <use x="668.076172" xlink:href="#DejaVuSans-45"/>
1034 |      <use x="731.259766" xlink:href="#DejaVuSans-54"/>
1035 |     </g>
1036 |    </g>
1037 |   </g>
1038 |  </g>
1039 |  <defs>
1040 |   <clipPath id="p1100a4c66a">
1041 |    <rect height="233.28" width="751.68" x="175.368313" y="21.31845"/>
1042 |   </clipPath>
1043 |  </defs>
1044 | </svg>
1045 | 


--------------------------------------------------------------------------------
/img/IRIS.svg:
--------------------------------------------------------------------------------
   1 | <?xml version="1.0" encoding="utf-8" standalone="no"?>
   2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
   3 |   "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
   4 | <!-- Created with matplotlib (http://matplotlib.org/) -->
   5 | <svg height="303pt" version="1.1" viewBox="0 0 927 303" width="927pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
   6 |  <defs>
   7 |   <style type="text/css">
   8 | *{stroke-linecap:butt;stroke-linejoin:round;}
   9 |   </style>
  10 |  </defs>
  11 |  <g id="figure_1">
  12 |   <g id="patch_1">
  13 |    <path d="M 0 303.681845 
  14 | L 927.048312 303.681845 
  15 | L 927.048312 0 
  16 | L 0 0 
  17 | z
  18 | " style="fill:#f0f0f0;"/>
  19 |   </g>
  20 |   <g id="axes_1">
  21 |    <g id="patch_2">
  22 |     <path d="M 175.368313 254.59845 
  23 | L 927.048312 254.59845 
  24 | L 927.048312 21.31845 
  25 | L 175.368313 21.31845 
  26 | z
  27 | " style="fill:#f0f0f0;"/>
  28 |    </g>
  29 |    <g id="matplotlib.axis_1">
  30 |     <g id="xtick_1">
  31 |      <g id="line2d_1">
  32 |       <path clip-path="url(#pfe7db76953)" d="M 175.368313 254.59845 
  33 | L 175.368313 21.31845 
  34 | " style="fill:none;stroke:#cbcbcb;"/>
  35 |      </g>
  36 |      <g id="line2d_2"/>
  37 |      <g id="text_1">
  38 |       <!-- 0.0 -->
  39 |       <defs>
  40 |        <path d="M 31.78125 66.40625 
  41 | Q 24.171875 66.40625 20.328125 58.90625 
  42 | Q 16.5 51.421875 16.5 36.375 
  43 | Q 16.5 21.390625 20.328125 13.890625 
  44 | Q 24.171875 6.390625 31.78125 6.390625 
  45 | Q 39.453125 6.390625 43.28125 13.890625 
  46 | Q 47.125 21.390625 47.125 36.375 
  47 | Q 47.125 51.421875 43.28125 58.90625 
  48 | Q 39.453125 66.40625 31.78125 66.40625 
  49 | z
  50 | M 31.78125 74.21875 
  51 | Q 44.046875 74.21875 50.515625 64.515625 
  52 | Q 56.984375 54.828125 56.984375 36.375 
  53 | Q 56.984375 17.96875 50.515625 8.265625 
  54 | Q 44.046875 -1.421875 31.78125 -1.421875 
  55 | Q 19.53125 -1.421875 13.0625 8.265625 
  56 | Q 6.59375 17.96875 6.59375 36.375 
  57 | Q 6.59375 54.828125 13.0625 64.515625 
  58 | Q 19.53125 74.21875 31.78125 74.21875 
  59 | z
  60 | " id="DejaVuSans-30"/>
  61 |        <path d="M 10.6875 12.40625 
  62 | L 21 12.40625 
  63 | L 21 0 
  64 | L 10.6875 0 
  65 | z
  66 | " id="DejaVuSans-2e"/>
  67 |       </defs>
  68 |       <g transform="translate(157.566236 281.36381)rotate(-45)scale(0.14 -0.14)">
  69 |        <use xlink:href="#DejaVuSans-30"/>
  70 |        <use x="63.623047" xlink:href="#DejaVuSans-2e"/>
  71 |        <use x="95.410156" xlink:href="#DejaVuSans-30"/>
  72 |       </g>
  73 |      </g>
  74 |     </g>
  75 |     <g id="xtick_2">
  76 |      <g id="line2d_3">
  77 |       <path clip-path="url(#pfe7db76953)" d="M 267.951955 254.59845 
  78 | L 267.951955 21.31845 
  79 | " style="fill:none;stroke:#cbcbcb;"/>
  80 |      </g>
  81 |      <g id="line2d_4"/>
  82 |      <g id="text_2">
  83 |       <!-- 0.5 -->
  84 |       <defs>
  85 |        <path d="M 10.796875 72.90625 
  86 | L 49.515625 72.90625 
  87 | L 49.515625 64.59375 
  88 | L 19.828125 64.59375 
  89 | L 19.828125 46.734375 
  90 | Q 21.96875 47.46875 24.109375 47.828125 
  91 | Q 26.265625 48.1875 28.421875 48.1875 
  92 | Q 40.625 48.1875 47.75 41.5 
  93 | Q 54.890625 34.8125 54.890625 23.390625 
  94 | Q 54.890625 11.625 47.5625 5.09375 
  95 | Q 40.234375 -1.421875 26.90625 -1.421875 
  96 | Q 22.3125 -1.421875 17.546875 -0.640625 
  97 | Q 12.796875 0.140625 7.71875 1.703125 
  98 | L 7.71875 11.625 
  99 | Q 12.109375 9.234375 16.796875 8.0625 
 100 | Q 21.484375 6.890625 26.703125 6.890625 
 101 | Q 35.15625 6.890625 40.078125 11.328125 
 102 | Q 45.015625 15.765625 45.015625 23.390625 
 103 | Q 45.015625 31 40.078125 35.4375 
 104 | Q 35.15625 39.890625 26.703125 39.890625 
 105 | Q 22.75 39.890625 18.8125 39.015625 
 106 | Q 14.890625 38.140625 10.796875 36.28125 
 107 | z
 108 | " id="DejaVuSans-35"/>
 109 |       </defs>
 110 |       <g transform="translate(250.149879 281.36381)rotate(-45)scale(0.14 -0.14)">
 111 |        <use xlink:href="#DejaVuSans-30"/>
 112 |        <use x="63.623047" xlink:href="#DejaVuSans-2e"/>
 113 |        <use x="95.410156" xlink:href="#DejaVuSans-35"/>
 114 |       </g>
 115 |      </g>
 116 |     </g>
 117 |     <g id="xtick_3">
 118 |      <g id="line2d_5">
 119 |       <path clip-path="url(#pfe7db76953)" d="M 360.535597 254.59845 
 120 | L 360.535597 21.31845 
 121 | " style="fill:none;stroke:#cbcbcb;"/>
 122 |      </g>
 123 |      <g id="line2d_6"/>
 124 |      <g id="text_3">
 125 |       <!-- 1.0 -->
 126 |       <defs>
 127 |        <path d="M 12.40625 8.296875 
 128 | L 28.515625 8.296875 
 129 | L 28.515625 63.921875 
 130 | L 10.984375 60.40625 
 131 | L 10.984375 69.390625 
 132 | L 28.421875 72.90625 
 133 | L 38.28125 72.90625 
 134 | L 38.28125 8.296875 
 135 | L 54.390625 8.296875 
 136 | L 54.390625 0 
 137 | L 12.40625 0 
 138 | z
 139 | " id="DejaVuSans-31"/>
 140 |       </defs>
 141 |       <g transform="translate(342.733521 281.36381)rotate(-45)scale(0.14 -0.14)">
 142 |        <use xlink:href="#DejaVuSans-31"/>
 143 |        <use x="63.623047" xlink:href="#DejaVuSans-2e"/>
 144 |        <use x="95.410156" xlink:href="#DejaVuSans-30"/>
 145 |       </g>
 146 |      </g>
 147 |     </g>
 148 |     <g id="xtick_4">
 149 |      <g id="line2d_7">
 150 |       <path clip-path="url(#pfe7db76953)" d="M 453.119239 254.59845 
 151 | L 453.119239 21.31845 
 152 | " style="fill:none;stroke:#cbcbcb;"/>
 153 |      </g>
 154 |      <g id="line2d_8"/>
 155 |      <g id="text_4">
 156 |       <!-- 1.5 -->
 157 |       <g transform="translate(435.317163 281.36381)rotate(-45)scale(0.14 -0.14)">
 158 |        <use xlink:href="#DejaVuSans-31"/>
 159 |        <use x="63.623047" xlink:href="#DejaVuSans-2e"/>
 160 |        <use x="95.410156" xlink:href="#DejaVuSans-35"/>
 161 |       </g>
 162 |      </g>
 163 |     </g>
 164 |     <g id="xtick_5">
 165 |      <g id="line2d_9">
 166 |       <path clip-path="url(#pfe7db76953)" d="M 545.702881 254.59845 
 167 | L 545.702881 21.31845 
 168 | " style="fill:none;stroke:#cbcbcb;"/>
 169 |      </g>
 170 |      <g id="line2d_10"/>
 171 |      <g id="text_5">
 172 |       <!-- 2.0 -->
 173 |       <defs>
 174 |        <path d="M 19.1875 8.296875 
 175 | L 53.609375 8.296875 
 176 | L 53.609375 0 
 177 | L 7.328125 0 
 178 | L 7.328125 8.296875 
 179 | Q 12.9375 14.109375 22.625 23.890625 
 180 | Q 32.328125 33.6875 34.8125 36.53125 
 181 | Q 39.546875 41.84375 41.421875 45.53125 
 182 | Q 43.3125 49.21875 43.3125 52.78125 
 183 | Q 43.3125 58.59375 39.234375 62.25 
 184 | Q 35.15625 65.921875 28.609375 65.921875 
 185 | Q 23.96875 65.921875 18.8125 64.3125 
 186 | Q 13.671875 62.703125 7.8125 59.421875 
 187 | L 7.8125 69.390625 
 188 | Q 13.765625 71.78125 18.9375 73 
 189 | Q 24.125 74.21875 28.421875 74.21875 
 190 | Q 39.75 74.21875 46.484375 68.546875 
 191 | Q 53.21875 62.890625 53.21875 53.421875 
 192 | Q 53.21875 48.921875 51.53125 44.890625 
 193 | Q 49.859375 40.875 45.40625 35.40625 
 194 | Q 44.1875 33.984375 37.640625 27.21875 
 195 | Q 31.109375 20.453125 19.1875 8.296875 
 196 | z
 197 | " id="DejaVuSans-32"/>
 198 |       </defs>
 199 |       <g transform="translate(527.900805 281.36381)rotate(-45)scale(0.14 -0.14)">
 200 |        <use xlink:href="#DejaVuSans-32"/>
 201 |        <use x="63.623047" xlink:href="#DejaVuSans-2e"/>
 202 |        <use x="95.410156" xlink:href="#DejaVuSans-30"/>
 203 |       </g>
 204 |      </g>
 205 |     </g>
 206 |     <g id="xtick_6">
 207 |      <g id="line2d_11">
 208 |       <path clip-path="url(#pfe7db76953)" d="M 638.286524 254.59845 
 209 | L 638.286524 21.31845 
 210 | " style="fill:none;stroke:#cbcbcb;"/>
 211 |      </g>
 212 |      <g id="line2d_12"/>
 213 |      <g id="text_6">
 214 |       <!-- 2.5 -->
 215 |       <g transform="translate(620.484447 281.36381)rotate(-45)scale(0.14 -0.14)">
 216 |        <use xlink:href="#DejaVuSans-32"/>
 217 |        <use x="63.623047" xlink:href="#DejaVuSans-2e"/>
 218 |        <use x="95.410156" xlink:href="#DejaVuSans-35"/>
 219 |       </g>
 220 |      </g>
 221 |     </g>
 222 |     <g id="xtick_7">
 223 |      <g id="line2d_13">
 224 |       <path clip-path="url(#pfe7db76953)" d="M 730.870166 254.59845 
 225 | L 730.870166 21.31845 
 226 | " style="fill:none;stroke:#cbcbcb;"/>
 227 |      </g>
 228 |      <g id="line2d_14"/>
 229 |      <g id="text_7">
 230 |       <!-- 3.0 -->
 231 |       <defs>
 232 |        <path d="M 40.578125 39.3125 
 233 | Q 47.65625 37.796875 51.625 33 
 234 | Q 55.609375 28.21875 55.609375 21.1875 
 235 | Q 55.609375 10.40625 48.1875 4.484375 
 236 | Q 40.765625 -1.421875 27.09375 -1.421875 
 237 | Q 22.515625 -1.421875 17.65625 -0.515625 
 238 | Q 12.796875 0.390625 7.625 2.203125 
 239 | L 7.625 11.71875 
 240 | Q 11.71875 9.328125 16.59375 8.109375 
 241 | Q 21.484375 6.890625 26.8125 6.890625 
 242 | Q 36.078125 6.890625 40.9375 10.546875 
 243 | Q 45.796875 14.203125 45.796875 21.1875 
 244 | Q 45.796875 27.640625 41.28125 31.265625 
 245 | Q 36.765625 34.90625 28.71875 34.90625 
 246 | L 20.21875 34.90625 
 247 | L 20.21875 43.015625 
 248 | L 29.109375 43.015625 
 249 | Q 36.375 43.015625 40.234375 45.921875 
 250 | Q 44.09375 48.828125 44.09375 54.296875 
 251 | Q 44.09375 59.90625 40.109375 62.90625 
 252 | Q 36.140625 65.921875 28.71875 65.921875 
 253 | Q 24.65625 65.921875 20.015625 65.03125 
 254 | Q 15.375 64.15625 9.8125 62.3125 
 255 | L 9.8125 71.09375 
 256 | Q 15.4375 72.65625 20.34375 73.4375 
 257 | Q 25.25 74.21875 29.59375 74.21875 
 258 | Q 40.828125 74.21875 47.359375 69.109375 
 259 | Q 53.90625 64.015625 53.90625 55.328125 
 260 | Q 53.90625 49.265625 50.4375 45.09375 
 261 | Q 46.96875 40.921875 40.578125 39.3125 
 262 | z
 263 | " id="DejaVuSans-33"/>
 264 |       </defs>
 265 |       <g transform="translate(713.06809 281.36381)rotate(-45)scale(0.14 -0.14)">
 266 |        <use xlink:href="#DejaVuSans-33"/>
 267 |        <use x="63.623047" xlink:href="#DejaVuSans-2e"/>
 268 |        <use x="95.410156" xlink:href="#DejaVuSans-30"/>
 269 |       </g>
 270 |      </g>
 271 |     </g>
 272 |     <g id="xtick_8">
 273 |      <g id="line2d_15">
 274 |       <path clip-path="url(#pfe7db76953)" d="M 823.453808 254.59845 
 275 | L 823.453808 21.31845 
 276 | " style="fill:none;stroke:#cbcbcb;"/>
 277 |      </g>
 278 |      <g id="line2d_16"/>
 279 |      <g id="text_8">
 280 |       <!-- 3.5 -->
 281 |       <g transform="translate(805.651732 281.36381)rotate(-45)scale(0.14 -0.14)">
 282 |        <use xlink:href="#DejaVuSans-33"/>
 283 |        <use x="63.623047" xlink:href="#DejaVuSans-2e"/>
 284 |        <use x="95.410156" xlink:href="#DejaVuSans-35"/>
 285 |       </g>
 286 |      </g>
 287 |     </g>
 288 |     <g id="xtick_9">
 289 |      <g id="line2d_17">
 290 |       <path clip-path="url(#pfe7db76953)" d="M 916.03745 254.59845 
 291 | L 916.03745 21.31845 
 292 | " style="fill:none;stroke:#cbcbcb;"/>
 293 |      </g>
 294 |      <g id="line2d_18"/>
 295 |      <g id="text_9">
 296 |       <!-- 4.0 -->
 297 |       <defs>
 298 |        <path d="M 37.796875 64.3125 
 299 | L 12.890625 25.390625 
 300 | L 37.796875 25.390625 
 301 | z
 302 | M 35.203125 72.90625 
 303 | L 47.609375 72.90625 
 304 | L 47.609375 25.390625 
 305 | L 58.015625 25.390625 
 306 | L 58.015625 17.1875 
 307 | L 47.609375 17.1875 
 308 | L 47.609375 0 
 309 | L 37.796875 0 
 310 | L 37.796875 17.1875 
 311 | L 4.890625 17.1875 
 312 | L 4.890625 26.703125 
 313 | z
 314 | " id="DejaVuSans-34"/>
 315 |       </defs>
 316 |       <g transform="translate(898.235374 281.36381)rotate(-45)scale(0.14 -0.14)">
 317 |        <use xlink:href="#DejaVuSans-34"/>
 318 |        <use x="63.623047" xlink:href="#DejaVuSans-2e"/>
 319 |        <use x="95.410156" xlink:href="#DejaVuSans-30"/>
 320 |       </g>
 321 |      </g>
 322 |     </g>
 323 |     <g id="text_10">
 324 |      <!-- RMSE sore -->
 325 |      <defs>
 326 |       <path d="M 44.390625 34.1875 
 327 | Q 47.5625 33.109375 50.5625 29.59375 
 328 | Q 53.5625 26.078125 56.59375 19.921875 
 329 | L 66.609375 0 
 330 | L 56 0 
 331 | L 46.6875 18.703125 
 332 | Q 43.0625 26.03125 39.671875 28.421875 
 333 | Q 36.28125 30.8125 30.421875 30.8125 
 334 | L 19.671875 30.8125 
 335 | L 19.671875 0 
 336 | L 9.8125 0 
 337 | L 9.8125 72.90625 
 338 | L 32.078125 72.90625 
 339 | Q 44.578125 72.90625 50.734375 67.671875 
 340 | Q 56.890625 62.453125 56.890625 51.90625 
 341 | Q 56.890625 45.015625 53.6875 40.46875 
 342 | Q 50.484375 35.9375 44.390625 34.1875 
 343 | z
 344 | M 19.671875 64.796875 
 345 | L 19.671875 38.921875 
 346 | L 32.078125 38.921875 
 347 | Q 39.203125 38.921875 42.84375 42.21875 
 348 | Q 46.484375 45.515625 46.484375 51.90625 
 349 | Q 46.484375 58.296875 42.84375 61.546875 
 350 | Q 39.203125 64.796875 32.078125 64.796875 
 351 | z
 352 | " id="DejaVuSans-52"/>
 353 |       <path d="M 9.8125 72.90625 
 354 | L 24.515625 72.90625 
 355 | L 43.109375 23.296875 
 356 | L 61.8125 72.90625 
 357 | L 76.515625 72.90625 
 358 | L 76.515625 0 
 359 | L 66.890625 0 
 360 | L 66.890625 64.015625 
 361 | L 48.09375 14.015625 
 362 | L 38.1875 14.015625 
 363 | L 19.390625 64.015625 
 364 | L 19.390625 0 
 365 | L 9.8125 0 
 366 | z
 367 | " id="DejaVuSans-4d"/>
 368 |       <path d="M 53.515625 70.515625 
 369 | L 53.515625 60.890625 
 370 | Q 47.90625 63.578125 42.921875 64.890625 
 371 | Q 37.9375 66.21875 33.296875 66.21875 
 372 | Q 25.25 66.21875 20.875 63.09375 
 373 | Q 16.5 59.96875 16.5 54.203125 
 374 | Q 16.5 49.359375 19.40625 46.890625 
 375 | Q 22.3125 44.4375 30.421875 42.921875 
 376 | L 36.375 41.703125 
 377 | Q 47.40625 39.59375 52.65625 34.296875 
 378 | Q 57.90625 29 57.90625 20.125 
 379 | Q 57.90625 9.515625 50.796875 4.046875 
 380 | Q 43.703125 -1.421875 29.984375 -1.421875 
 381 | Q 24.8125 -1.421875 18.96875 -0.25 
 382 | Q 13.140625 0.921875 6.890625 3.21875 
 383 | L 6.890625 13.375 
 384 | Q 12.890625 10.015625 18.65625 8.296875 
 385 | Q 24.421875 6.59375 29.984375 6.59375 
 386 | Q 38.421875 6.59375 43.015625 9.90625 
 387 | Q 47.609375 13.234375 47.609375 19.390625 
 388 | Q 47.609375 24.75 44.3125 27.78125 
 389 | Q 41.015625 30.8125 33.5 32.328125 
 390 | L 27.484375 33.5 
 391 | Q 16.453125 35.6875 11.515625 40.375 
 392 | Q 6.59375 45.0625 6.59375 53.421875 
 393 | Q 6.59375 63.09375 13.40625 68.65625 
 394 | Q 20.21875 74.21875 32.171875 74.21875 
 395 | Q 37.3125 74.21875 42.625 73.28125 
 396 | Q 47.953125 72.359375 53.515625 70.515625 
 397 | z
 398 | " id="DejaVuSans-53"/>
 399 |       <path d="M 9.8125 72.90625 
 400 | L 55.90625 72.90625 
 401 | L 55.90625 64.59375 
 402 | L 19.671875 64.59375 
 403 | L 19.671875 43.015625 
 404 | L 54.390625 43.015625 
 405 | L 54.390625 34.71875 
 406 | L 19.671875 34.71875 
 407 | L 19.671875 8.296875 
 408 | L 56.78125 8.296875 
 409 | L 56.78125 0 
 410 | L 9.8125 0 
 411 | z
 412 | " id="DejaVuSans-45"/>
 413 |       <path id="DejaVuSans-20"/>
 414 |       <path d="M 44.28125 53.078125 
 415 | L 44.28125 44.578125 
 416 | Q 40.484375 46.53125 36.375 47.5 
 417 | Q 32.28125 48.484375 27.875 48.484375 
 418 | Q 21.1875 48.484375 17.84375 46.4375 
 419 | Q 14.5 44.390625 14.5 40.28125 
 420 | Q 14.5 37.15625 16.890625 35.375 
 421 | Q 19.28125 33.59375 26.515625 31.984375 
 422 | L 29.59375 31.296875 
 423 | Q 39.15625 29.25 43.1875 25.515625 
 424 | Q 47.21875 21.78125 47.21875 15.09375 
 425 | Q 47.21875 7.46875 41.1875 3.015625 
 426 | Q 35.15625 -1.421875 24.609375 -1.421875 
 427 | Q 20.21875 -1.421875 15.453125 -0.5625 
 428 | Q 10.6875 0.296875 5.421875 2 
 429 | L 5.421875 11.28125 
 430 | Q 10.40625 8.6875 15.234375 7.390625 
 431 | Q 20.0625 6.109375 24.8125 6.109375 
 432 | Q 31.15625 6.109375 34.5625 8.28125 
 433 | Q 37.984375 10.453125 37.984375 14.40625 
 434 | Q 37.984375 18.0625 35.515625 20.015625 
 435 | Q 33.0625 21.96875 24.703125 23.78125 
 436 | L 21.578125 24.515625 
 437 | Q 13.234375 26.265625 9.515625 29.90625 
 438 | Q 5.8125 33.546875 5.8125 39.890625 
 439 | Q 5.8125 47.609375 11.28125 51.796875 
 440 | Q 16.75 56 26.8125 56 
 441 | Q 31.78125 56 36.171875 55.265625 
 442 | Q 40.578125 54.546875 44.28125 53.078125 
 443 | z
 444 | " id="DejaVuSans-73"/>
 445 |       <path d="M 30.609375 48.390625 
 446 | Q 23.390625 48.390625 19.1875 42.75 
 447 | Q 14.984375 37.109375 14.984375 27.296875 
 448 | Q 14.984375 17.484375 19.15625 11.84375 
 449 | Q 23.34375 6.203125 30.609375 6.203125 
 450 | Q 37.796875 6.203125 41.984375 11.859375 
 451 | Q 46.1875 17.53125 46.1875 27.296875 
 452 | Q 46.1875 37.015625 41.984375 42.703125 
 453 | Q 37.796875 48.390625 30.609375 48.390625 
 454 | z
 455 | M 30.609375 56 
 456 | Q 42.328125 56 49.015625 48.375 
 457 | Q 55.71875 40.765625 55.71875 27.296875 
 458 | Q 55.71875 13.875 49.015625 6.21875 
 459 | Q 42.328125 -1.421875 30.609375 -1.421875 
 460 | Q 18.84375 -1.421875 12.171875 6.21875 
 461 | Q 5.515625 13.875 5.515625 27.296875 
 462 | Q 5.515625 40.765625 12.171875 48.375 
 463 | Q 18.84375 56 30.609375 56 
 464 | z
 465 | " id="DejaVuSans-6f"/>
 466 |       <path d="M 41.109375 46.296875 
 467 | Q 39.59375 47.171875 37.8125 47.578125 
 468 | Q 36.03125 48 33.890625 48 
 469 | Q 26.265625 48 22.1875 43.046875 
 470 | Q 18.109375 38.09375 18.109375 28.8125 
 471 | L 18.109375 0 
 472 | L 9.078125 0 
 473 | L 9.078125 54.6875 
 474 | L 18.109375 54.6875 
 475 | L 18.109375 46.1875 
 476 | Q 20.953125 51.171875 25.484375 53.578125 
 477 | Q 30.03125 56 36.53125 56 
 478 | Q 37.453125 56 38.578125 55.875 
 479 | Q 39.703125 55.765625 41.0625 55.515625 
 480 | z
 481 | " id="DejaVuSans-72"/>
 482 |       <path d="M 56.203125 29.59375 
 483 | L 56.203125 25.203125 
 484 | L 14.890625 25.203125 
 485 | Q 15.484375 15.921875 20.484375 11.0625 
 486 | Q 25.484375 6.203125 34.421875 6.203125 
 487 | Q 39.59375 6.203125 44.453125 7.46875 
 488 | Q 49.3125 8.734375 54.109375 11.28125 
 489 | L 54.109375 2.78125 
 490 | Q 49.265625 0.734375 44.1875 -0.34375 
 491 | Q 39.109375 -1.421875 33.890625 -1.421875 
 492 | Q 20.796875 -1.421875 13.15625 6.1875 
 493 | Q 5.515625 13.8125 5.515625 26.8125 
 494 | Q 5.515625 40.234375 12.765625 48.109375 
 495 | Q 20.015625 56 32.328125 56 
 496 | Q 43.359375 56 49.78125 48.890625 
 497 | Q 56.203125 41.796875 56.203125 29.59375 
 498 | z
 499 | M 47.21875 32.234375 
 500 | Q 47.125 39.59375 43.09375 43.984375 
 501 | Q 39.0625 48.390625 32.421875 48.390625 
 502 | Q 24.90625 48.390625 20.390625 44.140625 
 503 | Q 15.875 39.890625 15.1875 32.171875 
 504 | z
 505 | " id="DejaVuSans-65"/>
 506 |      </defs>
 507 |      <g transform="translate(506.679125 300.18797)scale(0.168 -0.168)">
 508 |       <use xlink:href="#DejaVuSans-52"/>
 509 |       <use x="69.482422" xlink:href="#DejaVuSans-4d"/>
 510 |       <use x="155.761719" xlink:href="#DejaVuSans-53"/>
 511 |       <use x="219.238281" xlink:href="#DejaVuSans-45"/>
 512 |       <use x="282.421875" xlink:href="#DejaVuSans-20"/>
 513 |       <use x="314.208984" xlink:href="#DejaVuSans-73"/>
 514 |       <use x="366.308594" xlink:href="#DejaVuSans-6f"/>
 515 |       <use x="427.490234" xlink:href="#DejaVuSans-72"/>
 516 |       <use x="468.572266" xlink:href="#DejaVuSans-65"/>
 517 |      </g>
 518 |     </g>
 519 |    </g>
 520 |    <g id="matplotlib.axis_2">
 521 |     <g id="ytick_1">
 522 |      <g id="line2d_19">
 523 |       <path clip-path="url(#pfe7db76953)" d="M 175.368313 231.519947 
 524 | L 927.048312 231.519947 
 525 | " style="fill:none;stroke:#cbcbcb;"/>
 526 |      </g>
 527 |      <g id="line2d_20"/>
 528 |      <g id="text_11">
 529 |       <!-- rmse_iterforest_score -->
 530 |       <defs>
 531 |        <path d="M 52 44.1875 
 532 | Q 55.375 50.25 60.0625 53.125 
 533 | Q 64.75 56 71.09375 56 
 534 | Q 79.640625 56 84.28125 50.015625 
 535 | Q 88.921875 44.046875 88.921875 33.015625 
 536 | L 88.921875 0 
 537 | L 79.890625 0 
 538 | L 79.890625 32.71875 
 539 | Q 79.890625 40.578125 77.09375 44.375 
 540 | Q 74.3125 48.1875 68.609375 48.1875 
 541 | Q 61.625 48.1875 57.5625 43.546875 
 542 | Q 53.515625 38.921875 53.515625 30.90625 
 543 | L 53.515625 0 
 544 | L 44.484375 0 
 545 | L 44.484375 32.71875 
 546 | Q 44.484375 40.625 41.703125 44.40625 
 547 | Q 38.921875 48.1875 33.109375 48.1875 
 548 | Q 26.21875 48.1875 22.15625 43.53125 
 549 | Q 18.109375 38.875 18.109375 30.90625 
 550 | L 18.109375 0 
 551 | L 9.078125 0 
 552 | L 9.078125 54.6875 
 553 | L 18.109375 54.6875 
 554 | L 18.109375 46.1875 
 555 | Q 21.1875 51.21875 25.484375 53.609375 
 556 | Q 29.78125 56 35.6875 56 
 557 | Q 41.65625 56 45.828125 52.96875 
 558 | Q 50 49.953125 52 44.1875 
 559 | z
 560 | " id="DejaVuSans-6d"/>
 561 |        <path d="M 50.984375 -16.609375 
 562 | L 50.984375 -23.578125 
 563 | L -0.984375 -23.578125 
 564 | L -0.984375 -16.609375 
 565 | z
 566 | " id="DejaVuSans-5f"/>
 567 |        <path d="M 9.421875 54.6875 
 568 | L 18.40625 54.6875 
 569 | L 18.40625 0 
 570 | L 9.421875 0 
 571 | z
 572 | M 9.421875 75.984375 
 573 | L 18.40625 75.984375 
 574 | L 18.40625 64.59375 
 575 | L 9.421875 64.59375 
 576 | z
 577 | " id="DejaVuSans-69"/>
 578 |        <path d="M 18.3125 70.21875 
 579 | L 18.3125 54.6875 
 580 | L 36.8125 54.6875 
 581 | L 36.8125 47.703125 
 582 | L 18.3125 47.703125 
 583 | L 18.3125 18.015625 
 584 | Q 18.3125 11.328125 20.140625 9.421875 
 585 | Q 21.96875 7.515625 27.59375 7.515625 
 586 | L 36.8125 7.515625 
 587 | L 36.8125 0 
 588 | L 27.59375 0 
 589 | Q 17.1875 0 13.234375 3.875 
 590 | Q 9.28125 7.765625 9.28125 18.015625 
 591 | L 9.28125 47.703125 
 592 | L 2.6875 47.703125 
 593 | L 2.6875 54.6875 
 594 | L 9.28125 54.6875 
 595 | L 9.28125 70.21875 
 596 | z
 597 | " id="DejaVuSans-74"/>
 598 |        <path d="M 37.109375 75.984375 
 599 | L 37.109375 68.5 
 600 | L 28.515625 68.5 
 601 | Q 23.6875 68.5 21.796875 66.546875 
 602 | Q 19.921875 64.59375 19.921875 59.515625 
 603 | L 19.921875 54.6875 
 604 | L 34.71875 54.6875 
 605 | L 34.71875 47.703125 
 606 | L 19.921875 47.703125 
 607 | L 19.921875 0 
 608 | L 10.890625 0 
 609 | L 10.890625 47.703125 
 610 | L 2.296875 47.703125 
 611 | L 2.296875 54.6875 
 612 | L 10.890625 54.6875 
 613 | L 10.890625 58.5 
 614 | Q 10.890625 67.625 15.140625 71.796875 
 615 | Q 19.390625 75.984375 28.609375 75.984375 
 616 | z
 617 | " id="DejaVuSans-66"/>
 618 |        <path d="M 48.78125 52.59375 
 619 | L 48.78125 44.1875 
 620 | Q 44.96875 46.296875 41.140625 47.34375 
 621 | Q 37.3125 48.390625 33.40625 48.390625 
 622 | Q 24.65625 48.390625 19.8125 42.84375 
 623 | Q 14.984375 37.3125 14.984375 27.296875 
 624 | Q 14.984375 17.28125 19.8125 11.734375 
 625 | Q 24.65625 6.203125 33.40625 6.203125 
 626 | Q 37.3125 6.203125 41.140625 7.25 
 627 | Q 44.96875 8.296875 48.78125 10.40625 
 628 | L 48.78125 2.09375 
 629 | Q 45.015625 0.34375 40.984375 -0.53125 
 630 | Q 36.96875 -1.421875 32.421875 -1.421875 
 631 | Q 20.0625 -1.421875 12.78125 6.34375 
 632 | Q 5.515625 14.109375 5.515625 27.296875 
 633 | Q 5.515625 40.671875 12.859375 48.328125 
 634 | Q 20.21875 56 33.015625 56 
 635 | Q 37.15625 56 41.109375 55.140625 
 636 | Q 45.0625 54.296875 48.78125 52.59375 
 637 | z
 638 | " id="DejaVuSans-63"/>
 639 |       </defs>
 640 |       <g transform="translate(20.25925 236.838854)scale(0.14 -0.14)">
 641 |        <use xlink:href="#DejaVuSans-72"/>
 642 |        <use x="41.097656" xlink:href="#DejaVuSans-6d"/>
 643 |        <use x="138.509766" xlink:href="#DejaVuSans-73"/>
 644 |        <use x="190.609375" xlink:href="#DejaVuSans-65"/>
 645 |        <use x="252.132812" xlink:href="#DejaVuSans-5f"/>
 646 |        <use x="302.132812" xlink:href="#DejaVuSans-69"/>
 647 |        <use x="329.916016" xlink:href="#DejaVuSans-74"/>
 648 |        <use x="369.125" xlink:href="#DejaVuSans-65"/>
 649 |        <use x="430.648438" xlink:href="#DejaVuSans-72"/>
 650 |        <use x="471.761719" xlink:href="#DejaVuSans-66"/>
 651 |        <use x="506.966797" xlink:href="#DejaVuSans-6f"/>
 652 |        <use x="568.148438" xlink:href="#DejaVuSans-72"/>
 653 |        <use x="609.230469" xlink:href="#DejaVuSans-65"/>
 654 |        <use x="670.753906" xlink:href="#DejaVuSans-73"/>
 655 |        <use x="722.853516" xlink:href="#DejaVuSans-74"/>
 656 |        <use x="762.0625" xlink:href="#DejaVuSans-5f"/>
 657 |        <use x="812.0625" xlink:href="#DejaVuSans-73"/>
 658 |        <use x="864.162109" xlink:href="#DejaVuSans-63"/>
 659 |        <use x="919.142578" xlink:href="#DejaVuSans-6f"/>
 660 |        <use x="980.324219" xlink:href="#DejaVuSans-72"/>
 661 |        <use x="1021.40625" xlink:href="#DejaVuSans-65"/>
 662 |       </g>
 663 |      </g>
 664 |     </g>
 665 |     <g id="ytick_2">
 666 |      <g id="line2d_21">
 667 |       <path clip-path="url(#pfe7db76953)" d="M 175.368313 200.332782 
 668 | L 927.048312 200.332782 
 669 | " style="fill:none;stroke:#cbcbcb;"/>
 670 |      </g>
 671 |      <g id="line2d_22"/>
 672 |      <g id="text_12">
 673 |       <!-- rmse_knn_score -->
 674 |       <defs>
 675 |        <path d="M 9.078125 75.984375 
 676 | L 18.109375 75.984375 
 677 | L 18.109375 31.109375 
 678 | L 44.921875 54.6875 
 679 | L 56.390625 54.6875 
 680 | L 27.390625 29.109375 
 681 | L 57.625 0 
 682 | L 45.90625 0 
 683 | L 18.109375 26.703125 
 684 | L 18.109375 0 
 685 | L 9.078125 0 
 686 | z
 687 | " id="DejaVuSans-6b"/>
 688 |        <path d="M 54.890625 33.015625 
 689 | L 54.890625 0 
 690 | L 45.90625 0 
 691 | L 45.90625 32.71875 
 692 | Q 45.90625 40.484375 42.875 44.328125 
 693 | Q 39.84375 48.1875 33.796875 48.1875 
 694 | Q 26.515625 48.1875 22.3125 43.546875 
 695 | Q 18.109375 38.921875 18.109375 30.90625 
 696 | L 18.109375 0 
 697 | L 9.078125 0 
 698 | L 9.078125 54.6875 
 699 | L 18.109375 54.6875 
 700 | L 18.109375 46.1875 
 701 | Q 21.34375 51.125 25.703125 53.5625 
 702 | Q 30.078125 56 35.796875 56 
 703 | Q 45.21875 56 50.046875 50.171875 
 704 | Q 54.890625 44.34375 54.890625 33.015625 
 705 | z
 706 | " id="DejaVuSans-6e"/>
 707 |       </defs>
 708 |       <g transform="translate(58.796438 205.651688)scale(0.14 -0.14)">
 709 |        <use xlink:href="#DejaVuSans-72"/>
 710 |        <use x="41.097656" xlink:href="#DejaVuSans-6d"/>
 711 |        <use x="138.509766" xlink:href="#DejaVuSans-73"/>
 712 |        <use x="190.609375" xlink:href="#DejaVuSans-65"/>
 713 |        <use x="252.132812" xlink:href="#DejaVuSans-5f"/>
 714 |        <use x="302.132812" xlink:href="#DejaVuSans-6b"/>
 715 |        <use x="360.042969" xlink:href="#DejaVuSans-6e"/>
 716 |        <use x="423.421875" xlink:href="#DejaVuSans-6e"/>
 717 |        <use x="486.800781" xlink:href="#DejaVuSans-5f"/>
 718 |        <use x="536.800781" xlink:href="#DejaVuSans-73"/>
 719 |        <use x="588.900391" xlink:href="#DejaVuSans-63"/>
 720 |        <use x="643.880859" xlink:href="#DejaVuSans-6f"/>
 721 |        <use x="705.0625" xlink:href="#DejaVuSans-72"/>
 722 |        <use x="746.144531" xlink:href="#DejaVuSans-65"/>
 723 |       </g>
 724 |      </g>
 725 |     </g>
 726 |     <g id="ytick_3">
 727 |      <g id="line2d_23">
 728 |       <path clip-path="url(#pfe7db76953)" d="M 175.368313 169.145616 
 729 | L 927.048312 169.145616 
 730 | " style="fill:none;stroke:#cbcbcb;"/>
 731 |      </g>
 732 |      <g id="line2d_24"/>
 733 |      <g id="text_13">
 734 |       <!-- rmse_mean_score -->
 735 |       <defs>
 736 |        <path d="M 34.28125 27.484375 
 737 | Q 23.390625 27.484375 19.1875 25 
 738 | Q 14.984375 22.515625 14.984375 16.5 
 739 | Q 14.984375 11.71875 18.140625 8.90625 
 740 | Q 21.296875 6.109375 26.703125 6.109375 
 741 | Q 34.1875 6.109375 38.703125 11.40625 
 742 | Q 43.21875 16.703125 43.21875 25.484375 
 743 | L 43.21875 27.484375 
 744 | z
 745 | M 52.203125 31.203125 
 746 | L 52.203125 0 
 747 | L 43.21875 0 
 748 | L 43.21875 8.296875 
 749 | Q 40.140625 3.328125 35.546875 0.953125 
 750 | Q 30.953125 -1.421875 24.3125 -1.421875 
 751 | Q 15.921875 -1.421875 10.953125 3.296875 
 752 | Q 6 8.015625 6 15.921875 
 753 | Q 6 25.140625 12.171875 29.828125 
 754 | Q 18.359375 34.515625 30.609375 34.515625 
 755 | L 43.21875 34.515625 
 756 | L 43.21875 35.40625 
 757 | Q 43.21875 41.609375 39.140625 45 
 758 | Q 35.0625 48.390625 27.6875 48.390625 
 759 | Q 23 48.390625 18.546875 47.265625 
 760 | Q 14.109375 46.140625 10.015625 43.890625 
 761 | L 10.015625 52.203125 
 762 | Q 14.9375 54.109375 19.578125 55.046875 
 763 | Q 24.21875 56 28.609375 56 
 764 | Q 40.484375 56 46.34375 49.84375 
 765 | Q 52.203125 43.703125 52.203125 31.203125 
 766 | z
 767 | " id="DejaVuSans-61"/>
 768 |       </defs>
 769 |       <g transform="translate(44.945188 174.269835)scale(0.14 -0.14)">
 770 |        <use xlink:href="#DejaVuSans-72"/>
 771 |        <use x="41.097656" xlink:href="#DejaVuSans-6d"/>
 772 |        <use x="138.509766" xlink:href="#DejaVuSans-73"/>
 773 |        <use x="190.609375" xlink:href="#DejaVuSans-65"/>
 774 |        <use x="252.132812" xlink:href="#DejaVuSans-5f"/>
 775 |        <use x="302.132812" xlink:href="#DejaVuSans-6d"/>
 776 |        <use x="399.544922" xlink:href="#DejaVuSans-65"/>
 777 |        <use x="461.068359" xlink:href="#DejaVuSans-61"/>
 778 |        <use x="522.347656" xlink:href="#DejaVuSans-6e"/>
 779 |        <use x="585.726562" xlink:href="#DejaVuSans-5f"/>
 780 |        <use x="635.726562" xlink:href="#DejaVuSans-73"/>
 781 |        <use x="687.826172" xlink:href="#DejaVuSans-63"/>
 782 |        <use x="742.806641" xlink:href="#DejaVuSans-6f"/>
 783 |        <use x="803.988281" xlink:href="#DejaVuSans-72"/>
 784 |        <use x="845.070312" xlink:href="#DejaVuSans-65"/>
 785 |       </g>
 786 |      </g>
 787 |     </g>
 788 |     <g id="ytick_4">
 789 |      <g id="line2d_25">
 790 |       <path clip-path="url(#pfe7db76953)" d="M 175.368313 137.95845 
 791 | L 927.048312 137.95845 
 792 | " style="fill:none;stroke:#cbcbcb;"/>
 793 |      </g>
 794 |      <g id="line2d_26"/>
 795 |      <g id="text_14">
 796 |       <!-- rmse_median_score -->
 797 |       <defs>
 798 |        <path d="M 45.40625 46.390625 
 799 | L 45.40625 75.984375 
 800 | L 54.390625 75.984375 
 801 | L 54.390625 0 
 802 | L 45.40625 0 
 803 | L 45.40625 8.203125 
 804 | Q 42.578125 3.328125 38.25 0.953125 
 805 | Q 33.9375 -1.421875 27.875 -1.421875 
 806 | Q 17.96875 -1.421875 11.734375 6.484375 
 807 | Q 5.515625 14.40625 5.515625 27.296875 
 808 | Q 5.515625 40.1875 11.734375 48.09375 
 809 | Q 17.96875 56 27.875 56 
 810 | Q 33.9375 56 38.25 53.625 
 811 | Q 42.578125 51.265625 45.40625 46.390625 
 812 | z
 813 | M 14.796875 27.296875 
 814 | Q 14.796875 17.390625 18.875 11.75 
 815 | Q 22.953125 6.109375 30.078125 6.109375 
 816 | Q 37.203125 6.109375 41.296875 11.75 
 817 | Q 45.40625 17.390625 45.40625 27.296875 
 818 | Q 45.40625 37.203125 41.296875 42.84375 
 819 | Q 37.203125 48.484375 30.078125 48.484375 
 820 | Q 22.953125 48.484375 18.875 42.84375 
 821 | Q 14.796875 37.203125 14.796875 27.296875 
 822 | z
 823 | " id="DejaVuSans-64"/>
 824 |       </defs>
 825 |       <g transform="translate(32.168 143.277356)scale(0.14 -0.14)">
 826 |        <use xlink:href="#DejaVuSans-72"/>
 827 |        <use x="41.097656" xlink:href="#DejaVuSans-6d"/>
 828 |        <use x="138.509766" xlink:href="#DejaVuSans-73"/>
 829 |        <use x="190.609375" xlink:href="#DejaVuSans-65"/>
 830 |        <use x="252.132812" xlink:href="#DejaVuSans-5f"/>
 831 |        <use x="302.132812" xlink:href="#DejaVuSans-6d"/>
 832 |        <use x="399.544922" xlink:href="#DejaVuSans-65"/>
 833 |        <use x="461.068359" xlink:href="#DejaVuSans-64"/>
 834 |        <use x="524.544922" xlink:href="#DejaVuSans-69"/>
 835 |        <use x="552.328125" xlink:href="#DejaVuSans-61"/>
 836 |        <use x="613.607422" xlink:href="#DejaVuSans-6e"/>
 837 |        <use x="676.986328" xlink:href="#DejaVuSans-5f"/>
 838 |        <use x="726.986328" xlink:href="#DejaVuSans-73"/>
 839 |        <use x="779.085938" xlink:href="#DejaVuSans-63"/>
 840 |        <use x="834.066406" xlink:href="#DejaVuSans-6f"/>
 841 |        <use x="895.248047" xlink:href="#DejaVuSans-72"/>
 842 |        <use x="936.330078" xlink:href="#DejaVuSans-65"/>
 843 |       </g>
 844 |      </g>
 845 |     </g>
 846 |     <g id="ytick_5">
 847 |      <g id="line2d_27">
 848 |       <path clip-path="url(#pfe7db76953)" d="M 175.368313 106.771284 
 849 | L 927.048312 106.771284 
 850 | " style="fill:none;stroke:#cbcbcb;"/>
 851 |      </g>
 852 |      <g id="line2d_28"/>
 853 |      <g id="text_15">
 854 |       <!-- rmse_mice_score -->
 855 |       <g transform="translate(50.809875 112.09019)scale(0.14 -0.14)">
 856 |        <use xlink:href="#DejaVuSans-72"/>
 857 |        <use x="41.097656" xlink:href="#DejaVuSans-6d"/>
 858 |        <use x="138.509766" xlink:href="#DejaVuSans-73"/>
 859 |        <use x="190.609375" xlink:href="#DejaVuSans-65"/>
 860 |        <use x="252.132812" xlink:href="#DejaVuSans-5f"/>
 861 |        <use x="302.132812" xlink:href="#DejaVuSans-6d"/>
 862 |        <use x="399.544922" xlink:href="#DejaVuSans-69"/>
 863 |        <use x="427.328125" xlink:href="#DejaVuSans-63"/>
 864 |        <use x="482.308594" xlink:href="#DejaVuSans-65"/>
 865 |        <use x="543.832031" xlink:href="#DejaVuSans-5f"/>
 866 |        <use x="593.832031" xlink:href="#DejaVuSans-73"/>
 867 |        <use x="645.931641" xlink:href="#DejaVuSans-63"/>
 868 |        <use x="700.912109" xlink:href="#DejaVuSans-6f"/>
 869 |        <use x="762.09375" xlink:href="#DejaVuSans-72"/>
 870 |        <use x="803.175781" xlink:href="#DejaVuSans-65"/>
 871 |       </g>
 872 |      </g>
 873 |     </g>
 874 |     <g id="ytick_6">
 875 |      <g id="line2d_29">
 876 |       <path clip-path="url(#pfe7db76953)" d="M 175.368313 75.584118 
 877 | L 927.048312 75.584118 
 878 | " style="fill:none;stroke:#cbcbcb;"/>
 879 |      </g>
 880 |      <g id="line2d_30"/>
 881 |      <g id="text_16">
 882 |       <!-- rmse_min_score -->
 883 |       <g transform="translate(58.249563 80.903025)scale(0.14 -0.14)">
 884 |        <use xlink:href="#DejaVuSans-72"/>
 885 |        <use x="41.097656" xlink:href="#DejaVuSans-6d"/>
 886 |        <use x="138.509766" xlink:href="#DejaVuSans-73"/>
 887 |        <use x="190.609375" xlink:href="#DejaVuSans-65"/>
 888 |        <use x="252.132812" xlink:href="#DejaVuSans-5f"/>
 889 |        <use x="302.132812" xlink:href="#DejaVuSans-6d"/>
 890 |        <use x="399.544922" xlink:href="#DejaVuSans-69"/>
 891 |        <use x="427.328125" xlink:href="#DejaVuSans-6e"/>
 892 |        <use x="490.707031" xlink:href="#DejaVuSans-5f"/>
 893 |        <use x="540.707031" xlink:href="#DejaVuSans-73"/>
 894 |        <use x="592.806641" xlink:href="#DejaVuSans-63"/>
 895 |        <use x="647.787109" xlink:href="#DejaVuSans-6f"/>
 896 |        <use x="708.96875" xlink:href="#DejaVuSans-72"/>
 897 |        <use x="750.050781" xlink:href="#DejaVuSans-65"/>
 898 |       </g>
 899 |      </g>
 900 |     </g>
 901 |     <g id="ytick_7">
 902 |      <g id="line2d_31">
 903 |       <path clip-path="url(#pfe7db76953)" d="M 175.368313 44.396953 
 904 | L 927.048312 44.396953 
 905 | " style="fill:none;stroke:#cbcbcb;"/>
 906 |      </g>
 907 |      <g id="line2d_32"/>
 908 |      <g id="text_17">
 909 |       <!-- rmse_zero_score -->
 910 |       <defs>
 911 |        <path d="M 5.515625 54.6875 
 912 | L 48.1875 54.6875 
 913 | L 48.1875 46.484375 
 914 | L 14.40625 7.171875 
 915 | L 48.1875 7.171875 
 916 | L 48.1875 0 
 917 | L 4.296875 0 
 918 | L 4.296875 8.203125 
 919 | L 38.09375 47.515625 
 920 | L 5.515625 47.515625 
 921 | z
 922 | " id="DejaVuSans-7a"/>
 923 |       </defs>
 924 |       <g transform="translate(54.368938 49.521171)scale(0.14 -0.14)">
 925 |        <use xlink:href="#DejaVuSans-72"/>
 926 |        <use x="41.097656" xlink:href="#DejaVuSans-6d"/>
 927 |        <use x="138.509766" xlink:href="#DejaVuSans-73"/>
 928 |        <use x="190.609375" xlink:href="#DejaVuSans-65"/>
 929 |        <use x="252.132812" xlink:href="#DejaVuSans-5f"/>
 930 |        <use x="302.132812" xlink:href="#DejaVuSans-7a"/>
 931 |        <use x="354.623047" xlink:href="#DejaVuSans-65"/>
 932 |        <use x="416.146484" xlink:href="#DejaVuSans-72"/>
 933 |        <use x="457.228516" xlink:href="#DejaVuSans-6f"/>
 934 |        <use x="518.410156" xlink:href="#DejaVuSans-5f"/>
 935 |        <use x="568.410156" xlink:href="#DejaVuSans-73"/>
 936 |        <use x="620.509766" xlink:href="#DejaVuSans-63"/>
 937 |        <use x="675.490234" xlink:href="#DejaVuSans-6f"/>
 938 |        <use x="736.671875" xlink:href="#DejaVuSans-72"/>
 939 |        <use x="777.753906" xlink:href="#DejaVuSans-65"/>
 940 |       </g>
 941 |      </g>
 942 |     </g>
 943 |     <g id="text_18">
 944 |      <!-- fill methods -->
 945 |      <defs>
 946 |       <path d="M 9.421875 75.984375 
 947 | L 18.40625 75.984375 
 948 | L 18.40625 0 
 949 | L 9.421875 0 
 950 | z
 951 | " id="DejaVuSans-6c"/>
 952 |       <path d="M 54.890625 33.015625 
 953 | L 54.890625 0 
 954 | L 45.90625 0 
 955 | L 45.90625 32.71875 
 956 | Q 45.90625 40.484375 42.875 44.328125 
 957 | Q 39.84375 48.1875 33.796875 48.1875 
 958 | Q 26.515625 48.1875 22.3125 43.546875 
 959 | Q 18.109375 38.921875 18.109375 30.90625 
 960 | L 18.109375 0 
 961 | L 9.078125 0 
 962 | L 9.078125 75.984375 
 963 | L 18.109375 75.984375 
 964 | L 18.109375 46.1875 
 965 | Q 21.34375 51.125 25.703125 53.5625 
 966 | Q 30.078125 56 35.796875 56 
 967 | Q 45.21875 56 50.046875 50.171875 
 968 | Q 54.890625 44.34375 54.890625 33.015625 
 969 | z
 970 | " id="DejaVuSans-68"/>
 971 |      </defs>
 972 |      <g transform="translate(12.765375 187.401637)rotate(-90)scale(0.168 -0.168)">
 973 |       <use xlink:href="#DejaVuSans-66"/>
 974 |       <use x="35.205078" xlink:href="#DejaVuSans-69"/>
 975 |       <use x="62.988281" xlink:href="#DejaVuSans-6c"/>
 976 |       <use x="90.771484" xlink:href="#DejaVuSans-6c"/>
 977 |       <use x="118.554688" xlink:href="#DejaVuSans-20"/>
 978 |       <use x="150.341797" xlink:href="#DejaVuSans-6d"/>
 979 |       <use x="247.753906" xlink:href="#DejaVuSans-65"/>
 980 |       <use x="309.277344" xlink:href="#DejaVuSans-74"/>
 981 |       <use x="348.486328" xlink:href="#DejaVuSans-68"/>
 982 |       <use x="411.865234" xlink:href="#DejaVuSans-6f"/>
 983 |       <use x="473.046875" xlink:href="#DejaVuSans-64"/>
 984 |       <use x="536.523438" xlink:href="#DejaVuSans-73"/>
 985 |      </g>
 986 |     </g>
 987 |    </g>
 988 |    <g id="patch_3">
 989 |     <path clip-path="url(#pfe7db76953)" d="M 175.368313 119.246151 
 990 | L 240.144445 119.246151 
 991 | L 240.144445 94.296418 
 992 | L 175.368313 94.296418 
 993 | z
 994 | " style="fill:#008fd5;"/>
 995 |    </g>
 996 |    <g id="patch_4">
 997 |     <path clip-path="url(#pfe7db76953)" d="M 175.368313 243.994814 
 998 | L 308.697947 243.994814 
 999 | L 308.697947 219.045081 
1000 | L 175.368313 219.045081 
1001 | z
1002 | " style="fill:#008fd5;"/>
1003 |    </g>
1004 |    <g id="patch_5">
1005 |     <path clip-path="url(#pfe7db76953)" d="M 175.368313 212.807648 
1006 | L 318.409235 212.807648 
1007 | L 318.409235 187.857915 
1008 | L 175.368313 187.857915 
1009 | z
1010 | " style="fill:#008fd5;"/>
1011 |    </g>
1012 |    <g id="patch_6">
1013 |     <path clip-path="url(#pfe7db76953)" d="M 175.368313 181.620482 
1014 | L 382.48991 181.620482 
1015 | L 382.48991 156.670749 
1016 | L 175.368313 156.670749 
1017 | z
1018 | " style="fill:#008fd5;"/>
1019 |    </g>
1020 |    <g id="patch_7">
1021 |     <path clip-path="url(#pfe7db76953)" d="M 175.368313 56.871819 
1022 | L 891.254027 56.871819 
1023 | L 891.254027 31.922086 
1024 | L 175.368313 31.922086 
1025 | z
1026 | " style="fill:#008fd5;"/>
1027 |    </g>
1028 |    <g id="patch_8">
1029 |     <path clip-path="url(#pfe7db76953)" d="M 175.368313 150.433316 
1030 | L 401.115509 150.433316 
1031 | L 401.115509 125.483584 
1032 | L 175.368313 125.483584 
1033 | z
1034 | " style="fill:#008fd5;"/>
1035 |    </g>
1036 |    <g id="patch_9">
1037 |     <path clip-path="url(#pfe7db76953)" d="M 175.368313 88.058985 
1038 | L 530.572709 88.058985 
1039 | L 530.572709 63.109252 
1040 | L 175.368313 63.109252 
1041 | z
1042 | " style="fill:#008fd5;"/>
1043 |    </g>
1044 |    <g id="line2d_33">
1045 |     <path clip-path="url(#pfe7db76953)" d="M 438.954826 254.59845 
1046 | L 438.954826 21.31845 
1047 | " style="fill:none;stroke:#ff0000;stroke-dasharray:14.8,6.4;stroke-dashoffset:0;stroke-width:4;"/>
1048 |    </g>
1049 |    <g id="patch_10">
1050 |     <path d="M 175.368313 254.59845 
1051 | L 175.368313 21.31845 
1052 | " style="fill:none;stroke:#f0f0f0;stroke-linecap:square;stroke-linejoin:miter;stroke-width:3;"/>
1053 |    </g>
1054 |    <g id="patch_11">
1055 |     <path d="M 927.048312 254.59845 
1056 | L 927.048312 21.31845 
1057 | " style="fill:none;stroke:#f0f0f0;stroke-linecap:square;stroke-linejoin:miter;stroke-width:3;"/>
1058 |    </g>
1059 |    <g id="patch_12">
1060 |     <path d="M 175.368313 254.59845 
1061 | L 927.048312 254.59845 
1062 | " style="fill:none;stroke:#f0f0f0;stroke-linecap:square;stroke-linejoin:miter;stroke-width:3;"/>
1063 |    </g>
1064 |    <g id="patch_13">
1065 |     <path d="M 175.368313 21.31845 
1066 | L 927.048312 21.31845 
1067 | " style="fill:none;stroke:#f0f0f0;stroke-linecap:square;stroke-linejoin:miter;stroke-width:3;"/>
1068 |    </g>
1069 |    <g id="text_19">
1070 |     <!-- IRIS DATA SET -->
1071 |     <defs>
1072 |      <path d="M 9.8125 72.90625 
1073 | L 19.671875 72.90625 
1074 | L 19.671875 0 
1075 | L 9.8125 0 
1076 | z
1077 | " id="DejaVuSans-49"/>
1078 |      <path d="M 19.671875 64.796875 
1079 | L 19.671875 8.109375 
1080 | L 31.59375 8.109375 
1081 | Q 46.6875 8.109375 53.6875 14.9375 
1082 | Q 60.6875 21.78125 60.6875 36.53125 
1083 | Q 60.6875 51.171875 53.6875 57.984375 
1084 | Q 46.6875 64.796875 31.59375 64.796875 
1085 | z
1086 | M 9.8125 72.90625 
1087 | L 30.078125 72.90625 
1088 | Q 51.265625 72.90625 61.171875 64.09375 
1089 | Q 71.09375 55.28125 71.09375 36.53125 
1090 | Q 71.09375 17.671875 61.125 8.828125 
1091 | Q 51.171875 0 30.078125 0 
1092 | L 9.8125 0 
1093 | z
1094 | " id="DejaVuSans-44"/>
1095 |      <path d="M 34.1875 63.1875 
1096 | L 20.796875 26.90625 
1097 | L 47.609375 26.90625 
1098 | z
1099 | M 28.609375 72.90625 
1100 | L 39.796875 72.90625 
1101 | L 67.578125 0 
1102 | L 57.328125 0 
1103 | L 50.6875 18.703125 
1104 | L 17.828125 18.703125 
1105 | L 11.1875 0 
1106 | L 0.78125 0 
1107 | z
1108 | " id="DejaVuSans-41"/>
1109 |      <path d="M -0.296875 72.90625 
1110 | L 61.375 72.90625 
1111 | L 61.375 64.59375 
1112 | L 35.5 64.59375 
1113 | L 35.5 0 
1114 | L 25.59375 0 
1115 | L 25.59375 64.59375 
1116 | L -0.296875 64.59375 
1117 | z
1118 | " id="DejaVuSans-54"/>
1119 |     </defs>
1120 |     <g transform="translate(478.840213 15.31845)scale(0.2016 -0.2016)">
1121 |      <use xlink:href="#DejaVuSans-49"/>
1122 |      <use x="29.492188" xlink:href="#DejaVuSans-52"/>
1123 |      <use x="98.974609" xlink:href="#DejaVuSans-49"/>
1124 |      <use x="128.466797" xlink:href="#DejaVuSans-53"/>
1125 |      <use x="191.943359" xlink:href="#DejaVuSans-20"/>
1126 |      <use x="223.730469" xlink:href="#DejaVuSans-44"/>
1127 |      <use x="300.716797" xlink:href="#DejaVuSans-41"/>
1128 |      <use x="369.015625" xlink:href="#DejaVuSans-54"/>
1129 |      <use x="429.990234" xlink:href="#DejaVuSans-41"/>
1130 |      <use x="498.398438" xlink:href="#DejaVuSans-20"/>
1131 |      <use x="530.185547" xlink:href="#DejaVuSans-53"/>
1132 |      <use x="593.662109" xlink:href="#DejaVuSans-45"/>
1133 |      <use x="656.845703" xlink:href="#DejaVuSans-54"/>
1134 |     </g>
1135 |    </g>
1136 |   </g>
1137 |  </g>
1138 |  <defs>
1139 |   <clipPath id="pfe7db76953">
1140 |    <rect height="233.28" width="751.68" x="175.368313" y="21.31845"/>
1141 |   </clipPath>
1142 |  </defs>
1143 | </svg>
1144 | 


--------------------------------------------------------------------------------