├── requirements.txt ├── kmeans.cfg ├── standardCanopy.py ├── .gitignore ├── config_loader.py ├── README.md ├── MyPreprocessing.py ├── Validation.py ├── MyFuzzyCmeans.py ├── MainLauncher.py ├── MyKmeans.py ├── MyKmedoids.py ├── MyCanopy.py └── datasets ├── ad_blob_0.csv ├── ad_blob_2.csv ├── ad_blob_4.csv ├── wine.arff └── pima-diabetes.arff /requirements.txt: -------------------------------------------------------------------------------- 1 | scikit-learn>=0.20.0 2 | numpy 3 | pandas 4 | configparser 5 | scipy -------------------------------------------------------------------------------- /kmeans.cfg: -------------------------------------------------------------------------------- 1 | [data] 2 | datadir = datasets 3 | # Choose any of the datasets. We used: pen-based.arff, pima-diabetes.arff, wine.arff, ad_blob0.csv, ad_blob2.csv, ad_blob4.csv 4 | dataset = wine.arff 5 | # 1: Density Canopy-Kmeans (MyCanopyKMeans) 6 | # 2: StandardCanopyKMeans 7 | # 3: MyKMeans 8 | # 4: SklearnKMeans 9 | # 5: MyKMeans++ 10 | # 6: SklearnKMeans++ 11 | # 7: MyKMedoids 12 | # 8: MyFuzzyCMeans 13 | algorithm = 1-2-3-4-5-6-7-8 14 | 15 | [clustering] 16 | normalized = true 17 | canopyT1 = 3 18 | canopyT2 = 1 19 | remove_outliers = true 20 | k = 3 21 | tol = 0.001 22 | max_rep = 100 23 | fuzzy_m = 2 24 | 25 | # kmeans_init_type = 26 | kmeans_init_type = canopy 27 | 28 | # run the algorithms or silhouette 29 | run = algorithms -------------------------------------------------------------------------------- /standardCanopy.py: -------------------------------------------------------------------------------- 1 | from sklearn.metrics.pairwise import pairwise_distances 2 | import numpy as np 3 | # source: https://gist.github.com/gdbassett/528d816d035f2deaaca1 4 | # T1 = Distance to centroid point to not include in other clusters 5 | # T2 = Distance to centroid point to include in cluster 6 | # T1 > T2 for overlapping clusters 7 | # T1 < T2 will have points which reside in no clusters 8 | # T1 == T2 will cause all points to reside in mutually exclusive clusters 9 | # Distance metric can be any from here: http://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise.pairwise_distances.html 10 | # filemap may be a list of point names in their order in X. If included, row numbers from X will be replaced with names from filemap. 11 | 12 | def myStandardCanopy(X, T1, T2, distance_metric='euclidean'): 13 | canopies = dict() 14 | X1_dist = pairwise_distances(X, metric=distance_metric) 15 | canopy_points = set(range(X.shape[0])) 16 | while canopy_points: 17 | point = canopy_points.pop() 18 | i = len(canopies) 19 | canopies[i] = {"c":point, "points": list(np.where(X1_dist[point] < T2)[0])} 20 | canopy_points = canopy_points.difference(set(np.where(X1_dist[point] < T1)[0])) 21 | print('Standard canopy found %d clusters' %(len(canopies))) 22 | return canopies 23 | 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # project files 107 | test.py 108 | *results.csv 109 | -------------------------------------------------------------------------------- /config_loader.py: -------------------------------------------------------------------------------- 1 | import configparser 2 | import sys 3 | import traceback 4 | 5 | clf_names = { 6 | '1': 'MyCanopyKmeans', 7 | '2': 'StandardCanopyKMeans', 8 | '3': 'MyKMeans', 9 | '4': 'SklearnKMeans', 10 | '5': 'MyKMeans++', 11 | '6': 'SklearnKMeans++', 12 | '7': 'KMedoids', 13 | '8': 'FuzzyCMeans', 14 | } 15 | 16 | def load(config_file, *args): 17 | """ 18 | Source from my repo: https://github.com/jforjohn/procapi/blob/master/dbod/config.py 19 | Reads configuration file 20 | """ 21 | if args: 22 | args = args[0] 23 | requiredConfig = { 24 | 'data': ['datadir', 25 | 'dataset', 26 | 'algorithm'], 27 | 'clustering': ['k', 28 | 'tol', 29 | 'max_rep'], 30 | } 31 | try: 32 | # Load configuration from file 33 | config = configparser.ConfigParser() 34 | number_read_files = config.read(config_file) 35 | 36 | # check the config file exist and can be read 37 | if len(number_read_files) != 1: 38 | print("Configuration file '{0}' cannot be read or does not exist. Stopping.".format(args.config)) 39 | sys.exit(1) 40 | 41 | # Force load of all required fields to avoid errors in runtime 42 | for section, options in requiredConfig.items(): 43 | for option in options: 44 | try: 45 | config.get(section, option) 46 | except configparser.NoSectionError: 47 | print("Section '{0}' not present in config file. Stopping.".format(section)) 48 | sys.exit(2) 49 | except configparser.NoOptionError: 50 | print("Option '{0}' not present in section {1} in config file. Stopping.".format(option, section)) 51 | sys.exit(2) 52 | return config 53 | 54 | except IOError as e: 55 | traceback.print_exc(file=sys.stdout) 56 | sys.exit(e) 57 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # canopyKmeans_improved 2 | This is an implementation of the paper on "Improved K-means algorithm based on density Canopy". 3 | The repo comes with a 4 | * *requirements.txt* for downloading possible dependencies (*pip install -r requirements.txt*) 5 | * *kmeans.cfg* configuration file in which you can define the specs of the algorithm you want to run 6 | 7 | When you define what you want to run in the configuration file you just run the MainLauncher.py file. 8 | 9 | NOTE: Don't worry about some Warnings that you may get in runtime. 10 | 11 | Concerning the configuration file in the *data* part: 12 | * datasdir: the directory which contains the datasets 13 | * dataset: the name of the dataset with the .arff or .csv extension, which is in the same directory as this file 14 | * algorithm: the algorithm or the algorithms you want to run separated by a dash (-) with no spaces e.g 1-2-3. Each algorithm corresponds to a number 15 | * 1: Density Canopy-Kmeans (MyCanopyKMeans) [from the aforementioned paper] 16 | * 2: StandardCanopyKMeans 17 | * 3: MyKMeans 18 | * 4: SklearnKMeans 19 | * 5: MyKMeans++ 20 | * 6: SklearnKMeans++ 21 | * 7: MyKMedoids 22 | * 8: MyFuzzyCMeans 23 | 24 | Concerning the configuration file in the *clustering* part: 25 | * normalized: boolean, to define if preprocessing should normalize the data or not (true/false) 26 | * canopyT1: a float, indicating the T1 parameter of the standard canopy algorithm 27 | * canopyT2: a float, indicating the T2 parameter of the standard canopy algorithm 28 | * remove_outliers: boolean, to define if density canopy should remove the outliers or not (true/false) 29 | * k: the number of clusters 30 | * tol: the tolerance for the convergence 31 | * max_rep: the number of maximum repetitions 32 | * kmeans_init_type: the type of initializing the centroids. The possible values are: 33 | * random: for getting random numbers following the uniform distribution 34 | * kmeans++: for applying KMeans++ algorithm for the initial centroids 35 | * canopy: for specifying that there are some centroids defined 36 | * run: the way you want to run the algorithms. The possible values are: 37 | * algorithms: for getting the indexes values for a specific k 38 | * silhouette: calculating the silhouette coefficient fir 15 different k and then it plots also the graph of best-k 39 | -------------------------------------------------------------------------------- /MyPreprocessing.py: -------------------------------------------------------------------------------- 1 | ## 2 | from scipy.io.arff import loadarff 3 | import pandas as pd 4 | import numpy as np 5 | from sklearn import preprocessing 6 | import matplotlib.pyplot as plt 7 | 8 | 9 | class MyPreprocessing: 10 | def __init__(self, normalized=True): 11 | pd.set_option('display.max_rows', 500) 12 | pd.set_option('display.max_columns', 500) 13 | pd.set_option('display.width', 1000) 14 | self.normalized = normalized 15 | print('Normalized', normalized) 16 | 17 | def fit(self, data): 18 | df = pd.DataFrame(data) 19 | df = df.replace(b'?', np.NaN) 20 | 21 | # get label 22 | labels = df.iloc[:, -1] 23 | self.labels_ = pd.factorize(labels)[0] 24 | df = df.drop(df.columns[len(df.columns) - 1], axis=1) 25 | nan_cols = df.loc[:, df.isna().any()].columns 26 | 27 | # normalize numerical data 28 | df_num = df.select_dtypes(exclude='object') 29 | if df_num.size > 0: 30 | if self.normalized: 31 | min_max_scaler = preprocessing.StandardScaler() 32 | scaled = min_max_scaler.fit_transform(df_num.values.astype(float)) 33 | df_normalized = pd.DataFrame(scaled, columns=df_num.columns) 34 | else: 35 | df_normalized = df_num 36 | else: 37 | df_normalized = pd.DataFrame() 38 | 39 | df_obj = df.select_dtypes(include='object') 40 | #le = preprocessing.LabelEncoder() 41 | #encoded = le.fit_transform(new_df) 42 | if df_obj.size > 0: 43 | df_encoded = df_obj.apply(lambda x: pd.factorize(x)[0]) 44 | # NaN values in categorical columns are 0 45 | if nan_cols.size > 0: 46 | df_encoded.loc[:, nan_cols] += 1 47 | 48 | new_df_numvalues = df_encoded.values.astype(float) 49 | min_max_scaler = preprocessing.MinMaxScaler() 50 | scaled = min_max_scaler.fit_transform(new_df_numvalues) 51 | df_encoded = pd.DataFrame(scaled, columns=df_encoded.columns) 52 | #df_encoded = df_encoded.astype('object') 53 | df_encoded = df_encoded.astype('float') 54 | else: 55 | df_encoded = pd.DataFrame() 56 | 57 | self.new_df = pd.concat([df_normalized, df_encoded], axis=1, sort=False) 58 | 59 | #self.new_df = new_df.drop(new_df.columns[len(self.df.columns)-1], axis=1) 60 | # 61 | #print(df.select_dtypes(exclude='object')) 62 | #print(df.select_dtypes(include='object')) 63 | #plt.interactive(False) 64 | #plt.show(block=True) 65 | 66 | 67 | ## 68 | #print(agg_clustering(df_preprocess, 'Single', 3)) 69 | #agg = AgglomerativeClustering(n_clusters=2, linkage='complete') 70 | #print(agg.fit_predict(df_preprocess)) 71 | #data, meta = loadarff('datasets/adult-test.arff') 72 | #preprocess = MyPreprocessing(data) 73 | #preprocess.fit() 74 | #print(preprocess.new_df) -------------------------------------------------------------------------------- /Validation.py: -------------------------------------------------------------------------------- 1 | 2 | ## 3 | import matplotlib.pyplot as plt 4 | 5 | ## 6 | def validation_metrics(df, y_true, y_pred, k_max= 15): 7 | from sklearn.metrics import davies_bouldin_score 8 | from sklearn.metrics import silhouette_score 9 | 10 | from sklearn.metrics import adjusted_mutual_info_score 11 | from sklearn.metrics import adjusted_rand_score 12 | from sklearn.metrics import jaccard_similarity_score 13 | 14 | DB= davies_bouldin_score(df, y_pred) 15 | SC= silhouette_score(df, y_pred) 16 | AMI= adjusted_mutual_info_score(y_true, y_pred, average_method='arithmetic') 17 | ARI= adjusted_rand_score(y_true, y_pred) 18 | JC = jaccard_similarity_score(y_true, y_pred, normalize=True) 19 | 20 | return {'DB': DB, 'SC': SC, 'AMI': AMI, 'ARI': ARI, 'JC': JC} 21 | 22 | ## 23 | #metric= validation_metrics(X, y_true, y_pred=pred) 24 | 25 | 26 | ## 27 | 28 | def best_k(df, algo, config_file, k_max = 15): 29 | """ 30 | Models: 31 | '1': 'KMeans', 32 | '2': 'KMeans++', 33 | '3': 'KMedoids', 34 | '4': 'FuzzyCMeans', 35 | '5': 'AggloSingle', 36 | '6': 'AggloAverage', 37 | '7': 'AggloComplete' 38 | 39 | """ 40 | from sklearn.cluster import KMeans 41 | from config_loader import load, clf_names 42 | from sklearn.metrics import silhouette_score 43 | from MyKmeans import MyKmeans 44 | from MyKmedoids import MyKmedoids 45 | from MyFuzzyCmeans import MyFuzzyCmeans 46 | from sklearn.cluster import AgglomerativeClustering 47 | 48 | config = load(config_file) 49 | tol = float(config.get('clustering', 'tol')) 50 | max_rep = int(config.get('clustering', 'max_rep')) 51 | fuzzy_m = int(config.get('clustering', 'fuzzy_m')) 52 | kmeans_init_type = config.get('clustering', 'kmeans_init_type') 53 | x = [1] 54 | sil = [0] 55 | for k in range(2, k_max + 1): 56 | clf_options = { 57 | '1': MyKmeans(k, tol, max_rep), 58 | #'1': KMeans(n_clusters=k), 59 | '2': MyKmeans(k, tol, max_rep, kmeans_init_type), 60 | '3': MyKmedoids(k, tol, max_rep), 61 | '4': MyFuzzyCmeans(k, tol, max_rep, fuzzy_m), 62 | '5': AgglomerativeClustering(n_clusters=k, linkage='single'), 63 | '6': AgglomerativeClustering(n_clusters=k, linkage='average'), 64 | '7': AgglomerativeClustering(n_clusters=k, linkage='complete') 65 | } 66 | 67 | clf = clf_options.get(str(algo)) 68 | clf.fit(df) 69 | pred = clf.labels_ 70 | x += [k] 71 | sil += [silhouette_score(df, pred, metric='euclidean')] 72 | 73 | clf_name = clf_names.get(str(algo)) 74 | plt.figure() 75 | plt.plot(x, sil, color='green', marker='o') 76 | plt.title('Silhouette Score ' + str(clf_name)) 77 | plt.xlabel('Number of Clusters') 78 | plt.ylabel('Average Silhouette Score') 79 | plt.ylim((0, 1)) 80 | plt.xlim((1, k_max+1)) 81 | 82 | return plt 83 | ## 84 | 85 | #best_k(2) -------------------------------------------------------------------------------- /MyFuzzyCmeans.py: -------------------------------------------------------------------------------- 1 | from scipy.spatial.distance import euclidean 2 | import pandas as pd 3 | import numpy as np 4 | from itertools import permutations 5 | 6 | class MyFuzzyCmeans: 7 | def __init__(self, k=2, tol=0.001, max_rep=100, m=2.0): 8 | self.k = k 9 | self.tol = tol 10 | self.max_rep = max_rep 11 | # Fuzzy parameter 12 | self.m = m 13 | self.name = 'FuzzyCMeans' 14 | 15 | def init_centers(self, data, init_type): 16 | if init_type == 'random': 17 | values = np.random.rand(self.k, data.shape[0]) 18 | values_sum = values.sum(axis=0) 19 | return values/values_sum 20 | 21 | def new_universe_matrix(self, universe_matrix, data): 22 | power_num = float(2 / (self.m - 1)) 23 | # dists: k rows with value the distance between each data point and the i-th centre 24 | dists = np.array([np.linalg.norm(data-v_centre, ord=1, axis=1) 25 | for v_centre in self.v_centres]) 26 | #den = dists.sum(axis=0) 27 | new_u = [] 28 | n = data.shape[0] 29 | for data_index in range(n): 30 | for cluster_index in range(self.k): 31 | 32 | self.universe_matrix[cluster_index, data_index] = ( 33 | 1/sum( 34 | [(dists[cluster_index, data_index]/dists[c_index, data_index])**power_num 35 | for c_index in range(self.k)] 36 | ) 37 | ) 38 | 39 | def v_centres_calc(self, data): 40 | universe_matrix_m = self.universe_matrix ** self.m 41 | v_centres_num = np.matmul(universe_matrix_m, data) 42 | # make the den a column vector with reshape 43 | v_centres_den = universe_matrix_m.sum(axis=1).reshape(-1, 1) 44 | 45 | self.v_centres = v_centres_num / v_centres_den 46 | 47 | def find_mindist(self, data, seed): 48 | #print(self.centroids[seed]) 49 | #seed_df = pd.DataFrame([self.centroids[seed]]*len(df.index)) 50 | return distance_metric(data, self.v_centres[seed]) 51 | 52 | def fit(self, dt): 53 | if isinstance(dt, pd.DataFrame): 54 | data = dt.values 55 | elif isinstance(dt, np.ndarray): 56 | data = dt 57 | else: 58 | raise Exception('dt should be a DataFrame or a numpy array') 59 | 60 | # get random indexes from data 61 | self.universe_matrix = self.init_centers(data, 'random') 62 | 63 | converge = False 64 | while self.max_rep > 0 and converge == False: 65 | if not hasattr(self, 'v_centres'): 66 | self.v_centres_calc(data) 67 | v_centres_old = self.v_centres.copy() 68 | self.new_universe_matrix(self.universe_matrix, data) 69 | self.v_centres_calc(data) 70 | 71 | #print(self.v_centres) 72 | converge = True 73 | for cluster_index in range(self.k): 74 | dist_diff = np.linalg.norm(self.v_centres[cluster_index]-v_centres_old[cluster_index], 75 | ord=1) 76 | if dist_diff <= self.tol: 77 | converge = converge and True 78 | else: 79 | converge = converge and False 80 | 81 | self.max_rep -= 1 82 | 83 | self.labels_ = self.universe_matrix.argmax(axis=0) 84 | #print(self.v_centres) 85 | print('Remaining repetitions: %s' % (self.max_rep)) 86 | 87 | self.inertia_ = 0 88 | for seed in range(len(self.v_centres)): 89 | self.inertia_ += np.array([self.find_mindist(data[np.where(self.labels_==seed)], seed)**2]).sum() 90 | 91 | def distance_metric(a, b, dist='Euclidean'): 92 | """ 93 | Define the distance metric used 94 | This can be: 'Euclidean' (default) 95 | """ 96 | # a numpy matrix, b numpy vector of the centroid 97 | if a.shape[1] == b.shape[0]: 98 | """ 99 | We assume that: 100 | - the numerical values of a and are normalized 101 | - a and b have the same columns from now on 102 | """ 103 | #a_num = a.select_dtypes(exclude='object') 104 | #a_cat = a.select_dtypes(include='object') 105 | ## make the same size as a 106 | #b_num = b.select_dtypes(exclude='object') 107 | #b_cat = b.select_dtypes(include='object') 108 | #print(a) 109 | #print(a-b) 110 | distance = ((a - b)**2).sum(axis=1) 111 | 112 | #dist_cat = pd.DataFrame(np.where(a_cat==b_cat, 0, 1)).sum(axis=1) 113 | #return (distance + dist_cat)**0.5 114 | return distance**0.5 115 | ''' 116 | clf = MyFuzzyCmeans() 117 | data = np.array([[2,3],[3,4],[1,5], [10,9], [12,13], [13,14],[11,15]]) 118 | clf.fit(data) 119 | print(clf.universe_matrix) 120 | print(clf.labels_) 121 | ''' 122 | -------------------------------------------------------------------------------- /MainLauncher.py: -------------------------------------------------------------------------------- 1 | ## 2 | from sklearn.cluster import KMeans 3 | from MyKmeans import MyKmeans 4 | from MyKmedoids import MyKmedoids 5 | from MyFuzzyCmeans import MyFuzzyCmeans 6 | from MyPreprocessing import MyPreprocessing 7 | from sklearn.cluster import AgglomerativeClustering 8 | from MyCanopy import MyCanopy 9 | from standardCanopy import myStandardCanopy 10 | from scipy.io.arff import loadarff 11 | import pandas as pd 12 | from config_loader import load, clf_names 13 | from sklearn.datasets import make_moons, make_blobs 14 | import argparse 15 | from os import path 16 | import sys 17 | from time import time 18 | import numpy as np 19 | import pandas as pd 20 | 21 | ## 22 | from Validation import validation_metrics 23 | from Validation import best_k 24 | 25 | ## 26 | if __name__ == '__main__': 27 | ## 28 | 29 | # Loads config 30 | parser = argparse.ArgumentParser() 31 | parser.add_argument( 32 | "-c", "--config", default="kmeans.cfg", 33 | help="specify the location of the clustering config file" 34 | ) 35 | args, _ = parser.parse_known_args() 36 | 37 | config_file = args.config 38 | config = load(config_file) 39 | 40 | ## 41 | datadir = config.get('data', 'datadir') 42 | dataset = config.get('data', 'dataset') 43 | path = path.join(datadir, dataset) 44 | try: 45 | if dataset.split('.')[-1] == 'arff': 46 | data, meta = loadarff(path) 47 | else: 48 | # first row is used as header 49 | data = pd.read_csv(path) 50 | except FileNotFoundError: 51 | print("Dataset '%s' cannot be found in the path %s" %(dataset, path)) 52 | sys.exit(1) 53 | 54 | ## 55 | k = int(config.get('clustering', 'k')) 56 | tol = float(config.get('clustering', 'tol')) 57 | max_rep = int(config.get('clustering', 'max_rep')) 58 | fuzzy_m = int(config.get('clustering', 'fuzzy_m')) 59 | kmeans_init_type = config.get('clustering', 'kmeans_init_type') 60 | run = config.get('clustering', 'run') 61 | 62 | ## Preprocessing 63 | normalized = config.getboolean('clustering', 'normalized') 64 | preprocess = MyPreprocessing(normalized=normalized) 65 | preprocess.fit(data) 66 | df = preprocess.new_df 67 | labels = preprocess.labels_ 68 | 69 | # MyCanopy 70 | remove_outliers = config.getboolean('clustering', 'remove_outliers') 71 | mycanopy = MyCanopy(remove_outliers=remove_outliers) 72 | mycanopy.fit(df) 73 | #centers = list(mycanopy.centroids.values()) 74 | #print('Canopy centers', centers) 75 | 76 | # StandardCanopy 77 | t1 = config.getfloat('clustering', 'canopyT1') 78 | t2 = config.getfloat('clustering', 'canopyT2') 79 | standard_canopy = myStandardCanopy(df, t1, t2) 80 | centroids = {} 81 | for i in standard_canopy.keys(): 82 | centroid_info = standard_canopy[i] 83 | centroid = df.iloc[centroid_info['c'],:] 84 | centroids[i] = centroid.values 85 | 86 | clf_options = { 87 | '1': MyKmeans(k, tol, max_rep, 'canopy', mycanopy.centroids), 88 | #'1': KMeans(n_clusters=len(centers), tol=tol, max_iter=max_rep, init=np.array(centers)), 89 | '2': MyKmeans(k, tol, max_rep, 'canopy', centroids), 90 | '3': MyKmeans(k, tol, max_rep), 91 | '4': KMeans(n_clusters=k, tol=tol, max_iter=max_rep, init='random'), 92 | '5': MyKmeans(k, tol, max_rep, 'kmeans++'), 93 | '6': KMeans(n_clusters=k, tol=tol, max_iter=max_rep, init='k-means++'), 94 | '7': MyKmedoids(k, tol, max_rep), 95 | '8': MyFuzzyCmeans(k, tol, max_rep, fuzzy_m), 96 | } 97 | 98 | algos = config.get('data', 'algorithm').split('-') 99 | values = pd.DataFrame() 100 | for algo in algos: 101 | #print(df.values) 102 | #print(df.dtypes) 103 | 104 | ## 105 | 106 | clf = clf_options.get(str(algo)) 107 | 108 | clf_name = clf_names.get(str(algo)) 109 | if not clf: 110 | print("Not available algorithm defined in config file. Available options:%s" 111 | % (clf_options.keys())) 112 | sys.exit(1) 113 | print('Algorithm %s' % (clf_name)) 114 | if run == 'algorithms': 115 | start = time() 116 | clf.fit(df) 117 | duration = time() - start 118 | ''' 119 | if hasattr(clf, 'centroids'): 120 | print('Final centroids', clf.centroids) 121 | else: 122 | print('Final centroids', clf.cluster_centers_) 123 | ''' 124 | metrics = validation_metrics(df, labels, clf.labels_) 125 | if hasattr(clf, 'max_rep'): 126 | rep = max_rep - clf.max_rep 127 | else: 128 | rep = clf.n_iter_ 129 | if hasattr(clf, 'k'): 130 | k = clf.k 131 | else: 132 | k = len(clf.cluster_centers_) 133 | metrics.update({"ERR": clf.inertia_, 134 | "TD": duration, 135 | "REP": rep, 136 | 'K': int(k), 137 | 'Dataset': dataset}) 138 | validations = pd.DataFrame.from_dict(metrics, orient='index', 139 | columns=[clf_name]) 140 | values = pd.concat([values, validations], axis=1) 141 | # print(clf.clusters) 142 | print() 143 | print('---') 144 | print() 145 | 146 | 147 | elif run == 'silhouette': 148 | best_k(df, algo, config_file).show() 149 | 150 | print(values) 151 | #values.to_csv(f'ad_results.csv', mode='a', header=False, index=False) -------------------------------------------------------------------------------- /MyKmeans.py: -------------------------------------------------------------------------------- 1 | from scipy.spatial.distance import euclidean 2 | import pandas as pd 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | from matplotlib import style 6 | from random import randint 7 | from MyCanopy import MyCanopy 8 | style.use('ggplot') 9 | 10 | class MyKmeans: 11 | def __init__(self, k=2, tol=0.01, max_rep=100, 12 | init_type='random', init_centers=None): 13 | self.k = k 14 | self.tol = tol 15 | self.max_rep = max_rep 16 | self.centroids = {} 17 | self.clusters = {} 18 | self.init_type = init_type 19 | if init_type == 'random': 20 | self.name = 'KMeans' 21 | elif init_type == 'kmeans++': 22 | self.name = 'KMeans++' 23 | elif init_type == 'canopy': 24 | self.centroids = init_centers 25 | self.k = len(init_centers) 26 | self.name = 'CanopyKmeans' 27 | else: 28 | self.name = 'NotSpecified' 29 | 30 | def init_centroids(self, data, init_type): 31 | if self.k < len(data): 32 | if init_type == 'random': 33 | #np.random.seed(randint(1,42)) 34 | #seeds = np.random.randint(0, len(df), self.k) 35 | seeds = np.random.choice(len(data), self.k, replace=False) 36 | for index in range(self.k): 37 | self.centroids[index] = data[seeds[index], :] 38 | 39 | elif init_type == 'kmeans++': 40 | len_data = data.shape[0] 41 | seed1 = np.random.choice(len_data, 1, replace=False)[0] 42 | centroid_index = 0 43 | self.centroids[centroid_index] = data[seed1, :] 44 | seeds = [seed1] 45 | 46 | # Here starts the for-loop for the other seeds: 47 | for cluster_index in range(self.k - 1): 48 | dist2centroids = np.array([self.find_mindist(data, seed) 49 | for seed in self.centroids])**2 50 | #dist_df = dist2centroids.argmin(axis=0) 51 | for seed in seeds: 52 | dist2centroids[:, seed] = 0 53 | dist_sum = dist2centroids.sum() 54 | D2 = (dist2centroids/dist_sum).sum(axis=0) 55 | 56 | #cumprobs = D2.cumsum() 57 | #r = np.random.uniform(0, 1) 58 | new_seed = np.random.choice(len_data, 1, replace=False, p=D2)[0] 59 | seeds.append(new_seed) 60 | centroid_index += 1 61 | self.centroids[centroid_index] = data[new_seed, :] 62 | ''' 63 | elif init_type == 'canopy': 64 | canopy = MyCanopy() 65 | canopy.fit(data) 66 | self.centroids = canopy.centroids 67 | self.k = len(self.centroids) 68 | ''' 69 | 70 | else: 71 | raise Exception('# of desired clusters should be < total data points') 72 | 73 | 74 | def find_mindist(self, data, seed): 75 | #print(self.centroids[seed]) 76 | #seed_df = pd.DataFrame([self.centroids[seed]]*len(df.index)) 77 | return distance_metric(data, self.centroids[seed]) 78 | 79 | def handle_empty_cluster(self, dist2centroids, data, seed, emptySeeds): 80 | #choose non empty seeds from distance matrix 81 | nonEmpty_dist2centroids = np.delete(dist2centroids, emptySeeds, axis=0) 82 | dat_point_maxDist = nonEmpty_dist2centroids.sum(axis=0).argmax() 83 | self.centroids[seed] = data[dat_point_maxDist, :] 84 | return np.array(self.find_mindist(data, seed)) 85 | 86 | def fit(self, dt): 87 | if isinstance(dt, pd.DataFrame): 88 | data = dt.values 89 | elif isinstance(dt, np.ndarray): 90 | data = dt 91 | else: 92 | raise Exception('dt should be a DataFrame or a numpy array') 93 | 94 | if not len(self.centroids) == self.k: 95 | print('No init centers', self.name) 96 | # get random indexes from data 97 | self.init_centroids(data, self.init_type) 98 | 99 | converge = False 100 | while self.max_rep > 0 and converge == False: 101 | emptyCluster = False 102 | emptySeeds = [] 103 | dist2centroids = np.array([self.find_mindist(data, seed) 104 | for seed in self.centroids]) 105 | # dist2centroids has k rows which correspond to the dist from each centroid 106 | #dist_df = pd.concat(dist2centroids, axis=1).idxmin(axis=1) 107 | self.labels_ = dist2centroids.argmin(axis=0) 108 | 109 | for seed in self.centroids: 110 | self.clusters[seed] = np.where(self.labels_==seed) 111 | if self.clusters[seed][0].size == 0: 112 | print("Cluster %s with centroid %s is empty!" 113 | %(seed, self.centroids[seed])) 114 | emptySeeds.append(seed) 115 | emptyCluster = True 116 | 117 | # check for empty clusters 118 | if emptyCluster: 119 | for seed in emptySeeds: 120 | dist2centroids[seed] = self.handle_empty_cluster(dist2centroids, data, seed, emptySeeds) 121 | emptySeeds.pop(emptySeeds.index(seed)) 122 | 123 | # find new clusters after fixing empty ones 124 | self.labels_ = dist2centroids.argmin(axis=0) 125 | for seed in self.centroids: 126 | self.clusters[seed] = np.where(self.labels_ == seed) 127 | 128 | 129 | prev_centroids = self.centroids.copy() 130 | for seed in self.clusters: 131 | self.centroids[seed] = data[self.clusters[seed]].mean(axis=0) 132 | 133 | converge = True 134 | for seed in self.clusters: 135 | #if euclidean(prev_centroids[seed], self.centroids[seed]) <= self.tol: 136 | #if np.array_equal(prev_centroids[seed], self.centroids[seed]): 137 | dist_diff = np.linalg.norm(prev_centroids[seed]-self.centroids[seed], 138 | ord=2) 139 | if dist_diff < self.tol: 140 | converge = converge and True 141 | else: 142 | converge = converge and False 143 | 144 | self.max_rep -= 1 145 | print('Remaining repetitions: %s' % (self.max_rep)) 146 | 147 | self.inertia_ = 0 148 | for seed in self.centroids: 149 | self.inertia_ += np.array([self.find_mindist(data[self.clusters[seed]], seed)**2]).sum() 150 | 151 | def distance_metric(a, b, dist='Euclidean'): 152 | """ 153 | Define the distance metric used 154 | This can be: 'Euclidean' (default) 155 | """ 156 | # a numpy matrix, b numpy vector of the centroid 157 | if a.shape[1] == b.shape[0]: 158 | """ 159 | We assume that: 160 | - the numerical values of a and are normalized 161 | - a and b have the same columns from now on 162 | """ 163 | #a_num = a.select_dtypes(exclude='object') 164 | #a_cat = a.select_dtypes(include='object') 165 | ## make the same size as a 166 | #b_num = b.select_dtypes(exclude='object') 167 | #b_cat = b.select_dtypes(include='object') 168 | #print(a) 169 | #print(a-b) 170 | distance = ((a - b)**2).sum(axis=1) 171 | 172 | #dist_cat = pd.DataFrame(np.where(a_cat==b_cat, 0, 1)).sum(axis=1) 173 | #return (distance + dist_cat)**0.5 174 | return distance**0.5 175 | ## 176 | ''' 177 | data = np.array([[2,3], 178 | [3,5], 179 | [1,4], 180 | [10,12], 181 | [11,13], 182 | [12,10]]) 183 | 184 | plt.scatter(data[:,0], data[:,1], s=100) 185 | #plt.show() 186 | df = pd.DataFrame(data) 187 | 188 | clf = MyKmeans(k=3, init_type='kmeans++') 189 | clf.fit(df) 190 | color = ['g','c','y'] 191 | print(clf.clusters) 192 | for centroid in clf.centroids: 193 | plt.scatter(clf.centroids[centroid][0], clf.centroids[centroid][1], marker='o', color=color[centroid]) 194 | plt.scatter(data[clf.clusters[centroid][0],0], data[clf.clusters[centroid][0],1], marker='+', color=color[centroid]) 195 | plt.show() 196 | 197 | #clf.predict(pd.DataFrame([[1,5]])) 198 | ''' 199 | -------------------------------------------------------------------------------- /MyKmedoids.py: -------------------------------------------------------------------------------- 1 | from scipy.spatial.distance import euclidean 2 | import pandas as pd 3 | import numpy as np 4 | from itertools import permutations 5 | from time import time 6 | from sklearn.metrics.pairwise import pairwise_distances 7 | import matplotlib.pyplot as plt 8 | 9 | 10 | 11 | class MyKmedoids: 12 | def __init__(self, k=2, tol=0.01, max_rep=100): 13 | self.k = k 14 | self.tol = tol 15 | self.max_rep = max_rep 16 | self.name = 'KMedoids' 17 | 18 | def init_medoids(self, data, init_type): 19 | if init_type == 'random': 20 | # np.random.seed(randint(1,42)) 21 | # seeds = np.random.randint(0, len(df), self.k) 22 | if self.k < len(data): 23 | seeds = np.random.choice(len(data), self.k, replace=False) 24 | return seeds 25 | else: 26 | raise Exception('# of desired clusters should be < total data points') 27 | 28 | def find_mindist(self, data, seed): 29 | # print(self.medoids[seed]) 30 | # seed_df = pd.DataFrame([self.medoids[seed]]*len(df.index)) 31 | return distance_metric(data, self.medoids[seed]) 32 | 33 | def fit(self, dt): 34 | if isinstance(dt, pd.DataFrame): 35 | data = dt.values 36 | elif isinstance(dt, np.ndarray): 37 | data = dt 38 | else: 39 | raise Exception('dt should be a DataFrame or a numpy array') 40 | 41 | # get random indexes from data 42 | self.seeds = self.init_medoids(data, 'random') 43 | self.clusters = {} 44 | self.medoids = {} 45 | 46 | for seed in range(self.k): 47 | self.medoids[seed] = data[seed, :] 48 | 49 | print('Calculate distance matrix') 50 | start = time() 51 | #distances = distance_matrix(data, data, p=2) 52 | distances = pairwise_distances(data) #, data, p=2) 53 | print('Duration for distance matrix: %s' %(time()-start)) 54 | 55 | converge = False 56 | while self.max_rep > 0 and converge == False: 57 | print(self.max_rep) 58 | dist2medoids = np.array([distances[seed, :] for seed in self.seeds]) 59 | #dist2medoids = np.array([get_dp_distances(distances, seed) for seed in self.seeds]) 60 | # dist2medoids has k columns which correspond to the dist from each medoid 61 | # dist_df = pd.concat(dist2medoids, axis=1).idxmin(axis=1) 62 | self.labels_ = dist2medoids.argmin(axis=0) 63 | 64 | for seed_index in range(self.k): 65 | self.clusters[seed_index] = np.where( 66 | self.labels_ == seed_index)[0] 67 | 68 | prev_medoids = self.medoids.copy() 69 | #prev_seeds = self.seeds 70 | self.medoids = {} 71 | new_seeds = [] 72 | for seed_index in range(self.k): 73 | #clusters_sse = [] 74 | min_dp_cluster = None 75 | medoid = None 76 | cluster = self.clusters[seed_index] 77 | for dp in cluster: 78 | #dp_cluster_dist = sum(get_dp_distances(distances, dp, cluster)) 79 | dp_cluster_dist = distances[dp, cluster].sum() 80 | # initial value of min_dp_cluster 81 | if not min_dp_cluster: 82 | min_dp_cluster = dp_cluster_dist 83 | medoid = dp 84 | 85 | if min_dp_cluster > dp_cluster_dist: 86 | min_dp_cluster = dp_cluster_dist 87 | medoid = dp 88 | ''' 89 | clusters_sse.append(dp_cluster_dists) 90 | 91 | for dp1, dp2 in permutations(self.clusters[seed_index], 2): 92 | if dp1 == dp1_prev: 93 | sum += calc_distances(distances, dp1, dp2) 94 | else: 95 | cluster_sse.append(sum) 96 | sum = 0 97 | dp1_prev = dp1 98 | sum += calc_distances(distances, dp1, dp2) 99 | 100 | #clusters_sse.append(sum) 101 | new_cluster_seed = np.array(cluster_sse).argmin(axis=0) 102 | 103 | new_seeds.append( 104 | self.clusters[seed_index][new_cluster_seed]) 105 | ''' 106 | new_seeds.append(medoid) 107 | self.medoids[seed_index] = data[medoid] 108 | self.seeds = np.array(new_seeds) 109 | #self.medoids = {seed_index:data[self.seeds[seed_index]] 110 | # for seed_index in range(self.k)} 111 | 112 | #self.centroids[seed] = data[self.clusters[seed]].mean(axis=0) 113 | 114 | converge = True 115 | for seed_index in range(self.k): 116 | #if euclidean(prev_medoids[seed_index], self.medoids[seed_index]) <= self.tol: 117 | dist_diff = np.linalg.norm(prev_medoids[seed_index]-self.medoids[seed_index], 118 | ord=2) 119 | if dist_diff <= self.tol: 120 | converge = converge and True 121 | else: 122 | converge = converge and False 123 | 124 | self.max_rep -= 1 125 | 126 | print('Remaining repetitions: %s' %(self.max_rep)) 127 | 128 | self.inertia_ = 0 129 | for seed in self.medoids: 130 | self.inertia_ += np.array([self.find_mindist(data[self.clusters[seed]], seed)**2]).sum() 131 | ''' 132 | def predict(self, df): 133 | dist2medoids = [self.find_mindist(df, seed) for seed in self.medoids] 134 | # dist2medoids has k columns which correspond to the dist from each centroid 135 | dist_df = pd.concat(dist2medoids, axis=1).idxmin(axis=1) 136 | 137 | for seed in self.medoids: 138 | if (dist_df == seed)[0]: 139 | return seed 140 | ''' 141 | def get_dp_distances(distances, row_idx, *c_members_tup): 142 | 143 | if len(c_members_tup) == 0: 144 | ''' 145 | keys = np.array(list(distances.keys())) 146 | x = np.where( 147 | keys[:,0] == row_idx 148 | )[0] 149 | y = np.where( 150 | keys[:,1] == row_idx 151 | )[0] 152 | dp = keys[np.append(x,y),:] 153 | 154 | ''' 155 | dp_dists = map(lambda x: distances[x] 156 | if x[0]==row_idx or x[1]==row_idx else None, 157 | distances) 158 | dp_dists = list(filter(None.__ne__, dp_dists)) 159 | 160 | else: 161 | 162 | cluster_members = c_members_tup[0] 163 | 164 | dp_dists = map(lambda x: distances[x] 165 | if ((x[0] == row_idx and x[1] in cluster_members) or 166 | (x[1] == row_idx and x[0] in cluster_members)) else None, 167 | distances) 168 | dp_dists = list(filter(None, dp_dists)) 169 | return dp_dists 170 | 171 | def custom_distance_matrix(data): 172 | rows, _ = data.shape 173 | distances = {} #np.zeros([rows, rows]) #- 1 174 | for rowid in range(rows): 175 | for colid in range(rowid+1): 176 | distances[(rowid, colid)] = np.linalg.norm( 177 | data[rowid, :] - data[colid, :]) 178 | ## copy lower diagonal values to the upper side 179 | #index_upper = np.triu_indices(rows) 180 | #distances[index_upper] = distances.T[index_upper] 181 | return distances 182 | 183 | def distance_metric(a, b, dist='Euclidean'): 184 | """ 185 | Define the distance metric used 186 | This can be: 'Euclidean' (default) 187 | """ 188 | # a numpy matrix, b numpy vector of the centroid 189 | if a.shape[1] == b.shape[0]: 190 | """ 191 | We assume that: 192 | - the numerical values of a and are normalized 193 | - a and b have the same columns from now on 194 | """ 195 | # a_num = a.select_dtypes(exclude='object') 196 | # a_cat = a.select_dtypes(include='object') 197 | ## make the same size as a 198 | # b_num = b.select_dtypes(exclude='object') 199 | # b_cat = b.select_dtypes(include='object') 200 | # print(a) 201 | # print(a-b) 202 | distance = ((a - b) ** 2).sum(axis=1) 203 | 204 | # dist_cat = pd.DataFrame(np.where(a_cat==b_cat, 0, 1)).sum(axis=1) 205 | # return (distance + dist_cat)**0.5 206 | return distance ** 0.5 207 | 208 | ''' 209 | data = np.array([[2,3], 210 | [3,5], 211 | [1,4], 212 | [10,12], 213 | [11,13], 214 | [12,10]]) 215 | 216 | plt.scatter(data[:,0], data[:,1], s=100) 217 | #lt.show() 218 | df = pd.DataFrame(data) 219 | 220 | clf = MyKmedoids(k=2) 221 | clf.fit(df) 222 | print(clf.clusters) 223 | print(clf.medoids) 224 | 225 | 226 | color = ['g','y','c'] 227 | print(clf.clusters) 228 | for centroid in clf.medoids: 229 | plt.scatter(clf.medoids[centroid][0], clf.medoids[centroid][1], marker='o', color=color[centroid]) 230 | plt.scatter(data[clf.clusters[centroid],0], data[clf.clusters[centroid],1], marker='+', color=color[centroid]) 231 | plt.show() 232 | ''' 233 | 234 | #clf.predict(pd.DataFrame([[1,5]])) -------------------------------------------------------------------------------- /MyCanopy.py: -------------------------------------------------------------------------------- 1 | from sklearn.base import BaseEstimator, TransformerMixin 2 | from sklearn.metrics.pairwise import pairwise_distances 3 | from functools import reduce 4 | import numpy as np 5 | import pandas as pd 6 | 7 | import sys 8 | 9 | class MyCanopy(BaseEstimator, TransformerMixin): 10 | def __init__(self, remove_outliers=True): 11 | self.remove_outliers = remove_outliers 12 | 13 | def calc_meanDist(self, data, dists=None): 14 | n = data.shape[0] 15 | if dists is None: 16 | dists = pairwise_distances(data, metric='euclidean') 17 | #if dists.shape[0] == dists.shape[1]: 18 | triang_dists = dists[np.arange(dists.shape[0])[:,None] > np.arange(dists.shape[1])].sum() 19 | #else: 20 | # ####### here! 21 | # triang_dists = dists.sum() 22 | meanDist = 2*triang_dists/(n*(n-1)) 23 | return dists, meanDist 24 | 25 | def p_density(self, dp, dists_i, meanDist): 26 | f = np.where((dists_i - meanDist)<0, 1, 0) 27 | p_i = f.sum() 28 | return p_i 29 | 30 | def a_density(self, meanDist, dists_i, p_i): 31 | cluster_dists = dists_i[dists_i < meanDist] 32 | d = cluster_dists.sum() 33 | if p_i-1 == 0: 34 | return 0 35 | else: 36 | return 2*d/(p_i*(p_i-1)) 37 | 38 | def s_distance(self, p, p_i, dists_i): 39 | dist_i_less_j = dists_i[p-p_i>0] 40 | if dist_i_less_j.size > 0: 41 | return dist_i_less_j.min() 42 | else: 43 | return dists_i.max() 44 | 45 | def w_weight(self, p_i, a_i, s_i): 46 | if a_i == 0: 47 | return 0 48 | else: 49 | return p_i*s_i/a_i 50 | 51 | def removeData(self, meanDist, dists, data, ind, centroids_dists=np.array([])): 52 | # in a row of dists var we have all the distances of a data point to the rest 53 | dists_i = dists[ind, :] 54 | dist_filter = dists_i>=meanDist 55 | new_dists = dists[dist_filter, :] 56 | #if dists.shape[0] == dists.shape[1]: 57 | new_dists = new_dists[:, dist_filter] 58 | new_data = data[dist_filter, :] 59 | 60 | # dists from centroids 61 | new_centroids_dists = [] 62 | for ind in range(centroids_dists.shape[0]): 63 | centroid_dists = centroids_dists[ind] 64 | new_centroids_dists.append(centroid_dists[dist_filter]) 65 | new_centroids_dists = np.array(new_centroids_dists) 66 | return new_dists, new_data, new_centroids_dists, dist_filter 67 | 68 | def fit(self, dt): 69 | if isinstance(dt, pd.DataFrame): 70 | data = dt.values 71 | elif isinstance(dt, np.ndarray): 72 | data = dt 73 | elif isinstance(dt, list): 74 | data = np.array(dt) 75 | else: 76 | raise Exception('dt should be a DataFrame or a numpy array') 77 | self.centroids = {} 78 | centroids_dists = np.array([]) 79 | p_centroid = np.array([]) 80 | 81 | # c1 82 | p = np.array([[]]) 83 | ############ 84 | dists, meanDist = self.calc_meanDist(data) 85 | for ind in range(data.shape[0]): 86 | p = np.append(p, self.p_density(ind, dists[ind,:], meanDist)) 87 | 88 | max_p_sample_ind = p.argmax() 89 | centroid = data[max_p_sample_ind, :] 90 | 91 | # centroid 92 | centroid_index = 0 93 | self.centroids[centroid_index] = centroid 94 | centroids_dists = np.concatenate([centroids_dists, dists[max_p_sample_ind, :]], 95 | axis=0).reshape(1,-1) 96 | p_centroid = np.append(p_centroid, p[max_p_sample_ind]) 97 | ''' 98 | print('MD', meanDist) 99 | print('1 data',data.shape) 100 | print('1 dists', dists.shape) 101 | print('1 p', len(p)) 102 | ''' 103 | dists, data, centroids_dists, dist_filter = self.removeData(meanDist, dists, data, max_p_sample_ind, centroids_dists=centroids_dists) 104 | p = p[dist_filter] 105 | ''' 106 | print('2 data', data.shape) 107 | print('2 dists', dists.shape) 108 | print('cd', centroids_dists) 109 | print('2 p',len(p)) 110 | ''' 111 | ############ 112 | 113 | # c2 114 | p = np.array([]) 115 | a = np.array([]) 116 | s = np.array([]) 117 | w = np.array([]) 118 | 119 | ############ 120 | 121 | _, meanDist = self.calc_meanDist(data, dists=dists) 122 | for ind in range(data.shape[0]): 123 | p_i = self.p_density(ind, dists[ind,:], meanDist) 124 | p = np.append(p, p_i) 125 | #p_i = p[ind] 126 | a = np.append(a, self.a_density(meanDist, dists[ind,:], p_i)) 127 | 128 | for ind in range(data.shape[0]): 129 | s_i = self.s_distance(p, p[ind], dists[ind,:]) 130 | s = np.append(s, s_i) 131 | w = np.append(w, self.w_weight(p[ind], a[ind], s_i)) 132 | 133 | max_w_sample_ind = w.argmax() 134 | centroid = data[max_w_sample_ind, :] 135 | 136 | # centroid 137 | centroid_index += 1 138 | self.centroids[centroid_index] = centroid 139 | centroids_dists = np.concatenate([centroids_dists, [dists[max_w_sample_ind, :]]], 140 | axis=0) 141 | p_centroid = np.append(p_centroid, p[max_w_sample_ind]) 142 | dists, data, centroids_dists, dist_filter = self.removeData(meanDist, dists, data, max_w_sample_ind, centroids_dists=centroids_dists) 143 | p_prev = p[dist_filter] 144 | s_prev = s[dist_filter] 145 | p = p[dist_filter] 146 | a = a[dist_filter] 147 | ''' 148 | print('3 data', data.shape) 149 | print('3 dist', dists.shape) 150 | print('3 p',len(p)) 151 | print('3 cdists', centroids_dists.shape) 152 | ''' 153 | ############ 154 | 155 | #p_prev = np.zeros((len(data))) #p 156 | #s_prev = np.ones((len(data))) * 999 #s 157 | #print(self.centroids) 158 | #print(centroids_dists) 159 | 160 | c_remove = 0 161 | while data.shape[0] > 1: 162 | print(data.shape, dists.shape) 163 | w = np.array([]) 164 | #p_new = np.array([]) 165 | #a = np.array([]) 166 | #s = np.array([]) 167 | 168 | ind = 0 169 | #for ind in range(data.shape[0]): 170 | _, meanDist = self.calc_meanDist(data, dists=dists) 171 | if meanDist == 0: 172 | 173 | break 174 | while ind < data.shape[0]: 175 | p_i = self.p_density(ind, dists[ind,:], meanDist) 176 | #p_new = np.append(p, p_i) 177 | a_i = self.a_density(meanDist, dists[ind, :], p_i) 178 | #a = np.append(a, self.a_density(meanDist, dists[ind, :], p_i)) 179 | 180 | #ind = 0 181 | #toRemove = False 182 | #print(p_prev.shape, p.shape, data.shape) 183 | 184 | #s_i = self.s_distance(p, p[ind], dists[ind,:]) 185 | s_centroid = [] 186 | w_i = 1 187 | 188 | for centroid_dists in centroids_dists: 189 | s_i = centroid_dists[ind] 190 | #s_i = self.s_distance(p, p[ind], centroid_dists) 191 | #s_centroid.append(s_i) 192 | s_centroid.append(s_i) 193 | 194 | w_i *= self.w_weight(p_i, a_i, s_i) 195 | #print('e',ind, data) 196 | #if w.shape[0] == data.shape[0]: 197 | # w[ind] *= w_i 198 | #else: 199 | 200 | # remove outliers 201 | if p_prev[ind] > p_i and s_prev[ind] < min(s_centroid) and self.remove_outliers: 202 | c_remove += 1 203 | #toRemove = True 204 | data = np.delete(data, ind, axis=0) 205 | dists = np.delete(dists, ind, axis=0) 206 | dists = np.delete(dists, ind, axis=1) 207 | p = np.delete(p, ind) 208 | a = np.delete(a, ind) 209 | #s = np.delete(s, ind) 210 | p_prev = np.delete(p_prev, ind) 211 | s_prev = np.delete(s_prev, ind) 212 | #w = np.delete(w, ind) 213 | centroids_dists = np.delete(centroids_dists, ind, axis=1) 214 | _, meanDist = self.calc_meanDist(data, dists=dists) 215 | #break 216 | else: 217 | p_prev[ind] = p_i 218 | s_prev[ind] = min(s_centroid) 219 | w = np.append(w, w_i) 220 | ind += 1 221 | #if p_i == 0: 222 | # print('end', dists.shape) 223 | # break 224 | if w.size > 0: 225 | max_w_sample_ind = w.argmax() 226 | centroid = data[max_w_sample_ind, :] 227 | centroids_dists = np.concatenate([centroids_dists, [dists[max_w_sample_ind, :]]], axis=0) 228 | p_centroid = np.append(p_centroid, p[max_w_sample_ind]) 229 | 230 | centroid_index += 1 231 | self.centroids[centroid_index] = centroid 232 | 233 | dists, data, centroids_dists, dist_filter = self.removeData(meanDist, dists, data, max_w_sample_ind, centroids_dists=centroids_dists) 234 | p_prev = p_prev[dist_filter] 235 | s_prev = s_prev[dist_filter] 236 | p = p[dist_filter] 237 | a = a[dist_filter] 238 | 239 | #print(f'{4+ind} data', data.shape) 240 | #print(f'{4+ind} dist', dists.shape) 241 | #print(f'{4+ind} p',len(p), len(p_prev)) 242 | #print(f'{4+ind} cdists', centroids_dists.shape) 243 | 244 | print('Canopy found %d centers' %(len(self.centroids))) 245 | print('Removed %d data points' %(c_remove)) 246 | 247 | 248 | ''' 249 | import pandas as pd 250 | import matplotlib.pyplot as plt 251 | from matplotlib import style 252 | from sklearn import datasets 253 | 254 | style.use('ggplot') 255 | data = np.array([[ 1, 2, 3], 256 | [ 4, 5, 6], 257 | [ 7, 8, 9], 258 | [10, 11, 12]]) 259 | 260 | data = np.array([[3,5], 261 | [1,4], 262 | [10,12], 263 | [11,13], 264 | [12,10], 265 | [2,3], 266 | [22,20], 267 | [23,21], 268 | [24,22]]) 269 | 270 | iris = datasets.load_iris() 271 | #data = iris.data[:, :4] # we only take the first two features. 272 | y = iris.target 273 | #plt.scatter(data[:,0], data[:,1], s=100) 274 | #plt.show() 275 | df = pd.DataFrame(data) 276 | canopy = MyCanopy() 277 | canopy.fit(data) 278 | 279 | color = ['g','c','y'] 280 | print(clf.clusters) 281 | for centroid in clf.centroids: 282 | plt.scatter(clf.centroids[centroid][0], clf.centroids[centroid][1], marker='o', color=color[centroid]) 283 | plt.scatter(data[clf.clusters[centroid][0],0], data[clf.clusters[centroid][0],1], marker='+', color=color[centroid]) 284 | plt.show() 285 | 286 | #clf.predict(pd.DataFrame([[1,5]])) 287 | ''' -------------------------------------------------------------------------------- /datasets/ad_blob_0.csv: -------------------------------------------------------------------------------- 1 | 0,1,2 2 | 0.871145055113999,0.2851786491648387,2.0 3 | 0.2695663564984365,0.9831118870172885,2.0 4 | 4.979218556628409,7.675877589142628,0.0 5 | 6.993417613273221,7.634750119951152,1.0 6 | -1.4970003983424156,0.047815736985920454,2.0 7 | 8.60642885291708,5.7466770470252255,1.0 8 | 4.005172577250398,10.085119133692416,0.0 9 | 7.939336599527487,5.140998012764404,1.0 10 | 3.02663983170569,9.228654599103436,0.0 11 | 1.9584459426696519,10.491973477049905,0.0 12 | 2.8428412701295294,12.285560340862567,0.0 13 | 7.855462647061621,4.947220449079385,1.0 14 | 4.058696580980758,6.5676378782794975,0.0 15 | 1.6223450753980035,8.876175079951123,0.0 16 | 0.5842225507416248,0.8292571277149824,2.0 17 | 2.6923215476575963,9.015649844202509,0.0 18 | -0.8811272524003788,1.6320726233540994,2.0 19 | 4.28222553812764,6.266266314124508,1.0 20 | 3.8910175124956856,10.960110549898495,0.0 21 | 4.625686829174031,12.79282650281413,0.0 22 | 5.483294556984731,8.276119586571594,0.0 23 | 3.503472620974611,10.113219349320968,0.0 24 | 8.782619018240677,5.765998769717158,1.0 25 | 1.3916943298881204,1.228490804062192,2.0 26 | 3.0502746492549315,8.808548433743775,0.0 27 | 2.3548926712972893,3.7222981343488772,2.0 28 | 0.695029408571866,2.6930320421006604,2.0 29 | 6.092608393263842,9.125165755498557,1.0 30 | 7.301569258843679,4.640703284741619,1.0 31 | 2.41112954403534,8.283427636651004,0.0 32 | 2.2595967007775926,8.657696469695003,0.0 33 | 2.1390624413819563,10.23085168696394,0.0 34 | 2.805690129240734,0.27581946449872574,2.0 35 | 4.066541804668931,7.638534896031179,0.0 36 | -0.7126001888734634,3.6102566045288134,2.0 37 | 4.0865910903798195,11.467857195522804,0.0 38 | 2.695669329392393,0.17669721709558828,2.0 39 | 7.687389274777135,5.226170078913672,1.0 40 | 1.3996408644925284,0.007081719732965341,2.0 41 | 7.83257338177979,8.800841100794194,1.0 42 | 9.220306141893985,4.925080643542195,1.0 43 | 4.131726774557771,9.39547419044991,0.0 44 | 0.8287770683156248,0.9111079216325951,2.0 45 | 7.1481296502351235,7.843309309930559,1.0 46 | 3.69333353391576,7.754126007669862,0.0 47 | 4.287494596736246,11.814197913798115,0.0 48 | 1.667535760253454,0.8434590332143515,2.0 49 | 6.591894096370395,6.10939605104985,1.0 50 | 2.0665903975371682,0.9421297541783253,2.0 51 | 9.69896464233208,4.129861593730093,1.0 52 | 7.43364625540464,4.970842274702198,1.0 53 | 1.548227442449439,3.7798614116970652,2.0 54 | 2.1100837735696376,0.15012552387099354,2.0 55 | 5.962242255585899,8.72973773668869,0.0 56 | 3.329132180520105,1.66122292547719,2.0 57 | 8.092510947573123,11.765682077952448,1.0 58 | 2.1518646179918104,0.928468482131632,2.0 59 | 5.190465382340108,10.126314454503909,0.0 60 | 7.4902154359909225,6.979780853751936,1.0 61 | 3.0544430320339435,11.092826403427534,0.0 62 | 1.9136082415872142,2.716242994192477,2.0 63 | 10.51948948009845,3.0584531426866133,1.0 64 | 8.669339231263926,6.44753412328528,1.0 65 | 6.178507617107221,3.2393075705138914,2.0 66 | 1.1867401817057548,3.0173016297935593,2.0 67 | 5.972317411091514,6.724463599230124,1.0 68 | 6.242273086235397,5.6664141144025955,1.0 69 | 2.6276087215577,-0.12701793439477727,2.0 70 | -0.9948872546083669,1.4766236550170984,2.0 71 | 2.1519456178844676,10.217531710051933,0.0 72 | 9.278157628845545,6.018090604419505,1.0 73 | 3.147823134698747,-1.0781640262726448,2.0 74 | 0.5105981424354868,1.8809150694633594,2.0 75 | 4.455257625333942,9.397899695113852,0.0 76 | 3.1572389587753884,7.311870641900983,0.0 77 | 1.9658716430941234,1.484588039188321,2.0 78 | -1.5409767456354497,1.4262651440928438,2.0 79 | 6.181240425783503,6.212175521684678,1.0 80 | 2.8444431535954173,9.069602439409245,0.0 81 | 9.08909959919598,5.282821363813309,1.0 82 | 2.226154507971989,9.97851406299207,0.0 83 | 3.637386006103124,11.012442410937197,0.0 84 | 2.3115621858609363,0.09361233616419984,2.0 85 | 4.2422963355789705,10.9704607547827,0.0 86 | 8.745575175393126,5.121229358476762,1.0 87 | 2.813724572522078,-0.1346150786244007,2.0 88 | 3.311859496914076,1.9415764683138461,2.0 89 | 6.54406295519076,10.217892445466841,0.0 90 | 9.968120778535695,6.5940574084118,1.0 91 | 1.6788971365379886,8.100405504226476,0.0 92 | -0.9439215172576925,2.374985491931929,2.0 93 | 3.6916621298086976,11.85410854782017,0.0 94 | 2.278656143043953,2.060438361292449,2.0 95 | 5.118504365026736,10.000269728588687,0.0 96 | 2.950760882823067,10.277044213769194,0.0 97 | 0.7297124464035921,-0.23687163552124635,2.0 98 | -0.041244239649526504,1.3463759758300862,2.0 99 | 4.13522537984626,10.679877371765127,0.0 100 | 0.4152976696105899,-1.1473779476347519,2.0 101 | 2.1803387592599166,4.375138922199126,2.0 102 | 6.607521451372616,5.006590993109798,1.0 103 | 4.28208222899605,10.348319853651512,0.0 104 | 7.007756042578138,5.2470834399821165,1.0 105 | 4.931949109038195,8.143061881907052,0.0 106 | 2.486574903639667,9.043324500322338,0.0 107 | 4.108344595822676,6.637222697112464,0.0 108 | 4.260828622826317,6.86258283105506,0.0 109 | 4.12114046399244,10.026815378344624,0.0 110 | 10.549713104381386,4.835563497638292,1.0 111 | -0.5376649563043465,2.4043990583978827,2.0 112 | 8.34932970367582,3.5675110351858885,1.0 113 | 2.5705212499692696,9.024050789790648,0.0 114 | 2.5326607841338435,8.754507498722356,0.0 115 | 3.8466934955055105,7.370020784778975,0.0 116 | 3.725155351366724,7.9205766706653105,0.0 117 | 7.618528961474256,5.086259526232174,1.0 118 | 0.00431717293468048,1.2744371862366142,2.0 119 | 4.750237624477735,8.017393403218485,1.0 120 | -1.2460014771144186,1.0326749773000634,2.0 121 | 5.810913345864497,4.1653019227887675,1.0 122 | 8.09895918947581,8.285693211474232,1.0 123 | 7.656078140829676,6.0054734431430585,1.0 124 | 5.849592654877774,7.404366469910739,0.0 125 | 2.425021849008015,2.0268204351773305,2.0 126 | 2.0586574223937526,0.43721539852719604,2.0 127 | 1.3683100172032154,0.12663454246138772,2.0 128 | 2.481436454489503,2.696206768432984,2.0 129 | 2.7300181880146868,10.424657497360462,0.0 130 | 3.1164963208112137,0.7949206050799059,2.0 131 | 0.764799976865704,0.3706359549685103,2.0 132 | 5.36997805323654,11.087846142151516,0.0 133 | 0.8672443655250626,9.467372250925335,0.0 134 | 3.8830423532768785,6.525789692197821,0.0 135 | 1.1580244397040755,8.663711770237702,0.0 136 | 1.527618202922484,8.427376751507097,0.0 137 | 8.077420336584737,7.285217633225548,1.0 138 | 6.941087191024311,4.114910069023091,1.0 139 | 1.1553740014843084,0.09179962968856548,2.0 140 | 3.7530713734373164,9.15526236403644,0.0 141 | 0.7893860284193248,-0.028875079471401,2.0 142 | 3.0411896095711968,10.320983129478108,0.0 143 | 3.377319014469319,8.376538817562926,0.0 144 | 4.651308291747346,4.192956467026833,2.0 145 | 3.875971790830882,9.05863203840036,0.0 146 | 5.04088446918303,5.260233732670989,1.0 147 | 6.246983854224098,7.00598146537238,1.0 148 | 8.539232596372491,6.931028104855784,1.0 149 | 4.1604373874686535,10.747917937653197,0.0 150 | 9.236436761796687,5.099227758716621,1.0 151 | 2.7253641061058884,9.855523609840667,0.0 152 | 2.3833650751918087,7.388687512096224,0.0 153 | 4.507274103798949,1.6128405313196195,2.0 154 | 7.741427219716599,5.052535562239475,1.0 155 | -0.7409848516790685,3.4764604362097904,2.0 156 | 4.189581604070489,9.898725972368995,0.0 157 | 5.5194948075303865,5.484832988208943,1.0 158 | 7.292169714125466,5.553596883590158,1.0 159 | 0.764434683008574,0.6962178423950065,2.0 160 | 2.518075092590426,-0.9323348900414132,2.0 161 | 2.419615034171925,9.737730723017453,0.0 162 | -0.18421646766099142,10.739996820661997,0.0 163 | 0.2467590243472293,-0.5142543930844774,2.0 164 | 3.8869143677081492,1.7334571548013145,2.0 165 | 3.3620072876702825,0.9478321438296994,2.0 166 | 3.8327142661426246,7.792687617353226,0.0 167 | 2.8668734595719036,1.0709099051102648,2.0 168 | 6.613381959686566,6.33465974800682,1.0 169 | 3.8357465033851645,13.20200623282709,0.0 170 | 4.528313536898971,9.952620073948939,0.0 171 | 4.965677014528129,7.660846589448228,0.0 172 | 5.459635410246156,10.635042613129322,0.0 173 | 2.3662648371229205,11.83204467162547,0.0 174 | 6.060356654842886,5.0874958738043325,1.0 175 | 3.0756287603730934,10.79174125558437,0.0 176 | 5.8476764415421245,6.679740053365272,1.0 177 | 7.356704679502465,6.733582278838541,1.0 178 | 8.628420373195068,6.2615978505778935,1.0 179 | 3.3484159386166907,13.587396813983588,0.0 180 | 10.503173713633,7.535282732797086,1.0 181 | 8.256119143692283,6.929103105866786,1.0 182 | 1.690071085633715,1.3264293502739073,2.0 183 | 2.928827101685851,9.67352694866396,0.0 184 | 0.5662507660055951,2.4158432062517656,2.0 185 | 9.768556374011503,3.841372775029417,1.0 186 | 8.176275184153802,7.689933302241265,1.0 187 | -2.1472803457665686,0.36460232015631133,2.0 188 | 7.806188946846714,5.791370260418839,1.0 189 | 5.943874341855956,9.168478613369357,0.0 190 | 2.4580384080104416,4.450003379968932,1.0 191 | 6.659872688068575,6.182695707899503,1.0 192 | 1.4661677585146073,2.992658684113884,2.0 193 | 4.003109722781303,-0.5102069839603105,2.0 194 | 6.081693622925283,5.504506079490876,1.0 195 | -0.8494830759619936,1.8651406571628613,2.0 196 | 6.224389470538346,6.311272726343328,1.0 197 | 5.339589607583088,8.733772990751897,1.0 198 | 7.291415106260017,4.482790795013653,1.0 199 | 10.535855552102039,6.937463375447383,1.0 200 | 2.283378683132643,10.68776996971284,0.0 201 | 1.8329857870121078,1.9322760828125305,2.0 202 | 4.8531010584667404,9.764195485884116,0.0 203 | 3.5719287648912643,9.055487520715227,0.0 204 | 6.0866088247644035,6.352115659208234,1.0 205 | 6.435892282697734,7.2609879875019026,1.0 206 | 4.9641899220649215,11.541503106955396,0.0 207 | 7.786300766511058,8.199619167394694,1.0 208 | 0.0815973349210497,2.316014976638793,2.0 209 | 0.5903270780605754,-0.06237680205956608,2.0 210 | 7.465433365603128,6.879320380125736,1.0 211 | 4.287855226045076,8.539463432191475,0.0 212 | 3.894878236105087,8.751929582924863,0.0 213 | 0.11280121355420847,2.589022393359685,2.0 214 | 9.496654829806612,7.425491081098176,1.0 215 | 5.428613486611483,7.36337776255253,1.0 216 | 1.6329688463416057,0.3135200287454698,2.0 217 | 3.813721982568407,1.6710873739916892,2.0 218 | 2.5093592035837924,4.96598449006862,2.0 219 | 2.425794595195175,1.2553771242975373,2.0 220 | 1.889975122156502,7.5264581444727465,0.0 221 | 1.147608858696899,-1.8929365437413506,2.0 222 | 2.804294776199599,-1.7567577601498015,2.0 223 | 4.13409583164934,5.197952309449224,1.0 224 | 0.4256602864979102,-0.5734353610344847,2.0 225 | -0.39651784633291465,2.564454026607084,2.0 226 | 3.7649040262904854,11.687444179835136,0.0 227 | 6.131158310465,5.814480179770017,1.0 228 | 0.8937465143101978,2.1258959429293047,2.0 229 | 4.0908988570332125,2.8824048387862025,2.0 230 | 6.847035551153533,7.125038672710267,1.0 231 | 3.6297486243524686,10.018871026324126,0.0 232 | 4.97849142846536,12.352332538080082,0.0 233 | 2.9494527257218275,4.424070164067222,2.0 234 | 3.2313294136834707,8.303727160266732,0.0 235 | -0.21470136457429745,-1.4989030632781246,2.0 236 | 7.388297177969771,5.009184320561641,1.0 237 | 8.342868875056496,5.521184707080182,1.0 238 | 0.5824322427211264,1.6310432103366475,2.0 239 | 7.288587027167829,6.162575916933539,1.0 240 | 4.02763797467629,1.186391144027459,2.0 241 | 1.4610667064533902,-0.25657909628165854,2.0 242 | 1.9948486897689846,4.6730464013301845,2.0 243 | 7.804017258621185,4.745738426641882,1.0 244 | 9.057105728364153,7.17407888291477,1.0 245 | 4.6839022101211345,8.221406729474737,0.0 246 | 3.8448725203288507,2.3683102688890143,2.0 247 | 4.9282979299225005,5.0875223075397065,1.0 248 | 8.35515540568072,5.384754134141612,1.0 249 | 7.327804967691326,6.057055732617479,1.0 250 | 1.949770157006588,0.2034702657054137,2.0 251 | 8.140585489869107,5.6832958633195245,1.0 252 | 2.1169052144813283,0.6539674232882956,2.0 253 | 7.156799195428764,6.589152425118778,1.0 254 | 0.6715955180660614,0.2639590488423025,2.0 255 | 10.791927268124313,3.185687053082744,1.0 256 | 10.604643817940559,4.774137413937639,1.0 257 | 2.0505774650706092,1.2312944101487302,2.0 258 | 5.929543710746926,5.897296807877666,1.0 259 | 1.7531221151259793,9.802434917902847,0.0 260 | -0.657692964245397,3.2755762681724203,2.0 261 | 4.185009898421646,8.435615937059609,0.0 262 | 3.4102070104848483,10.578143805237298,0.0 263 | 5.291900472217551,10.904063242773459,0.0 264 | 1.6762388658957925,0.268018901369631,2.0 265 | 1.4194060418744257,9.609987526308203,0.0 266 | 8.558063941596096,7.206849295971324,1.0 267 | 6.327259720961469,7.265234844164702,1.0 268 | 1.7406298519922134,2.33160345445015,2.0 269 | 7.424642545599079,5.408614446677726,1.0 270 | 8.440879825798943,6.902140240120564,1.0 271 | 3.4568597413019413,9.959464077599579,0.0 272 | 6.114220411734712,10.658295157828524,0.0 273 | 7.466453565937298,4.827070166187166,1.0 274 | 6.612041619429901,7.620010737421415,1.0 275 | 8.09249231892704,6.757263768338679,1.0 276 | 1.709184862568204,2.68702588841971,2.0 277 | 3.069625618246026,0.6946073990773034,2.0 278 | 1.1560761527575285,2.63625858705597,2.0 279 | 2.0858987274645826,7.712833127978154,0.0 280 | 6.5746800402894205,9.769009783346918,0.0 281 | 2.4752106113710175,7.234872227070364,0.0 282 | 6.1607015963076925,5.631356931860353,1.0 283 | 7.9856685603333935,7.148535922114371,1.0 284 | 9.481849351713224,3.8327916152012067,1.0 285 | 5.147812905868065,3.8753891804055334,1.0 286 | 2.0679310156659865,0.9370133325108245,2.0 287 | 7.4163594467572445,4.369967675076406,1.0 288 | 2.5282503288020868,3.6128925396605496,2.0 289 | 8.750942063353854,6.963671718929063,1.0 290 | 7.092261775560676,6.869060651697231,1.0 291 | 2.136284577833326,1.5109030812208868,2.0 292 | 4.231527142565818,8.929519643474686,0.0 293 | 6.644841210895185,6.920859740491614,1.0 294 | 9.06468504634649,6.001934433499747,1.0 295 | 3.415893356717357,10.04281192136678,0.0 296 | 1.2017643341247506,0.19859971029962975,2.0 297 | 2.0189108223401706,10.070690091617669,0.0 298 | 6.076446901730942,5.146313281674912,1.0 299 | 1.3341763366317836,9.784093851897618,0.0 300 | 1.5878139732086927,4.07460117177495,2.0 301 | 2.8645952845828555,3.5934019915694533,2.0 302 | -------------------------------------------------------------------------------- /datasets/ad_blob_2.csv: -------------------------------------------------------------------------------- 1 | 0,1,2 2 | 2.35742085648901,0.3869305246688759,2.0 3 | 0.682645423646413,2.2169364118694332,2.0 4 | 9.870506083238904,11.61718795314443,0.0 5 | 12.643182778625011,18.331713213631772,1.0 6 | -3.009477951907129,0.13404798415084865,2.0 7 | 10.80670731502262,15.863979878752993,1.0 8 | 4.476477337770952,16.061126330842015,0.0 9 | 18.04090585620022,10.477672208783918,1.0 10 | 4.1411489544304745,12.975816887427916,0.0 11 | 3.288427434796028,13.722854476083764,0.0 12 | 7.882400054648043,12.392226232265488,0.0 13 | 19.303756835844325,5.84547876949216,1.0 14 | 6.14360225375113,11.250264004605434,0.0 15 | 3.902444968202455,15.182490006938314,0.0 16 | 1.4173426203408552,1.6690119192756416,2.0 17 | 6.5204898403021705,11.25280963938161,0.0 18 | -1.211391361657189,2.9835942309307453,2.0 19 | 7.459001152016251,12.600773203184485,1.0 20 | 11.32561434026187,25.21982676098695,0.0 21 | 6.7865666927670745,14.527198467706878,0.0 22 | 14.05116555240308,18.965022276060953,0.0 23 | 4.4855807358439534,17.24211085174071,0.0 24 | 13.310970037489046,12.447365448165154,1.0 25 | 2.670379167102548,1.897427567970309,2.0 26 | 4.91421610971541,26.234823564655883,0.0 27 | 5.98047256877765,7.663815577354276,2.0 28 | 1.3145924879095792,6.532878469758462,2.0 29 | 12.320526046387213,24.493322040432183,1.0 30 | 14.57757077551308,12.385064080214622,1.0 31 | 3.9645929128876576,18.504176248898787,0.0 32 | 5.165152761631926,10.22049061018837,0.0 33 | 4.455761457879376,13.859846057450767,0.0 34 | 3.9558332444882023,0.27987501219948085,2.0 35 | 8.437507303534016,12.455788050919518,0.0 36 | -1.3810444286857482,4.011981076818874,2.0 37 | 6.391583979562982,32.147795661248416,0.0 38 | 7.964960077666886,0.27435906210133465,2.0 39 | 17.215868350434768,5.368354149562069,1.0 40 | 2.9866294684745602,0.013001528907974718,2.0 41 | 9.207617258666083,14.94433125017299,1.0 42 | 16.476707420838622,6.3828095204094195,1.0 43 | 9.843599274001065,11.277370629046494,0.0 44 | 1.7077098675390596,2.148777641166758,2.0 45 | 10.760537891510147,20.242734445882146,1.0 46 | 6.578527364034455,15.621894739288384,0.0 47 | 11.441089628660919,30.075681596238773,0.0 48 | 3.405428201637314,1.0694182702188486,2.0 49 | 16.765376190918523,11.229913479475607,1.0 50 | 3.8654168655214107,1.6595570862293205,2.0 51 | 25.753718875465864,9.983902218430181,1.0 52 | 8.071729434096,8.90469896491836,1.0 53 | 1.6807119806660293,6.9160866283053695,2.0 54 | 6.210997359790726,0.4357188614853173,2.0 55 | 8.412253332372915,15.205506280806441,0.0 56 | 9.379404113672646,2.255524257615275,2.0 57 | 19.57978931304489,14.409803266003603,1.0 58 | 3.082512812541256,1.6290846986548675,2.0 59 | 8.04593676046453,22.063957758615786,0.0 60 | 14.62572262413109,20.688843389618587,1.0 61 | 7.407020215495074,14.5297414188311,0.0 62 | 4.331498588113457,4.966769911614661,2.0 63 | 13.63325321711088,5.52246276916882,1.0 64 | 16.461654742470472,11.904528610764974,1.0 65 | 13.21943189476762,4.907939791065978,2.0 66 | 2.2761460104919937,7.646719905254874,2.0 67 | 16.990796850434666,19.522780294663626,1.0 68 | 9.1405586758899,6.742850086459994,1.0 69 | 5.2677339140434345,-0.23282531634502393,2.0 70 | -1.7735374622104487,3.856318203642547,2.0 71 | 5.925090081517135,27.615713498967946,0.0 72 | 15.900782623255445,12.53539572270317,1.0 73 | 3.4097839401410255,-1.1790059418455416,2.0 74 | 1.4075582882285573,3.5865864973045576,2.0 75 | 6.1877206949080295,10.521622621995965,0.0 76 | 7.036977369179325,19.947067769000235,0.0 77 | 3.885228252953902,3.198726676736351,2.0 78 | -3.3392171202087955,4.189445373491705,2.0 79 | 15.523709941302137,14.391460015876966,1.0 80 | 7.400350398162921,11.323554677392465,0.0 81 | 25.325792861326804,8.65096878129254,1.0 82 | 4.56630592225771,23.28084439767175,0.0 83 | 9.800307874964606,28.95619667121201,0.0 84 | 2.3515885771644327,0.13145822843653135,2.0 85 | 12.475907395621642,27.077929647271958,0.0 86 | 22.311460515223317,8.924891927757628,1.0 87 | 7.728291830942027,-0.21337420312176866,2.0 88 | 8.520950916812762,5.123085306641668,2.0 89 | 6.731597648835892,24.63284746276912,0.0 90 | 12.87323046647778,6.9103337586324525,1.0 91 | 1.9571716414477724,16.648573300543358,0.0 92 | -2.5667178567112865,6.932263393738797,2.0 93 | 4.700604369432348,20.16153942710921,0.0 94 | 5.278216649521822,3.835600657341021,2.0 95 | 14.355732014894613,21.679215710855633,0.0 96 | 4.48489973173273,14.269528774651782,0.0 97 | 2.18100444010114,-0.6283116133246254,2.0 98 | -0.07114936170127097,2.542035690676223,2.0 99 | 11.709485687797493,12.19287277813406,0.0 100 | 1.2171346717178833,-1.6556086472441018,2.0 101 | 4.43542163741839,6.095635311934952,2.0 102 | 6.827921572017575,12.86927356539015,1.0 103 | 7.5954020727535045,25.77468380776417,0.0 104 | 13.33380731452861,9.720460687954823,1.0 105 | 8.782530161806314,17.555376395007436,0.0 106 | 5.189189597316817,26.48974224456129,0.0 107 | 10.735326826389937,10.066163485992577,0.0 108 | 4.850617368603936,17.104459402948898,0.0 109 | 9.520659989244098,16.850523907037633,0.0 110 | 20.259167815288407,8.613455011862424,1.0 111 | -0.7829197863699144,4.4783222209589395,2.0 112 | 19.96515228986472,6.028513599653504,1.0 113 | 6.774785331549304,26.89212306761953,0.0 114 | 5.991585020926765,21.761084836530046,0.0 115 | 4.344479895290906,17.303388570753892,0.0 116 | 7.017369590641248,9.552075179619855,0.0 117 | 20.39730713344138,13.112358450924255,1.0 118 | 0.005803803968163775,1.314322699975062,2.0 119 | 7.487079081677362,13.933379619945628,1.0 120 | -2.1232093028948453,1.2702901323624904,2.0 121 | 9.857979481963877,7.362700168695309,1.0 122 | 14.72253836475372,10.804313983628848,1.0 123 | 11.988166840985528,7.882232490588019,1.0 124 | 17.02951433312786,15.195610738427291,0.0 125 | 3.3421065388087934,5.977285252424174,2.0 126 | 5.060903747901779,1.240466405191158,2.0 127 | 3.9073330929351293,0.3651556116713879,2.0 128 | 7.369754743111555,4.453843580538899,2.0 129 | 7.17299141456029,25.576966901138658,0.0 130 | 3.643822992932525,1.1346534123313494,2.0 131 | 2.0682280937600193,0.4391082589345746,2.0 132 | 9.864714207452707,30.194364852784602,0.0 133 | 2.2193909672048955,21.859480246530133,0.0 134 | 4.331124064990922,11.099476242572688,0.0 135 | 3.2359597150627244,25.579497922210678,0.0 136 | 1.8056363314960784,21.846214272367405,0.0 137 | 9.959635053282812,11.263945640473565,1.0 138 | 7.632679232307588,7.437237836495083,1.0 139 | 3.3825906498132645,0.21247092409533713,2.0 140 | 4.617823834286381,19.73114729257876,0.0 141 | 1.4709877164264569,-0.05361147991213337,2.0 142 | 5.532281992114845,27.091992093555096,0.0 143 | 9.489282080518336,11.574553884544974,0.0 144 | 5.846678650790187,6.524984858759207,2.0 145 | 6.597896659314002,23.911499193226476,0.0 146 | 13.540369488148768,11.218643897434852,1.0 147 | 10.985100347507426,17.634052428655043,1.0 148 | 23.108597755368557,18.361640168157187,1.0 149 | 4.870633542582827,28.20133390292966,0.0 150 | 22.824644887162727,14.636521188131045,1.0 151 | 3.9303419980014263,24.697988480146954,0.0 152 | 5.563695488495141,11.841644803771285,0.0 153 | 8.955119698456958,2.8266492843268503,2.0 154 | 21.683509142108665,9.974818266578868,1.0 155 | -2.15191511503465,10.265831167941531,2.0 156 | 6.999738923431347,19.690457619417575,0.0 157 | 10.92973488052169,7.059300152154967,1.0 158 | 13.370333239980031,11.800620204890656,1.0 159 | 1.4674879815758666,1.6051003116521132,2.0 160 | 5.155507483902211,-2.336306011848186,2.0 161 | 4.930491049694881,18.956209694739684,0.0 162 | -0.19465666591577868,24.97739555290864,0.0 163 | 0.684450430993136,-1.2797239875016104,2.0 164 | 8.476104022936635,4.027250043283601,2.0 165 | 8.20046984142084,2.6388875717080404,2.0 166 | 9.378815620076448,10.251054727088073,0.0 167 | 3.6447272316392403,2.6156398108462184,2.0 168 | 14.641753293464213,18.745877695287177,1.0 169 | 6.9457864493705,28.521737751957158,0.0 170 | 5.331492591176113,22.333409541383386,0.0 171 | 5.59259833450303,15.220571123518091,0.0 172 | 6.8315546103173785,16.637605106107053,0.0 173 | 6.098200598963373,18.288478054317615,0.0 174 | 6.09860303436927,7.054407418085322,1.0 175 | 8.911352517802603,12.554039512042335,0.0 176 | 13.505882114675842,13.935170910239085,1.0 177 | 18.340128370697062,15.513162689261854,1.0 178 | 24.912388411945628,7.1065110250954335,1.0 179 | 6.019705095771107,33.20382225761187,0.0 180 | 13.679521868743546,8.236471018916617,1.0 181 | 11.904094535782074,20.002121912331013,1.0 182 | 4.888253250736263,2.717826471203795,2.0 183 | 8.778180511562411,12.661022617225056,0.0 184 | 1.6515225376231055,5.880985764746761,2.0 185 | 14.312041050494926,11.280318290813565,1.0 186 | 12.895350990118516,14.687418338862944,1.0 187 | -4.707791602096854,0.5905647927990378,2.0 188 | 22.161679016233848,13.53603254576408,1.0 189 | 16.63466676505407,23.976942231427003,0.0 190 | 7.151990112601724,5.1519979945918575,1.0 191 | 8.582650670446256,6.73094748375319,1.0 192 | 3.482878545244601,8.880580885491547,2.0 193 | 4.672831078531463,-1.3891973869052738,2.0 194 | 17.25531827056268,9.796122018945125,1.0 195 | -1.857753641445904,2.106915093344132,2.0 196 | 17.784584388428463,12.562338030614633,1.0 197 | 8.192411809364168,14.718706659201317,1.0 198 | 20.974146966839232,10.473213491250393,1.0 199 | 27.12202288055628,7.042785220572197,1.0 200 | 4.344792255795637,27.89068730257145,0.0 201 | 2.154341527835431,4.449923866881565,2.0 202 | 10.360908968328864,10.938040639435282,0.0 203 | 4.713762278042496,26.750072507109152,0.0 204 | 10.324015229327962,15.797272240964862,1.0 205 | 17.020285892636227,16.353085380707324,1.0 206 | 11.170758155462554,23.900493091543765,0.0 207 | 19.671944055026145,18.25823697604197,1.0 208 | 0.1423600654480666,5.8623809411107,2.0 209 | 1.6919355615646108,-0.11663711741066196,2.0 210 | 7.827628526874229,10.059958637310997,1.0 211 | 7.924057350394377,12.711126254375749,0.0 212 | 5.894621994504026,23.748154258180968,0.0 213 | 0.32874514925486376,5.50072674948352,2.0 214 | 17.3147039748259,7.904383931198388,1.0 215 | 5.698004320962074,17.060989660637247,1.0 216 | 4.399974905352165,0.8065054075219374,2.0 217 | 10.33256321061833,2.9247426813525728,2.0 218 | 5.9002443403554015,13.3087594016391,2.0 219 | 4.347336759761318,2.4208636380529773,2.0 220 | 3.3627998176952385,19.836852042180027,0.0 221 | 3.3262051056537967,-2.643034694364525,2.0 222 | 6.044697126052644,-3.745674935619564,2.0 223 | 6.953294761530268,9.206352443492317,1.0 224 | 0.9219660306649365,-1.4356277150490597,2.0 225 | -0.5002854285473014,3.006882402890411,2.0 226 | 5.119633628990278,32.52315714523718,0.0 227 | 9.6608567734058,15.402583030910908,1.0 228 | 1.6250215469460272,3.673799764226448,2.0 229 | 5.890349898740709,3.7597440409190757,2.0 230 | 13.949655750193969,15.122003551466047,1.0 231 | 8.811832179083012,16.179973224527032,0.0 232 | 8.607341530962,34.47626125923539,0.0 233 | 2.9553671981842555,12.40275028495425,2.0 234 | 6.477146485378579,21.41298877519376,0.0 235 | -0.39643138413098555,-2.3956813370585905,2.0 236 | 11.483729966668704,9.931600222215316,1.0 237 | 14.059023742798729,9.635637463014032,1.0 238 | 1.4109608475135702,1.8122135443304792,2.0 239 | 19.03478400068319,12.129373264838504,1.0 240 | 4.844939194923223,1.292229965950852,2.0 241 | 1.820044414511318,-0.5528837743900505,2.0 242 | 2.5345987179263076,8.292382973930152,2.0 243 | 23.04727250591153,10.219773761424808,1.0 244 | 11.483984230644449,16.990184858760617,1.0 245 | 9.200841577169374,22.1335091120144,0.0 246 | 5.243809464979968,2.995620583324741,2.0 247 | 10.428882044918716,14.078907855061455,1.0 248 | 10.042335645556816,11.774733102484106,1.0 249 | 13.924645003961876,6.5514805774143,1.0 250 | 5.291726091090104,0.4222621956395757,2.0 251 | 9.93461276904081,12.611299864961254,1.0 252 | 4.1585598590810395,1.62144566553365,2.0 253 | 7.705920007986146,11.800801231993859,1.0 254 | 1.1834679437445161,0.2939661972032236,2.0 255 | 27.7999707219423,7.4748187755579005,1.0 256 | 14.04547380394763,12.367189258565343,1.0 257 | 6.06366024561836,2.988327679882684,2.0 258 | 8.332837973398966,16.010813523231036,1.0 259 | 3.4386217085656874,23.329910211097648,0.0 260 | -0.8053065015967066,6.083468757249708,2.0 261 | 6.618937644829613,16.36166971901921,0.0 262 | 7.145590435377864,21.159516428197893,0.0 263 | 6.666739755548575,11.919185224196877,0.0 264 | 3.6432879877894524,0.3688472404621039,2.0 265 | 3.5663221684184014,10.103145561807725,0.0 266 | 16.067412979423487,7.546042129239142,1.0 267 | 13.073209567092007,12.772161078668404,1.0 268 | 2.8552925281499393,2.4233499300514314,2.0 269 | 18.821130839620096,13.913635990012075,1.0 270 | 12.516820154011185,9.223281961639298,1.0 271 | 5.671467700778641,10.2979439251582,0.0 272 | 13.91236530775403,13.667442609143976,0.0 273 | 19.588380231040325,6.643618548793073,1.0 274 | 13.462572965494925,22.535504810640372,1.0 275 | 11.424322838309955,11.54316960550614,1.0 276 | 3.744661828368261,4.38731916914791,2.0 277 | 4.231289219648982,2.0403196247656066,2.0 278 | 1.2366631096770981,4.467131548693298,2.0 279 | 4.745659500452144,19.310642255877056,0.0 280 | 8.966928239833006,14.271308872120386,0.0 281 | 6.268202466330587,21.03456590555668,0.0 282 | 7.480997505074459,9.120094370487232,1.0 283 | 18.235972965033778,12.044713173586157,1.0 284 | 14.038653840233989,4.696245589102828,1.0 285 | 14.298011711450444,7.215189260024673,1.0 286 | 4.067408441214198,1.8234219229828312,2.0 287 | 10.261322894404627,8.070590368130325,1.0 288 | 6.382900176965783,6.420690534936611,2.0 289 | 24.625036169895857,11.4241356640923,1.0 290 | 7.473853786767158,19.268307860902134,1.0 291 | 5.233364062401854,1.5697731562547015,2.0 292 | 7.963644205756706,13.919714878949176,0.0 293 | 14.449355361774115,16.469983178841808,1.0 294 | 10.084917724438633,7.720932121646331,1.0 295 | 9.07766828314098,28.482416071774328,0.0 296 | 1.985593636760938,0.5915300013809834,2.0 297 | 3.058536558786453,11.472770966765207,0.0 298 | 6.712606525056687,11.563726700241974,1.0 299 | 3.1289034343851583,27.306634646349494,0.0 300 | 4.106945809238594,6.244062324047144,2.0 301 | 3.400831391656404,5.89578632139443,2.0 302 | -------------------------------------------------------------------------------- /datasets/ad_blob_4.csv: -------------------------------------------------------------------------------- 1 | 0,1,2 2 | 1.9314659422733451,1.2726553433388674,2.0 3 | 1.053108719194825,4.533008075845624,2.0 4 | 8.775048330340333,24.669792236447194,0.0 5 | 12.239516621751054,25.083784552638782,1.0 6 | -5.815254450306158,0.19565293249910765,2.0 7 | 37.63380644672942,9.013626934201405,1.0 8 | 7.127254399049706,23.31308360131381,0.0 9 | 37.891772926127395,24.653709324696457,1.0 10 | 9.334966808924058,33.88588679633668,0.0 11 | 2.0419409971785303,50.53335923733684,0.0 12 | 8.194599921626576,16.688410149780484,0.0 13 | 23.02229316007707,10.443125462787577,1.0 14 | 19.624436335138917,14.961609375436282,0.0 15 | 8.068759201767136,18.692890356929002,0.0 16 | 1.1668997565308212,0.8518955590027799,2.0 17 | 10.67796486998665,14.578470078344164,0.0 18 | -1.7005384493841582,8.144244317288626,2.0 19 | 13.065500288071242,18.852324703302372,1.0 20 | 12.641603713306445,23.846493163241085,0.0 21 | 17.294695117393537,55.4010169593406,0.0 22 | 23.58193676227873,22.721692239813137,0.0 23 | 7.556289455168832,16.112688092102264,0.0 24 | 26.5006407422796,5.848084444348119,1.0 25 | 1.6070391734251541,3.3008937146081205,2.0 26 | 10.055501099367776,24.621523983136232,0.0 27 | 6.366523185951002,15.79069377712934,2.0 28 | 1.2885291390439741,11.844904825584674,2.0 29 | 18.359367149890524,37.320326589807095,1.0 30 | 18.519486646588792,19.73426420961013,1.0 31 | 5.450666423478139,26.257632677536158,0.0 32 | 2.93078799458119,36.30919746175303,0.0 33 | 9.58770768618579,47.056624998340816,0.0 34 | 12.78168401000123,0.5033378771474103,2.0 35 | 5.352684449142848,12.142916696254966,0.0 36 | -0.9072992405163486,16.20729748288087,2.0 37 | 9.045453998527101,38.977514143315226,0.0 38 | 6.533144074343247,0.7026757554171108,2.0 39 | 34.24595791722155,25.188044606230363,1.0 40 | 2.758030774145578,0.03388090947459192,2.0 41 | 38.75505931970827,22.851470647224694,1.0 42 | 43.336926808880584,22.384267124325063,1.0 43 | 12.432289017921688,13.732353790184405,0.0 44 | 1.5539482020951465,3.9894658494814568,2.0 45 | 15.377231757350948,35.66615797024149,1.0 46 | 9.663036068445656,38.6415993738336,0.0 47 | 7.1988018672983785,25.860398573977683,0.0 48 | 5.286380702968518,1.9719409339010254,2.0 49 | 25.08050748295078,27.31654028533625,1.0 50 | 8.35954530159766,2.1893129290549,2.0 51 | 39.77810811641932,16.03490779280186,1.0 52 | 12.797661631324154,19.854445623453085,1.0 53 | 2.802451764421667,14.269744316359695,2.0 54 | 7.090391857669061,0.6984960819048333,2.0 55 | 22.713728052386454,21.934975420085138,0.0 56 | 7.032410426812842,4.399536141550346,2.0 57 | 12.828521185863012,56.09721802654372,1.0 58 | 6.416428664865229,3.169351831777474,2.0 59 | 10.129729301520447,12.959785886256054,0.0 60 | 33.41396167457126,21.89909658624835,1.0 61 | 12.677808381173303,22.204374220762126,0.0 62 | 8.230541741500158,11.337608232423978,2.0 63 | 29.418083419647143,3.8154961907904026,1.0 64 | 10.856937876418991,12.126131984859416,1.0 65 | 23.846743534426295,4.569275463874536,2.0 66 | 4.975629298656839,15.042994186006128,2.0 67 | 6.760788821132792,18.95864083471824,1.0 68 | 19.128318331289538,16.17842318474844,1.0 69 | 3.659638405771404,-0.6336660048552648,2.0 70 | -4.122396196695774,5.5861630906536925,2.0 71 | 10.533525555571842,34.82265185159663,0.0 72 | 21.41980906782169,15.882031283229903,1.0 73 | 7.3962217945584126,-3.6777437856497555,2.0 74 | 1.0302573764573364,4.312103152806804,2.0 75 | 10.322237087014901,21.40383744147057,0.0 76 | 7.114345819925571,33.69610301602689,0.0 77 | 2.1310279872212963,4.63037702440975,2.0 78 | -5.685499349081771,6.861293651239036,2.0 79 | 24.516016909558843,29.66430333225131,1.0 80 | 13.33316698352154,40.13272595463323,0.0 81 | 26.90125046240518,11.987543100114202,1.0 82 | 10.408397330027173,13.832741235405173,0.0 83 | 17.93987206716612,12.14888496031022,0.0 84 | 3.9628425520276367,0.27535745213996,2.0 85 | 16.84980681166741,37.65857866999981,0.0 86 | 20.056944590869865,24.84150699020496,1.0 87 | 8.532819208739218,-0.5348226674442351,2.0 88 | 8.361529247443105,6.237155345111103,2.0 89 | 12.57493719527958,30.767975570840058,0.0 90 | 39.10741781037888,12.859598194681421,1.0 91 | 4.178791432567943,32.97633817053439,0.0 92 | -4.020131178059955,3.09066633835895,2.0 93 | 10.170544344109567,53.38835083295407,0.0 94 | 8.464868027911333,8.140162085842679,2.0 95 | 24.99040945229244,22.94754516864781,0.0 96 | 8.36342381968811,30.463132645272182,0.0 97 | 2.1477826226101095,-1.0185900515733481,2.0 98 | -0.09280854310591609,3.095841826122059,2.0 99 | 4.397445945496594,43.6580419273224,0.0 100 | 0.5039260973387232,-4.9756927090735745,2.0 101 | 4.012554625390242,20.99518067384861,2.0 102 | 7.701522827615259,9.737500415617069,1.0 103 | 4.320915161750209,42.75008260650542,0.0 104 | 32.662351477797124,16.430406516262543,1.0 105 | 12.611861779656595,9.272784133748795,0.0 106 | 8.141990930464084,10.279752173939364,0.0 107 | 10.978965885292315,9.241077173674096,0.0 108 | 17.326467065767527,34.13632503896363,0.0 109 | 8.258863519126201,16.24265008772222,0.0 110 | 18.900794140502526,18.690525028224847,1.0 111 | -2.090983936997006,10.953265508559483,2.0 112 | 31.022310774728787,6.920422619790084,1.0 113 | 5.744318006308341,18.96168746556802,0.0 114 | 5.076091874839017,42.64491983976718,0.0 115 | 13.317853055106067,32.83052231759537,0.0 116 | 11.387056944765298,35.56372625071779,0.0 117 | 35.51269867765038,23.32160786907766,1.0 118 | 0.013217366817343484,5.8932801923893114,2.0 119 | 14.911805629828532,31.178976082935325,1.0 120 | -3.0785906037523896,3.1596761846324286,2.0 121 | 28.220455336074792,15.069726043684161,1.0 122 | 12.108743256365655,12.96544573306912,1.0 123 | 34.57038882419698,14.15799178421294,1.0 124 | 26.5076598639188,8.912954218461046,0.0 125 | 5.2153062070804985,3.7232865309449714,2.0 126 | 9.249118663897917,0.8710817912010691,2.0 127 | 4.100574054539139,0.5913299335739262,2.0 128 | 10.383103353283643,2.7073967068453104,2.0 129 | 5.278108141880406,42.332247361772765,0.0 130 | 3.897394491483289,1.5191402430523457,2.0 131 | 3.195202722136994,1.325822646863251,2.0 132 | 23.156429807843065,43.31245907124763,0.0 133 | 3.4550973061197956,11.736190613374497,0.0 134 | 17.178569776413052,14.499700796793189,0.0 135 | 4.304505259076297,35.59594335791081,0.0 136 | 5.302856406368749,13.879623511279469,0.0 137 | 23.459913875477092,20.532677173720945,1.0 138 | 21.02011907883152,9.880492460822348,1.0 139 | 1.3013255767214094,0.43522305471163125,2.0 140 | 4.4125084665849,28.336735958490763,0.0 141 | 1.7416504780691824,-0.04530268581675412,2.0 142 | 3.2296512005930293,39.84407448554511,0.0 143 | 4.269608658499723,32.16159817858114,0.0 144 | 22.104252881088144,11.644906329278374,2.0 145 | 13.744592832393222,44.36991533870447,0.0 146 | 8.68297439466981,12.667563395706303,1.0 147 | 27.85756119370193,20.344560475399053,1.0 148 | 13.790650498492944,19.335314093346767,1.0 149 | 19.04734350896193,30.599847254931255,0.0 150 | 40.22805763852242,22.443661214176423,1.0 151 | 10.316701572014999,15.519091529038022,0.0 152 | 8.935057968084344,13.07656015629373,0.0 153 | 6.649385333498417,2.642192650796094,2.0 154 | 31.45747888456177,5.456802874677318,1.0 155 | -3.662732226570486,7.679066249199942,2.0 156 | 13.953334465624398,31.713617488723052,0.0 157 | 23.355413694457944,16.822447157982456,1.0 158 | 27.03044910893088,8.228021363567848,1.0 159 | 1.278253637032419,2.959063644935747,2.0 160 | 5.916555987507281,-2.704837833843858,2.0 161 | 4.564941611067467,27.016827936031422,0.0 162 | -0.8318053313005337,34.07495246818361,0.0 163 | 0.3862110127226491,-1.020482441483724,2.0 164 | 4.276071139862178,3.14301422387999,2.0 165 | 10.326730150815465,2.856799414577341,2.0 166 | 16.204547672185672,13.455212142307932,0.0 167 | 9.271626719294902,4.648351976673214,2.0 168 | 13.125215103659816,10.255767916617646,1.0 169 | 16.94172586074665,26.620353166124218,0.0 170 | 5.9427061238037915,18.45856319902358,0.0 171 | 6.803192848248808,31.086925278170604,0.0 172 | 5.672567422703105,28.44901932449797,0.0 173 | 7.244101292212072,32.78444307427095,0.0 174 | 8.504390897674865,12.919292126726015,1.0 175 | 12.909471240571506,48.51992801805725,0.0 176 | 25.149724443822468,30.90953514889486,1.0 177 | 10.684090846122558,26.02344243791139,1.0 178 | 14.245691555676498,14.36081781654029,1.0 179 | 3.5002186884542636,29.237387706556458,0.0 180 | 48.508506159829665,25.840584725612228,1.0 181 | 39.31136937002854,26.087956532946738,1.0 182 | 3.3577550908889435,4.460565544561302,2.0 183 | 14.440294208924826,15.990370443011239,0.0 184 | 2.462480267340748,10.746572814891437,2.0 185 | 15.79012094354455,11.302875213968775,1.0 186 | 39.008992523852456,36.5589817178721,1.0 187 | -10.66445996297626,1.266909856963442,2.0 188 | 19.54607305974625,21.486504022990545,1.0 189 | 11.645644425382578,41.73728626933862,0.0 190 | 10.663317257802557,12.59421871645935,1.0 191 | 24.78508125739887,23.250633809166658,1.0 192 | 2.4612438516945288,10.157729305107537,2.0 193 | 7.137975600170278,-2.1056678139927056,2.0 194 | 13.921414615101629,22.096648780939912,1.0 195 | -2.161104899837214,7.276212001692229,2.0 196 | 25.38253179612084,29.34153104466755,1.0 197 | 12.303986275743465,38.55374337596214,1.0 198 | 15.061301971283957,14.477113948385275,1.0 199 | 47.46796863826209,19.469153345794844,1.0 200 | 9.846950654474913,17.26873617920174,0.0 201 | 2.6113237328865173,9.443642298983136,2.0 202 | 6.4440804878287805,16.875260328896736,0.0 203 | 15.476644719148435,34.794167671604576,0.0 204 | 7.44241538392597,14.064333965524117,1.0 205 | 9.276012069335374,18.501791527093637,1.0 206 | 20.442649089939984,40.73753234174437,0.0 207 | 14.06901791565741,18.201367679647625,1.0 208 | 0.22642786984039395,6.7019131268541114,2.0 209 | 2.9065752293808282,-0.20773318928582388,2.0 210 | 21.205673447994066,24.77675179589374,1.0 211 | 4.527826390687936,8.701154965064758,0.0 212 | 13.158929270666729,16.640261716475404,0.0 213 | 0.5424347685002031,3.050410223934355,2.0 214 | 36.539970281801544,31.3037372620842,1.0 215 | 24.37116538971467,11.247408867205364,1.0 216 | 3.0756420387414476,1.4201245049520654,2.0 217 | 12.769654248636378,4.29542151628055,2.0 218 | 6.345575863656329,11.32781532552087,2.0 219 | 6.04054944758111,3.842520857554923,2.0 220 | 4.288453373530611,29.511055174222165,0.0 221 | 1.1626061983657894,-5.271828558078601,2.0 222 | 7.0899047001334665,-8.27730811318331,2.0 223 | 17.489628609262066,16.457011864910502,1.0 224 | 2.1251856309703383,-2.468442170234326,2.0 225 | -1.4099630504410463,8.044693637964132,2.0 226 | 11.892941840916762,12.574824064417681,0.0 227 | 28.61734236038386,20.936960893226694,1.0 228 | 4.2600185818388265,3.6449448090566063,2.0 229 | 10.971056754372537,5.77393803002165,2.0 230 | 32.07094522287228,7.18977570537807,1.0 231 | 13.792909075596285,27.507218415658663,0.0 232 | 7.357798066126902,13.618742645954002,0.0 233 | 7.176003436861019,16.188753052210803,2.0 234 | 3.8668678884318433,13.858797699025327,0.0 235 | -0.7855710266210646,-1.519483344113403,2.0 236 | 16.028502134453436,15.85986523270047,1.0 237 | 35.82531715205891,11.594331603457897,1.0 238 | 1.028534869063111,3.8552751364949343,2.0 239 | 10.79601923376999,12.979247608390294,1.0 240 | 6.664721538108447,4.218907937772264,2.0 241 | 4.049685755359983,-0.6814211220526756,2.0 242 | 9.512703697359052,22.265959917290537,2.0 243 | 19.986893348563164,10.299248296023094,1.0 244 | 40.38989426779686,12.166149037273609,1.0 245 | 13.308915798290434,24.444165333523365,0.0 246 | 17.596832301493144,4.574340433276806,2.0 247 | 23.328398356396107,7.601671403844724,1.0 248 | 26.6297218478627,21.287924597518536,1.0 249 | 16.830634791609132,27.149250607250778,1.0 250 | 3.2851010742860067,0.32291127741552955,2.0 251 | 35.31425485896504,19.722605772089633,1.0 252 | 9.669408659746454,1.21015892104425,2.0 253 | 31.000020335947504,29.390106226068035,1.0 254 | 1.313035668244587,1.0334810432454382,2.0 255 | 48.33096155784799,12.70136999699293,1.0 256 | 43.77708311750213,5.785244479893182,1.0 257 | 6.592400333602623,1.4358583081640774,2.0 258 | 6.433959651908813,22.520209183204535,1.0 259 | 5.084148648967018,14.872274720862222,0.0 260 | -0.9964831775873004,13.883728459896673,2.0 261 | 17.611966663832565,19.078776076039905,0.0 262 | 11.670484542632986,37.01589690678957,0.0 263 | 9.092193575601637,22.26557388045417,0.0 264 | 6.825748980327916,1.3348871561579339,2.0 265 | 3.8828916607910933,18.96008510601513,0.0 266 | 15.132017200240803,25.240010780647907,1.0 267 | 18.456770459990782,30.66393779812354,1.0 268 | 4.971048264368607,11.645083799824056,2.0 269 | 20.09604876352165,26.355314542651605,1.0 270 | 16.008653865915637,30.88678068673777,1.0 271 | 12.95727512941837,38.10576320268165,0.0 272 | 28.991024355537455,47.552662713374126,0.0 273 | 31.917916932645973,10.419415462393463,1.0 274 | 25.697230821768073,31.055226938740283,1.0 275 | 24.25034126589981,29.770876204675908,1.0 276 | 6.337421213709919,2.8375789435768644,2.0 277 | 12.57132847686961,2.061208197622599,2.0 278 | 2.5948389119133584,6.976009620074358,2.0 279 | 6.668142325321018,22.35837220037453,0.0 280 | 29.160365645019304,38.95268221892587,0.0 281 | 9.661361428436907,16.887167499658375,0.0 282 | 12.555141668490164,7.999464523078232,1.0 283 | 33.87091369821666,17.398012910290795,1.0 284 | 40.16553364270774,16.896690701298024,1.0 285 | 14.39839277621361,5.6428252702550505,1.0 286 | 7.717430402521697,3.6414759002529795,2.0 287 | 16.163099246264757,19.355964047131998,1.0 288 | 8.80694150390206,11.317091270473789,2.0 289 | 30.96877447225679,27.49726658474705,1.0 290 | 7.120547526222228,8.389323734066744,1.0 291 | 7.733406271677986,5.671795488042103,2.0 292 | 16.83508866936726,35.60442142052285,0.0 293 | 19.85866428311684,14.949488790510227,1.0 294 | 15.214068705397576,8.002575047556402,1.0 295 | 5.2993749169778654,14.618060797205088,0.0 296 | 4.225677560995273,0.5565992575349481,2.0 297 | 4.727524794001402,37.41050024409685,0.0 298 | 10.952130874806748,10.667573300821086,1.0 299 | 1.3630060220961675,13.296942162635144,0.0 300 | 5.256958549799029,11.375262204200169,2.0 301 | 2.989814472196076,10.132384067344898,2.0 302 | -------------------------------------------------------------------------------- /datasets/wine.arff: -------------------------------------------------------------------------------- 1 | % 2 | %1. Title of Database: Wine recognition data 3 | %2. Sources: 4 | % (a) Forina, M. et al, PARVUS - An Extendible Package for Data 5 | % Exploration, Classification and Correlation. Institute of Pharmaceutical 6 | % and Food Analysis and Technologies, Via Brigata Salerno, 7 | % 16147 Genoa, Italy. 8 | % 9 | % (b) Stefan Aeberhard, email: stefan@coral.cs.jcu.edu.au 10 | % (c) July 1991 11 | %3. Past Usage: 12 | % 13 | % (1) 14 | % S. Aeberhard, D. Coomans and O. de Vel, 15 | % Comparison of Classifiers in High Dimensional Settings, 16 | % Tech. Rep. no. 92-02, (1992), Dept. of Computer Science and Dept. of 17 | % Mathematics and Statistics, James Cook University of North Queensland. 18 | % (Also submitted to Technometrics). 19 | % 20 | % The data was used with many others for comparing various 21 | % classifiers. The classes are separable, though only RDA 22 | % has achieved 100% correct classification. 23 | % (RDA : 100%, QDA 99.4%, LDA 98.9%, 1NN 96.1% (z-transformed data)) 24 | % (All results using the leave-one-out technique) 25 | % 26 | % In a classification context, this is a well posed problem 27 | % with "well behaved" class structures. A good data set 28 | % for first testing of a new classifier, but not very 29 | % challenging. 30 | % 31 | % (2) 32 | % S. Aeberhard, D. Coomans and O. de Vel, 33 | % "THE CLASSIFICATION PERFORMANCE OF RDA" 34 | % Tech. Rep. no. 92-01, (1992), Dept. of Computer Science and Dept. of 35 | % Mathematics and Statistics, James Cook University of North Queensland. 36 | % (Also submitted to Journal of Chemometrics). 37 | % 38 | % Here, the data was used to illustrate the superior performance of 39 | % the use of a new appreciation function with RDA. 40 | % 41 | %4. Relevant Information: 42 | % 43 | % -- These data are the results of a chemical analysis of 44 | % wines grown in the same region in Italy but derived from three 45 | % different cultivars. 46 | % The analysis determined the quantities of 13 constituents 47 | % found in each of the three types of wines. 48 | % 49 | % -- I think that the initial data set had around 30 variables, but 50 | % for some reason I only have the 13 dimensional version. 51 | % I had a list of what the 30 or so variables were, but a.) 52 | % I lost it, and b.), I would not know which 13 variables 53 | % are included in the set. 54 | % 55 | %5. Number of Instances 56 | % 57 | % class 1 59 58 | % class 2 71 59 | % class 3 48 60 | % 61 | %6. Number of Attributes 62 | %% 63 | % 13 64 | % 65 | %7. For Each Attribute: 66 | % 67 | % All attributes are continuous 68 | % 69 | % No statistics available, but suggest to standardise 70 | % variables for certain uses (e.g. for us with classifiers 71 | % which are NOT scale invariant) 72 | % 73 | % NOTE: 1st attribute is class identifier (1-3) 74 | % 75 | %8. Missing Attribute Values: 76 | % 77 | % None 78 | % 79 | %9. Class Distribution: number of instances per class 80 | % 81 | % class 1 59 82 | % class 2 71 83 | % class 3 48 84 | @relation wine 85 | 86 | @attribute a1 real 87 | @attribute a2 real 88 | @attribute a3 real 89 | @attribute a4 real 90 | @attribute a5 real 91 | @attribute a6 real 92 | @attribute a7 real 93 | @attribute a8 real 94 | @attribute a9 real 95 | @attribute a10 real 96 | @attribute a11 real 97 | @attribute a12 real 98 | @attribute a13 real 99 | @attribute class {1,2,3} 100 | 101 | @data 102 | 14.23,1.71,2.43,15.6,127,2.8,3.06,.28,2.29,5.64,1.04,3.92,1065,1 103 | 13.2,1.78,2.14,11.2,100,2.65,2.76,.26,1.28,4.38,1.05,3.4,1050,1 104 | 13.16,2.36,2.67,18.6,101,2.8,3.24,.3,2.81,5.68,1.03,3.17,1185,1 105 | 14.37,1.95,2.5,16.8,113,3.85,3.49,.24,2.18,7.8,.86,3.45,1480,1 106 | 13.24,2.59,2.87,21,118,2.8,2.69,.39,1.82,4.32,1.04,2.93,735,1 107 | 14.2,1.76,2.45,15.2,112,3.27,3.39,.34,1.97,6.75,1.05,2.85,1450,1 108 | 14.39,1.87,2.45,14.6,96,2.5,2.52,.3,1.98,5.25,1.02,3.58,1290,1 109 | 14.06,2.15,2.61,17.6,121,2.6,2.51,.31,1.25,5.05,1.06,3.58,1295,1 110 | 14.83,1.64,2.17,14,97,2.8,2.98,.29,1.98,5.2,1.08,2.85,1045,1 111 | 13.86,1.35,2.27,16,98,2.98,3.15,.22,1.85,7.22,1.01,3.55,1045,1 112 | 14.1,2.16,2.3,18,105,2.95,3.32,.22,2.38,5.75,1.25,3.17,1510,1 113 | 14.12,1.48,2.32,16.8,95,2.2,2.43,.26,1.57,5,1.17,2.82,1280,1 114 | 13.75,1.73,2.41,16,89,2.6,2.76,.29,1.81,5.6,1.15,2.9,1320,1 115 | 14.75,1.73,2.39,11.4,91,3.1,3.69,.43,2.81,5.4,1.25,2.73,1150,1 116 | 14.38,1.87,2.38,12,102,3.3,3.64,.29,2.96,7.5,1.2,3,1547,1 117 | 13.63,1.81,2.7,17.2,112,2.85,2.91,.3,1.46,7.3,1.28,2.88,1310,1 118 | 14.3,1.92,2.72,20,120,2.8,3.14,.33,1.97,6.2,1.07,2.65,1280,1 119 | 13.83,1.57,2.62,20,115,2.95,3.4,.4,1.72,6.6,1.13,2.57,1130,1 120 | 14.19,1.59,2.48,16.5,108,3.3,3.93,.32,1.86,8.7,1.23,2.82,1680,1 121 | 13.64,3.1,2.56,15.2,116,2.7,3.03,.17,1.66,5.1,.96,3.36,845,1 122 | 14.06,1.63,2.28,16,126,3,3.17,.24,2.1,5.65,1.09,3.71,780,1 123 | 12.93,3.8,2.65,18.6,102,2.41,2.41,.25,1.98,4.5,1.03,3.52,770,1 124 | 13.71,1.86,2.36,16.6,101,2.61,2.88,.27,1.69,3.8,1.11,4,1035,1 125 | 12.85,1.6,2.52,17.8,95,2.48,2.37,.26,1.46,3.93,1.09,3.63,1015,1 126 | 13.5,1.81,2.61,20,96,2.53,2.61,.28,1.66,3.52,1.12,3.82,845,1 127 | 13.05,2.05,3.22,25,124,2.63,2.68,.47,1.92,3.58,1.13,3.2,830,1 128 | 13.39,1.77,2.62,16.1,93,2.85,2.94,.34,1.45,4.8,.92,3.22,1195,1 129 | 13.3,1.72,2.14,17,94,2.4,2.19,.27,1.35,3.95,1.02,2.77,1285,1 130 | 13.87,1.9,2.8,19.4,107,2.95,2.97,.37,1.76,4.5,1.25,3.4,915,1 131 | 14.02,1.68,2.21,16,96,2.65,2.33,.26,1.98,4.7,1.04,3.59,1035,1 132 | 13.73,1.5,2.7,22.5,101,3,3.25,.29,2.38,5.7,1.19,2.71,1285,1 133 | 13.58,1.66,2.36,19.1,106,2.86,3.19,.22,1.95,6.9,1.09,2.88,1515,1 134 | 13.68,1.83,2.36,17.2,104,2.42,2.69,.42,1.97,3.84,1.23,2.87,990,1 135 | 13.76,1.53,2.7,19.5,132,2.95,2.74,.5,1.35,5.4,1.25,3,1235,1 136 | 13.51,1.8,2.65,19,110,2.35,2.53,.29,1.54,4.2,1.1,2.87,1095,1 137 | 13.48,1.81,2.41,20.5,100,2.7,2.98,.26,1.86,5.1,1.04,3.47,920,1 138 | 13.28,1.64,2.84,15.5,110,2.6,2.68,.34,1.36,4.6,1.09,2.78,880,1 139 | 13.05,1.65,2.55,18,98,2.45,2.43,.29,1.44,4.25,1.12,2.51,1105,1 140 | 13.07,1.5,2.1,15.5,98,2.4,2.64,.28,1.37,3.7,1.18,2.69,1020,1 141 | 14.22,3.99,2.51,13.2,128,3,3.04,.2,2.08,5.1,.89,3.53,760,1 142 | 13.56,1.71,2.31,16.2,117,3.15,3.29,.34,2.34,6.13,.95,3.38,795,1 143 | 13.41,3.84,2.12,18.8,90,2.45,2.68,.27,1.48,4.28,.91,3,1035,1 144 | 13.88,1.89,2.59,15,101,3.25,3.56,.17,1.7,5.43,.88,3.56,1095,1 145 | 13.24,3.98,2.29,17.5,103,2.64,2.63,.32,1.66,4.36,.82,3,680,1 146 | 13.05,1.77,2.1,17,107,3,3,.28,2.03,5.04,.88,3.35,885,1 147 | 14.21,4.04,2.44,18.9,111,2.85,2.65,.3,1.25,5.24,.87,3.33,1080,1 148 | 14.38,3.59,2.28,16,102,3.25,3.17,.27,2.19,4.9,1.04,3.44,1065,1 149 | 13.9,1.68,2.12,16,101,3.1,3.39,.21,2.14,6.1,.91,3.33,985,1 150 | 14.1,2.02,2.4,18.8,103,2.75,2.92,.32,2.38,6.2,1.07,2.75,1060,1 151 | 13.94,1.73,2.27,17.4,108,2.88,3.54,.32,2.08,8.90,1.12,3.1,1260,1 152 | 13.05,1.73,2.04,12.4,92,2.72,3.27,.17,2.91,7.2,1.12,2.91,1150,1 153 | 13.83,1.65,2.6,17.2,94,2.45,2.99,.22,2.29,5.6,1.24,3.37,1265,1 154 | 13.82,1.75,2.42,14,111,3.88,3.74,.32,1.87,7.05,1.01,3.26,1190,1 155 | 13.77,1.9,2.68,17.1,115,3,2.79,.39,1.68,6.3,1.13,2.93,1375,1 156 | 13.74,1.67,2.25,16.4,118,2.6,2.9,.21,1.62,5.85,.92,3.2,1060,1 157 | 13.56,1.73,2.46,20.5,116,2.96,2.78,.2,2.45,6.25,.98,3.03,1120,1 158 | 14.22,1.7,2.3,16.3,118,3.2,3,.26,2.03,6.38,.94,3.31,970,1 159 | 13.29,1.97,2.68,16.8,102,3,3.23,.31,1.66,6,1.07,2.84,1270,1 160 | 13.72,1.43,2.5,16.7,108,3.4,3.67,.19,2.04,6.8,.89,2.87,1285,1 161 | 12.37,.94,1.36,10.6,88,1.98,.57,.28,.42,1.95,1.05,1.82,520,2 162 | 12.33,1.1,2.28,16,101,2.05,1.09,.63,.41,3.27,1.25,1.67,680,2 163 | 12.64,1.36,2.02,16.8,100,2.02,1.41,.53,.62,5.75,.98,1.59,450,2 164 | 13.67,1.25,1.92,18,94,2.1,1.79,.32,.73,3.8,1.23,2.46,630,2 165 | 12.37,1.13,2.16,19,87,3.5,3.1,.19,1.87,4.45,1.22,2.87,420,2 166 | 12.17,1.45,2.53,19,104,1.89,1.75,.45,1.03,2.95,1.45,2.23,355,2 167 | 12.37,1.21,2.56,18.1,98,2.42,2.65,.37,2.08,4.6,1.19,2.3,678,2 168 | 13.11,1.01,1.7,15,78,2.98,3.18,.26,2.28,5.3,1.12,3.18,502,2 169 | 12.37,1.17,1.92,19.6,78,2.11,2,.27,1.04,4.68,1.12,3.48,510,2 170 | 13.34,.94,2.36,17,110,2.53,1.3,.55,.42,3.17,1.02,1.93,750,2 171 | 12.21,1.19,1.75,16.8,151,1.85,1.28,.14,2.5,2.85,1.28,3.07,718,2 172 | 12.29,1.61,2.21,20.4,103,1.1,1.02,.37,1.46,3.05,.906,1.82,870,2 173 | 13.86,1.51,2.67,25,86,2.95,2.86,.21,1.87,3.38,1.36,3.16,410,2 174 | 13.49,1.66,2.24,24,87,1.88,1.84,.27,1.03,3.74,.98,2.78,472,2 175 | 12.99,1.67,2.6,30,139,3.3,2.89,.21,1.96,3.35,1.31,3.5,985,2 176 | 11.96,1.09,2.3,21,101,3.38,2.14,.13,1.65,3.21,.99,3.13,886,2 177 | 11.66,1.88,1.92,16,97,1.61,1.57,.34,1.15,3.8,1.23,2.14,428,2 178 | 13.03,.9,1.71,16,86,1.95,2.03,.24,1.46,4.6,1.19,2.48,392,2 179 | 11.84,2.89,2.23,18,112,1.72,1.32,.43,.95,2.65,.96,2.52,500,2 180 | 12.33,.99,1.95,14.8,136,1.9,1.85,.35,2.76,3.4,1.06,2.31,750,2 181 | 12.7,3.87,2.4,23,101,2.83,2.55,.43,1.95,2.57,1.19,3.13,463,2 182 | 12,.92,2,19,86,2.42,2.26,.3,1.43,2.5,1.38,3.12,278,2 183 | 12.72,1.81,2.2,18.8,86,2.2,2.53,.26,1.77,3.9,1.16,3.14,714,2 184 | 12.08,1.13,2.51,24,78,2,1.58,.4,1.4,2.2,1.31,2.72,630,2 185 | 13.05,3.86,2.32,22.5,85,1.65,1.59,.61,1.62,4.8,.84,2.01,515,2 186 | 11.84,.89,2.58,18,94,2.2,2.21,.22,2.35,3.05,.79,3.08,520,2 187 | 12.67,.98,2.24,18,99,2.2,1.94,.3,1.46,2.62,1.23,3.16,450,2 188 | 12.16,1.61,2.31,22.8,90,1.78,1.69,.43,1.56,2.45,1.33,2.26,495,2 189 | 11.65,1.67,2.62,26,88,1.92,1.61,.4,1.34,2.6,1.36,3.21,562,2 190 | 11.64,2.06,2.46,21.6,84,1.95,1.69,.48,1.35,2.8,1,2.75,680,2 191 | 12.08,1.33,2.3,23.6,70,2.2,1.59,.42,1.38,1.74,1.07,3.21,625,2 192 | 12.08,1.83,2.32,18.5,81,1.6,1.5,.52,1.64,2.4,1.08,2.27,480,2 193 | 12,1.51,2.42,22,86,1.45,1.25,.5,1.63,3.6,1.05,2.65,450,2 194 | 12.69,1.53,2.26,20.7,80,1.38,1.46,.58,1.62,3.05,.96,2.06,495,2 195 | 12.29,2.83,2.22,18,88,2.45,2.25,.25,1.99,2.15,1.15,3.3,290,2 196 | 11.62,1.99,2.28,18,98,3.02,2.26,.17,1.35,3.25,1.16,2.96,345,2 197 | 12.47,1.52,2.2,19,162,2.5,2.27,.32,3.28,2.6,1.16,2.63,937,2 198 | 11.81,2.12,2.74,21.5,134,1.6,.99,.14,1.56,2.5,.95,2.26,625,2 199 | 12.29,1.41,1.98,16,85,2.55,2.5,.29,1.77,2.9,1.23,2.74,428,2 200 | 12.37,1.07,2.1,18.5,88,3.52,3.75,.24,1.95,4.5,1.04,2.77,660,2 201 | 12.29,3.17,2.21,18,88,2.85,2.99,.45,2.81,2.3,1.42,2.83,406,2 202 | 12.08,2.08,1.7,17.5,97,2.23,2.17,.26,1.4,3.3,1.27,2.96,710,2 203 | 12.6,1.34,1.9,18.5,88,1.45,1.36,.29,1.35,2.45,1.04,2.77,562,2 204 | 12.34,2.45,2.46,21,98,2.56,2.11,.34,1.31,2.8,.8,3.38,438,2 205 | 11.82,1.72,1.88,19.5,86,2.5,1.64,.37,1.42,2.06,.94,2.44,415,2 206 | 12.51,1.73,1.98,20.5,85,2.2,1.92,.32,1.48,2.94,1.04,3.57,672,2 207 | 12.42,2.55,2.27,22,90,1.68,1.84,.66,1.42,2.7,.86,3.3,315,2 208 | 12.25,1.73,2.12,19,80,1.65,2.03,.37,1.63,3.4,1,3.17,510,2 209 | 12.72,1.75,2.28,22.5,84,1.38,1.76,.48,1.63,3.3,.88,2.42,488,2 210 | 12.22,1.29,1.94,19,92,2.36,2.04,.39,2.08,2.7,.86,3.02,312,2 211 | 11.61,1.35,2.7,20,94,2.74,2.92,.29,2.49,2.65,.96,3.26,680,2 212 | 11.46,3.74,1.82,19.5,107,3.18,2.58,.24,3.58,2.9,.75,2.81,562,2 213 | 12.52,2.43,2.17,21,88,2.55,2.27,.26,1.22,2,.9,2.78,325,2 214 | 11.76,2.68,2.92,20,103,1.75,2.03,.6,1.05,3.8,1.23,2.5,607,2 215 | 11.41,.74,2.5,21,88,2.48,2.01,.42,1.44,3.08,1.1,2.31,434,2 216 | 12.08,1.39,2.5,22.5,84,2.56,2.29,.43,1.04,2.9,.93,3.19,385,2 217 | 11.03,1.51,2.2,21.5,85,2.46,2.17,.52,2.01,1.9,1.71,2.87,407,2 218 | 11.82,1.47,1.99,20.8,86,1.98,1.6,.3,1.53,1.95,.95,3.33,495,2 219 | 12.42,1.61,2.19,22.5,108,2,2.09,.34,1.61,2.06,1.06,2.96,345,2 220 | 12.77,3.43,1.98,16,80,1.63,1.25,.43,.83,3.4,.7,2.12,372,2 221 | 12,3.43,2,19,87,2,1.64,.37,1.87,1.28,.93,3.05,564,2 222 | 11.45,2.4,2.42,20,96,2.9,2.79,.32,1.83,3.25,.8,3.39,625,2 223 | 11.56,2.05,3.23,28.5,119,3.18,5.08,.47,1.87,6,.93,3.69,465,2 224 | 12.42,4.43,2.73,26.5,102,2.2,2.13,.43,1.71,2.08,.92,3.12,365,2 225 | 13.05,5.8,2.13,21.5,86,2.62,2.65,.3,2.01,2.6,.73,3.1,380,2 226 | 11.87,4.31,2.39,21,82,2.86,3.03,.21,2.91,2.8,.75,3.64,380,2 227 | 12.07,2.16,2.17,21,85,2.6,2.65,.37,1.35,2.76,.86,3.28,378,2 228 | 12.43,1.53,2.29,21.5,86,2.74,3.15,.39,1.77,3.94,.69,2.84,352,2 229 | 11.79,2.13,2.78,28.5,92,2.13,2.24,.58,1.76,3,.97,2.44,466,2 230 | 12.37,1.63,2.3,24.5,88,2.22,2.45,.4,1.9,2.12,.89,2.78,342,2 231 | 12.04,4.3,2.38,22,80,2.1,1.75,.42,1.35,2.6,.79,2.57,580,2 232 | 12.86,1.35,2.32,18,122,1.51,1.25,.21,.94,4.1,.76,1.29,630,3 233 | 12.88,2.99,2.4,20,104,1.3,1.22,.24,.83,5.4,.74,1.42,530,3 234 | 12.81,2.31,2.4,24,98,1.15,1.09,.27,.83,5.7,.66,1.36,560,3 235 | 12.7,3.55,2.36,21.5,106,1.7,1.2,.17,.84,5,.78,1.29,600,3 236 | 12.51,1.24,2.25,17.5,85,2,.58,.6,1.25,5.45,.75,1.51,650,3 237 | 12.6,2.46,2.2,18.5,94,1.62,.66,.63,.94,7.1,.73,1.58,695,3 238 | 12.25,4.72,2.54,21,89,1.38,.47,.53,.8,3.85,.75,1.27,720,3 239 | 12.53,5.51,2.64,25,96,1.79,.6,.63,1.1,5,.82,1.69,515,3 240 | 13.49,3.59,2.19,19.5,88,1.62,.48,.58,.88,5.7,.81,1.82,580,3 241 | 12.84,2.96,2.61,24,101,2.32,.6,.53,.81,4.92,.89,2.15,590,3 242 | 12.93,2.81,2.7,21,96,1.54,.5,.53,.75,4.6,.77,2.31,600,3 243 | 13.36,2.56,2.35,20,89,1.4,.5,.37,.64,5.6,.7,2.47,780,3 244 | 13.52,3.17,2.72,23.5,97,1.55,.52,.5,.55,4.35,.89,2.06,520,3 245 | 13.62,4.95,2.35,20,92,2,.8,.47,1.02,4.4,.91,2.05,550,3 246 | 12.25,3.88,2.2,18.5,112,1.38,.78,.29,1.14,8.21,.65,2,855,3 247 | 13.16,3.57,2.15,21,102,1.5,.55,.43,1.3,4,.6,1.68,830,3 248 | 13.88,5.04,2.23,20,80,.98,.34,.4,.68,4.9,.58,1.33,415,3 249 | 12.87,4.61,2.48,21.5,86,1.7,.65,.47,.86,7.65,.54,1.86,625,3 250 | 13.32,3.24,2.38,21.5,92,1.93,.76,.45,1.25,8.42,.55,1.62,650,3 251 | 13.08,3.9,2.36,21.5,113,1.41,1.39,.34,1.14,9.40,.57,1.33,550,3 252 | 13.5,3.12,2.62,24,123,1.4,1.57,.22,1.25,8.60,.59,1.3,500,3 253 | 12.79,2.67,2.48,22,112,1.48,1.36,.24,1.26,10.8,.48,1.47,480,3 254 | 13.11,1.9,2.75,25.5,116,2.2,1.28,.26,1.56,7.1,.61,1.33,425,3 255 | 13.23,3.3,2.28,18.5,98,1.8,.83,.61,1.87,10.52,.56,1.51,675,3 256 | 12.58,1.29,2.1,20,103,1.48,.58,.53,1.4,7.6,.58,1.55,640,3 257 | 13.17,5.19,2.32,22,93,1.74,.63,.61,1.55,7.9,.6,1.48,725,3 258 | 13.84,4.12,2.38,19.5,89,1.8,.83,.48,1.56,9.01,.57,1.64,480,3 259 | 12.45,3.03,2.64,27,97,1.9,.58,.63,1.14,7.5,.67,1.73,880,3 260 | 14.34,1.68,2.7,25,98,2.8,1.31,.53,2.7,13,.57,1.96,660,3 261 | 13.48,1.67,2.64,22.5,89,2.6,1.1,.52,2.29,11.75,.57,1.78,620,3 262 | 12.36,3.83,2.38,21,88,2.3,.92,.5,1.04,7.65,.56,1.58,520,3 263 | 13.69,3.26,2.54,20,107,1.83,.56,.5,.8,5.88,.96,1.82,680,3 264 | 12.85,3.27,2.58,22,106,1.65,.6,.6,.96,5.58,.87,2.11,570,3 265 | 12.96,3.45,2.35,18.5,106,1.39,.7,.4,.94,5.28,.68,1.75,675,3 266 | 13.78,2.76,2.3,22,90,1.35,.68,.41,1.03,9.58,.7,1.68,615,3 267 | 13.73,4.36,2.26,22.5,88,1.28,.47,.52,1.15,6.62,.78,1.75,520,3 268 | 13.45,3.7,2.6,23,111,1.7,.92,.43,1.46,10.68,.85,1.56,695,3 269 | 12.82,3.37,2.3,19.5,88,1.48,.66,.4,.97,10.26,.72,1.75,685,3 270 | 13.58,2.58,2.69,24.5,105,1.55,.84,.39,1.54,8.66,.74,1.8,750,3 271 | 13.4,4.6,2.86,25,112,1.98,.96,.27,1.11,8.5,.67,1.92,630,3 272 | 12.2,3.03,2.32,19,96,1.25,.49,.4,.73,5.5,.66,1.83,510,3 273 | 12.77,2.39,2.28,19.5,86,1.39,.51,.48,.64,9.899999,.57,1.63,470,3 274 | 14.16,2.51,2.48,20,91,1.68,.7,.44,1.24,9.7,.62,1.71,660,3 275 | 13.71,5.65,2.45,20.5,95,1.68,.61,.52,1.06,7.7,.64,1.74,740,3 276 | 13.4,3.91,2.48,23,102,1.8,.75,.43,1.41,7.3,.7,1.56,750,3 277 | 13.27,4.28,2.26,20,120,1.59,.69,.43,1.35,10.2,.59,1.56,835,3 278 | 13.17,2.59,2.37,20,120,1.65,.68,.53,1.46,9.3,.6,1.62,840,3 279 | 14.13,4.1,2.74,24.5,96,2.05,.76,.56,1.35,9.2,.61,1.6,560,3 280 | -------------------------------------------------------------------------------- /datasets/pima-diabetes.arff: -------------------------------------------------------------------------------- 1 | % 1. Title: Pima Indians Diabetes Database 2 | % 3 | % 2. Sources: 4 | % (a) Original owners: National Institute of Diabetes and Digestive and 5 | % Kidney Diseases 6 | % (b) Donor of database: Vincent Sigillito (vgs@aplcen.apl.jhu.edu) 7 | % Research Center, RMI Group Leader 8 | % Applied Physics Laboratory 9 | % The Johns Hopkins University 10 | % Johns Hopkins Road 11 | % Laurel, MD 20707 12 | % (301) 953-6231 13 | % (c) Date received: 9 May 1990 14 | % 15 | % 3. Past Usage: 16 | % 1. Smith,~J.~W., Everhart,~J.~E., Dickson,~W.~C., Knowler,~W.~C., \& 17 | % Johannes,~R.~S. (1988). Using the ADAP learning algorithm to forecast 18 | % the onset of diabetes mellitus. In {\it Proceedings of the Symposium 19 | % on Computer Applications and Medical Care} (pp. 261--265). IEEE 20 | % Computer Society Press. 21 | % 22 | % The diagnostic, binary-valued variable investigated is whether the 23 | % patient shows signs of diabetes according to World Health Organization 24 | % criteria (i.e., if the 2 hour post-load plasma glucose was at least 25 | % 200 mg/dl at any survey examination or if found during routine medical 26 | % care). The population lives near Phoenix, Arizona, USA. 27 | % 28 | % Results: Their ADAP algorithm makes a real-valued prediction between 29 | % 0 and 1. This was transformed into a binary decision using a cutoff of 30 | % 0.448. Using 576 training instances, the sensitivity and specificity 31 | % of their algorithm was 76% on the remaining 192 instances. 32 | % 33 | % 4. Relevant Information: 34 | % Several constraints were placed on the selection of these instances from 35 | % a larger database. In particular, all patients here are females at 36 | % least 21 years old of Pima Indian heritage. ADAP is an adaptive learning 37 | % routine that generates and executes digital analogs of perceptron-like 38 | % devices. It is a unique algorithm; see the paper for details. 39 | % 40 | % 5. Number of Instances: 768 41 | % 42 | % 6. Number of Attributes: 8 plus class 43 | % 44 | % 7. For Each Attribute: (all numeric-valued) 45 | % 1. Number of times pregnant 46 | % 2. Plasma glucose concentration a 2 hours in an oral glucose tolerance test 47 | % 3. Diastolic blood pressure (mm Hg) 48 | % 4. Triceps skin fold thickness (mm) 49 | % 5. 2-Hour serum insulin (mu U/ml) 50 | % 6. Body mass index (weight in kg/(height in m)^2) 51 | % 7. Diabetes pedigree function 52 | % 8. Age (years) 53 | % 9. Class variable (0 or 1) 54 | % 55 | % 8. Missing Attribute Values: None 56 | % 57 | % 9. Class Distribution: (class value 1 is interpreted as "tested positive for 58 | % diabetes") 59 | % 60 | % Class Value Number of instances 61 | % 0 500 62 | % 1 268 63 | % 64 | % 10. Brief statistical analysis: 65 | % 66 | % Attribute number: Mean: Standard Deviation: 67 | % 1. 3.8 3.4 68 | % 2. 120.9 32.0 69 | % 3. 69.1 19.4 70 | % 4. 20.5 16.0 71 | % 5. 79.8 115.2 72 | % 6. 32.0 7.9 73 | % 7. 0.5 0.3 74 | % 8. 33.2 11.8 75 | % 76 | % 77 | % 78 | % 79 | % 80 | % 81 | % Relabeled values in attribute 'class' 82 | % From: 0 To: tested_negative 83 | % From: 1 To: tested_positive 84 | % 85 | @relation pima_diabetes 86 | @attribute 'preg' real 87 | @attribute 'plas' real 88 | @attribute 'pres' real 89 | @attribute 'skin' real 90 | @attribute 'insu' real 91 | @attribute 'mass' real 92 | @attribute 'pedi' real 93 | @attribute 'age' real 94 | @attribute 'class' {tested_negative,tested_positive} 95 | @data 96 | 6,148,72,35,0,33.6,0.627,50,tested_positive 97 | 1,85,66,29,0,26.6,0.351,31,tested_negative 98 | 8,183,64,0,0,23.3,0.672,32,tested_positive 99 | 1,89,66,23,94,28.1,0.167,21,tested_negative 100 | 0,137,40,35,168,43.1,2.288,33,tested_positive 101 | 5,116,74,0,0,25.6,0.201,30,tested_negative 102 | 3,78,50,32,88,31,0.248,26,tested_positive 103 | 10,115,0,0,0,35.3,0.134,29,tested_negative 104 | 2,197,70,45,543,30.5,0.158,53,tested_positive 105 | 8,125,96,0,0,0,0.232,54,tested_positive 106 | 4,110,92,0,0,37.6,0.191,30,tested_negative 107 | 10,168,74,0,0,38,0.537,34,tested_positive 108 | 10,139,80,0,0,27.1,1.441,57,tested_negative 109 | 1,189,60,23,846,30.1,0.398,59,tested_positive 110 | 5,166,72,19,175,25.8,0.587,51,tested_positive 111 | 7,100,0,0,0,30,0.484,32,tested_positive 112 | 0,118,84,47,230,45.8,0.551,31,tested_positive 113 | 7,107,74,0,0,29.6,0.254,31,tested_positive 114 | 1,103,30,38,83,43.3,0.183,33,tested_negative 115 | 1,115,70,30,96,34.6,0.529,32,tested_positive 116 | 3,126,88,41,235,39.3,0.704,27,tested_negative 117 | 8,99,84,0,0,35.4,0.388,50,tested_negative 118 | 7,196,90,0,0,39.8,0.451,41,tested_positive 119 | 9,119,80,35,0,29,0.263,29,tested_positive 120 | 11,143,94,33,146,36.6,0.254,51,tested_positive 121 | 10,125,70,26,115,31.1,0.205,41,tested_positive 122 | 7,147,76,0,0,39.4,0.257,43,tested_positive 123 | 1,97,66,15,140,23.2,0.487,22,tested_negative 124 | 13,145,82,19,110,22.2,0.245,57,tested_negative 125 | 5,117,92,0,0,34.1,0.337,38,tested_negative 126 | 5,109,75,26,0,36,0.546,60,tested_negative 127 | 3,158,76,36,245,31.6,0.851,28,tested_positive 128 | 3,88,58,11,54,24.8,0.267,22,tested_negative 129 | 6,92,92,0,0,19.9,0.188,28,tested_negative 130 | 10,122,78,31,0,27.6,0.512,45,tested_negative 131 | 4,103,60,33,192,24,0.966,33,tested_negative 132 | 11,138,76,0,0,33.2,0.42,35,tested_negative 133 | 9,102,76,37,0,32.9,0.665,46,tested_positive 134 | 2,90,68,42,0,38.2,0.503,27,tested_positive 135 | 4,111,72,47,207,37.1,1.39,56,tested_positive 136 | 3,180,64,25,70,34,0.271,26,tested_negative 137 | 7,133,84,0,0,40.2,0.696,37,tested_negative 138 | 7,106,92,18,0,22.7,0.235,48,tested_negative 139 | 9,171,110,24,240,45.4,0.721,54,tested_positive 140 | 7,159,64,0,0,27.4,0.294,40,tested_negative 141 | 0,180,66,39,0,42,1.893,25,tested_positive 142 | 1,146,56,0,0,29.7,0.564,29,tested_negative 143 | 2,71,70,27,0,28,0.586,22,tested_negative 144 | 7,103,66,32,0,39.1,0.344,31,tested_positive 145 | 7,105,0,0,0,0,0.305,24,tested_negative 146 | 1,103,80,11,82,19.4,0.491,22,tested_negative 147 | 1,101,50,15,36,24.2,0.526,26,tested_negative 148 | 5,88,66,21,23,24.4,0.342,30,tested_negative 149 | 8,176,90,34,300,33.7,0.467,58,tested_positive 150 | 7,150,66,42,342,34.7,0.718,42,tested_negative 151 | 1,73,50,10,0,23,0.248,21,tested_negative 152 | 7,187,68,39,304,37.7,0.254,41,tested_positive 153 | 0,100,88,60,110,46.8,0.962,31,tested_negative 154 | 0,146,82,0,0,40.5,1.781,44,tested_negative 155 | 0,105,64,41,142,41.5,0.173,22,tested_negative 156 | 2,84,0,0,0,0,0.304,21,tested_negative 157 | 8,133,72,0,0,32.9,0.27,39,tested_positive 158 | 5,44,62,0,0,25,0.587,36,tested_negative 159 | 2,141,58,34,128,25.4,0.699,24,tested_negative 160 | 7,114,66,0,0,32.8,0.258,42,tested_positive 161 | 5,99,74,27,0,29,0.203,32,tested_negative 162 | 0,109,88,30,0,32.5,0.855,38,tested_positive 163 | 2,109,92,0,0,42.7,0.845,54,tested_negative 164 | 1,95,66,13,38,19.6,0.334,25,tested_negative 165 | 4,146,85,27,100,28.9,0.189,27,tested_negative 166 | 2,100,66,20,90,32.9,0.867,28,tested_positive 167 | 5,139,64,35,140,28.6,0.411,26,tested_negative 168 | 13,126,90,0,0,43.4,0.583,42,tested_positive 169 | 4,129,86,20,270,35.1,0.231,23,tested_negative 170 | 1,79,75,30,0,32,0.396,22,tested_negative 171 | 1,0,48,20,0,24.7,0.14,22,tested_negative 172 | 7,62,78,0,0,32.6,0.391,41,tested_negative 173 | 5,95,72,33,0,37.7,0.37,27,tested_negative 174 | 0,131,0,0,0,43.2,0.27,26,tested_positive 175 | 2,112,66,22,0,25,0.307,24,tested_negative 176 | 3,113,44,13,0,22.4,0.14,22,tested_negative 177 | 2,74,0,0,0,0,0.102,22,tested_negative 178 | 7,83,78,26,71,29.3,0.767,36,tested_negative 179 | 0,101,65,28,0,24.6,0.237,22,tested_negative 180 | 5,137,108,0,0,48.8,0.227,37,tested_positive 181 | 2,110,74,29,125,32.4,0.698,27,tested_negative 182 | 13,106,72,54,0,36.6,0.178,45,tested_negative 183 | 2,100,68,25,71,38.5,0.324,26,tested_negative 184 | 15,136,70,32,110,37.1,0.153,43,tested_positive 185 | 1,107,68,19,0,26.5,0.165,24,tested_negative 186 | 1,80,55,0,0,19.1,0.258,21,tested_negative 187 | 4,123,80,15,176,32,0.443,34,tested_negative 188 | 7,81,78,40,48,46.7,0.261,42,tested_negative 189 | 4,134,72,0,0,23.8,0.277,60,tested_positive 190 | 2,142,82,18,64,24.7,0.761,21,tested_negative 191 | 6,144,72,27,228,33.9,0.255,40,tested_negative 192 | 2,92,62,28,0,31.6,0.13,24,tested_negative 193 | 1,71,48,18,76,20.4,0.323,22,tested_negative 194 | 6,93,50,30,64,28.7,0.356,23,tested_negative 195 | 1,122,90,51,220,49.7,0.325,31,tested_positive 196 | 1,163,72,0,0,39,1.222,33,tested_positive 197 | 1,151,60,0,0,26.1,0.179,22,tested_negative 198 | 0,125,96,0,0,22.5,0.262,21,tested_negative 199 | 1,81,72,18,40,26.6,0.283,24,tested_negative 200 | 2,85,65,0,0,39.6,0.93,27,tested_negative 201 | 1,126,56,29,152,28.7,0.801,21,tested_negative 202 | 1,96,122,0,0,22.4,0.207,27,tested_negative 203 | 4,144,58,28,140,29.5,0.287,37,tested_negative 204 | 3,83,58,31,18,34.3,0.336,25,tested_negative 205 | 0,95,85,25,36,37.4,0.247,24,tested_positive 206 | 3,171,72,33,135,33.3,0.199,24,tested_positive 207 | 8,155,62,26,495,34,0.543,46,tested_positive 208 | 1,89,76,34,37,31.2,0.192,23,tested_negative 209 | 4,76,62,0,0,34,0.391,25,tested_negative 210 | 7,160,54,32,175,30.5,0.588,39,tested_positive 211 | 4,146,92,0,0,31.2,0.539,61,tested_positive 212 | 5,124,74,0,0,34,0.22,38,tested_positive 213 | 5,78,48,0,0,33.7,0.654,25,tested_negative 214 | 4,97,60,23,0,28.2,0.443,22,tested_negative 215 | 4,99,76,15,51,23.2,0.223,21,tested_negative 216 | 0,162,76,56,100,53.2,0.759,25,tested_positive 217 | 6,111,64,39,0,34.2,0.26,24,tested_negative 218 | 2,107,74,30,100,33.6,0.404,23,tested_negative 219 | 5,132,80,0,0,26.8,0.186,69,tested_negative 220 | 0,113,76,0,0,33.3,0.278,23,tested_positive 221 | 1,88,30,42,99,55,0.496,26,tested_positive 222 | 3,120,70,30,135,42.9,0.452,30,tested_negative 223 | 1,118,58,36,94,33.3,0.261,23,tested_negative 224 | 1,117,88,24,145,34.5,0.403,40,tested_positive 225 | 0,105,84,0,0,27.9,0.741,62,tested_positive 226 | 4,173,70,14,168,29.7,0.361,33,tested_positive 227 | 9,122,56,0,0,33.3,1.114,33,tested_positive 228 | 3,170,64,37,225,34.5,0.356,30,tested_positive 229 | 8,84,74,31,0,38.3,0.457,39,tested_negative 230 | 2,96,68,13,49,21.1,0.647,26,tested_negative 231 | 2,125,60,20,140,33.8,0.088,31,tested_negative 232 | 0,100,70,26,50,30.8,0.597,21,tested_negative 233 | 0,93,60,25,92,28.7,0.532,22,tested_negative 234 | 0,129,80,0,0,31.2,0.703,29,tested_negative 235 | 5,105,72,29,325,36.9,0.159,28,tested_negative 236 | 3,128,78,0,0,21.1,0.268,55,tested_negative 237 | 5,106,82,30,0,39.5,0.286,38,tested_negative 238 | 2,108,52,26,63,32.5,0.318,22,tested_negative 239 | 10,108,66,0,0,32.4,0.272,42,tested_positive 240 | 4,154,62,31,284,32.8,0.237,23,tested_negative 241 | 0,102,75,23,0,0,0.572,21,tested_negative 242 | 9,57,80,37,0,32.8,0.096,41,tested_negative 243 | 2,106,64,35,119,30.5,1.4,34,tested_negative 244 | 5,147,78,0,0,33.7,0.218,65,tested_negative 245 | 2,90,70,17,0,27.3,0.085,22,tested_negative 246 | 1,136,74,50,204,37.4,0.399,24,tested_negative 247 | 4,114,65,0,0,21.9,0.432,37,tested_negative 248 | 9,156,86,28,155,34.3,1.189,42,tested_positive 249 | 1,153,82,42,485,40.6,0.687,23,tested_negative 250 | 8,188,78,0,0,47.9,0.137,43,tested_positive 251 | 7,152,88,44,0,50,0.337,36,tested_positive 252 | 2,99,52,15,94,24.6,0.637,21,tested_negative 253 | 1,109,56,21,135,25.2,0.833,23,tested_negative 254 | 2,88,74,19,53,29,0.229,22,tested_negative 255 | 17,163,72,41,114,40.9,0.817,47,tested_positive 256 | 4,151,90,38,0,29.7,0.294,36,tested_negative 257 | 7,102,74,40,105,37.2,0.204,45,tested_negative 258 | 0,114,80,34,285,44.2,0.167,27,tested_negative 259 | 2,100,64,23,0,29.7,0.368,21,tested_negative 260 | 0,131,88,0,0,31.6,0.743,32,tested_positive 261 | 6,104,74,18,156,29.9,0.722,41,tested_positive 262 | 3,148,66,25,0,32.5,0.256,22,tested_negative 263 | 4,120,68,0,0,29.6,0.709,34,tested_negative 264 | 4,110,66,0,0,31.9,0.471,29,tested_negative 265 | 3,111,90,12,78,28.4,0.495,29,tested_negative 266 | 6,102,82,0,0,30.8,0.18,36,tested_positive 267 | 6,134,70,23,130,35.4,0.542,29,tested_positive 268 | 2,87,0,23,0,28.9,0.773,25,tested_negative 269 | 1,79,60,42,48,43.5,0.678,23,tested_negative 270 | 2,75,64,24,55,29.7,0.37,33,tested_negative 271 | 8,179,72,42,130,32.7,0.719,36,tested_positive 272 | 6,85,78,0,0,31.2,0.382,42,tested_negative 273 | 0,129,110,46,130,67.1,0.319,26,tested_positive 274 | 5,143,78,0,0,45,0.19,47,tested_negative 275 | 5,130,82,0,0,39.1,0.956,37,tested_positive 276 | 6,87,80,0,0,23.2,0.084,32,tested_negative 277 | 0,119,64,18,92,34.9,0.725,23,tested_negative 278 | 1,0,74,20,23,27.7,0.299,21,tested_negative 279 | 5,73,60,0,0,26.8,0.268,27,tested_negative 280 | 4,141,74,0,0,27.6,0.244,40,tested_negative 281 | 7,194,68,28,0,35.9,0.745,41,tested_positive 282 | 8,181,68,36,495,30.1,0.615,60,tested_positive 283 | 1,128,98,41,58,32,1.321,33,tested_positive 284 | 8,109,76,39,114,27.9,0.64,31,tested_positive 285 | 5,139,80,35,160,31.6,0.361,25,tested_positive 286 | 3,111,62,0,0,22.6,0.142,21,tested_negative 287 | 9,123,70,44,94,33.1,0.374,40,tested_negative 288 | 7,159,66,0,0,30.4,0.383,36,tested_positive 289 | 11,135,0,0,0,52.3,0.578,40,tested_positive 290 | 8,85,55,20,0,24.4,0.136,42,tested_negative 291 | 5,158,84,41,210,39.4,0.395,29,tested_positive 292 | 1,105,58,0,0,24.3,0.187,21,tested_negative 293 | 3,107,62,13,48,22.9,0.678,23,tested_positive 294 | 4,109,64,44,99,34.8,0.905,26,tested_positive 295 | 4,148,60,27,318,30.9,0.15,29,tested_positive 296 | 0,113,80,16,0,31,0.874,21,tested_negative 297 | 1,138,82,0,0,40.1,0.236,28,tested_negative 298 | 0,108,68,20,0,27.3,0.787,32,tested_negative 299 | 2,99,70,16,44,20.4,0.235,27,tested_negative 300 | 6,103,72,32,190,37.7,0.324,55,tested_negative 301 | 5,111,72,28,0,23.9,0.407,27,tested_negative 302 | 8,196,76,29,280,37.5,0.605,57,tested_positive 303 | 5,162,104,0,0,37.7,0.151,52,tested_positive 304 | 1,96,64,27,87,33.2,0.289,21,tested_negative 305 | 7,184,84,33,0,35.5,0.355,41,tested_positive 306 | 2,81,60,22,0,27.7,0.29,25,tested_negative 307 | 0,147,85,54,0,42.8,0.375,24,tested_negative 308 | 7,179,95,31,0,34.2,0.164,60,tested_negative 309 | 0,140,65,26,130,42.6,0.431,24,tested_positive 310 | 9,112,82,32,175,34.2,0.26,36,tested_positive 311 | 12,151,70,40,271,41.8,0.742,38,tested_positive 312 | 5,109,62,41,129,35.8,0.514,25,tested_positive 313 | 6,125,68,30,120,30,0.464,32,tested_negative 314 | 5,85,74,22,0,29,1.224,32,tested_positive 315 | 5,112,66,0,0,37.8,0.261,41,tested_positive 316 | 0,177,60,29,478,34.6,1.072,21,tested_positive 317 | 2,158,90,0,0,31.6,0.805,66,tested_positive 318 | 7,119,0,0,0,25.2,0.209,37,tested_negative 319 | 7,142,60,33,190,28.8,0.687,61,tested_negative 320 | 1,100,66,15,56,23.6,0.666,26,tested_negative 321 | 1,87,78,27,32,34.6,0.101,22,tested_negative 322 | 0,101,76,0,0,35.7,0.198,26,tested_negative 323 | 3,162,52,38,0,37.2,0.652,24,tested_positive 324 | 4,197,70,39,744,36.7,2.329,31,tested_negative 325 | 0,117,80,31,53,45.2,0.089,24,tested_negative 326 | 4,142,86,0,0,44,0.645,22,tested_positive 327 | 6,134,80,37,370,46.2,0.238,46,tested_positive 328 | 1,79,80,25,37,25.4,0.583,22,tested_negative 329 | 4,122,68,0,0,35,0.394,29,tested_negative 330 | 3,74,68,28,45,29.7,0.293,23,tested_negative 331 | 4,171,72,0,0,43.6,0.479,26,tested_positive 332 | 7,181,84,21,192,35.9,0.586,51,tested_positive 333 | 0,179,90,27,0,44.1,0.686,23,tested_positive 334 | 9,164,84,21,0,30.8,0.831,32,tested_positive 335 | 0,104,76,0,0,18.4,0.582,27,tested_negative 336 | 1,91,64,24,0,29.2,0.192,21,tested_negative 337 | 4,91,70,32,88,33.1,0.446,22,tested_negative 338 | 3,139,54,0,0,25.6,0.402,22,tested_positive 339 | 6,119,50,22,176,27.1,1.318,33,tested_positive 340 | 2,146,76,35,194,38.2,0.329,29,tested_negative 341 | 9,184,85,15,0,30,1.213,49,tested_positive 342 | 10,122,68,0,0,31.2,0.258,41,tested_negative 343 | 0,165,90,33,680,52.3,0.427,23,tested_negative 344 | 9,124,70,33,402,35.4,0.282,34,tested_negative 345 | 1,111,86,19,0,30.1,0.143,23,tested_negative 346 | 9,106,52,0,0,31.2,0.38,42,tested_negative 347 | 2,129,84,0,0,28,0.284,27,tested_negative 348 | 2,90,80,14,55,24.4,0.249,24,tested_negative 349 | 0,86,68,32,0,35.8,0.238,25,tested_negative 350 | 12,92,62,7,258,27.6,0.926,44,tested_positive 351 | 1,113,64,35,0,33.6,0.543,21,tested_positive 352 | 3,111,56,39,0,30.1,0.557,30,tested_negative 353 | 2,114,68,22,0,28.7,0.092,25,tested_negative 354 | 1,193,50,16,375,25.9,0.655,24,tested_negative 355 | 11,155,76,28,150,33.3,1.353,51,tested_positive 356 | 3,191,68,15,130,30.9,0.299,34,tested_negative 357 | 3,141,0,0,0,30,0.761,27,tested_positive 358 | 4,95,70,32,0,32.1,0.612,24,tested_negative 359 | 3,142,80,15,0,32.4,0.2,63,tested_negative 360 | 4,123,62,0,0,32,0.226,35,tested_positive 361 | 5,96,74,18,67,33.6,0.997,43,tested_negative 362 | 0,138,0,0,0,36.3,0.933,25,tested_positive 363 | 2,128,64,42,0,40,1.101,24,tested_negative 364 | 0,102,52,0,0,25.1,0.078,21,tested_negative 365 | 2,146,0,0,0,27.5,0.24,28,tested_positive 366 | 10,101,86,37,0,45.6,1.136,38,tested_positive 367 | 2,108,62,32,56,25.2,0.128,21,tested_negative 368 | 3,122,78,0,0,23,0.254,40,tested_negative 369 | 1,71,78,50,45,33.2,0.422,21,tested_negative 370 | 13,106,70,0,0,34.2,0.251,52,tested_negative 371 | 2,100,70,52,57,40.5,0.677,25,tested_negative 372 | 7,106,60,24,0,26.5,0.296,29,tested_positive 373 | 0,104,64,23,116,27.8,0.454,23,tested_negative 374 | 5,114,74,0,0,24.9,0.744,57,tested_negative 375 | 2,108,62,10,278,25.3,0.881,22,tested_negative 376 | 0,146,70,0,0,37.9,0.334,28,tested_positive 377 | 10,129,76,28,122,35.9,0.28,39,tested_negative 378 | 7,133,88,15,155,32.4,0.262,37,tested_negative 379 | 7,161,86,0,0,30.4,0.165,47,tested_positive 380 | 2,108,80,0,0,27,0.259,52,tested_positive 381 | 7,136,74,26,135,26,0.647,51,tested_negative 382 | 5,155,84,44,545,38.7,0.619,34,tested_negative 383 | 1,119,86,39,220,45.6,0.808,29,tested_positive 384 | 4,96,56,17,49,20.8,0.34,26,tested_negative 385 | 5,108,72,43,75,36.1,0.263,33,tested_negative 386 | 0,78,88,29,40,36.9,0.434,21,tested_negative 387 | 0,107,62,30,74,36.6,0.757,25,tested_positive 388 | 2,128,78,37,182,43.3,1.224,31,tested_positive 389 | 1,128,48,45,194,40.5,0.613,24,tested_positive 390 | 0,161,50,0,0,21.9,0.254,65,tested_negative 391 | 6,151,62,31,120,35.5,0.692,28,tested_negative 392 | 2,146,70,38,360,28,0.337,29,tested_positive 393 | 0,126,84,29,215,30.7,0.52,24,tested_negative 394 | 14,100,78,25,184,36.6,0.412,46,tested_positive 395 | 8,112,72,0,0,23.6,0.84,58,tested_negative 396 | 0,167,0,0,0,32.3,0.839,30,tested_positive 397 | 2,144,58,33,135,31.6,0.422,25,tested_positive 398 | 5,77,82,41,42,35.8,0.156,35,tested_negative 399 | 5,115,98,0,0,52.9,0.209,28,tested_positive 400 | 3,150,76,0,0,21,0.207,37,tested_negative 401 | 2,120,76,37,105,39.7,0.215,29,tested_negative 402 | 10,161,68,23,132,25.5,0.326,47,tested_positive 403 | 0,137,68,14,148,24.8,0.143,21,tested_negative 404 | 0,128,68,19,180,30.5,1.391,25,tested_positive 405 | 2,124,68,28,205,32.9,0.875,30,tested_positive 406 | 6,80,66,30,0,26.2,0.313,41,tested_negative 407 | 0,106,70,37,148,39.4,0.605,22,tested_negative 408 | 2,155,74,17,96,26.6,0.433,27,tested_positive 409 | 3,113,50,10,85,29.5,0.626,25,tested_negative 410 | 7,109,80,31,0,35.9,1.127,43,tested_positive 411 | 2,112,68,22,94,34.1,0.315,26,tested_negative 412 | 3,99,80,11,64,19.3,0.284,30,tested_negative 413 | 3,182,74,0,0,30.5,0.345,29,tested_positive 414 | 3,115,66,39,140,38.1,0.15,28,tested_negative 415 | 6,194,78,0,0,23.5,0.129,59,tested_positive 416 | 4,129,60,12,231,27.5,0.527,31,tested_negative 417 | 3,112,74,30,0,31.6,0.197,25,tested_positive 418 | 0,124,70,20,0,27.4,0.254,36,tested_positive 419 | 13,152,90,33,29,26.8,0.731,43,tested_positive 420 | 2,112,75,32,0,35.7,0.148,21,tested_negative 421 | 1,157,72,21,168,25.6,0.123,24,tested_negative 422 | 1,122,64,32,156,35.1,0.692,30,tested_positive 423 | 10,179,70,0,0,35.1,0.2,37,tested_negative 424 | 2,102,86,36,120,45.5,0.127,23,tested_positive 425 | 6,105,70,32,68,30.8,0.122,37,tested_negative 426 | 8,118,72,19,0,23.1,1.476,46,tested_negative 427 | 2,87,58,16,52,32.7,0.166,25,tested_negative 428 | 1,180,0,0,0,43.3,0.282,41,tested_positive 429 | 12,106,80,0,0,23.6,0.137,44,tested_negative 430 | 1,95,60,18,58,23.9,0.26,22,tested_negative 431 | 0,165,76,43,255,47.9,0.259,26,tested_negative 432 | 0,117,0,0,0,33.8,0.932,44,tested_negative 433 | 5,115,76,0,0,31.2,0.343,44,tested_positive 434 | 9,152,78,34,171,34.2,0.893,33,tested_positive 435 | 7,178,84,0,0,39.9,0.331,41,tested_positive 436 | 1,130,70,13,105,25.9,0.472,22,tested_negative 437 | 1,95,74,21,73,25.9,0.673,36,tested_negative 438 | 1,0,68,35,0,32,0.389,22,tested_negative 439 | 5,122,86,0,0,34.7,0.29,33,tested_negative 440 | 8,95,72,0,0,36.8,0.485,57,tested_negative 441 | 8,126,88,36,108,38.5,0.349,49,tested_negative 442 | 1,139,46,19,83,28.7,0.654,22,tested_negative 443 | 3,116,0,0,0,23.5,0.187,23,tested_negative 444 | 3,99,62,19,74,21.8,0.279,26,tested_negative 445 | 5,0,80,32,0,41,0.346,37,tested_positive 446 | 4,92,80,0,0,42.2,0.237,29,tested_negative 447 | 4,137,84,0,0,31.2,0.252,30,tested_negative 448 | 3,61,82,28,0,34.4,0.243,46,tested_negative 449 | 1,90,62,12,43,27.2,0.58,24,tested_negative 450 | 3,90,78,0,0,42.7,0.559,21,tested_negative 451 | 9,165,88,0,0,30.4,0.302,49,tested_positive 452 | 1,125,50,40,167,33.3,0.962,28,tested_positive 453 | 13,129,0,30,0,39.9,0.569,44,tested_positive 454 | 12,88,74,40,54,35.3,0.378,48,tested_negative 455 | 1,196,76,36,249,36.5,0.875,29,tested_positive 456 | 5,189,64,33,325,31.2,0.583,29,tested_positive 457 | 5,158,70,0,0,29.8,0.207,63,tested_negative 458 | 5,103,108,37,0,39.2,0.305,65,tested_negative 459 | 4,146,78,0,0,38.5,0.52,67,tested_positive 460 | 4,147,74,25,293,34.9,0.385,30,tested_negative 461 | 5,99,54,28,83,34,0.499,30,tested_negative 462 | 6,124,72,0,0,27.6,0.368,29,tested_positive 463 | 0,101,64,17,0,21,0.252,21,tested_negative 464 | 3,81,86,16,66,27.5,0.306,22,tested_negative 465 | 1,133,102,28,140,32.8,0.234,45,tested_positive 466 | 3,173,82,48,465,38.4,2.137,25,tested_positive 467 | 0,118,64,23,89,0,1.731,21,tested_negative 468 | 0,84,64,22,66,35.8,0.545,21,tested_negative 469 | 2,105,58,40,94,34.9,0.225,25,tested_negative 470 | 2,122,52,43,158,36.2,0.816,28,tested_negative 471 | 12,140,82,43,325,39.2,0.528,58,tested_positive 472 | 0,98,82,15,84,25.2,0.299,22,tested_negative 473 | 1,87,60,37,75,37.2,0.509,22,tested_negative 474 | 4,156,75,0,0,48.3,0.238,32,tested_positive 475 | 0,93,100,39,72,43.4,1.021,35,tested_negative 476 | 1,107,72,30,82,30.8,0.821,24,tested_negative 477 | 0,105,68,22,0,20,0.236,22,tested_negative 478 | 1,109,60,8,182,25.4,0.947,21,tested_negative 479 | 1,90,62,18,59,25.1,1.268,25,tested_negative 480 | 1,125,70,24,110,24.3,0.221,25,tested_negative 481 | 1,119,54,13,50,22.3,0.205,24,tested_negative 482 | 5,116,74,29,0,32.3,0.66,35,tested_positive 483 | 8,105,100,36,0,43.3,0.239,45,tested_positive 484 | 5,144,82,26,285,32,0.452,58,tested_positive 485 | 3,100,68,23,81,31.6,0.949,28,tested_negative 486 | 1,100,66,29,196,32,0.444,42,tested_negative 487 | 5,166,76,0,0,45.7,0.34,27,tested_positive 488 | 1,131,64,14,415,23.7,0.389,21,tested_negative 489 | 4,116,72,12,87,22.1,0.463,37,tested_negative 490 | 4,158,78,0,0,32.9,0.803,31,tested_positive 491 | 2,127,58,24,275,27.7,1.6,25,tested_negative 492 | 3,96,56,34,115,24.7,0.944,39,tested_negative 493 | 0,131,66,40,0,34.3,0.196,22,tested_positive 494 | 3,82,70,0,0,21.1,0.389,25,tested_negative 495 | 3,193,70,31,0,34.9,0.241,25,tested_positive 496 | 4,95,64,0,0,32,0.161,31,tested_positive 497 | 6,137,61,0,0,24.2,0.151,55,tested_negative 498 | 5,136,84,41,88,35,0.286,35,tested_positive 499 | 9,72,78,25,0,31.6,0.28,38,tested_negative 500 | 5,168,64,0,0,32.9,0.135,41,tested_positive 501 | 2,123,48,32,165,42.1,0.52,26,tested_negative 502 | 4,115,72,0,0,28.9,0.376,46,tested_positive 503 | 0,101,62,0,0,21.9,0.336,25,tested_negative 504 | 8,197,74,0,0,25.9,1.191,39,tested_positive 505 | 1,172,68,49,579,42.4,0.702,28,tested_positive 506 | 6,102,90,39,0,35.7,0.674,28,tested_negative 507 | 1,112,72,30,176,34.4,0.528,25,tested_negative 508 | 1,143,84,23,310,42.4,1.076,22,tested_negative 509 | 1,143,74,22,61,26.2,0.256,21,tested_negative 510 | 0,138,60,35,167,34.6,0.534,21,tested_positive 511 | 3,173,84,33,474,35.7,0.258,22,tested_positive 512 | 1,97,68,21,0,27.2,1.095,22,tested_negative 513 | 4,144,82,32,0,38.5,0.554,37,tested_positive 514 | 1,83,68,0,0,18.2,0.624,27,tested_negative 515 | 3,129,64,29,115,26.4,0.219,28,tested_positive 516 | 1,119,88,41,170,45.3,0.507,26,tested_negative 517 | 2,94,68,18,76,26,0.561,21,tested_negative 518 | 0,102,64,46,78,40.6,0.496,21,tested_negative 519 | 2,115,64,22,0,30.8,0.421,21,tested_negative 520 | 8,151,78,32,210,42.9,0.516,36,tested_positive 521 | 4,184,78,39,277,37,0.264,31,tested_positive 522 | 0,94,0,0,0,0,0.256,25,tested_negative 523 | 1,181,64,30,180,34.1,0.328,38,tested_positive 524 | 0,135,94,46,145,40.6,0.284,26,tested_negative 525 | 1,95,82,25,180,35,0.233,43,tested_positive 526 | 2,99,0,0,0,22.2,0.108,23,tested_negative 527 | 3,89,74,16,85,30.4,0.551,38,tested_negative 528 | 1,80,74,11,60,30,0.527,22,tested_negative 529 | 2,139,75,0,0,25.6,0.167,29,tested_negative 530 | 1,90,68,8,0,24.5,1.138,36,tested_negative 531 | 0,141,0,0,0,42.4,0.205,29,tested_positive 532 | 12,140,85,33,0,37.4,0.244,41,tested_negative 533 | 5,147,75,0,0,29.9,0.434,28,tested_negative 534 | 1,97,70,15,0,18.2,0.147,21,tested_negative 535 | 6,107,88,0,0,36.8,0.727,31,tested_negative 536 | 0,189,104,25,0,34.3,0.435,41,tested_positive 537 | 2,83,66,23,50,32.2,0.497,22,tested_negative 538 | 4,117,64,27,120,33.2,0.23,24,tested_negative 539 | 8,108,70,0,0,30.5,0.955,33,tested_positive 540 | 4,117,62,12,0,29.7,0.38,30,tested_positive 541 | 0,180,78,63,14,59.4,2.42,25,tested_positive 542 | 1,100,72,12,70,25.3,0.658,28,tested_negative 543 | 0,95,80,45,92,36.5,0.33,26,tested_negative 544 | 0,104,64,37,64,33.6,0.51,22,tested_positive 545 | 0,120,74,18,63,30.5,0.285,26,tested_negative 546 | 1,82,64,13,95,21.2,0.415,23,tested_negative 547 | 2,134,70,0,0,28.9,0.542,23,tested_positive 548 | 0,91,68,32,210,39.9,0.381,25,tested_negative 549 | 2,119,0,0,0,19.6,0.832,72,tested_negative 550 | 2,100,54,28,105,37.8,0.498,24,tested_negative 551 | 14,175,62,30,0,33.6,0.212,38,tested_positive 552 | 1,135,54,0,0,26.7,0.687,62,tested_negative 553 | 5,86,68,28,71,30.2,0.364,24,tested_negative 554 | 10,148,84,48,237,37.6,1.001,51,tested_positive 555 | 9,134,74,33,60,25.9,0.46,81,tested_negative 556 | 9,120,72,22,56,20.8,0.733,48,tested_negative 557 | 1,71,62,0,0,21.8,0.416,26,tested_negative 558 | 8,74,70,40,49,35.3,0.705,39,tested_negative 559 | 5,88,78,30,0,27.6,0.258,37,tested_negative 560 | 10,115,98,0,0,24,1.022,34,tested_negative 561 | 0,124,56,13,105,21.8,0.452,21,tested_negative 562 | 0,74,52,10,36,27.8,0.269,22,tested_negative 563 | 0,97,64,36,100,36.8,0.6,25,tested_negative 564 | 8,120,0,0,0,30,0.183,38,tested_positive 565 | 6,154,78,41,140,46.1,0.571,27,tested_negative 566 | 1,144,82,40,0,41.3,0.607,28,tested_negative 567 | 0,137,70,38,0,33.2,0.17,22,tested_negative 568 | 0,119,66,27,0,38.8,0.259,22,tested_negative 569 | 7,136,90,0,0,29.9,0.21,50,tested_negative 570 | 4,114,64,0,0,28.9,0.126,24,tested_negative 571 | 0,137,84,27,0,27.3,0.231,59,tested_negative 572 | 2,105,80,45,191,33.7,0.711,29,tested_positive 573 | 7,114,76,17,110,23.8,0.466,31,tested_negative 574 | 8,126,74,38,75,25.9,0.162,39,tested_negative 575 | 4,132,86,31,0,28,0.419,63,tested_negative 576 | 3,158,70,30,328,35.5,0.344,35,tested_positive 577 | 0,123,88,37,0,35.2,0.197,29,tested_negative 578 | 4,85,58,22,49,27.8,0.306,28,tested_negative 579 | 0,84,82,31,125,38.2,0.233,23,tested_negative 580 | 0,145,0,0,0,44.2,0.63,31,tested_positive 581 | 0,135,68,42,250,42.3,0.365,24,tested_positive 582 | 1,139,62,41,480,40.7,0.536,21,tested_negative 583 | 0,173,78,32,265,46.5,1.159,58,tested_negative 584 | 4,99,72,17,0,25.6,0.294,28,tested_negative 585 | 8,194,80,0,0,26.1,0.551,67,tested_negative 586 | 2,83,65,28,66,36.8,0.629,24,tested_negative 587 | 2,89,90,30,0,33.5,0.292,42,tested_negative 588 | 4,99,68,38,0,32.8,0.145,33,tested_negative 589 | 4,125,70,18,122,28.9,1.144,45,tested_positive 590 | 3,80,0,0,0,0,0.174,22,tested_negative 591 | 6,166,74,0,0,26.6,0.304,66,tested_negative 592 | 5,110,68,0,0,26,0.292,30,tested_negative 593 | 2,81,72,15,76,30.1,0.547,25,tested_negative 594 | 7,195,70,33,145,25.1,0.163,55,tested_positive 595 | 6,154,74,32,193,29.3,0.839,39,tested_negative 596 | 2,117,90,19,71,25.2,0.313,21,tested_negative 597 | 3,84,72,32,0,37.2,0.267,28,tested_negative 598 | 6,0,68,41,0,39,0.727,41,tested_positive 599 | 7,94,64,25,79,33.3,0.738,41,tested_negative 600 | 3,96,78,39,0,37.3,0.238,40,tested_negative 601 | 10,75,82,0,0,33.3,0.263,38,tested_negative 602 | 0,180,90,26,90,36.5,0.314,35,tested_positive 603 | 1,130,60,23,170,28.6,0.692,21,tested_negative 604 | 2,84,50,23,76,30.4,0.968,21,tested_negative 605 | 8,120,78,0,0,25,0.409,64,tested_negative 606 | 12,84,72,31,0,29.7,0.297,46,tested_positive 607 | 0,139,62,17,210,22.1,0.207,21,tested_negative 608 | 9,91,68,0,0,24.2,0.2,58,tested_negative 609 | 2,91,62,0,0,27.3,0.525,22,tested_negative 610 | 3,99,54,19,86,25.6,0.154,24,tested_negative 611 | 3,163,70,18,105,31.6,0.268,28,tested_positive 612 | 9,145,88,34,165,30.3,0.771,53,tested_positive 613 | 7,125,86,0,0,37.6,0.304,51,tested_negative 614 | 13,76,60,0,0,32.8,0.18,41,tested_negative 615 | 6,129,90,7,326,19.6,0.582,60,tested_negative 616 | 2,68,70,32,66,25,0.187,25,tested_negative 617 | 3,124,80,33,130,33.2,0.305,26,tested_negative 618 | 6,114,0,0,0,0,0.189,26,tested_negative 619 | 9,130,70,0,0,34.2,0.652,45,tested_positive 620 | 3,125,58,0,0,31.6,0.151,24,tested_negative 621 | 3,87,60,18,0,21.8,0.444,21,tested_negative 622 | 1,97,64,19,82,18.2,0.299,21,tested_negative 623 | 3,116,74,15,105,26.3,0.107,24,tested_negative 624 | 0,117,66,31,188,30.8,0.493,22,tested_negative 625 | 0,111,65,0,0,24.6,0.66,31,tested_negative 626 | 2,122,60,18,106,29.8,0.717,22,tested_negative 627 | 0,107,76,0,0,45.3,0.686,24,tested_negative 628 | 1,86,66,52,65,41.3,0.917,29,tested_negative 629 | 6,91,0,0,0,29.8,0.501,31,tested_negative 630 | 1,77,56,30,56,33.3,1.251,24,tested_negative 631 | 4,132,0,0,0,32.9,0.302,23,tested_positive 632 | 0,105,90,0,0,29.6,0.197,46,tested_negative 633 | 0,57,60,0,0,21.7,0.735,67,tested_negative 634 | 0,127,80,37,210,36.3,0.804,23,tested_negative 635 | 3,129,92,49,155,36.4,0.968,32,tested_positive 636 | 8,100,74,40,215,39.4,0.661,43,tested_positive 637 | 3,128,72,25,190,32.4,0.549,27,tested_positive 638 | 10,90,85,32,0,34.9,0.825,56,tested_positive 639 | 4,84,90,23,56,39.5,0.159,25,tested_negative 640 | 1,88,78,29,76,32,0.365,29,tested_negative 641 | 8,186,90,35,225,34.5,0.423,37,tested_positive 642 | 5,187,76,27,207,43.6,1.034,53,tested_positive 643 | 4,131,68,21,166,33.1,0.16,28,tested_negative 644 | 1,164,82,43,67,32.8,0.341,50,tested_negative 645 | 4,189,110,31,0,28.5,0.68,37,tested_negative 646 | 1,116,70,28,0,27.4,0.204,21,tested_negative 647 | 3,84,68,30,106,31.9,0.591,25,tested_negative 648 | 6,114,88,0,0,27.8,0.247,66,tested_negative 649 | 1,88,62,24,44,29.9,0.422,23,tested_negative 650 | 1,84,64,23,115,36.9,0.471,28,tested_negative 651 | 7,124,70,33,215,25.5,0.161,37,tested_negative 652 | 1,97,70,40,0,38.1,0.218,30,tested_negative 653 | 8,110,76,0,0,27.8,0.237,58,tested_negative 654 | 11,103,68,40,0,46.2,0.126,42,tested_negative 655 | 11,85,74,0,0,30.1,0.3,35,tested_negative 656 | 6,125,76,0,0,33.8,0.121,54,tested_positive 657 | 0,198,66,32,274,41.3,0.502,28,tested_positive 658 | 1,87,68,34,77,37.6,0.401,24,tested_negative 659 | 6,99,60,19,54,26.9,0.497,32,tested_negative 660 | 0,91,80,0,0,32.4,0.601,27,tested_negative 661 | 2,95,54,14,88,26.1,0.748,22,tested_negative 662 | 1,99,72,30,18,38.6,0.412,21,tested_negative 663 | 6,92,62,32,126,32,0.085,46,tested_negative 664 | 4,154,72,29,126,31.3,0.338,37,tested_negative 665 | 0,121,66,30,165,34.3,0.203,33,tested_positive 666 | 3,78,70,0,0,32.5,0.27,39,tested_negative 667 | 2,130,96,0,0,22.6,0.268,21,tested_negative 668 | 3,111,58,31,44,29.5,0.43,22,tested_negative 669 | 2,98,60,17,120,34.7,0.198,22,tested_negative 670 | 1,143,86,30,330,30.1,0.892,23,tested_negative 671 | 1,119,44,47,63,35.5,0.28,25,tested_negative 672 | 6,108,44,20,130,24,0.813,35,tested_negative 673 | 2,118,80,0,0,42.9,0.693,21,tested_positive 674 | 10,133,68,0,0,27,0.245,36,tested_negative 675 | 2,197,70,99,0,34.7,0.575,62,tested_positive 676 | 0,151,90,46,0,42.1,0.371,21,tested_positive 677 | 6,109,60,27,0,25,0.206,27,tested_negative 678 | 12,121,78,17,0,26.5,0.259,62,tested_negative 679 | 8,100,76,0,0,38.7,0.19,42,tested_negative 680 | 8,124,76,24,600,28.7,0.687,52,tested_positive 681 | 1,93,56,11,0,22.5,0.417,22,tested_negative 682 | 8,143,66,0,0,34.9,0.129,41,tested_positive 683 | 6,103,66,0,0,24.3,0.249,29,tested_negative 684 | 3,176,86,27,156,33.3,1.154,52,tested_positive 685 | 0,73,0,0,0,21.1,0.342,25,tested_negative 686 | 11,111,84,40,0,46.8,0.925,45,tested_positive 687 | 2,112,78,50,140,39.4,0.175,24,tested_negative 688 | 3,132,80,0,0,34.4,0.402,44,tested_positive 689 | 2,82,52,22,115,28.5,1.699,25,tested_negative 690 | 6,123,72,45,230,33.6,0.733,34,tested_negative 691 | 0,188,82,14,185,32,0.682,22,tested_positive 692 | 0,67,76,0,0,45.3,0.194,46,tested_negative 693 | 1,89,24,19,25,27.8,0.559,21,tested_negative 694 | 1,173,74,0,0,36.8,0.088,38,tested_positive 695 | 1,109,38,18,120,23.1,0.407,26,tested_negative 696 | 1,108,88,19,0,27.1,0.4,24,tested_negative 697 | 6,96,0,0,0,23.7,0.19,28,tested_negative 698 | 1,124,74,36,0,27.8,0.1,30,tested_negative 699 | 7,150,78,29,126,35.2,0.692,54,tested_positive 700 | 4,183,0,0,0,28.4,0.212,36,tested_positive 701 | 1,124,60,32,0,35.8,0.514,21,tested_negative 702 | 1,181,78,42,293,40,1.258,22,tested_positive 703 | 1,92,62,25,41,19.5,0.482,25,tested_negative 704 | 0,152,82,39,272,41.5,0.27,27,tested_negative 705 | 1,111,62,13,182,24,0.138,23,tested_negative 706 | 3,106,54,21,158,30.9,0.292,24,tested_negative 707 | 3,174,58,22,194,32.9,0.593,36,tested_positive 708 | 7,168,88,42,321,38.2,0.787,40,tested_positive 709 | 6,105,80,28,0,32.5,0.878,26,tested_negative 710 | 11,138,74,26,144,36.1,0.557,50,tested_positive 711 | 3,106,72,0,0,25.8,0.207,27,tested_negative 712 | 6,117,96,0,0,28.7,0.157,30,tested_negative 713 | 2,68,62,13,15,20.1,0.257,23,tested_negative 714 | 9,112,82,24,0,28.2,1.282,50,tested_positive 715 | 0,119,0,0,0,32.4,0.141,24,tested_positive 716 | 2,112,86,42,160,38.4,0.246,28,tested_negative 717 | 2,92,76,20,0,24.2,1.698,28,tested_negative 718 | 6,183,94,0,0,40.8,1.461,45,tested_negative 719 | 0,94,70,27,115,43.5,0.347,21,tested_negative 720 | 2,108,64,0,0,30.8,0.158,21,tested_negative 721 | 4,90,88,47,54,37.7,0.362,29,tested_negative 722 | 0,125,68,0,0,24.7,0.206,21,tested_negative 723 | 0,132,78,0,0,32.4,0.393,21,tested_negative 724 | 5,128,80,0,0,34.6,0.144,45,tested_negative 725 | 4,94,65,22,0,24.7,0.148,21,tested_negative 726 | 7,114,64,0,0,27.4,0.732,34,tested_positive 727 | 0,102,78,40,90,34.5,0.238,24,tested_negative 728 | 2,111,60,0,0,26.2,0.343,23,tested_negative 729 | 1,128,82,17,183,27.5,0.115,22,tested_negative 730 | 10,92,62,0,0,25.9,0.167,31,tested_negative 731 | 13,104,72,0,0,31.2,0.465,38,tested_positive 732 | 5,104,74,0,0,28.8,0.153,48,tested_negative 733 | 2,94,76,18,66,31.6,0.649,23,tested_negative 734 | 7,97,76,32,91,40.9,0.871,32,tested_positive 735 | 1,100,74,12,46,19.5,0.149,28,tested_negative 736 | 0,102,86,17,105,29.3,0.695,27,tested_negative 737 | 4,128,70,0,0,34.3,0.303,24,tested_negative 738 | 6,147,80,0,0,29.5,0.178,50,tested_positive 739 | 4,90,0,0,0,28,0.61,31,tested_negative 740 | 3,103,72,30,152,27.6,0.73,27,tested_negative 741 | 2,157,74,35,440,39.4,0.134,30,tested_negative 742 | 1,167,74,17,144,23.4,0.447,33,tested_positive 743 | 0,179,50,36,159,37.8,0.455,22,tested_positive 744 | 11,136,84,35,130,28.3,0.26,42,tested_positive 745 | 0,107,60,25,0,26.4,0.133,23,tested_negative 746 | 1,91,54,25,100,25.2,0.234,23,tested_negative 747 | 1,117,60,23,106,33.8,0.466,27,tested_negative 748 | 5,123,74,40,77,34.1,0.269,28,tested_negative 749 | 2,120,54,0,0,26.8,0.455,27,tested_negative 750 | 1,106,70,28,135,34.2,0.142,22,tested_negative 751 | 2,155,52,27,540,38.7,0.24,25,tested_positive 752 | 2,101,58,35,90,21.8,0.155,22,tested_negative 753 | 1,120,80,48,200,38.9,1.162,41,tested_negative 754 | 11,127,106,0,0,39,0.19,51,tested_negative 755 | 3,80,82,31,70,34.2,1.292,27,tested_positive 756 | 10,162,84,0,0,27.7,0.182,54,tested_negative 757 | 1,199,76,43,0,42.9,1.394,22,tested_positive 758 | 8,167,106,46,231,37.6,0.165,43,tested_positive 759 | 9,145,80,46,130,37.9,0.637,40,tested_positive 760 | 6,115,60,39,0,33.7,0.245,40,tested_positive 761 | 1,112,80,45,132,34.8,0.217,24,tested_negative 762 | 4,145,82,18,0,32.5,0.235,70,tested_positive 763 | 10,111,70,27,0,27.5,0.141,40,tested_positive 764 | 6,98,58,33,190,34,0.43,43,tested_negative 765 | 9,154,78,30,100,30.9,0.164,45,tested_negative 766 | 6,165,68,26,168,33.6,0.631,49,tested_negative 767 | 1,99,58,10,0,25.4,0.551,21,tested_negative 768 | 10,68,106,23,49,35.5,0.285,47,tested_negative 769 | 3,123,100,35,240,57.3,0.88,22,tested_negative 770 | 8,91,82,0,0,35.6,0.587,68,tested_negative 771 | 6,195,70,0,0,30.9,0.328,31,tested_positive 772 | 9,156,86,0,0,24.8,0.23,53,tested_positive 773 | 0,93,60,0,0,35.3,0.263,25,tested_negative 774 | 3,121,52,0,0,36,0.127,25,tested_positive 775 | 2,101,58,17,265,24.2,0.614,23,tested_negative 776 | 2,56,56,28,45,24.2,0.332,22,tested_negative 777 | 0,162,76,36,0,49.6,0.364,26,tested_positive 778 | 0,95,64,39,105,44.6,0.366,22,tested_negative 779 | 4,125,80,0,0,32.3,0.536,27,tested_positive 780 | 5,136,82,0,0,0,0.64,69,tested_negative 781 | 2,129,74,26,205,33.2,0.591,25,tested_negative 782 | 3,130,64,0,0,23.1,0.314,22,tested_negative 783 | 1,107,50,19,0,28.3,0.181,29,tested_negative 784 | 1,140,74,26,180,24.1,0.828,23,tested_negative 785 | 1,144,82,46,180,46.1,0.335,46,tested_positive 786 | 8,107,80,0,0,24.6,0.856,34,tested_negative 787 | 13,158,114,0,0,42.3,0.257,44,tested_positive 788 | 2,121,70,32,95,39.1,0.886,23,tested_negative 789 | 7,129,68,49,125,38.5,0.439,43,tested_positive 790 | 2,90,60,0,0,23.5,0.191,25,tested_negative 791 | 7,142,90,24,480,30.4,0.128,43,tested_positive 792 | 3,169,74,19,125,29.9,0.268,31,tested_positive 793 | 0,99,0,0,0,25,0.253,22,tested_negative 794 | 4,127,88,11,155,34.5,0.598,28,tested_negative 795 | 4,118,70,0,0,44.5,0.904,26,tested_negative 796 | 2,122,76,27,200,35.9,0.483,26,tested_negative 797 | 6,125,78,31,0,27.6,0.565,49,tested_positive 798 | 1,168,88,29,0,35,0.905,52,tested_positive 799 | 2,129,0,0,0,38.5,0.304,41,tested_negative 800 | 4,110,76,20,100,28.4,0.118,27,tested_negative 801 | 6,80,80,36,0,39.8,0.177,28,tested_negative 802 | 10,115,0,0,0,0,0.261,30,tested_positive 803 | 2,127,46,21,335,34.4,0.176,22,tested_negative 804 | 9,164,78,0,0,32.8,0.148,45,tested_positive 805 | 2,93,64,32,160,38,0.674,23,tested_positive 806 | 3,158,64,13,387,31.2,0.295,24,tested_negative 807 | 5,126,78,27,22,29.6,0.439,40,tested_negative 808 | 10,129,62,36,0,41.2,0.441,38,tested_positive 809 | 0,134,58,20,291,26.4,0.352,21,tested_negative 810 | 3,102,74,0,0,29.5,0.121,32,tested_negative 811 | 7,187,50,33,392,33.9,0.826,34,tested_positive 812 | 3,173,78,39,185,33.8,0.97,31,tested_positive 813 | 10,94,72,18,0,23.1,0.595,56,tested_negative 814 | 1,108,60,46,178,35.5,0.415,24,tested_negative 815 | 5,97,76,27,0,35.6,0.378,52,tested_positive 816 | 4,83,86,19,0,29.3,0.317,34,tested_negative 817 | 1,114,66,36,200,38.1,0.289,21,tested_negative 818 | 1,149,68,29,127,29.3,0.349,42,tested_positive 819 | 5,117,86,30,105,39.1,0.251,42,tested_negative 820 | 1,111,94,0,0,32.8,0.265,45,tested_negative 821 | 4,112,78,40,0,39.4,0.236,38,tested_negative 822 | 1,116,78,29,180,36.1,0.496,25,tested_negative 823 | 0,141,84,26,0,32.4,0.433,22,tested_negative 824 | 2,175,88,0,0,22.9,0.326,22,tested_negative 825 | 2,92,52,0,0,30.1,0.141,22,tested_negative 826 | 3,130,78,23,79,28.4,0.323,34,tested_positive 827 | 8,120,86,0,0,28.4,0.259,22,tested_positive 828 | 2,174,88,37,120,44.5,0.646,24,tested_positive 829 | 2,106,56,27,165,29,0.426,22,tested_negative 830 | 2,105,75,0,0,23.3,0.56,53,tested_negative 831 | 4,95,60,32,0,35.4,0.284,28,tested_negative 832 | 0,126,86,27,120,27.4,0.515,21,tested_negative 833 | 8,65,72,23,0,32,0.6,42,tested_negative 834 | 2,99,60,17,160,36.6,0.453,21,tested_negative 835 | 1,102,74,0,0,39.5,0.293,42,tested_positive 836 | 11,120,80,37,150,42.3,0.785,48,tested_positive 837 | 3,102,44,20,94,30.8,0.4,26,tested_negative 838 | 1,109,58,18,116,28.5,0.219,22,tested_negative 839 | 9,140,94,0,0,32.7,0.734,45,tested_positive 840 | 13,153,88,37,140,40.6,1.174,39,tested_negative 841 | 12,100,84,33,105,30,0.488,46,tested_negative 842 | 1,147,94,41,0,49.3,0.358,27,tested_positive 843 | 1,81,74,41,57,46.3,1.096,32,tested_negative 844 | 3,187,70,22,200,36.4,0.408,36,tested_positive 845 | 6,162,62,0,0,24.3,0.178,50,tested_positive 846 | 4,136,70,0,0,31.2,1.182,22,tested_positive 847 | 1,121,78,39,74,39,0.261,28,tested_negative 848 | 3,108,62,24,0,26,0.223,25,tested_negative 849 | 0,181,88,44,510,43.3,0.222,26,tested_positive 850 | 8,154,78,32,0,32.4,0.443,45,tested_positive 851 | 1,128,88,39,110,36.5,1.057,37,tested_positive 852 | 7,137,90,41,0,32,0.391,39,tested_negative 853 | 0,123,72,0,0,36.3,0.258,52,tested_positive 854 | 1,106,76,0,0,37.5,0.197,26,tested_negative 855 | 6,190,92,0,0,35.5,0.278,66,tested_positive 856 | 2,88,58,26,16,28.4,0.766,22,tested_negative 857 | 9,170,74,31,0,44,0.403,43,tested_positive 858 | 9,89,62,0,0,22.5,0.142,33,tested_negative 859 | 10,101,76,48,180,32.9,0.171,63,tested_negative 860 | 2,122,70,27,0,36.8,0.34,27,tested_negative 861 | 5,121,72,23,112,26.2,0.245,30,tested_negative 862 | 1,126,60,0,0,30.1,0.349,47,tested_positive 863 | 1,93,70,31,0,30.4,0.315,23,tested_negative 864 | --------------------------------------------------------------------------------