├── CDEC ├── archs │ ├── cancer.json │ └── genome.json ├── customlayers.py ├── genome │ └── last_numeric.csv ├── keras_unpooling.py ├── main.py ├── misc.py ├── network.py ├── plots │ └── genome │ │ ├── autoencoder.png │ │ ├── clustered_kld.png │ │ ├── clustered_km.png │ │ └── raw.png └── self.trainAutoencoder.png ├── DEC_GenotypeClustering_Keras ├── DEC_Genotype_Clustering.py ├── LSTM_EthnicityPrediction.py └── genome.csv ├── PopulationClustering_v2 ├── output_1.txt ├── pom.xml ├── results │ └── train.csv │ │ ├── DEC_Genotype_Clustering.py │ │ ├── LSTM_EthnicityPrediction.py │ │ ├── genome.csv │ │ └── part-00000-2c4830b2-4c39-48fc-909d-4868a1164190-c000.csv ├── src │ └── main │ │ └── scala │ │ └── org │ │ └── fit │ │ └── genomics │ │ ├── PopGenomicsClassificationSpark.scala │ │ ├── PopStratClassification.scala │ │ ├── PopStratClustering.scala │ │ └── featureExtractor.scala └── target │ ├── classes │ └── META-INF │ │ ├── MANIFEST.MF │ │ └── maven │ │ └── com.deri.sels │ │ └── PopulationClustering_v2 │ │ ├── pom.properties │ │ └── pom.xml │ └── maven-archiver │ └── pom.properties └── README.md /CDEC/archs/cancer.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "c-3-32_p_c-3-64_p_fc-32", 4 | "batch_size": 50, 5 | "layers": [ 6 | { 7 | "type": "Input", 8 | "output_shape": [ 9 | 1, 10 | 127, 11 | 127 12 | ] 13 | }, 14 | { 15 | "type": "Conv2D", 16 | "num_filters": 32, 17 | "filter_size": [ 18 | 3, 19 | 3 20 | ], 21 | "non_linearity": "rectify", 22 | "conv_mode": "same" 23 | }, 24 | { 25 | "type": "MaxPool2D", 26 | "filter_size": [ 27 | 2, 28 | 2 29 | ] 30 | }, 31 | { 32 | "type": "Conv2D", 33 | "num_filters": 64, 34 | "filter_size": [ 35 | 3, 36 | 3 37 | ], 38 | "non_linearity": "rectify", 39 | "conv_mode": "same" 40 | }, 41 | { 42 | "type": "MaxPool2D", 43 | "filter_size": [ 44 | 2, 45 | 2 46 | ] 47 | }, 48 | { 49 | "type": "Dense", 50 | "num_units": 3136, 51 | "non_linearity": "rectify" 52 | }, 53 | { 54 | "type": "Dense", 55 | "num_units": 32, 56 | "non_linearity": "rectify" 57 | } 58 | ] 59 | }, 60 | { 61 | "name": "c-5-6_p_c-5-16_p_c-4-120", 62 | "use_batch_norm": 1, 63 | "batch_size": 100, 64 | "layers": [ 65 | { 66 | "type": "Input", 67 | "output_shape": [ 68 | 1, 69 | 127, 70 | 127 71 | ] 72 | }, 73 | { 74 | "type": "Conv2D", 75 | "num_filters": 50, 76 | "filter_size": [ 77 | 4, 78 | 4 79 | ], 80 | "non_linearity": "rectify" 81 | }, 82 | { 83 | "type": "MaxPool2D*", 84 | "filter_size": [ 85 | 2, 86 | 2 87 | ] 88 | }, 89 | { 90 | "type": "Conv2D", 91 | "num_filters": 50, 92 | "filter_size": [ 93 | 3, 94 | 3 95 | ], 96 | "non_linearity": "rectify" 97 | }, 98 | { 99 | "type": "MaxPool2D*", 100 | "filter_size": [ 101 | 2, 102 | 2 103 | ] 104 | }, 105 | { 106 | "type": "Conv2D", 107 | "num_filters": 120, 108 | "filter_size": [ 109 | 2, 110 | 2 111 | ], 112 | "non_linearity": "linear" 113 | } 114 | ] 115 | } 116 | 117 | ] 118 | -------------------------------------------------------------------------------- /CDEC/archs/genome.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "c-3-32_p_c-3-64_p_fc-32", 4 | "batch_size": 32, 5 | "layers": [ 6 | { 7 | "type": "Input", 8 | "output_shape": [ 9 | 1, 10 | 67, 11 | 67 12 | ] 13 | }, 14 | { 15 | "type": "Conv2D", 16 | "num_filters": 32, 17 | "filter_size": [ 18 | 3, 19 | 3 20 | ], 21 | "non_linearity": "rectify", 22 | "conv_mode": "same" 23 | }, 24 | { 25 | "type": "MaxPool2D", 26 | "filter_size": [ 27 | 2, 28 | 2 29 | ] 30 | }, 31 | { 32 | "type": "Conv2D", 33 | "num_filters": 64, 34 | "filter_size": [ 35 | 3, 36 | 3 37 | ], 38 | "non_linearity": "rectify", 39 | "conv_mode": "same" 40 | }, 41 | { 42 | "type": "MaxPool2D", 43 | "filter_size": [ 44 | 2, 45 | 2 46 | ] 47 | }, 48 | { 49 | "type": "Dense", 50 | "num_units": 3136, 51 | "non_linearity": "rectify" 52 | }, 53 | { 54 | "type": "Dense", 55 | "num_units": 32, 56 | "non_linearity": "rectify" 57 | } 58 | ] 59 | }, 60 | { 61 | "name": "c-5-6_p_c-5-16_p_c-4-120", 62 | "use_batch_norm": 1, 63 | "batch_size": 32, 64 | "layers": [ 65 | { 66 | "type": "Input", 67 | "output_shape": [ 68 | 1, 69 | 67, 70 | 67 71 | ] 72 | }, 73 | { 74 | "type": "Conv2D", 75 | "num_filters": 50, 76 | "filter_size": [ 77 | 5, 78 | 5 79 | ], 80 | "non_linearity": "rectify" 81 | }, 82 | { 83 | "type": "MaxPool2D*", 84 | "filter_size": [ 85 | 2, 86 | 2 87 | ] 88 | }, 89 | { 90 | "type": "Conv2D", 91 | "num_filters": 50, 92 | "filter_size": [ 93 | 5, 94 | 5 95 | ], 96 | "non_linearity": "rectify" 97 | }, 98 | { 99 | "type": "MaxPool2D*", 100 | "filter_size": [ 101 | 2, 102 | 2 103 | ] 104 | }, 105 | { 106 | "type": "Conv2D", 107 | "num_filters": 32, 108 | "filter_size": [ 109 | 2, 110 | 2 111 | ], 112 | "non_linearity": "linear" 113 | } 114 | ] 115 | } 116 | ] 117 | -------------------------------------------------------------------------------- /CDEC/customlayers.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Jul 25, 2017 3 | ''' 4 | 5 | #from lasagne import layers 6 | from keras.models import Sequential 7 | from keras import backend as K 8 | from keras import layers 9 | from keras.engine.topology import Layer 10 | class Unpool2DLayer(layers.Layer): 11 | """ 12 | This layer performs unpooling over the last two dimensions 13 | of a 4D tensor. 14 | Layer borrowed from: https://swarbrickjones.wordpress.com/2015/04/29/convolutional-autoencoders-in-pythontheanolasagne/ 15 | """ 16 | 17 | def __init__(self, incoming, ds, **kwargs): 18 | self.ds = ds 19 | super(Unpool2DLayer, self).__init__(**kwargs) 20 | 21 | def compute_output_shape(self, input_shape): 22 | output_shape = list(input_shape) 23 | output_shape[1] = input_shape[1] * self.ds[0] 24 | output_shape[2] = input_shape[2] * self.ds[1] 25 | return tuple(output_shape) 26 | 27 | def call(self,incoming,**kwargs): 28 | ''' 29 | Just repeats the input element the upscaled image 30 | ''' 31 | repaxis2 = K.repeat_elements(incoming,self.ds[0], axis=1) 32 | Unpool_layer = K.repeat_elements(repaxis2,self.ds[1], axis=2) 33 | return Unpool_layer 34 | 35 | 36 | class ClusteringLayer(layers.Layer): 37 | ''' 38 | This layer gives soft assignments for the clusters based on distance from k-means based 39 | cluster centers. The weights of the layers are the cluster centers so that they can be learnt 40 | while optimizing for loss 41 | ''' 42 | def __init__(self,num_of_clusters, num_samples,latent_space_dim,**kwargs): 43 | self.num_of_clusters = num_of_clusters 44 | #self.alpha = alpha 45 | #self.cluster_centers = cluster_centers 46 | self.num_samples = num_samples 47 | self.latent_space_dim = latent_space_dim 48 | #self.intial_clusters = intial_clusters 49 | super(ClusteringLayer, self).__init__(**kwargs) 50 | def build(self,intial_clusters_shape): 51 | # Create a trainable weight variable for this layer. 52 | self.W = self.add_weight(name='W', 53 | shape=intial_clusters_shape, 54 | initializer='glorot_uniform', 55 | trainable=True) 56 | super(ClusteringLayer, self).build(intial_clusters_shape) # Be sure to call this at the end 57 | 58 | 59 | def call(self,incoming,**kwargs): 60 | 61 | return getSoftAssignments(incoming,self.W,self.num_of_clusters,self.num_samples, self.latent_space_dim) 62 | def compute_output_shape(self, input_shape): 63 | return (input_shape[0], self.num_of_clusters) 64 | 65 | def get_config(self): 66 | config = {'W': self.W} 67 | base_config = super(ClusteringLayer, self).get_config() 68 | return dict(list(base_config.items()) + list(config.items())) 69 | 70 | def getSoftAssignments(latent_space, cluster_centers, num_clusters,num_samples,latent_space_dim): 71 | ''' 72 | Returns cluster membership distribution for each sample 73 | :param latent_space: latent space representation of inputs 74 | :param cluster_centers: the coordinates of cluster centers in latent space 75 | :param num_clusters: total number of clusters 76 | :param latent_space_dim: dimensionality of latent space 77 | :param num_samples: total number of input samples 78 | :return: soft assigment based on the equation qij = (1+|zi - uj|^2)^(-1)/sum_j'((1+|zi - uj'|^2)^(-1)) 79 | ''' 80 | z_expanded = K.reshape(latent_space,shape=(num_samples,1,latent_space_dim,)) 81 | z_expanded = K.tile(z_expanded, (1,num_clusters,1)) 82 | u_expanded = K.tile(K.expand_dims(cluster_centers,0), [num_samples, 1, 1])#[1, 10,120] after expand_dims #[100,10,120] after tile 83 | distances_from_cluster_centers = K.sqrt(K.sum((z_expanded - u_expanded)**2,axis=2))#K.norm((z_expanded - u_expanded),2,axis=2) 84 | qij_numerator = 1 + distances_from_cluster_centers**2 85 | qij_numerator = 1 / qij_numerator 86 | normalizer_q = K.sum(qij_numerator, axis=1) 87 | normalizer_q = K.reshape(normalizer_q,(num_samples, 1)) 88 | #print((qij_numerator/normalizer_q).shape) 89 | return qij_numerator/normalizer_q 90 | 91 | 92 | 93 | 94 | -------------------------------------------------------------------------------- /CDEC/keras_unpooling.py: -------------------------------------------------------------------------------- 1 | from keras import backend as K 2 | from keras.layers.convolutional import UpSampling2D 3 | from keras.layers.convolutional import MaxPooling2D 4 | class MaxPoolingMask2D(MaxPooling2D): 5 | def __init__(self, pool_size=(2, 2), strides=None, border_mode='valid', 6 | dim_ordering='default', **kwargs): 7 | super(MaxPoolingMask2D, self).__init__(pool_size, strides, border_mode, 8 | dim_ordering, **kwargs) 9 | 10 | def _pooling_function(self, inputs, pool_size, strides, 11 | border_mode, dim_ordering): 12 | pooled = K.pool2d(inputs, pool_size, strides, border_mode, 13 | dim_ordering, pool_mode='max') 14 | upsampled = UpSampling2D(size=pool_size)(pooled) 15 | indexMask = K.tf.equal(inputs, upsampled) 16 | assert indexMask.get_shape().as_list() == inputs.get_shape().as_list() 17 | return indexMask 18 | 19 | def get_output_shape_for(self, input_shape): 20 | return input_shape 21 | 22 | 23 | def unpooling(inputs): 24 | ''' 25 | do unpooling with indices, move this to separate layer if it works 26 | 1. do naive upsampling (repeat elements) 27 | 2. keep only values in mask (stored indices) and set the rest to zeros 28 | ''' 29 | x = inputs[0] 30 | mask = inputs[1] 31 | mask_shape = mask.get_shape().as_list() 32 | x_shape = x.get_shape().as_list() 33 | pool_size = (mask_shape[1] / x_shape[1], mask_shape[2] / x_shape[2]) 34 | on_success = UpSampling2D(size=pool_size)(x) 35 | on_fail = K.zeros_like(on_success) 36 | return K.tf.where(mask, on_success, on_fail) 37 | 38 | 39 | def unpooling_output_shape(input_shape): 40 | return input_shape[1] 41 | -------------------------------------------------------------------------------- /CDEC/main.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Jul 9, 2017 3 | ''' 4 | import numpy 5 | import json 6 | from misc import DatasetHelper, evaluateKMeans, visualizeData 7 | from network import DCJC, rootLogger 8 | from copy import deepcopy 9 | import argparse 10 | 11 | import tensorflow as tf 12 | import keras.backend as K 13 | K.set_image_dim_ordering('tf') 14 | 15 | def testOnlyClusterInitialization(dataset_name, arch, epochs): 16 | ''' 17 | Train an autoencoder defined by architecture arch and trains it with the dataset defined 18 | :param dataset_name: Name of the dataset with which the network will be trained [MNIST, COIL20] 19 | :param arch: Architecture of the network as a dictionary. Specification for architecture can be found in readme.md 20 | :param epochs: Number of train epochs 21 | :return: None - (side effect) saves the latent space and params of trained network in an appropriate location in saved_params folder 22 | ''' 23 | arch_copy = deepcopy(arch) 24 | rootLogger.info("Loading dataset") 25 | dataset = DatasetHelper(dataset_name) 26 | dataset.loadDataset() 27 | rootLogger.info("Done loading dataset") 28 | rootLogger.info("Creating network") 29 | dcjc = DCJC(arch_copy) 30 | rootLogger.info("Done creating network") 31 | rootLogger.info("Starting training") 32 | dcjc.pretrainWithData(dataset, epochs, False); 33 | 34 | 35 | def testOnlyClusterImprovement(dataset_name, arch, epochs, method): 36 | ''' 37 | Use an initialized autoencoder and train it along with clustering loss. Assumed that pretrained autoencoder params 38 | are available, i.e. testOnlyClusterInitialization has been run already with the given params 39 | :param dataset_name: Name of the dataset with which the network will be trained [MNIST, COIL20] 40 | :param arch: Architecture of the network as a dictionary. Specification for architecture can be found in readme.md 41 | :param epochs: Number of train epochs 42 | :param method: Can be KM or KLD - depending on whether the clustering loss is KLDivergence loss between the current KMeans distribution(Q) and a more desired one(Q^2), or if the clustering loss is just the Kmeans loss 43 | :return: None - (side effect) saves latent space and params of the trained network 44 | ''' 45 | arch_copy = deepcopy(arch) 46 | rootLogger.info("Loading dataset") 47 | dataset = DatasetHelper(dataset_name) 48 | dataset.loadDataset() 49 | rootLogger.info("Done loading dataset") 50 | rootLogger.info("Creating network") 51 | dcjc = DCJC(arch_copy) 52 | rootLogger.info("Starting cluster improvement") 53 | if method == 'KM': 54 | dcjc.doClusteringWithKMeansLoss(dataset, epochs) 55 | elif method == 'KLD': 56 | dcjc.doClusteringWithKLdivLoss(dataset, True, epochs) 57 | 58 | 59 | def testKMeans(dataset_name, archs): 60 | ''' 61 | Performs kMeans clustering, and report metrics on the output latent space produced by the networks defined in archs, 62 | with given dataset. Assumes that testOnlyClusterInitialization and testOnlyClusterImprovement have been run before 63 | this for the specified archs/datasets, as the results saved by them are used for clustering 64 | :param dataset_name: Name of dataset [MNIST, COIL20] 65 | :param archs: Architectures as a dictionary 66 | :return: None - reports the accuracy and nmi clustering metrics 67 | ''' 68 | rootLogger.info('Initial Cluster Quality Comparison') 69 | rootLogger.info(80 * '_') 70 | rootLogger.info('%-50s %8s %8s' % ('method', 'ACC', 'NMI')) 71 | rootLogger.info(80 * '_') 72 | dataset = DatasetHelper(dataset_name) 73 | dataset.loadDataset() 74 | rootLogger.info(evaluateKMeans(dataset.input_flat, dataset.labels, dataset.getClusterCount(), 'image')[0]) 75 | for arch in archs: 76 | Z = numpy.load('saved_params/' + dataset.name + '/z_' + arch['name'] + '.npy') 77 | rootLogger.info(evaluateKMeans(Z, dataset.labels, dataset.getClusterCount(), arch['name'])[0]) 78 | Z = numpy.load('saved_params/' + dataset.name + '/pc_z_' + arch['name'] + '.npy') 79 | rootLogger.info(evaluateKMeans(Z, dataset.labels, dataset.getClusterCount(), arch['name'])[0]) 80 | Z = numpy.load('saved_params/' + dataset.name + '/pc_km_z_' + arch['name'] + '.npy') 81 | rootLogger.info(evaluateKMeans(Z, dataset.labels, dataset.getClusterCount(), arch['name'])[0]) 82 | rootLogger.info(80 * '_') 83 | 84 | 85 | def visualizeLatentSpace(dataset_name, arch): 86 | ''' 87 | Plots and saves graphs for visualized images space, autoencoder latent space, and the final clustering latent space 88 | :param dataset_name: Name of dataset [MNIST, COIL20] 89 | :param arch: Architectures as a dictionary 90 | :return: None - (side effect) saved graphs in plots/ folder 91 | ''' 92 | rootLogger.info("Loading dataset") 93 | dataset = DatasetHelper(dataset_name) 94 | dataset.loadDataset() 95 | rootLogger.info("Done loading dataset") 96 | # We consider only the first 5000 point or less for better visualization 97 | max_points = min(dataset.input_flat.shape[0], 5000) 98 | # Image space 99 | visualizeData(dataset.input_flat[0:max_points], dataset.labels[0:max_points], dataset.getClusterCount(), "plots/%s/raw.png" % dataset.name) 100 | # Latent space - autoencoder 101 | Z = numpy.load('saved_params/' + dataset.name + '/z_' + arch['name'] + '.npy') 102 | visualizeData(Z[0:max_points], dataset.labels[0:max_points], dataset.getClusterCount(), "plots/%s/autoencoder.png" % dataset.name) 103 | # Latent space - kl div clustering network 104 | Z = numpy.load('saved_params/' + dataset.name + '/pc_z_' + arch['name'] + '.npy') 105 | visualizeData(Z[0:max_points], dataset.labels[0:max_points], dataset.getClusterCount(), "plots/%s/clustered_kld.png" % dataset.name) 106 | # Latent space - kmeans clustering network 107 | Z = numpy.load('saved_params/' + dataset.name + '/pc_km_z_' + arch['name'] + '.npy') 108 | visualizeData(Z[0:max_points], dataset.labels[0:max_points], dataset.getClusterCount(), "plots/%s/clustered_km.png" % dataset.name) 109 | 110 | 111 | if __name__ == '__main__': 112 | ''' 113 | usage: main.py [-h] -d DATASET -a ARCHITECTURE [--pretrain PRETRAIN] 114 | [--cluster CLUSTER] [--metrics METRICS] [--visualize VISUALIZE] 115 | 116 | required arguments: 117 | -d DATASET, --dataset DATASET 118 | Dataset on which autoencoder is trained [MNIST,COIL20] 119 | -a ARCHITECTURE, --architecture ARCHITECTURE 120 | Index of architecture of autoencoder in the json file 121 | (archs/) 122 | 123 | optional arguments: 124 | -h, --help show this help message and exit 125 | --pretrain PRETRAIN Pretrain the autoencoder for specified #epochs 126 | specified by architecture on specified dataset 127 | --cluster CLUSTER Refine the autoencoder for specified #epochs with 128 | clustering loss, assumes that pretraining results are 129 | available 130 | --metrics METRICS Report k-means clustering metrics on the clustered 131 | latent space, assumes pretrain and cluster based 132 | training have been performed 133 | --visualize VISUALIZE 134 | Visualize the image space and latent space, assumes 135 | pretraining and cluster based training have been 136 | performed 137 | ''' 138 | # Load architectures from the json files 139 | mnist_archs = [] 140 | coil_archs = [] 141 | cancer_archs = [] 142 | with open("archs/coil.json") as archs_file: 143 | coil_archs = json.load(archs_file) 144 | with open("archs/mnist.json") as archs_file: 145 | mnist_archs = json.load(archs_file) 146 | with open("archs/cancer.json") as archs_file: 147 | cancer_archs = json.load(archs_file) 148 | 149 | # Argument parsing 150 | parser = argparse.ArgumentParser() 151 | requiredArgs = parser.add_argument_group('required arguments') 152 | requiredArgs.add_argument("-d", "--dataset", help="Dataset on which autoencoder is trained [MNIST,COIL20]", required=True) 153 | requiredArgs.add_argument("-a", "--architecture", type=int, help="Index of architecture of autoencoder in the json file (archs/)", required=True) 154 | requiredArgs.add_argument("-m", "--method", help="type of loss KLD or KM") 155 | parser.add_argument("--pretrain", type=int, help="Pretrain the autoencoder for specified #epochs specified by architecture on specified dataset") 156 | parser.add_argument("--cluster", type=int, help="Refine the autoencoder for specified #epochs with clustering loss, assumes that pretraining results are available") 157 | parser.add_argument("--metrics", action='store_true', help="Report k-means clustering metrics on the clustered latent space, assumes pretrain and cluster based training have been performed") 158 | parser.add_argument("--visualize", action='store_true', help="Visualize the image space and latent space, assumes pretraining and cluster based training have been performed") 159 | args = parser.parse_args() 160 | 161 | # Train/Visualize as per the arguments 162 | dataset_name = args.dataset 163 | loss = args.method 164 | arch_index = args.architecture 165 | 166 | if dataset_name == 'MNIST': 167 | archs = mnist_archs 168 | elif dataset_name == 'COIL20': 169 | archs = coil_archs 170 | elif dataset_name == 'cancer': 171 | archs = cancer_archs 172 | 173 | if args.pretrain: 174 | testOnlyClusterInitialization(dataset_name, archs[arch_index], args.pretrain) 175 | if args.cluster and loss =='KLD': 176 | testOnlyClusterImprovement(dataset_name, archs[arch_index], args.cluster, loss) 177 | elif args.cluster and loss =='KM': 178 | testOnlyClusterImprovement(dataset_name, archs[arch_index], args.cluster, loss) 179 | elif args.cluster and loss !='KM' and loss!='KLD': 180 | print("Please specify the type of loss KLD or KM after writing -m ") 181 | if args.metrics: 182 | testKMeans(dataset_name, [archs[arch_index]]) 183 | if args.visualize: 184 | visualizeLatentSpace(dataset_name, archs[arch_index]) 185 | -------------------------------------------------------------------------------- /CDEC/misc.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Jul 11, 2017 3 | ''' 4 | 5 | import _pickle as cPickle 6 | import _pickle 7 | import gzip 8 | 9 | import numpy as np 10 | from PIL import Image 11 | import matplotlib 12 | 13 | # For plotting graphs via ssh with no display 14 | # Ref: https://stackoverflow.com/questions/2801882/generating-a-png-with-matplotlib-when-display-is-undefined 15 | matplotlib.use('Agg') 16 | 17 | from matplotlib import pyplot as plt 18 | from numpy import float32 19 | from sklearn import metrics 20 | from sklearn.cluster.k_means_ import KMeans 21 | from sklearn import manifold 22 | from sklearn.utils.linear_assignment_ import linear_assignment 23 | from sklearn import preprocessing 24 | import os 25 | from keras.preprocessing.image import load_img 26 | 27 | import _pickle as cPickle 28 | import _pickle 29 | import gzip 30 | from skimage import transform 31 | import numpy as np 32 | from PIL import Image 33 | import matplotlib 34 | import os 35 | # For plotting graphs via ssh with no display 36 | # Ref: https://stackoverflow.com/questions/2801882/generating-a-png-with-matplotlib-when-display-is-undefined 37 | matplotlib.use('Agg') 38 | from keras.preprocessing.image import load_img 39 | from matplotlib import pyplot as plt 40 | from numpy import float32 41 | from sklearn import metrics 42 | from sklearn.cluster.k_means_ import KMeans 43 | from sklearn import manifold 44 | from sklearn.utils.linear_assignment_ import linear_assignment 45 | from sklearn import preprocessing 46 | 47 | import tensorflow as tf 48 | import keras.backend as K 49 | K.set_image_dim_ordering('tf') 50 | 51 | 52 | class DatasetHelper(object): 53 | ''' 54 | Utility class for handling different datasets 55 | ''' 56 | 57 | def __init__(self, name): 58 | ''' 59 | A dataset instance keeps dataset name, the input set, the flat version of input set 60 | and the cluster labels 61 | ''' 62 | self.name = name 63 | if name == 'MNIST': 64 | self.dataset = MNISTDataset() 65 | elif name == 'STL': 66 | self.dataset = STLDataset() 67 | elif name == 'COIL20': 68 | self.dataset = COIL20Dataset() 69 | elif name == 'cancer': # added by Sher 70 | self.dataset = CANCERDataset() 71 | 72 | def loadDataset(self): 73 | ''' 74 | Load the appropriate dataset based on the dataset name 75 | ''' 76 | self.input, self.labels, self.input_flat = self.dataset.loadDataset() 77 | 78 | def getClusterCount(self): 79 | ''' 80 | Number of clusters in the dataset - e.g 10 for mnist, 20 for coil20 81 | ''' 82 | return self.dataset.cluster_count 83 | 84 | def iterate_minibatches(self, set_type, batch_size, targets=None, shuffle=False): 85 | ''' 86 | Utility method for getting batches out of a dataset 87 | :param set_type: IMAGE - suitable input for CNNs or FLAT - suitable for DNN 88 | :param batch_size: Size of minibatches 89 | :param targets: None if the output should be same as inputs (autoencoders), otherwise takes a target array from which batches can be extracted. Must have the same order as the dataset, e.g, dataset inputs nth sample has output at target's nth element 90 | :param shuffle: If the dataset needs to be shuffled or not 91 | :return: generates a batches of size batch_size from the dataset, each batch is the pair (input, output) 92 | ''' 93 | inputs = None 94 | if set_type == 'IMAGE': 95 | inputs = self.input 96 | if targets is None: 97 | targets = self.input 98 | elif set_type == 'FLAT': 99 | inputs = self.input_flat 100 | if targets is None: 101 | targets = self.input_flat 102 | assert len(inputs) == len(targets) 103 | if shuffle: 104 | indices = np.arange(len(inputs)) 105 | np.random.shuffle(indices) 106 | for start_idx in range(0, len(inputs) - batch_size + 1, batch_size): 107 | if shuffle: 108 | excerpt = indices[start_idx:start_idx + batch_size] 109 | else: 110 | excerpt = slice(start_idx, start_idx + batch_size) 111 | yield inputs[excerpt], targets[excerpt] 112 | 113 | 114 | class MNISTDataset(object): 115 | ''' 116 | Class for reading and preparing MNIST dataset 117 | ''' 118 | 119 | def __init__(self): 120 | self.cluster_count = 10 121 | 122 | def loadDataset(self): 123 | f = gzip.open('mnist/mnist.pkl.gz', 'rb') 124 | train_set, _, test_set = cPickle.load(f,encoding='latin1') 125 | train_input, train_input_flat, train_labels = self.prepareDatasetForAutoencoder(train_set[0], train_set[1]) 126 | test_input, test_input_flat, test_labels = self.prepareDatasetForAutoencoder(test_set[0], test_set[1]) 127 | f.close() 128 | # combine test and train samples 129 | return [np.concatenate((train_input, test_input)), np.concatenate((train_labels, test_labels)), 130 | np.concatenate((train_input_flat, test_input_flat))] 131 | 132 | def prepareDatasetForAutoencoder(self, inputs, targets): 133 | ''' 134 | Returns the image, flat and labels as a tuple 135 | ''' 136 | X = inputs 137 | X = X.reshape((-1,28, 28,1)) 138 | return (X, X.reshape((-1, 28 * 28)), targets) 139 | 140 | 141 | class CANCERDataset1(object): 142 | ''' 143 | Class for reading and preparing MNIST dataset 144 | ''' 145 | 146 | def __init__(self): 147 | self.cluster_count = 5 148 | 149 | def loadDataset(self): 150 | import pandas as pd 151 | import pandas as pd 152 | 153 | trainDF = pd.read_csv('cancer/TCGA_train.csv') 154 | train_labels = trainDF[trainDF.columns[-1]] 155 | train_labels = np.asarray(train_labels) 156 | 157 | train_features = trainDF.drop(trainDF.columns[-1],axis=1) 158 | train_features = train_features.as_matrix().astype(np.float32) 159 | train_features = np.asarray([[train_features[row][col] for col in range(1,16130)] for row in range(599)]) 160 | train_features = np.asarray(train_features) 161 | 162 | testDF = pd.read_csv('cancer/TCGA_test.csv') 163 | test_labels = testDF[testDF.columns[-1]] 164 | test_labels = np.asarray(test_labels) 165 | 166 | test_features = testDF.drop(testDF.columns[-1],axis=1) 167 | test_features = test_features.as_matrix().astype(np.float32) 168 | test_features = np.asarray([[test_features[row][col] for col in range(1,16130)] for row in range(200)]) 169 | test_features = np.asarray(test_features) 170 | 171 | train_input, train_input_flat, train_labels = self.prepareDatasetForAutoencoder(train_features, train_labels) 172 | test_input, test_input_flat, test_labels = self.prepareDatasetForAutoencoder(test_features, test_labels) 173 | 174 | # combine test and train samples 175 | return [np.concatenate((train_input, test_input)), np.concatenate((train_labels, test_labels)), 176 | np.concatenate((train_input_flat, test_input_flat))] 177 | 178 | def prepareDatasetForAutoencoder(self, inputs, targets): 179 | ''' 180 | Returns the image, flat and labels as a tuple 181 | ''' 182 | X = inputs 183 | X = X.reshape((-1, 127, 127, 1)) 184 | return (X, X.reshape((-1, 127 * 127)), targets) 185 | 186 | class CANCERDataset(object): 187 | ''' 188 | Class for reading and preparing CANCER dataset 189 | ''' 190 | def __init__(self): 191 | self.cluster_count = 4 192 | 193 | def loadDataset(self): 194 | root ='/home/rkarim/Training_data/' 195 | features = [] 196 | features_flat = [] 197 | for rootName,dirName,fileNames in os.walk(root): 198 | if(not rootName == root): 199 | for fileName in fileNames: 200 | imgGray = load_img(rootName+'/'+fileName,color_mode='grayscale') 201 | transformed=transform.resize(np.array(imgGray),(512,512)) 202 | features += [transformed.reshape((transformed.shape[0],transformed.shape[1]))] 203 | features_flat+=[transformed.reshape((transformed.shape[0]*transformed.shape[1]*1))] 204 | features=np.stack(features) 205 | features_flat = np.stack(features_flat) 206 | labels= features 207 | return [np.concatenate((features, features),axis=0), np.concatenate((labels, labels),axis=0), 208 | np.concatenate((features_flat,features_flat),axis=0)] 209 | 210 | def loadDataset1(self): 211 | import pandas as pd 212 | import pandas as pd 213 | 214 | df = pd.read_csv('cancer/TCGA_train.csv') 215 | print(len(df.columns)) 216 | 217 | labels = df[df.columns[-1]] 218 | features = df.drop(df.columns[-1],axis=1) 219 | features = features.as_matrix().astype(np.float32) 220 | features = np.asarray([[features[row][col] for col in range(1,16130)] for row in range(599)]) 221 | print("Is there any NaN value?") 222 | print(np.count_nonzero(np.isnan(features))) 223 | 224 | min_max_scaler = preprocessing.MinMaxScaler() 225 | train_input = min_max_scaler.fit_transform(features) 226 | print(np.isfinite(train_input)) 227 | 228 | train_input_flat = train_input 229 | train_input = train_input.reshape((-1, 127, 127, 1)) 230 | train_input_flat = np.reshape(train_input, (-1, 127 * 127)) 231 | train_labels = np.asarray(labels) 232 | 233 | df2 = pd.read_csv('cancer/TCGA_test.csv') 234 | labels2 = df2[df.columns[-1]] 235 | features2 = df2.drop(df2.columns[-1],axis=1) 236 | 237 | features2 = features2.as_matrix().astype(np.float32) 238 | features2 = np.asarray([[features2[row][col] for col in range(1,16130)] for row in range(200)]) 239 | 240 | test_input = np.asarray(features2) 241 | print(np.isfinite(test_input)) 242 | 243 | test_input = min_max_scaler.fit_transform(test_input) 244 | test_input_flat = test_input 245 | test_input = test_input.reshape((-1, 127, 127, 1)) 246 | test_input_flat = np.reshape(test_input, (-1, 127 * 127)) 247 | test_labels = np.asarray(labels2) 248 | 249 | # combine test and train samples 250 | return [np.concatenate((train_input, test_input)), np.concatenate((train_labels, test_labels)), 251 | np.concatenate((train_input_flat, test_input_flat))] 252 | 253 | 254 | class STLDataset(object): 255 | ''' 256 | Class for preparing and reading the STL dataset 257 | ''' 258 | 259 | def __init__(self): 260 | self.cluster_count = 10 261 | 262 | def loadDataset(self): 263 | train_x = np.fromfile('stl/train_X.bin', dtype=np.uint8) 264 | train_y = np.fromfile('stl/train_y.bin', dtype=np.uint8) 265 | test_x = np.fromfile('stl/train_X.bin', dtype=np.uint8) 266 | test_y = np.fromfile('stl/train_y.bin', dtype=np.uint8) 267 | train_input = np.reshape(train_x, (-1, 3, 96, 96)) 268 | train_labels = train_y 269 | train_input_flat = np.reshape(test_x, (-1, 1, 3 * 96 * 96)) 270 | test_input = np.reshape(test_x, (-1, 3, 96, 96)) 271 | test_labels = test_y 272 | test_input_flat = np.reshape(test_x, (-1, 1, 3 * 96 * 96)) 273 | return [np.concatenate(train_input, test_input), np.concatenate(train_labels, test_labels), 274 | np.concatenate(train_input_flat, test_input_flat)] 275 | 276 | 277 | class COIL20Dataset(object): 278 | ''' 279 | Class for reading and preparing the COIL20Dataset 280 | ''' 281 | 282 | def __init__(self): 283 | self.cluster_count = 20 284 | 285 | def loadDataset(self): 286 | train_x = np.load('coil/coil_X.npy').astype(np.float32) / 256.0 287 | train_y = np.load('coil/coil_y.npy') 288 | train_x_flat = np.reshape(train_x, (-1, 128 * 128)) 289 | return [train_x, train_y, train_x_flat] 290 | 291 | 292 | def rescaleReshapeAndSaveImage(image_sample, out_filename): 293 | ''' 294 | For saving the reconstructed output as an image 295 | :param image_sample: output of the autoencoder 296 | :param out_filename: filename for the saved image 297 | :return: None (side effect) Image saved 298 | ''' 299 | image_sample = ((image_sample - np.amin(image_sample)) / (np.amax(image_sample) - np.amin(image_sample))) * 255; 300 | image_sample = np.rint(image_sample).astype(int) 301 | image_sample = np.clip(image_sample, a_min=0, a_max=255).astype('uint8') 302 | img = Image.fromarray(image_sample, 'L') 303 | img.save(out_filename) 304 | 305 | 306 | def cluster_acc(y_true, y_pred): 307 | ''' 308 | Uses the hungarian algorithm to find the best permutation mapping and then calculates the accuracy wrt 309 | Implementation inpired from https://github.com/piiswrong/dec, since scikit does not implement this metric 310 | this mapping and true labels 311 | :param y_true: True cluster labels 312 | :param y_pred: Predicted cluster labels 313 | :return: accuracy score for the clustering 314 | ''' 315 | D = int(max(y_pred.max(), y_true.max()) + 1) 316 | w = np.zeros((D, D), dtype=np.int32) 317 | for i in range(y_pred.size): 318 | idx1 = int(y_pred[i]) 319 | idx2 = int(y_true[i]) 320 | w[idx1, idx2] += 1 321 | ind = linear_assignment(w.max() - w) 322 | return sum([w[i, j] for i, j in ind]) * 1.0 / y_pred.size 323 | 324 | 325 | def getClusterMetricString(method_name, labels_true, labels_pred): 326 | ''' 327 | Creates a formatted string containing the method name and acc, nmi metrics - can be used for printing 328 | :param method_name: Name of the clustering method (just for printing) 329 | :param labels_true: True label for each sample 330 | :param labels_pred: Predicted label for each sample 331 | :return: Formatted string containing metrics and method name 332 | ''' 333 | acc = cluster_acc(labels_true, labels_pred) 334 | nmi = metrics.normalized_mutual_info_score(labels_true, labels_pred) 335 | return '%-50s %8.3f %8.3f' % (method_name, acc, nmi) 336 | 337 | 338 | def evaluateKMeans(data, labels, nclusters, method_name): 339 | ''' 340 | Clusters data with kmeans algorithm and then returns the string containing method name and metrics, and also the evaluated cluster centers 341 | :param data: Points that need to be clustered as a numpy array 342 | :param labels: True labels for the given points 343 | :param nclusters: Total number of clusters 344 | :param method_name: Name of the method from which the clustering space originates (only used for printing) 345 | :return: Formatted string containing metrics and method name, cluster centers 346 | ''' 347 | kmeans = KMeans(n_clusters=nclusters, n_init=5) 348 | kmeans.fit(data) 349 | return getClusterMetricString(method_name, labels, kmeans.labels_), kmeans.cluster_centers_ 350 | 351 | 352 | def visualizeData(Z, labels, num_clusters, title): 353 | ''' 354 | TSNE visualization of the points in latent space Z 355 | :param Z: Numpy array containing points in latent space in which clustering was performed 356 | :param labels: True labels - used for coloring points 357 | :param num_clusters: Total number of clusters 358 | :param title: filename where the plot should be saved 359 | :return: None - (side effect) saves clustering visualization plot in specified location 360 | ''' 361 | labels = labels.astype(int) 362 | tsne = manifold.TSNE(n_components=2, init='pca', random_state=0) 363 | Z_tsne = tsne.fit_transform(Z) 364 | fig = plt.figure() 365 | plt.scatter(Z_tsne[:, 0], Z_tsne[:, 1], s=2, c=labels, cmap=plt.cm.get_cmap("jet", num_clusters)) 366 | plt.colorbar(ticks=range(num_clusters)) 367 | fig.savefig(title, dpi=fig.dpi) 368 | -------------------------------------------------------------------------------- /CDEC/plots/genome/autoencoder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rezacsedu/Convolutional-embedded-networks/f52c2a3816acbf05be28a52fe93140fe31495eb0/CDEC/plots/genome/autoencoder.png -------------------------------------------------------------------------------- /CDEC/plots/genome/clustered_kld.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rezacsedu/Convolutional-embedded-networks/f52c2a3816acbf05be28a52fe93140fe31495eb0/CDEC/plots/genome/clustered_kld.png -------------------------------------------------------------------------------- /CDEC/plots/genome/clustered_km.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rezacsedu/Convolutional-embedded-networks/f52c2a3816acbf05be28a52fe93140fe31495eb0/CDEC/plots/genome/clustered_km.png -------------------------------------------------------------------------------- /CDEC/plots/genome/raw.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rezacsedu/Convolutional-embedded-networks/f52c2a3816acbf05be28a52fe93140fe31495eb0/CDEC/plots/genome/raw.png -------------------------------------------------------------------------------- /CDEC/self.trainAutoencoder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rezacsedu/Convolutional-embedded-networks/f52c2a3816acbf05be28a52fe93140fe31495eb0/CDEC/self.trainAutoencoder.png -------------------------------------------------------------------------------- /DEC_GenotypeClustering_Keras/DEC_Genotype_Clustering.py: -------------------------------------------------------------------------------- 1 | from keras.datasets import mnist 2 | import numpy as np 3 | import pandas as pd 4 | np.random.seed(10) 5 | 6 | from time import time 7 | import numpy as np 8 | import keras.backend as K 9 | from keras.engine.topology import Layer, InputSpec 10 | from keras.layers import Dense, Input 11 | from keras.models import Model 12 | from keras.optimizers import RMSprop 13 | from keras import callbacks 14 | from keras.initializers import VarianceScaling 15 | from sklearn.cluster import KMeans 16 | from sklearn import metrics 17 | from sklearn.metrics.cluster import normalized_mutual_info_score 18 | from sklearn.metrics.cluster import adjusted_rand_score 19 | from sklearn.metrics import accuracy_score 20 | from sklearn import manifold 21 | import keras.layers.normalization as bn 22 | 23 | df1 = pd.read_csv('/home/asif/genome.csv', header=None) 24 | print(df1.head()) 25 | 26 | label = df1[0] 27 | print(label.head()) 28 | 29 | from sklearn import preprocessing 30 | le = preprocessing.LabelEncoder() 31 | lbl = le.fit(label) 32 | labelss = lbl.transform(label) 33 | labelDF = pd.DataFrame(labelss) 34 | 35 | #labelArr = 36 | print(labelDF.head()) 37 | 38 | feature = df1.drop(0, axis=1) 39 | print(feature.head()) 40 | 41 | from sklearn.preprocessing import MinMaxScaler 42 | scaler = MinMaxScaler() 43 | x1 = feature.iloc[:,1:] 44 | df_scaled = pd.DataFrame(scaler.fit_transform(x1), columns=x1.columns) 45 | df_scaled.head() 46 | 47 | y = labelss 48 | x = df_scaled.values 49 | 50 | print(y.shape) 51 | print(x.shape) 52 | 53 | print(np.isnan(np.min(x))) 54 | 55 | #y.shape 56 | #x.shape 57 | #print(x) 58 | #print(y) 59 | 60 | n_clusters = len(np.unique(y)) 61 | print(n_clusters) 62 | 63 | kmeans = KMeans(n_clusters=n_clusters, n_init=5) 64 | y_pred_kmeans = kmeans.fit_predict(x) 65 | 66 | print(accuracy_score(y, y_pred_kmeans)) 67 | 68 | dims = [x.shape[-1], 16, 16, 32, 5] 69 | init = VarianceScaling(scale=1. / 3., mode='fan_in', distribution='uniform') 70 | pretrain_optimizer = RMSprop(lr=0.001, rho=0.01, epsilon=None, decay=0.0) 71 | pretrain_epochs = 100 72 | batch_size = 32 73 | save_dir = 'result/' 74 | 75 | def autoencoder(dims, act='relu', init='glorot_uniform'): 76 | """ 77 | Fully connected auto-encoder model, symmetric. 78 | Arguments: 79 | dims: list of number of units in each layer of encoder. dims[0] is input dim, dims[-1] is units in hidden layer. 80 | The decoder is symmetric with encoder. So number of layers of the auto-encoder is 2*len(dims)-1 81 | act: activation, not applied to Input, Hidden and Output layers 82 | return: 83 | (ae_model, encoder_model), Model of autoencoder and model of encoder 84 | """ 85 | n_stacks = len(dims) - 1 86 | # input 87 | input_img = Input(shape=(dims[0],), name='input') 88 | x = input_img 89 | # internal layers in encoder 90 | for i in range(n_stacks-1): 91 | x = Dense(dims[i + 1], activation=act, kernel_initializer=init, name='encoder_%d' % i)(x) 92 | #bn.BatchNormalization(momentum=0.9, epsilon=1e-06, weights=None) 93 | 94 | # hidden layer 95 | encoded = Dense(dims[-1], kernel_initializer=init, name='encoder_%d' % (n_stacks - 1))(x) # hidden layer, features are extracted from here 96 | bn.BatchNormalization(momentum=0.9, epsilon=1e-06, weights=None) 97 | 98 | x = encoded 99 | # internal layers in decoder 100 | for i in range(n_stacks-1, 0, -1): 101 | x = Dense(dims[i], activation=act, kernel_initializer=init, name='decoder_%d' % i)(x) 102 | 103 | # output 104 | x = Dense(dims[0], kernel_initializer=init, name='decoder_0')(x) 105 | bn.BatchNormalization(momentum=0.9, epsilon=1e-06, weights=None) 106 | 107 | decoded = x 108 | return Model(inputs=input_img, outputs=decoded, name='AE'), Model(inputs=input_img, outputs=encoded, name='encoder') 109 | 110 | autoencoder, encoder = autoencoder(dims, init=init) 111 | autoencoder.compile(optimizer=pretrain_optimizer, loss='mse') 112 | autoencoder.fit(x, x, batch_size=batch_size, epochs=pretrain_epochs) #, callbacks=cb) 113 | autoencoder.save_weights(save_dir + '/ThesisDEC_weights.h5') 114 | 115 | autoencoder.save_weights(save_dir + '/ThesisDEC_weights.h5') 116 | autoencoder.load_weights(save_dir + '/ThesisDEC_weights.h5') 117 | 118 | 119 | class ClusteringLayer(Layer): 120 | """ 121 | Clustering layer converts input sample (feature) to soft label, i.e. a vector that represents the probability of the 122 | sample belonging to each cluster. The probability is calculated with student's t-distribution. 123 | 124 | # Example 125 | ``` 126 | model.add(ClusteringLayer(n_clusters=10)) 127 | ``` 128 | # Arguments 129 | n_clusters: number of clusters. 130 | weights: list of Numpy array with shape `(n_clusters, n_features)` witch represents the initial cluster centers. 131 | alpha: degrees of freedom parameter in Student's t-distribution. Default to 1.0. 132 | # Input shape 133 | 2D tensor with shape: `(n_samples, n_features)`. 134 | # Output shape 135 | 2D tensor with shape: `(n_samples, n_clusters)`. 136 | """ 137 | 138 | def __init__(self, n_clusters, weights=None, alpha=1.0, **kwargs): 139 | if 'input_shape' not in kwargs and 'input_dim' in kwargs: 140 | kwargs['input_shape'] = (kwargs.pop('input_dim'),) 141 | super(ClusteringLayer, self).__init__(**kwargs) 142 | self.n_clusters = n_clusters 143 | self.alpha = alpha 144 | self.initial_weights = weights 145 | self.input_spec = InputSpec(ndim=2) 146 | 147 | def build(self, input_shape): 148 | assert len(input_shape) == 2 149 | input_dim = input_shape[1] 150 | self.input_spec = InputSpec(dtype=K.floatx(), shape=(None, input_dim)) 151 | self.clusters = self.add_weight((self.n_clusters, input_dim), initializer='glorot_uniform', name='clusters') 152 | if self.initial_weights is not None: 153 | self.set_weights(self.initial_weights) 154 | del self.initial_weights 155 | self.built = True 156 | 157 | def call(self, inputs, **kwargs): 158 | """ student t-distribution, as same as used in t-SNE algorithm. 159 | Measure the similarity between embedded point z_i and centroid µ_j. 160 | q_ij = 1/(1+dist(x_i, µ_j)^2), then normalize it. 161 | q_ij can be interpreted as the probability of assigning sample i to cluster j. 162 | (i.e., a soft assignment) 163 | Arguments: 164 | inputs: the variable containing data, shape=(n_samples, n_features) 165 | Return: 166 | q: student's t-distribution, or soft labels for each sample. shape=(n_samples, n_clusters) 167 | """ 168 | q = 1.0 / (1.0 + (K.sum(K.square(K.expand_dims(inputs, axis=1) - self.clusters), axis=2) / self.alpha)) 169 | q **= (self.alpha + 1.0) / 2.0 170 | q = K.transpose(K.transpose(q) / K.sum(q, axis=1)) # Make sure each sample's 10 values add up to 1. 171 | return q 172 | 173 | def compute_output_shape(self, input_shape): 174 | assert input_shape and len(input_shape) == 2 175 | return input_shape[0], self.n_clusters 176 | 177 | def get_config(self): 178 | config = {'n_clusters': self.n_clusters} 179 | base_config = super(ClusteringLayer, self).get_config() 180 | return dict(list(base_config.items()) + list(config.items())) 181 | 182 | clustering_layer = ClusteringLayer(n_clusters, name='clustering')(encoder.output) 183 | model = Model(inputs=encoder.input, outputs=clustering_layer) 184 | model.compile(optimizer=RMSprop(lr=0.01, rho=0.9, epsilon=None, decay=0.0), loss='kld') 185 | 186 | kmeans = KMeans(n_clusters=n_clusters, n_init=n_clusters) 187 | y_pred = kmeans.fit_predict(encoder.predict(x)) 188 | 189 | y_pred_last = np.copy(y_pred) 190 | 191 | model.get_layer(name='clustering').set_weights([kmeans.cluster_centers_]) 192 | 193 | # computing an auxiliary target distribution 194 | def target_distribution(q): 195 | weight = q ** 2 / q.sum(0) 196 | return (weight.T / weight.sum(1)).T 197 | 198 | loss = 0 199 | index = 0 200 | maxiter = 20000 201 | update_interval = 500 202 | index_array = np.arange(x.shape[0]) 203 | 204 | tol = 0.001 # tolerance threshold to stop training 205 | 206 | for ite in range(int(maxiter)): 207 | if ite % update_interval == 0: 208 | q = model.predict(x, verbose=0) 209 | p = target_distribution(q) # update the auxiliary target distribution p 210 | 211 | # evaluate the clustering performance 212 | y_pred = q.argmax(1) 213 | if y is not None: 214 | acc = np.round(accuracy_score(y, y_pred), 5) 215 | nmi = np.round(normalized_mutual_info_score(y, y_pred), 5) 216 | ari = np.round(adjusted_rand_score(y, y_pred), 5) 217 | loss = np.round(loss, 5) 218 | print('Iter %d: acc = %.5f, nmi = %.5f, ari = %.5f' % (ite, acc, nmi, ari), ' ; loss=', loss) 219 | 220 | # check stop criterion - model convergence 221 | delta_label = np.sum(y_pred != y_pred_last).astype(np.float32) / y_pred.shape[0] 222 | y_pred_last = np.copy(y_pred) 223 | if ite > 0 and delta_label < tol: 224 | print('delta_label ', delta_label, '< tol ', tol) 225 | print('Reached tolerance threshold. Stopping training.') 226 | break 227 | idx = index_array[index * batch_size: min((index+1) * batch_size, x.shape[0])] 228 | model.train_on_batch(x=x[idx], y=p[idx]) 229 | index = index + 1 if (index + 1) * batch_size <= x.shape[0] else 0 230 | 231 | model.save_weights(save_dir + '/Thesis_DEC_model_final.h5') 232 | model.load_weights(save_dir + '/Thesis_DEC_model_final.h5') 233 | 234 | # Eval. 235 | q = model.predict(x, verbose=0) 236 | p = target_distribution(q) # update the auxiliary target distribution p 237 | 238 | # evaluate the clustering performance 239 | y_pred = q.argmax(1) 240 | if y is not None: 241 | acc = np.round(accuracy_score(y, y_pred), 5) 242 | nmi = np.round(normalized_mutual_info_score(y, y_pred), 5) 243 | ari = np.round(adjusted_rand_score(y, y_pred), 5) 244 | loss = np.round(loss, 5) 245 | print('Acc = %.5f, nmi = %.5f, ari = %.5f' % (acc, nmi, ari), ' ; loss=', loss) 246 | 247 | import seaborn as sns 248 | import sklearn.metrics 249 | import matplotlib.pyplot as plt 250 | sns.set(font_scale=1.5) 251 | confusion_matrix = sklearn.metrics.confusion_matrix(y, y_pred) 252 | 253 | plt.figure(figsize=(12, 11)) 254 | sns.heatmap(confusion_matrix, annot=True, fmt="d", annot_kws={"size": 15}); 255 | plt.title("Confusion matrix", fontsize=25) 256 | plt.ylabel('True label', fontsize=25) 257 | plt.xlabel('Clustering label', fontsize=25) 258 | plt.show() 259 | 260 | def visualizeData(Z, labels, num_clusters, title): 261 | ''' 262 | TSNE visualization of the points in latent space Z 263 | :param Z: Numpy array containing points in latent space in which clustering was performed 264 | :param labels: True labels - used for coloring points 265 | :param num_clusters: Total number of clusters 266 | :param title: filename where the plot should be saved 267 | :return: None - (side effect) saves clustering visualization plot in specified location 268 | ''' 269 | labels = labels.astype(int) 270 | tsne = manifold.TSNE(n_components=2, init='pca', random_state=0) 271 | Z_tsne = tsne.fit_transform(Z) 272 | fig = plt.figure() 273 | plt.scatter(Z_tsne[:, 0], Z_tsne[:, 1], s=2, c=labels, cmap=plt.cm.get_cmap("jet", num_clusters)) 274 | plt.colorbar(ticks=range(num_clusters)) 275 | fig.savefig(title, dpi=fig.dpi) 276 | 277 | from sklearn.manifold import TSNE 278 | import seaborn as sn 279 | import matplotlib.pyplot as plt 280 | 281 | data_1000 = x[0:1000,:] 282 | labels_1000 = y[0:1000] 283 | 284 | model = TSNE(n_components = 2, random_state = 0) 285 | 286 | tsne_data = model.fit_transform(x) 287 | #y_pred 288 | 289 | tsne_data = np.vstack((tsne_data.T, y)).T 290 | tsne_df = pd.DataFrame(data= tsne_data, columns= ("Dim_1","Dim_2","label")) 291 | 292 | sn.FacetGrid(tsne_df, hue= "label", size = 6).map(plt.scatter, 'Dim_1', 'Dim_2').add_legend() 293 | plt.show() 294 | 295 | visualizeData(x, y, n_clusters, "t_SNE_graph_original.png") 296 | visualizeData(x, y_pred, n_clusters, "t_SNE_graph_predicted.png") 297 | -------------------------------------------------------------------------------- /DEC_GenotypeClustering_Keras/LSTM_EthnicityPrediction.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | import glob 4 | import numpy as np 5 | import sys 6 | from time import time 7 | 8 | from sklearn.preprocessing import LabelEncoder 9 | from sklearn.model_selection import train_test_split 10 | from sklearn.metrics import precision_recall_fscore_support 11 | from sklearn import metrics 12 | 13 | from keras.models import Sequential 14 | from keras.layers import LSTM, Dense, Dropout, Activation, Flatten 15 | from keras.callbacks import TensorBoard 16 | from keras.optimizers import RMSprop 17 | from keras.regularizers import l2 18 | from keras.callbacks import EarlyStopping 19 | from sklearn.metrics import precision_recall_fscore_support, roc_auc_score 20 | from keras.utils import np_utils 21 | 22 | from keras import backend as K 23 | K.set_image_dim_ordering('tf') 24 | import matplotlib.pyplot as plt 25 | import itertools 26 | 27 | import numpy as np 28 | import pandas as pd 29 | np.random.seed(10) 30 | 31 | from time import time 32 | import numpy as np 33 | import keras.backend as K 34 | from keras.engine.topology import Layer, InputSpec 35 | from keras.layers import Dense, Input 36 | from keras.models import Model 37 | from keras.optimizers import RMSprop 38 | from keras import callbacks 39 | from keras.initializers import VarianceScaling 40 | from sklearn.cluster import KMeans 41 | from sklearn import metrics 42 | from sklearn.metrics.cluster import normalized_mutual_info_score 43 | from sklearn.metrics.cluster import adjusted_rand_score 44 | from sklearn.metrics import accuracy_score 45 | from sklearn import manifold 46 | import keras.layers.normalization as bn 47 | 48 | from sklearn.metrics import confusion_matrix 49 | 50 | df1 = pd.read_csv('/home/asif/genome.csv', header=None) 51 | print(df1.head()) 52 | 53 | label = df1[0] 54 | print(label.head()) 55 | 56 | from sklearn import preprocessing 57 | le = preprocessing.LabelEncoder() 58 | lbl = le.fit(label) 59 | labelss = lbl.transform(label) 60 | labelDF = pd.DataFrame(labelss) 61 | 62 | #labelArr = 63 | print(labelDF.head()) 64 | 65 | feature = df1.drop(0, axis=1) 66 | print(feature.head()) 67 | 68 | from sklearn.preprocessing import MinMaxScaler 69 | scaler = MinMaxScaler() 70 | x1 = feature.iloc[:,1:] 71 | df_scaled = pd.DataFrame(scaler.fit_transform(x1), columns=x1.columns) 72 | df_scaled.head() 73 | 74 | y = labelss 75 | x = df_scaled.values 76 | 77 | features = x 78 | labels = y 79 | 80 | def prepare_test_train_valid(): 81 | # Train-test split 82 | train_x, test_x, train_y, test_y = train_test_split(features, labels, test_size=0.25, random_state=100) 83 | test_x, valid_x, test_y, valid_y = train_test_split(train_x, train_y, test_size=0.50, random_state=100) 84 | 85 | return train_x, test_x, train_y, test_y, valid_x, valid_y 86 | 87 | def one_hot_encode(labels): 88 | n_labels = len(labels) 89 | n_unique_labels = len(np.unique(labels)) 90 | one_hot_encode = np.zeros((n_labels,n_unique_labels)) 91 | one_hot_encode[np.arange(n_labels), labels] = 1 92 | return one_hot_encode 93 | 94 | labels = one_hot_encode(labels) 95 | 96 | # Extract feature 97 | train_x, test_x, train_y, test_y, valid_x, valid_y = prepare_test_train_valid() 98 | 99 | print('X_train shape:', train_x.shape) 100 | print('Y_train shape:', train_y.shape) 101 | 102 | num_classes = 5 103 | data_dim = 52 104 | timesteps = 1 105 | 106 | train_x = np.reshape(train_x,(train_x.shape[0], 1, train_x.shape[1])) 107 | test_x = np.reshape(test_x,(test_x.shape[0], 1, test_x.shape[1])) 108 | valid_x = np.reshape(valid_x,(valid_x.shape[0], 1, valid_x.shape[1])) 109 | 110 | def plot_confusion_matrix(cm, classes, 111 | normalize=False, 112 | title='Confusion matrix', 113 | cmap=plt.cm.Blues): 114 | """ 115 | This function prints and plots the confusion matrix. 116 | Normalization can be applied by setting `normalize=True`. 117 | """ 118 | if normalize: 119 | cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] 120 | print("Normalized confusion matrix") 121 | else: 122 | print('Confusion matrix, without normalization') 123 | 124 | print(cm) 125 | 126 | plt.imshow(cm, interpolation='nearest', cmap=cmap) 127 | plt.title(title) 128 | plt.colorbar() 129 | tick_marks = np.arange(len(classes)) 130 | plt.xticks(tick_marks, classes, rotation=45) 131 | plt.yticks(tick_marks, classes) 132 | 133 | fmt = '.2f' if normalize else 'd' 134 | thresh = cm.max() / 2. 135 | for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): 136 | plt.text(j, i, format(cm[i, j], fmt), 137 | horizontalalignment="center", 138 | color="white" if cm[i, j] > thresh else "black") 139 | 140 | plt.tight_layout() 141 | plt.ylabel('True label') 142 | plt.xlabel('Predicted label') 143 | 144 | def build_LSTM(): #OK 145 | # expected input data shape: (batch_size, timesteps, data_dim) 146 | model = Sequential() 147 | model.add(LSTM(32, return_sequences=True, input_shape=(timesteps, data_dim))) 148 | 149 | model.add(LSTM(24, return_sequences=True)) 150 | 151 | #model.add(Dropout(0.2)) 152 | model.add(LSTM(16, return_sequences=True)) 153 | model.add(Dropout(0.2)) 154 | 155 | # apply softmax to output 156 | model.add(Flatten()) 157 | model.add(Dense(num_classes, activation='softmax')) 158 | return model 159 | 160 | def model_train_evaluate(model, number_epoch): 161 | sgd = RMSprop(lr=0.001, rho=0.01, epsilon=None, decay=0.0) 162 | 163 | # a stopping function should the validation loss stop improving 164 | earlystop = EarlyStopping(monitor='val_loss', patience=1, verbose=0, mode='auto') 165 | 166 | #if model in ['RNN']: 167 | rnn_model = build_LSTM() #OK 168 | rnn_model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=sgd) 169 | tensorboardRNN = TensorBoard(log_dir="RNN_logs/{}".format(time())) 170 | rnn_model.fit(train_x, train_y, validation_data=(valid_x, valid_y), callbacks=[tensorboardRNN], batch_size=128, epochs=int(number_epoch)) 171 | print(rnn_model.summary()) 172 | 173 | y_prob = rnn_model.predict(test_x) 174 | y_pred = y_prob.argmax(axis=-1) 175 | y_true = np.argmax(test_y, 1) 176 | 177 | roc = roc_auc_score(test_y, y_prob) 178 | print ("ROC:", round(roc,3)) 179 | 180 | # evaluate the model 181 | score, accuracy = rnn_model.evaluate(test_x, test_y, batch_size=32) 182 | print("\nAccuracy = {:.2f}".format(accuracy)) 183 | 184 | # the F-score gives a similiar value to the accuracy score, but useful for cross-checking 185 | p,r,f,s = precision_recall_fscore_support(y_true, y_pred, average='micro') 186 | print ("F-Score:", round(f,2)) 187 | print ("Precision:", round(p,2)) 188 | print ("Recall:", round(r,2)) 189 | print ("F-Score:", round(f,2)) 190 | 191 | # Compute confusion matrix 192 | cnf_matrix = confusion_matrix(y_true, y_pred) 193 | np.set_printoptions(precision=2) 194 | 195 | class_names = ["FIN", "GBR", "ASW", "CHB", "CLM"] 196 | 197 | # Plot non-normalized confusion matrix 198 | plt.figure() 199 | plot_confusion_matrix(cnf_matrix, classes=class_names, title='Confusion matrix: true vs predicted label') 200 | plt.show() 201 | 202 | model = build_LSTM() 203 | model_train_evaluate(model, 1000) 204 | import gc; gc.collect() 205 | -------------------------------------------------------------------------------- /DEC_GenotypeClustering_Keras/genome.csv: -------------------------------------------------------------------------------- 1 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 2 | GBR,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0 3 | CHB,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 4 | CHB,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 5 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0 6 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0 7 | CHB,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0 8 | ASW,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0 9 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1 10 | ASW,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0 11 | ASW,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0 12 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0 13 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 14 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0 15 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 16 | CHB,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0 17 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 18 | GBR,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0 19 | GBR,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0 20 | GBR,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0 21 | ASW,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0 22 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 23 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0 24 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 25 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0 26 | CHB,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 27 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0 28 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 29 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 30 | GBR,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 31 | CLM,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0 32 | CLM,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0 33 | CHB,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0 34 | CLM,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 35 | CLM,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0 36 | FIN,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 37 | GBR,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1 38 | FIN,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 39 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 40 | GBR,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0 41 | FIN,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 42 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 43 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 44 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0 45 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 46 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 47 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0 48 | FIN,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0 49 | ASW,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0 50 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 51 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 52 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0 53 | CHB,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 54 | CLM,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 55 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 56 | CLM,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0 57 | CHB,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 58 | ASW,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 59 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0 60 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 61 | CLM,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0 62 | GBR,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 63 | GBR,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0 64 | FIN,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0 65 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0 66 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 67 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 68 | CLM,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0 69 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 70 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 71 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0 72 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 73 | CHB,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 74 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 75 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 76 | GBR,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0 77 | ASW,1,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 78 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0 79 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 80 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0 81 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 82 | CHB,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0 83 | CLM,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0 84 | CLM,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 85 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0 86 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 87 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 88 | FIN,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0 89 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 90 | ASW,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0 91 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0 92 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 93 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 94 | ASW,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0 95 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0 96 | FIN,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 97 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0 98 | FIN,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0 99 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0 100 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 101 | GBR,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0 102 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 103 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 104 | GBR,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 105 | FIN,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 106 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0 107 | GBR,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0 108 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1 109 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 110 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0 111 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 112 | FIN,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0 113 | CHB,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0 114 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 115 | FIN,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 116 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 117 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0 118 | FIN,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0 119 | CHB,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 120 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 121 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 122 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 123 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 124 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 125 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 126 | CLM,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 127 | ASW,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0 128 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 129 | FIN,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 130 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 131 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 132 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1 133 | CLM,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0 134 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 135 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0 136 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 137 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 138 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0 139 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1 140 | ASW,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1 141 | FIN,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0 142 | CLM,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0 143 | GBR,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 144 | CLM,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 145 | FIN,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 146 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 147 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 148 | CLM,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1 149 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 150 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0 151 | ASW,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0 152 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 153 | CLM,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 154 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 155 | GBR,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 156 | GBR,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0 157 | CLM,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1 158 | CHB,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 159 | ASW,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0 160 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0 161 | CLM,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0 162 | CLM,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0 163 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0 164 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0 165 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 166 | CHB,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 167 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 168 | FIN,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 169 | CLM,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 170 | ASW,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0 171 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0 172 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 173 | ASW,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0 174 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0 175 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 176 | CLM,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 177 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 178 | CHB,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 179 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 180 | GBR,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0 181 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0 182 | CHB,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0 183 | FIN,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0 184 | ASW,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 185 | GBR,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 186 | GBR,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 187 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0 188 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1 189 | GBR,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0 190 | FIN,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 191 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 192 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0 193 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0 194 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 195 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 196 | CLM,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 197 | CHB,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 198 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0 199 | CHB,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0 200 | -------------------------------------------------------------------------------- /PopulationClustering_v2/output_1.txt: -------------------------------------------------------------------------------- 1 | Found 199 samples -------------------------------------------------------------------------------- /PopulationClustering_v2/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | com.deri.sels 5 | PopulationClustering_v2 6 | 0.1-SNAPSHOT 7 | 8 | 2.2.1 9 | 2.11.8 10 | 3.16.0.2 11 | 2.2.6 12 | 0.23.0 13 | 14 | 15 | 16 | scala-tools.org 17 | Scala-tools Maven2 Repository 18 | http://scala-tools.org/repo-releases 19 | 20 | 21 | 22 | 23 | org.bdgenomics.adam 24 | adam-core_2.11 25 | 0.23.0 26 | 27 | 28 | 29 | ai.h2o 30 | sparkling-water-core_2.11 31 | 2.2.6 32 | 33 | 34 | ai.h2o 35 | sparkling-water-examples_2.11 36 | 2.2.6 37 | 38 | 39 | org.apache.directory.studio 40 | org.apache.commons.io 41 | 2.4 42 | 43 | 44 | org.apache.spark 45 | spark-core_2.11 46 | ${spark.version} 47 | 48 | 49 | 50 | ai.h2o 51 | h2o-core 52 | ${h2o.version} 53 | 54 | 55 | ai.h2o 56 | h2o-scala_2.11 57 | ${h2o.version} 58 | 59 | 60 | ai.h2o 61 | h2o-algos 62 | ${h2o.version} 63 | 64 | 65 | ai.h2o 66 | h2o-app 67 | ${h2o.version} 68 | 69 | 70 | ai.h2o 71 | h2o-persist-hdfs 72 | ${h2o.version} 73 | 74 | 75 | scala-library 76 | org.scala-lang 77 | ${scala.version} 78 | 79 | 80 | ai.h2o 81 | google-analytics-java 82 | 1.1.2-H2O-CUSTOM 83 | 84 | 85 | joda-time 86 | joda-time 87 | 2.9.9 88 | 89 | 90 | 91 | 92 | snapshots-repo 93 | https://oss.sonatype.org/content/repositories/snapshots 94 | 95 | false 96 | 97 | 98 | true 99 | daily 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | org.apache.maven.plugins 108 | maven-eclipse-plugin 109 | 2.9 110 | 111 | true 112 | false 113 | 114 | 115 | 116 | 117 | org.apache.maven.plugins 118 | maven-compiler-plugin 119 | 3.5.1 120 | 121 | ${jdk.version} 122 | ${jdk.version} 123 | 124 | 125 | 126 | maven-shade-plugin 127 | 2.4.3 128 | 129 | 130 | package 131 | 132 | shade 133 | 134 | 135 | false 136 | 137 | 138 | 139 | *:* 140 | 141 | META-INF/*.SF 142 | META-INF/*.DSA 143 | META-INF/*.RSA 144 | 145 | 146 | 147 | 148 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | org.apache.maven.plugins 158 | maven-assembly-plugin 159 | 2.4.1 160 | 161 | 162 | 163 | jar-with-dependencies 164 | 165 | 166 | 167 | 168 | org.fit.genomics.PopStratClassification 169 | 170 | 171 | 172 | 173 | oozie.launcher.mapreduce.job.user.classpath.first 174 | true 175 | 176 | 177 | 178 | 179 | 180 | make-assembly 181 | 182 | package 183 | 184 | single 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | -------------------------------------------------------------------------------- /PopulationClustering_v2/results/train.csv/DEC_Genotype_Clustering.py: -------------------------------------------------------------------------------- 1 | from keras.datasets import mnist 2 | import numpy as np 3 | import pandas as pd 4 | np.random.seed(10) 5 | 6 | from time import time 7 | import numpy as np 8 | import keras.backend as K 9 | from keras.engine.topology import Layer, InputSpec 10 | from keras.layers import Dense, Input 11 | from keras.models import Model 12 | from keras.optimizers import RMSprop 13 | from keras import callbacks 14 | from keras.initializers import VarianceScaling 15 | from sklearn.cluster import KMeans 16 | from sklearn import metrics 17 | from sklearn.metrics.cluster import normalized_mutual_info_score 18 | from sklearn.metrics.cluster import adjusted_rand_score 19 | from sklearn.metrics import accuracy_score 20 | from sklearn import manifold 21 | import keras.layers.normalization as bn 22 | 23 | df1 = pd.read_csv('/home/asif/genome.csv', header=None) 24 | print(df1.head()) 25 | 26 | label = df1[0] 27 | print(label.head()) 28 | 29 | from sklearn import preprocessing 30 | le = preprocessing.LabelEncoder() 31 | lbl = le.fit(label) 32 | labelss = lbl.transform(label) 33 | labelDF = pd.DataFrame(labelss) 34 | 35 | #labelArr = 36 | print(labelDF.head()) 37 | 38 | feature = df1.drop(0, axis=1) 39 | print(feature.head()) 40 | 41 | from sklearn.preprocessing import MinMaxScaler 42 | scaler = MinMaxScaler() 43 | x1 = feature.iloc[:,1:] 44 | df_scaled = pd.DataFrame(scaler.fit_transform(x1), columns=x1.columns) 45 | df_scaled.head() 46 | 47 | y = labelss 48 | x = df_scaled.values 49 | 50 | print(y.shape) 51 | print(x.shape) 52 | 53 | print(np.isnan(np.min(x))) 54 | 55 | #y.shape 56 | #x.shape 57 | #print(x) 58 | #print(y) 59 | 60 | n_clusters = len(np.unique(y)) 61 | print(n_clusters) 62 | 63 | kmeans = KMeans(n_clusters=n_clusters, n_init=5) 64 | y_pred_kmeans = kmeans.fit_predict(x) 65 | 66 | print(accuracy_score(y, y_pred_kmeans)) 67 | 68 | dims = [x.shape[-1], 16, 16, 32, 5] 69 | init = VarianceScaling(scale=1. / 3., mode='fan_in', distribution='uniform') 70 | pretrain_optimizer = RMSprop(lr=0.001, rho=0.01, epsilon=None, decay=0.0) 71 | pretrain_epochs = 100 72 | batch_size = 32 73 | save_dir = 'result/' 74 | 75 | def autoencoder(dims, act='relu', init='glorot_uniform'): 76 | """ 77 | Fully connected auto-encoder model, symmetric. 78 | Arguments: 79 | dims: list of number of units in each layer of encoder. dims[0] is input dim, dims[-1] is units in hidden layer. 80 | The decoder is symmetric with encoder. So number of layers of the auto-encoder is 2*len(dims)-1 81 | act: activation, not applied to Input, Hidden and Output layers 82 | return: 83 | (ae_model, encoder_model), Model of autoencoder and model of encoder 84 | """ 85 | n_stacks = len(dims) - 1 86 | # input 87 | input_img = Input(shape=(dims[0],), name='input') 88 | x = input_img 89 | # internal layers in encoder 90 | for i in range(n_stacks-1): 91 | x = Dense(dims[i + 1], activation=act, kernel_initializer=init, name='encoder_%d' % i)(x) 92 | #bn.BatchNormalization(momentum=0.9, epsilon=1e-06, weights=None) 93 | 94 | # hidden layer 95 | encoded = Dense(dims[-1], kernel_initializer=init, name='encoder_%d' % (n_stacks - 1))(x) # hidden layer, features are extracted from here 96 | bn.BatchNormalization(momentum=0.9, epsilon=1e-06, weights=None) 97 | 98 | x = encoded 99 | # internal layers in decoder 100 | for i in range(n_stacks-1, 0, -1): 101 | x = Dense(dims[i], activation=act, kernel_initializer=init, name='decoder_%d' % i)(x) 102 | 103 | # output 104 | x = Dense(dims[0], kernel_initializer=init, name='decoder_0')(x) 105 | bn.BatchNormalization(momentum=0.9, epsilon=1e-06, weights=None) 106 | 107 | decoded = x 108 | return Model(inputs=input_img, outputs=decoded, name='AE'), Model(inputs=input_img, outputs=encoded, name='encoder') 109 | 110 | autoencoder, encoder = autoencoder(dims, init=init) 111 | autoencoder.compile(optimizer=pretrain_optimizer, loss='mse') 112 | autoencoder.fit(x, x, batch_size=batch_size, epochs=pretrain_epochs) #, callbacks=cb) 113 | autoencoder.save_weights(save_dir + '/ThesisDEC_weights.h5') 114 | 115 | autoencoder.save_weights(save_dir + '/ThesisDEC_weights.h5') 116 | autoencoder.load_weights(save_dir + '/ThesisDEC_weights.h5') 117 | 118 | 119 | class ClusteringLayer(Layer): 120 | """ 121 | Clustering layer converts input sample (feature) to soft label, i.e. a vector that represents the probability of the 122 | sample belonging to each cluster. The probability is calculated with student's t-distribution. 123 | 124 | # Example 125 | ``` 126 | model.add(ClusteringLayer(n_clusters=10)) 127 | ``` 128 | # Arguments 129 | n_clusters: number of clusters. 130 | weights: list of Numpy array with shape `(n_clusters, n_features)` witch represents the initial cluster centers. 131 | alpha: degrees of freedom parameter in Student's t-distribution. Default to 1.0. 132 | # Input shape 133 | 2D tensor with shape: `(n_samples, n_features)`. 134 | # Output shape 135 | 2D tensor with shape: `(n_samples, n_clusters)`. 136 | """ 137 | 138 | def __init__(self, n_clusters, weights=None, alpha=1.0, **kwargs): 139 | if 'input_shape' not in kwargs and 'input_dim' in kwargs: 140 | kwargs['input_shape'] = (kwargs.pop('input_dim'),) 141 | super(ClusteringLayer, self).__init__(**kwargs) 142 | self.n_clusters = n_clusters 143 | self.alpha = alpha 144 | self.initial_weights = weights 145 | self.input_spec = InputSpec(ndim=2) 146 | 147 | def build(self, input_shape): 148 | assert len(input_shape) == 2 149 | input_dim = input_shape[1] 150 | self.input_spec = InputSpec(dtype=K.floatx(), shape=(None, input_dim)) 151 | self.clusters = self.add_weight((self.n_clusters, input_dim), initializer='glorot_uniform', name='clusters') 152 | if self.initial_weights is not None: 153 | self.set_weights(self.initial_weights) 154 | del self.initial_weights 155 | self.built = True 156 | 157 | def call(self, inputs, **kwargs): 158 | """ student t-distribution, as same as used in t-SNE algorithm. 159 | Measure the similarity between embedded point z_i and centroid µ_j. 160 | q_ij = 1/(1+dist(x_i, µ_j)^2), then normalize it. 161 | q_ij can be interpreted as the probability of assigning sample i to cluster j. 162 | (i.e., a soft assignment) 163 | Arguments: 164 | inputs: the variable containing data, shape=(n_samples, n_features) 165 | Return: 166 | q: student's t-distribution, or soft labels for each sample. shape=(n_samples, n_clusters) 167 | """ 168 | q = 1.0 / (1.0 + (K.sum(K.square(K.expand_dims(inputs, axis=1) - self.clusters), axis=2) / self.alpha)) 169 | q **= (self.alpha + 1.0) / 2.0 170 | q = K.transpose(K.transpose(q) / K.sum(q, axis=1)) # Make sure each sample's 10 values add up to 1. 171 | return q 172 | 173 | def compute_output_shape(self, input_shape): 174 | assert input_shape and len(input_shape) == 2 175 | return input_shape[0], self.n_clusters 176 | 177 | def get_config(self): 178 | config = {'n_clusters': self.n_clusters} 179 | base_config = super(ClusteringLayer, self).get_config() 180 | return dict(list(base_config.items()) + list(config.items())) 181 | 182 | clustering_layer = ClusteringLayer(n_clusters, name='clustering')(encoder.output) 183 | model = Model(inputs=encoder.input, outputs=clustering_layer) 184 | model.compile(optimizer=RMSprop(lr=0.01, rho=0.9, epsilon=None, decay=0.0), loss='kld') 185 | 186 | kmeans = KMeans(n_clusters=n_clusters, n_init=n_clusters) 187 | y_pred = kmeans.fit_predict(encoder.predict(x)) 188 | 189 | y_pred_last = np.copy(y_pred) 190 | 191 | model.get_layer(name='clustering').set_weights([kmeans.cluster_centers_]) 192 | 193 | # computing an auxiliary target distribution 194 | def target_distribution(q): 195 | weight = q ** 2 / q.sum(0) 196 | return (weight.T / weight.sum(1)).T 197 | 198 | loss = 0 199 | index = 0 200 | maxiter = 20000 201 | update_interval = 500 202 | index_array = np.arange(x.shape[0]) 203 | 204 | tol = 0.001 # tolerance threshold to stop training 205 | 206 | for ite in range(int(maxiter)): 207 | if ite % update_interval == 0: 208 | q = model.predict(x, verbose=0) 209 | p = target_distribution(q) # update the auxiliary target distribution p 210 | 211 | # evaluate the clustering performance 212 | y_pred = q.argmax(1) 213 | if y is not None: 214 | acc = np.round(accuracy_score(y, y_pred), 5) 215 | nmi = np.round(normalized_mutual_info_score(y, y_pred), 5) 216 | ari = np.round(adjusted_rand_score(y, y_pred), 5) 217 | loss = np.round(loss, 5) 218 | print('Iter %d: acc = %.5f, nmi = %.5f, ari = %.5f' % (ite, acc, nmi, ari), ' ; loss=', loss) 219 | 220 | # check stop criterion - model convergence 221 | delta_label = np.sum(y_pred != y_pred_last).astype(np.float32) / y_pred.shape[0] 222 | y_pred_last = np.copy(y_pred) 223 | if ite > 0 and delta_label < tol: 224 | print('delta_label ', delta_label, '< tol ', tol) 225 | print('Reached tolerance threshold. Stopping training.') 226 | break 227 | idx = index_array[index * batch_size: min((index+1) * batch_size, x.shape[0])] 228 | model.train_on_batch(x=x[idx], y=p[idx]) 229 | index = index + 1 if (index + 1) * batch_size <= x.shape[0] else 0 230 | 231 | model.save_weights(save_dir + '/Thesis_DEC_model_final.h5') 232 | model.load_weights(save_dir + '/Thesis_DEC_model_final.h5') 233 | 234 | # Eval. 235 | q = model.predict(x, verbose=0) 236 | p = target_distribution(q) # update the auxiliary target distribution p 237 | 238 | # evaluate the clustering performance 239 | y_pred = q.argmax(1) 240 | if y is not None: 241 | acc = np.round(accuracy_score(y, y_pred), 5) 242 | nmi = np.round(normalized_mutual_info_score(y, y_pred), 5) 243 | ari = np.round(adjusted_rand_score(y, y_pred), 5) 244 | loss = np.round(loss, 5) 245 | print('Acc = %.5f, nmi = %.5f, ari = %.5f' % (acc, nmi, ari), ' ; loss=', loss) 246 | 247 | import seaborn as sns 248 | import sklearn.metrics 249 | import matplotlib.pyplot as plt 250 | sns.set(font_scale=1.5) 251 | confusion_matrix = sklearn.metrics.confusion_matrix(y, y_pred) 252 | 253 | plt.figure(figsize=(12, 11)) 254 | sns.heatmap(confusion_matrix, annot=True, fmt="d", annot_kws={"size": 15}); 255 | plt.title("Confusion matrix", fontsize=25) 256 | plt.ylabel('True label', fontsize=25) 257 | plt.xlabel('Clustering label', fontsize=25) 258 | plt.show() 259 | 260 | def visualizeData(Z, labels, num_clusters, title): 261 | ''' 262 | TSNE visualization of the points in latent space Z 263 | :param Z: Numpy array containing points in latent space in which clustering was performed 264 | :param labels: True labels - used for coloring points 265 | :param num_clusters: Total number of clusters 266 | :param title: filename where the plot should be saved 267 | :return: None - (side effect) saves clustering visualization plot in specified location 268 | ''' 269 | labels = labels.astype(int) 270 | tsne = manifold.TSNE(n_components=2, init='pca', random_state=0) 271 | Z_tsne = tsne.fit_transform(Z) 272 | fig = plt.figure() 273 | plt.scatter(Z_tsne[:, 0], Z_tsne[:, 1], s=2, c=labels, cmap=plt.cm.get_cmap("jet", num_clusters)) 274 | plt.colorbar(ticks=range(num_clusters)) 275 | fig.savefig(title, dpi=fig.dpi) 276 | 277 | from sklearn.manifold import TSNE 278 | import seaborn as sn 279 | import matplotlib.pyplot as plt 280 | 281 | data_1000 = x[0:1000,:] 282 | labels_1000 = y[0:1000] 283 | 284 | model = TSNE(n_components = 2, random_state = 0) 285 | 286 | tsne_data = model.fit_transform(x) 287 | #y_pred 288 | 289 | tsne_data = np.vstack((tsne_data.T, y)).T 290 | tsne_df = pd.DataFrame(data= tsne_data, columns= ("Dim_1","Dim_2","label")) 291 | 292 | sn.FacetGrid(tsne_df, hue= "label", size = 6).map(plt.scatter, 'Dim_1', 'Dim_2').add_legend() 293 | plt.show() 294 | 295 | visualizeData(x, y, n_clusters, "t_SNE_graph_original.png") 296 | visualizeData(x, y_pred, n_clusters, "t_SNE_graph_predicted.png") 297 | -------------------------------------------------------------------------------- /PopulationClustering_v2/results/train.csv/LSTM_EthnicityPrediction.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | import glob 4 | import numpy as np 5 | import sys 6 | from time import time 7 | 8 | from sklearn.preprocessing import LabelEncoder 9 | from sklearn.model_selection import train_test_split 10 | from sklearn.metrics import precision_recall_fscore_support 11 | from sklearn import metrics 12 | 13 | from keras.models import Sequential 14 | from keras.layers import LSTM, Dense, Dropout, Activation, Flatten 15 | from keras.callbacks import TensorBoard 16 | from keras.optimizers import RMSprop 17 | from keras.regularizers import l2 18 | from keras.callbacks import EarlyStopping 19 | from sklearn.metrics import precision_recall_fscore_support, roc_auc_score 20 | from keras.utils import np_utils 21 | 22 | from keras import backend as K 23 | K.set_image_dim_ordering('tf') 24 | import matplotlib.pyplot as plt 25 | import itertools 26 | 27 | import numpy as np 28 | import pandas as pd 29 | np.random.seed(10) 30 | 31 | from time import time 32 | import numpy as np 33 | import keras.backend as K 34 | from keras.engine.topology import Layer, InputSpec 35 | from keras.layers import Dense, Input 36 | from keras.models import Model 37 | from keras.optimizers import RMSprop 38 | from keras import callbacks 39 | from keras.initializers import VarianceScaling 40 | from sklearn.cluster import KMeans 41 | from sklearn import metrics 42 | from sklearn.metrics.cluster import normalized_mutual_info_score 43 | from sklearn.metrics.cluster import adjusted_rand_score 44 | from sklearn.metrics import accuracy_score 45 | from sklearn import manifold 46 | import keras.layers.normalization as bn 47 | 48 | from sklearn.metrics import confusion_matrix 49 | 50 | df1 = pd.read_csv('/home/asif/genome.csv', header=None) 51 | print(df1.head()) 52 | 53 | label = df1[0] 54 | print(label.head()) 55 | 56 | from sklearn import preprocessing 57 | le = preprocessing.LabelEncoder() 58 | lbl = le.fit(label) 59 | labelss = lbl.transform(label) 60 | labelDF = pd.DataFrame(labelss) 61 | 62 | #labelArr = 63 | print(labelDF.head()) 64 | 65 | feature = df1.drop(0, axis=1) 66 | print(feature.head()) 67 | 68 | from sklearn.preprocessing import MinMaxScaler 69 | scaler = MinMaxScaler() 70 | x1 = feature.iloc[:,1:] 71 | df_scaled = pd.DataFrame(scaler.fit_transform(x1), columns=x1.columns) 72 | df_scaled.head() 73 | 74 | y = labelss 75 | x = df_scaled.values 76 | 77 | features = x 78 | labels = y 79 | 80 | def prepare_test_train_valid(): 81 | # Train-test split 82 | train_x, test_x, train_y, test_y = train_test_split(features, labels, test_size=0.25, random_state=100) 83 | test_x, valid_x, test_y, valid_y = train_test_split(train_x, train_y, test_size=0.50, random_state=100) 84 | 85 | return train_x, test_x, train_y, test_y, valid_x, valid_y 86 | 87 | def one_hot_encode(labels): 88 | n_labels = len(labels) 89 | n_unique_labels = len(np.unique(labels)) 90 | one_hot_encode = np.zeros((n_labels,n_unique_labels)) 91 | one_hot_encode[np.arange(n_labels), labels] = 1 92 | return one_hot_encode 93 | 94 | labels = one_hot_encode(labels) 95 | 96 | # Extract feature 97 | train_x, test_x, train_y, test_y, valid_x, valid_y = prepare_test_train_valid() 98 | 99 | print('X_train shape:', train_x.shape) 100 | print('Y_train shape:', train_y.shape) 101 | 102 | num_classes = 5 103 | data_dim = 52 104 | timesteps = 1 105 | 106 | train_x = np.reshape(train_x,(train_x.shape[0], 1, train_x.shape[1])) 107 | test_x = np.reshape(test_x,(test_x.shape[0], 1, test_x.shape[1])) 108 | valid_x = np.reshape(valid_x,(valid_x.shape[0], 1, valid_x.shape[1])) 109 | 110 | def plot_confusion_matrix(cm, classes, 111 | normalize=False, 112 | title='Confusion matrix', 113 | cmap=plt.cm.Blues): 114 | """ 115 | This function prints and plots the confusion matrix. 116 | Normalization can be applied by setting `normalize=True`. 117 | """ 118 | if normalize: 119 | cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] 120 | print("Normalized confusion matrix") 121 | else: 122 | print('Confusion matrix, without normalization') 123 | 124 | print(cm) 125 | 126 | plt.imshow(cm, interpolation='nearest', cmap=cmap) 127 | plt.title(title) 128 | plt.colorbar() 129 | tick_marks = np.arange(len(classes)) 130 | plt.xticks(tick_marks, classes, rotation=45) 131 | plt.yticks(tick_marks, classes) 132 | 133 | fmt = '.2f' if normalize else 'd' 134 | thresh = cm.max() / 2. 135 | for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): 136 | plt.text(j, i, format(cm[i, j], fmt), 137 | horizontalalignment="center", 138 | color="white" if cm[i, j] > thresh else "black") 139 | 140 | plt.tight_layout() 141 | plt.ylabel('True label') 142 | plt.xlabel('Predicted label') 143 | 144 | def build_LSTM(): #OK 145 | # expected input data shape: (batch_size, timesteps, data_dim) 146 | model = Sequential() 147 | model.add(LSTM(32, return_sequences=True, input_shape=(timesteps, data_dim))) 148 | 149 | model.add(LSTM(24, return_sequences=True)) 150 | 151 | #model.add(Dropout(0.2)) 152 | model.add(LSTM(16, return_sequences=True)) 153 | model.add(Dropout(0.2)) 154 | 155 | # apply softmax to output 156 | model.add(Flatten()) 157 | model.add(Dense(num_classes, activation='softmax')) 158 | return model 159 | 160 | def model_train_evaluate(model, number_epoch): 161 | sgd = RMSprop(lr=0.001, rho=0.01, epsilon=None, decay=0.0) 162 | 163 | # a stopping function should the validation loss stop improving 164 | earlystop = EarlyStopping(monitor='val_loss', patience=1, verbose=0, mode='auto') 165 | 166 | #if model in ['RNN']: 167 | rnn_model = build_LSTM() #OK 168 | rnn_model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=sgd) 169 | tensorboardRNN = TensorBoard(log_dir="RNN_logs/{}".format(time())) 170 | rnn_model.fit(train_x, train_y, validation_data=(valid_x, valid_y), callbacks=[tensorboardRNN], batch_size=128, epochs=int(number_epoch)) 171 | print(rnn_model.summary()) 172 | 173 | y_prob = rnn_model.predict(test_x) 174 | y_pred = y_prob.argmax(axis=-1) 175 | y_true = np.argmax(test_y, 1) 176 | 177 | roc = roc_auc_score(test_y, y_prob) 178 | print ("ROC:", round(roc,3)) 179 | 180 | # evaluate the model 181 | score, accuracy = rnn_model.evaluate(test_x, test_y, batch_size=32) 182 | print("\nAccuracy = {:.2f}".format(accuracy)) 183 | 184 | # the F-score gives a similiar value to the accuracy score, but useful for cross-checking 185 | p,r,f,s = precision_recall_fscore_support(y_true, y_pred, average='micro') 186 | print ("F-Score:", round(f,2)) 187 | print ("Precision:", round(p,2)) 188 | print ("Recall:", round(r,2)) 189 | print ("F-Score:", round(f,2)) 190 | 191 | # Compute confusion matrix 192 | cnf_matrix = confusion_matrix(y_true, y_pred) 193 | np.set_printoptions(precision=2) 194 | 195 | class_names = ["FIN", "GBR", "ASW", "CHB", "CLM"] 196 | 197 | # Plot non-normalized confusion matrix 198 | plt.figure() 199 | plot_confusion_matrix(cnf_matrix, classes=class_names, title='Confusion matrix: true vs predicted label') 200 | plt.show() 201 | 202 | model = build_LSTM() 203 | model_train_evaluate(model, 1000) 204 | import gc; gc.collect() 205 | -------------------------------------------------------------------------------- /PopulationClustering_v2/results/train.csv/genome.csv: -------------------------------------------------------------------------------- 1 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 2 | GBR,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0 3 | CHB,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 4 | CHB,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 5 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0 6 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0 7 | CHB,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0 8 | ASW,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0 9 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1 10 | ASW,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0 11 | ASW,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0 12 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0 13 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 14 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0 15 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 16 | CHB,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0 17 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 18 | GBR,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0 19 | GBR,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0 20 | GBR,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0 21 | ASW,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0 22 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 23 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0 24 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 25 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0 26 | CHB,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 27 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0 28 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 29 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 30 | GBR,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 31 | CLM,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0 32 | CLM,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0 33 | CHB,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0 34 | CLM,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 35 | CLM,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0 36 | FIN,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 37 | GBR,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1 38 | FIN,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 39 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 40 | GBR,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0 41 | FIN,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 42 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 43 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 44 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0 45 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 46 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 47 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0 48 | FIN,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0 49 | ASW,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0 50 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 51 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 52 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0 53 | CHB,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 54 | CLM,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 55 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 56 | CLM,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0 57 | CHB,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 58 | ASW,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 59 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0 60 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 61 | CLM,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0 62 | GBR,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 63 | GBR,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0 64 | FIN,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0 65 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0 66 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 67 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 68 | CLM,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0 69 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 70 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 71 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0 72 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 73 | CHB,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 74 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 75 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 76 | GBR,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0 77 | ASW,1,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 78 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0 79 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 80 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0 81 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 82 | CHB,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0 83 | CLM,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0 84 | CLM,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 85 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0 86 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 87 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 88 | FIN,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0 89 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 90 | ASW,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0 91 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0 92 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 93 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 94 | ASW,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0 95 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0 96 | FIN,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 97 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0 98 | FIN,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0 99 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0 100 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 101 | GBR,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0 102 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 103 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 104 | GBR,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 105 | FIN,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 106 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0 107 | GBR,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0 108 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1 109 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 110 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0 111 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 112 | FIN,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0 113 | CHB,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0 114 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 115 | FIN,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 116 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 117 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0 118 | FIN,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0 119 | CHB,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 120 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 121 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 122 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 123 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 124 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 125 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 126 | CLM,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 127 | ASW,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0 128 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 129 | FIN,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 130 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 131 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 132 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1 133 | CLM,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0 134 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 135 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0 136 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 137 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 138 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0 139 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1 140 | ASW,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1 141 | FIN,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0 142 | CLM,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0 143 | GBR,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 144 | CLM,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 145 | FIN,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 146 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 147 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 148 | CLM,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1 149 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 150 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0 151 | ASW,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0 152 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 153 | CLM,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 154 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 155 | GBR,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 156 | GBR,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0 157 | CLM,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1 158 | CHB,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 159 | ASW,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0 160 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0 161 | CLM,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0 162 | CLM,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0 163 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0 164 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0 165 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 166 | CHB,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 167 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 168 | FIN,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 169 | CLM,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 170 | ASW,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0 171 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0 172 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 173 | ASW,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0 174 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0 175 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 176 | CLM,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 177 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 178 | CHB,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 179 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 180 | GBR,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0 181 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0 182 | CHB,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0 183 | FIN,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0 184 | ASW,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 185 | GBR,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 186 | GBR,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 187 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0 188 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1 189 | GBR,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0 190 | FIN,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 191 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 192 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0 193 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0 194 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 195 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 196 | CLM,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 197 | CHB,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 198 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0 199 | CHB,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0 200 | -------------------------------------------------------------------------------- /PopulationClustering_v2/results/train.csv/part-00000-2c4830b2-4c39-48fc-909d-4868a1164190-c000.csv: -------------------------------------------------------------------------------- 1 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 2 | GBR,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0 3 | CHB,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 4 | CHB,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 5 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0 6 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0 7 | CHB,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0 8 | ASW,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0 9 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1 10 | ASW,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0 11 | ASW,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0 12 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0 13 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 14 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0 15 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 16 | CHB,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0 17 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 18 | GBR,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0 19 | GBR,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0 20 | GBR,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0 21 | ASW,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0 22 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 23 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0 24 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 25 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0 26 | CHB,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 27 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0 28 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 29 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 30 | GBR,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 31 | CLM,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0 32 | CLM,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0 33 | CHB,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0 34 | CLM,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 35 | CLM,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0 36 | FIN,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 37 | GBR,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1 38 | FIN,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 39 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 40 | GBR,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0 41 | FIN,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 42 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 43 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 44 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0 45 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 46 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 47 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0 48 | FIN,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0 49 | ASW,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0 50 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 51 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 52 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0 53 | CHB,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 54 | CLM,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 55 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 56 | CLM,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0 57 | CHB,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 58 | ASW,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 59 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0 60 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 61 | CLM,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0 62 | GBR,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 63 | GBR,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0 64 | FIN,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0 65 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0 66 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 67 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 68 | CLM,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0 69 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 70 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 71 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0 72 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 73 | CHB,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 74 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 75 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 76 | GBR,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0 77 | ASW,1,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 78 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0 79 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 80 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0 81 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 82 | CHB,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0 83 | CLM,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0 84 | CLM,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 85 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0 86 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 87 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 88 | FIN,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0 89 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 90 | ASW,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0 91 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0 92 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 93 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 94 | ASW,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0 95 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0 96 | FIN,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 97 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0 98 | FIN,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0 99 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0 100 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 101 | GBR,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0 102 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 103 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 104 | GBR,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 105 | FIN,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 106 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0 107 | GBR,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0 108 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1 109 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 110 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0 111 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 112 | FIN,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0 113 | CHB,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0 114 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 115 | FIN,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 116 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 117 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0 118 | FIN,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0 119 | CHB,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 120 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 121 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 122 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 123 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 124 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 125 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 126 | CLM,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 127 | ASW,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0 128 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 129 | FIN,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 130 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 131 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 132 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1 133 | CLM,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0 134 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 135 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0 136 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 137 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 138 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0 139 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1 140 | ASW,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1 141 | FIN,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0 142 | CLM,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0 143 | GBR,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 144 | CLM,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 145 | FIN,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 146 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 147 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 148 | CLM,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1 149 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 150 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0 151 | ASW,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0 152 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 153 | CLM,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 154 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 155 | GBR,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 156 | GBR,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0 157 | CLM,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1 158 | CHB,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 159 | ASW,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0 160 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0 161 | CLM,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0 162 | CLM,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0 163 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0 164 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0 165 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 166 | CHB,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 167 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 168 | FIN,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 169 | CLM,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 170 | ASW,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0 171 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0 172 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 173 | ASW,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0 174 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0 175 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 176 | CLM,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 177 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 178 | CHB,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 179 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 180 | GBR,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0 181 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0 182 | CHB,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0 183 | FIN,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0 184 | ASW,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 185 | GBR,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 186 | GBR,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 187 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0 188 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1 189 | GBR,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0 190 | FIN,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 191 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 192 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0 193 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0 194 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 195 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 196 | CLM,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 197 | CHB,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 198 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0 199 | CHB,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0 200 | -------------------------------------------------------------------------------- /PopulationClustering_v2/src/main/scala/org/fit/genomics/PopGenomicsClassificationSpark.scala: -------------------------------------------------------------------------------- 1 | package org.fit.genomics 2 | 3 | import hex.FrameSplitter 4 | import org.apache.spark.SparkContext 5 | import org.apache.spark.h2o.H2OContext 6 | import org.bdgenomics.adam.rdd.ADAMContext._ 7 | import org.bdgenomics.formats.avro.{ Genotype, GenotypeAllele } 8 | import water.{ Job, Key } 9 | import water.fvec.Frame 10 | 11 | import org.apache.spark.h2o._ 12 | import java.io.File 13 | import java.io._ 14 | import scala.collection.JavaConverters._ 15 | import scala.collection.immutable.Range.inclusive 16 | import scala.io.Source 17 | 18 | import org.apache.spark.rdd.RDD 19 | import org.apache.spark.sql._ 20 | import org.apache.spark.sql.types.{ IntegerType, StringType, StructField, StructType } 21 | import org.apache.spark.ml.feature.{ VectorAssembler, Normalizer } 22 | import org.apache.spark.ml.Pipeline 23 | import org.apache.spark.ml.feature.VectorIndexer 24 | import org.apache.spark.ml.feature.StringIndexer 25 | import org.apache.spark.ml.feature.PCA 26 | import org.apache.spark.ml.{ Pipeline } 27 | import org.apache.spark.ml.classification.{ RandomForestClassifier, RandomForestClassificationModel } 28 | import org.apache.spark.ml.evaluation.{ MulticlassClassificationEvaluator } 29 | import org.apache.spark.ml.tuning.{ ParamGridBuilder, CrossValidator } 30 | 31 | object PopGenomicsClassificationSpark { 32 | def main(args: Array[String]): Unit = { 33 | val genotypeFile = "C:/Users/admin-karim/Downloads/genotypes.vcf" 34 | val panelFile = "C:/Users/admin-karim/Downloads/genotypes.panel" 35 | 36 | val spark:SparkSession = SparkSession 37 | .builder() 38 | .appName("PopStrat") 39 | .master("local[*]") 40 | .config("spark.sql.warehouse.dir", "C:/Exp/") 41 | .getOrCreate() 42 | 43 | val sc: SparkContext = spark.sparkContext 44 | 45 | // Create a set of the populations that we want to predict 46 | // Then create a map of sample ID -> population so that we can filter out the samples we're not interested in 47 | //val populations = Set("GBR", "ASW", "FIN", "CHB", "CLM") 48 | val populations = Set("FIN", "GBR", "ASW", "CHB", "CLM") 49 | 50 | def extract(file: String, 51 | filter: (String, String) => Boolean): Map[String, String] = { 52 | Source 53 | .fromFile(file) 54 | .getLines() 55 | .map(line => { 56 | val tokens = line.split(Array('\t', ' ')).toList 57 | tokens(0) -> tokens(1) 58 | }) 59 | .toMap 60 | .filter(tuple => filter(tuple._1, tuple._2)) 61 | } 62 | 63 | val panel: Map[String, String] = extract( 64 | panelFile, 65 | (sampleID: String, pop: String) => populations.contains(pop)) 66 | 67 | // Load the ADAM genotypes from the parquet file(s) 68 | // Next, filter the genotypes so that we're left with only those in the populations we're interested in 69 | val allGenotypes: RDD[Genotype] = sc.loadGenotypes(genotypeFile).rdd 70 | //allGenotypes.adamParquetSave("output") 71 | val genotypes: RDD[Genotype] = allGenotypes.filter(genotype => { 72 | panel.contains(genotype.getSampleId) 73 | }) 74 | 75 | // Convert the Genotype objects to our own SampleVariant objects to try and conserve memory 76 | case class SampleVariant(sampleId: String, 77 | variantId: Int, 78 | alternateCount: Int) 79 | 80 | def variantId(genotype: Genotype): String = { 81 | val name = genotype.getVariant.getContigName 82 | val start = genotype.getVariant.getStart 83 | val end = genotype.getVariant.getEnd 84 | s"$name:$start:$end" 85 | } 86 | 87 | def alternateCount(genotype: Genotype): Int = { 88 | genotype.getAlleles.asScala.count(_ != GenotypeAllele.REF) 89 | } 90 | 91 | def toVariant(genotype: Genotype): SampleVariant = { 92 | // Intern sample IDs as they will be repeated a lot 93 | new SampleVariant(genotype.getSampleId.intern(), 94 | variantId(genotype).hashCode(), 95 | alternateCount(genotype)) 96 | } 97 | 98 | val variantsRDD: RDD[SampleVariant] = genotypes.map(toVariant) 99 | //println(s"Variant RDD: " + variantsRDD.first()) 100 | 101 | // Group the variants by sample ID so we can process the variants sample-by-sample 102 | // Then get the total number of samples. This will be used to find variants that are missing for some samples. 103 | // Group the variants by variant ID and filter out those variants that are missing from some samples 104 | val variantsBySampleId: RDD[(String, Iterable[SampleVariant])] = 105 | variantsRDD.groupBy(_.sampleId) 106 | val sampleCount: Long = variantsBySampleId.count() 107 | println("Found " + sampleCount + " samples") 108 | 109 | val variantsByVariantId: RDD[(Int, Iterable[SampleVariant])] = 110 | variantsRDD.groupBy(_.variantId).filter { 111 | case (_, sampleVariants) => sampleVariants.size == sampleCount 112 | } 113 | 114 | // Make a map of variant ID -> count of samples with an alternate count of greater than zero 115 | // then filter out those variants that are not in our desired frequency range. The objective here is simply to 116 | // reduce the number of dimensions in the data set to make it easier to train the model. 117 | // The specified range is fairly arbitrary and was chosen based on the fact that it includes a reasonable 118 | // number of variants, but not too many. 119 | val variantFrequencies: collection.Map[Int, Int] = variantsByVariantId 120 | .map { 121 | case (variantId, sampleVariants) => 122 | (variantId, sampleVariants.count(_.alternateCount > 0)) 123 | } 124 | .collectAsMap() 125 | 126 | val permittedRange = inclusive(11, 11) 127 | val filteredVariantsBySampleId: RDD[(String, Iterable[SampleVariant])] = 128 | variantsBySampleId.map { 129 | case (sampleId, sampleVariants) => 130 | val filteredSampleVariants = sampleVariants.filter( 131 | variant => 132 | permittedRange.contains( 133 | variantFrequencies.getOrElse(variant.variantId, -1))) 134 | (sampleId, filteredSampleVariants) 135 | } 136 | 137 | //println(s"Filtered Variant RDD: " + filteredVariantsBySampleId.first()) 138 | 139 | // Sort the variants for each sample ID. Each sample should now have the same number of sorted variants. 140 | // All items in the RDD should now have the same variants in the same order so we can just use the first 141 | // one to construct our header 142 | // Next construct the rows of our SchemaRDD from the variants 143 | val sortedVariantsBySampleId: RDD[(String, Array[SampleVariant])] = 144 | filteredVariantsBySampleId.map { 145 | case (sampleId, variants) => 146 | (sampleId, variants.toArray.sortBy(_.variantId)) 147 | } 148 | 149 | println(s"Sorted by Sample ID RDD: " + sortedVariantsBySampleId.first()) 150 | 151 | val header = StructType( 152 | Seq(StructField("Region", StringType)) ++ 153 | sortedVariantsBySampleId 154 | .first() 155 | ._2 156 | .map(variant => { 157 | StructField(variant.variantId.toString, IntegerType) 158 | })) 159 | 160 | val rowRDD: RDD[Row] = sortedVariantsBySampleId.map { 161 | case (sampleId, sortedVariants) => 162 | val region: Array[String] = Array(panel.getOrElse(sampleId, "Unknown")) 163 | val alternateCounts: Array[Int] = sortedVariants.map(_.alternateCount) 164 | Row.fromSeq(region ++ alternateCounts) 165 | } 166 | 167 | // Create the SchemaRDD from the header and rows and convert the SchemaRDD into a H2O dataframe 168 | val sqlContext = spark.sqlContext 169 | val schemaDF = sqlContext.createDataFrame(rowRDD, header) 170 | schemaDF.printSchema() 171 | schemaDF.show(10) 172 | 173 | val featureCols = schemaDF.columns.drop(1) 174 | 175 | val assembler = new VectorAssembler() 176 | .setInputCols(featureCols) 177 | .setOutputCol("features") 178 | 179 | val assembleDF = assembler.transform(schemaDF).select("features", "Region") 180 | assembleDF.show() 181 | 182 | /* 183 | val pca = new PCA() 184 | .setInputCol("features") 185 | .setOutputCol("pcaFeatures") 186 | .setK(50) 187 | .fit(assembleDF) 188 | 189 | val pcaDF = pca.transform(assembleDF).select("pcaFeatures", "Region").withColumnRenamed("pcaFeatures", "features")//.withColumnRenamed("Region", "label") 190 | pcaDF.show() 191 | * 192 | */ 193 | 194 | 195 | val indexer = new StringIndexer() 196 | .setInputCol("Region") 197 | .setOutputCol("label") 198 | 199 | val indexedDF = indexer.fit(assembleDF).transform(assembleDF).select("features", "label") 200 | println("Indeexed: ") 201 | indexedDF.show(10) 202 | 203 | val seed = 12345L 204 | val splits = indexedDF.randomSplit(Array(0.75, 0.25), seed) 205 | val (trainDF, testDF) = (splits(0), splits(1)) 206 | 207 | trainDF.cache 208 | testDF.cache 209 | 210 | val rf = new RandomForestClassifier() 211 | .setLabelCol("label") 212 | .setFeaturesCol("features") 213 | .setSeed(1234567L) 214 | 215 | // Search through decision tree's maxDepth parameter for best model 216 | val paramGrid = new ParamGridBuilder() 217 | .addGrid(rf.maxDepth, 3 :: 5 :: 15 :: 20 :: 25 :: 30 :: Nil) 218 | .addGrid(rf.featureSubsetStrategy, "auto" :: "all" :: Nil) 219 | .addGrid(rf.impurity, "gini" :: "entropy" :: Nil) 220 | .addGrid(rf.maxBins, 3 :: 5 :: 10 :: 15 :: 25 :: 35 :: 45 :: Nil) 221 | .addGrid(rf.numTrees, 5 :: 10 :: 15 :: 20 :: 30 :: Nil) 222 | .build() 223 | 224 | val evaluator = new MulticlassClassificationEvaluator() 225 | .setLabelCol("label") 226 | .setPredictionCol("prediction") 227 | 228 | // Set up 10-fold cross validation 229 | val numFolds = 10 230 | val crossval = new CrossValidator() 231 | .setEstimator(rf) 232 | .setEvaluator(evaluator) 233 | .setEstimatorParamMaps(paramGrid) 234 | .setNumFolds(numFolds) 235 | 236 | val cvModel = crossval.fit(trainDF) 237 | 238 | // Save the workflow 239 | //cvModel.write.overwrite().save("model/RF_model_churn") 240 | 241 | val predictions = cvModel.transform(testDF) 242 | predictions.show(10) 243 | 244 | val metric = new MulticlassClassificationEvaluator() 245 | .setLabelCol("label") 246 | .setPredictionCol("prediction") 247 | 248 | val evaluator1 = metric.setMetricName("accuracy") 249 | val evaluator2 = metric.setMetricName("weightedPrecision") 250 | val evaluator3 = metric.setMetricName("weightedRecall") 251 | val evaluator4 = metric.setMetricName("f1") 252 | 253 | // compute the classification accuracy, precision, recall, f1 measure and error on test data. 254 | val accuracy = evaluator1.evaluate(predictions) 255 | val precision = evaluator2.evaluate(predictions) 256 | val recall = evaluator3.evaluate(predictions) 257 | val f1 = evaluator4.evaluate(predictions) 258 | 259 | // Print the performance metrics 260 | println("Accuracy = " + accuracy); 261 | println("Precision = " + precision) 262 | println("Recall = " + recall) 263 | println("F1 = " + f1) 264 | println(s"Test Error = ${1 - accuracy}") 265 | 266 | // Shutdown Spark cluster and H2O 267 | spark.stop() 268 | } 269 | 270 | } 271 | -------------------------------------------------------------------------------- /PopulationClustering_v2/src/main/scala/org/fit/genomics/PopStratClassification.scala: -------------------------------------------------------------------------------- 1 | package org.fit.genomics 2 | 3 | import java.io._ 4 | 5 | import hex.FrameSplitter 6 | import hex.deeplearning.DeepLearning 7 | import hex.deeplearning.DeepLearningModel.DeepLearningParameters 8 | import hex.deeplearning.DeepLearningModel.DeepLearningParameters.Activation 9 | import org.apache.spark.SparkContext 10 | import org.apache.spark.h2o.H2OContext 11 | import org.apache.spark.rdd.RDD 12 | import org.apache.spark.sql._ 13 | import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType} 14 | 15 | import org.bdgenomics.adam.rdd.ADAMContext._ 16 | import org.bdgenomics.formats.avro.{ Genotype, GenotypeAllele} 17 | import water.{Job, Key} 18 | import water.support.ModelMetricsSupport 19 | import water.fvec.Frame 20 | 21 | import org.apache.spark.h2o._ 22 | import java.io.File 23 | 24 | import htsjdk.samtools.ValidationStringency 25 | 26 | import _root_.hex.{ModelMetrics, ModelMetricsSupervised, ModelMetricsMultinomial} 27 | 28 | import scala.collection.JavaConverters._ 29 | import scala.collection.immutable.Range.inclusive 30 | import scala.io.Source 31 | 32 | object PopStratClassification { 33 | def main(args: Array[String]): Unit = { 34 | val genotypeFile = "C:/Users/admin-karim/Downloads/ALL.chrY.phase3_integrated_v2a.20130502.genotypes.vcf" 35 | val panelFile = "C:/Users/admin-karim/Downloads/genotypes.panel" 36 | 37 | val sparkSession: SparkSession = 38 | SparkSession.builder.appName("PopStrat").master("local[*]").getOrCreate() 39 | val sc: SparkContext = sparkSession.sparkContext 40 | 41 | // Create a set of the populations that we want to predict 42 | // Then create a map of sample ID -> population so that we can filter out the samples we're not interested in 43 | //val populations = Set("GBR", "ASW", "FIN", "CHB", "CLM") 44 | val populations = Set("FIN", "GBR", "ASW", "CHB", "CLM") 45 | 46 | def extract(file: String, 47 | filter: (String, String) => Boolean): Map[String, String] = { 48 | Source 49 | .fromFile(file) 50 | .getLines() 51 | .map(line => { 52 | val tokens = line.split(Array('\t', ' ')).toList 53 | tokens(0) -> tokens(1) 54 | }) 55 | .toMap 56 | .filter(tuple => filter(tuple._1, tuple._2)) 57 | } 58 | 59 | val panel: Map[String, String] = extract( 60 | panelFile, 61 | (sampleID: String, pop: String) => populations.contains(pop)) 62 | 63 | // Load the ADAM genotypes from the parquet file(s) 64 | // Next, filter the genotypes so that we're left with only those in the populations we're interested in 65 | //val allGenotypes: RDD[Genotype] = sc.loadGenotypes(genotypeFile, stringency = ValidationStringency.SILENT).rdd 66 | val genotypes0 = sc.loadGenotypes("C:/Users/admin-karim/Downloads/VCF_files/1.vcf", stringency = ValidationStringency.SILENT) 67 | 68 | //val genotypes0 = sc.loadGenotypes("sample0.vcf") 69 | val genotypes1 = sc.loadGenotypes("C:/Users/admin-karim/Downloads/VCF_files/2.vcf") 70 | val union = genotypes0.union(genotypes1) 71 | val rdd: RDD[Genotype] = union.rdd 72 | 73 | val allGenotypes: RDD[Genotype] = rdd.rdd 74 | 75 | //allGenotypes.adamParquetSave("output") 76 | val genotypesFiltered: RDD[Genotype] = allGenotypes.filter(genotype => { 77 | panel.contains(genotype.getSampleId) 78 | }) 79 | 80 | // Convert the Genotype objects to our own SampleVariant objects to try and conserve memory 81 | case class SampleVariant(sampleId: String, 82 | variantId: Int, 83 | alternateCount: Int) 84 | def variantId(genotype: Genotype): String = { 85 | val name = genotype.getVariant.getContigName 86 | val start = genotype.getVariant.getStart 87 | val end = genotype.getVariant.getEnd 88 | s"$name:$start:$end" 89 | } 90 | 91 | def alternateCount(genotype: Genotype): Int = { 92 | genotype.getAlleles.asScala.count(_ != GenotypeAllele.REF) 93 | } 94 | 95 | def toVariant(genotype: Genotype): SampleVariant = { 96 | // Intern sample IDs as they will be repeated a lot 97 | new SampleVariant(genotype.getSampleId.intern(), 98 | variantId(genotype).hashCode(), 99 | alternateCount(genotype)) 100 | } 101 | 102 | val variantsRDD: RDD[SampleVariant] = genotypesFiltered.map(toVariant) 103 | //println(s"Variant RDD: " + variantsRDD.first()) 104 | 105 | // Group the variants by sample ID so we can process the variants sample-by-sample 106 | // Then get the total number of samples. This will be used to find variants that are missing for some samples. 107 | // Group the variants by variant ID and filter out those variants that are missing from some samples 108 | val variantsBySampleId: RDD[(String, Iterable[SampleVariant])] = 109 | variantsRDD.groupBy(_.sampleId) 110 | val sampleCount: Long = variantsBySampleId.count() 111 | println("Found " + sampleCount + " samples") 112 | 113 | val writer_0 = new PrintWriter(new File("output_1.txt")) 114 | writer_0.write("Found " + sampleCount + " samples") 115 | //writer.write(s"Confusion Matrix"+ cm) 116 | //writer.write("Prediction Matrix"+ result) 117 | writer_0.close() 118 | 119 | val variantsByVariantId: RDD[(Int, Iterable[SampleVariant])] = 120 | variantsRDD.groupBy(_.variantId).filter { 121 | case (_, sampleVariants) => sampleVariants.size == sampleCount 122 | } 123 | 124 | // Make a map of variant ID -> count of samples with an alternate count of greater than zero 125 | // then filter out those variants that are not in our desired frequency range. The objective here is simply to 126 | // reduce the number of dimensions in the data set to make it easier to train the model. 127 | // The specified range is fairly arbitrary and was chosen based on the fact that it includes a reasonable 128 | // number of variants, but not too many. 129 | val variantFrequencies: collection.Map[Int, Int] = variantsByVariantId 130 | .map { 131 | case (variantId, sampleVariants) => 132 | (variantId, sampleVariants.count(_.alternateCount > 0)) 133 | } 134 | .collectAsMap() 135 | 136 | println(variantFrequencies.max) 137 | 138 | val permittedRange = inclusive(11, 11) 139 | val filteredVariantsBySampleId: RDD[(String, Iterable[SampleVariant])] = 140 | variantsBySampleId.map { 141 | case (sampleId, sampleVariants) => 142 | val filteredSampleVariants = sampleVariants.filter( 143 | variant => 144 | permittedRange.contains( 145 | variantFrequencies.getOrElse(variant.variantId, -1))) 146 | (sampleId, filteredSampleVariants) 147 | } 148 | 149 | //println(s"Filtered Variant RDD: " + filteredVariantsBySampleId.first()) 150 | 151 | // Sort the variants for each sample ID. Each sample should now have the same number of sorted variants. 152 | // All items in the RDD should now have the same variants in the same order so we can just use the first 153 | // one to construct our header 154 | // Next construct the rows of our SchemaRDD from the variants 155 | val sortedVariantsBySampleId: RDD[(String, Array[SampleVariant])] = 156 | filteredVariantsBySampleId.map { 157 | case (sampleId, variants) => 158 | (sampleId, variants.toArray.sortBy(_.variantId)) 159 | } 160 | 161 | println(s"Sorted by Sample ID RDD: " + sortedVariantsBySampleId.first()) 162 | 163 | val header = StructType( 164 | Seq(StructField("Region", StringType)) ++ 165 | sortedVariantsBySampleId 166 | .first() 167 | ._2 168 | .map(variant => { 169 | StructField(variant.variantId.toString, IntegerType) 170 | })) 171 | 172 | val rowRDD: RDD[Row] = sortedVariantsBySampleId.map { 173 | case (sampleId, sortedVariants) => 174 | val region: Array[String] = Array(panel.getOrElse(sampleId, "Unknown")) 175 | val alternateCounts: Array[Int] = sortedVariants.map(_.alternateCount) 176 | Row.fromSeq(region ++ alternateCounts) 177 | } 178 | 179 | // Create the SchemaRDD from the header and rows and convert the SchemaRDD into a H2O dataframe 180 | val sqlContext = sparkSession.sqlContext 181 | val schemaDF = sqlContext.createDataFrame(rowRDD, header) 182 | schemaDF.coalesce(1).write.format("com.databricks.spark.csv").csv("results/train.csv") 183 | //testData.write.format("com.databricks.spark.csv").csv("results/test.csv") 184 | 185 | schemaDF.show() 186 | 187 | val h2oContext = H2OContext.getOrCreate(sparkSession) 188 | import h2oContext.implicits._ 189 | 190 | val dataFrame = h2oContext.asH2OFrame(schemaDF) 191 | dataFrame 192 | .replace(dataFrame.find("Region"), 193 | dataFrame.vec("Region").toCategoricalVec()) 194 | .remove() 195 | dataFrame.update() 196 | 197 | // Split the dataframe into 60% training, 20% test, and 20% validation data 198 | val frameSplitter = new FrameSplitter( 199 | dataFrame, 200 | Array(0.50, 0.30), 201 | Array("training", "test", "validation").map(Key.make[Frame]), 202 | null) 203 | 204 | water.H2O.submitTask(frameSplitter) 205 | val splits = frameSplitter.getResult 206 | val training = splits(0) 207 | val test = splits(1) 208 | val validation = splits(2) 209 | 210 | // Set the parameters for our deep learning model. 211 | val deepLearningParameters = new DeepLearningParameters() 212 | deepLearningParameters._train = training 213 | deepLearningParameters._valid = validation 214 | deepLearningParameters._response_column = "Region" 215 | deepLearningParameters._epochs = 2 216 | deepLearningParameters._l2 = 0.01 217 | deepLearningParameters._seed = 1234567 218 | deepLearningParameters._activation = Activation.RectifierWithDropout 219 | deepLearningParameters._hidden = Array[Int](32, 64, 128) 220 | 221 | // Train the deep learning model 222 | val deepLearning = new DeepLearning(deepLearningParameters) 223 | val deepLearningTrained = deepLearning.trainModel 224 | val trainedModel = deepLearningTrained.get 225 | 226 | val error = trainedModel.classification_error() 227 | println("Training Error: " + error) 228 | 229 | //val predict = trainedModel.score(test)('predict) 230 | //trainedModel.score(test)('predict) 231 | 232 | trainedModel.score(dataFrame)('predict) 233 | println(variantFrequencies.max) 234 | 235 | 236 | /* 237 | val h2oContext2 = H2OContext.getOrCreate(sc) 238 | import h2oContext2._ 239 | import h2oContext2.implicits._ 240 | 241 | val predictionsFromModel = asRDD[DoubleHolder](predict).collect.map(_.result.getOrElse(Double.NaN)) 242 | predictionsFromModel.foreach{ value => println(value)} 243 | * 244 | */ 245 | 246 | // Collect model metrics and evaluate model quality 247 | //val trainMetrics = ModelMetricsSupport.modelMetrics[ModelMetricsMultinomial](trainedModel, test) 248 | //val met = trainMetrics.cm() 249 | //println("Accuracy: "+ met.accuracy()) 250 | //println("MSE: "+ trainMetrics.mse) 251 | //println("RMSE: "+ trainMetrics.rmse) 252 | //println("R2: " + trainMetrics.r2) 253 | 254 | // Shutdown Spark cluster and H2O 255 | h2oContext.stop(stopSparkContext = true) 256 | sparkSession.stop() 257 | } 258 | } 259 | -------------------------------------------------------------------------------- /PopulationClustering_v2/src/main/scala/org/fit/genomics/PopStratClustering.scala: -------------------------------------------------------------------------------- 1 | package org.fit.genomics 2 | 3 | import hex.FrameSplitter 4 | import org.apache.spark.{ SparkConf, SparkContext } 5 | import org.apache.spark.h2o.H2OContext 6 | import org.apache.spark.rdd.RDD 7 | import org.apache.spark.sql._ 8 | import org.bdgenomics.adam.rdd.ADAMContext._ 9 | import org.bdgenomics.formats.avro.{ Genotype, GenotypeAllele} 10 | import org.apache.spark._ 11 | import org.apache.spark.rdd.RDD 12 | import org.apache.spark.mllib.linalg.{ Vectors, Vector } 13 | import org.apache.spark.ml.clustering.KMeans 14 | import water.fvec.Frame 15 | import java.io._ 16 | import org.apache.spark.SparkContext 17 | import org.apache.spark.h2o.H2OContext 18 | import org.apache.spark.sql.types.{ IntegerType, StringType, StructField, StructType } 19 | 20 | import org.apache.spark.ml.feature.{ VectorAssembler, Normalizer } 21 | import org.apache.spark.ml.Pipeline 22 | import org.apache.spark.ml.feature.VectorIndexer 23 | import org.apache.spark.ml.feature.PCA 24 | 25 | import water.{ Job, Key } 26 | import water.fvec.Frame 27 | 28 | import org.apache.spark.h2o._ 29 | import java.io.File 30 | import water._ 31 | 32 | import scala.collection.JavaConverters._ 33 | import scala.collection.immutable.Range.inclusive 34 | import scala.io.Source 35 | 36 | object PopStratClusterings { 37 | def main(args: Array[String]): Unit = { 38 | val genotypeFile = "C:/Users/admin-karim/Downloads/genotypes.vcf" 39 | val panelFile = "C:/Users/admin-karim/Downloads/genotypes.panel" 40 | 41 | val sparkSession: SparkSession = 42 | SparkSession.builder.appName("PopStrat").master("local[*]").getOrCreate() 43 | val sc: SparkContext = sparkSession.sparkContext 44 | 45 | val populations = Set("GBR", "MXL", "ASW", "CHB", "CLM") 46 | def extract(file: String, filter: (String, String) => Boolean): Map[String, String] = { 47 | Source 48 | .fromFile(file) 49 | .getLines() 50 | .map(line => { 51 | val tokens = line.split(Array('\t', ' ')).toList 52 | tokens(0) -> tokens(1) 53 | }) 54 | .toMap 55 | .filter(tuple => filter(tuple._1, tuple._2)) 56 | } 57 | 58 | val panel: Map[String, String] = extract( 59 | panelFile, 60 | (sampleID: String, pop: String) => populations.contains(pop)) 61 | val allGenotypes: RDD[Genotype] = sc.loadGenotypes(genotypeFile).rdd 62 | val genotypes: RDD[Genotype] = allGenotypes.filter(genotype => { 63 | panel.contains(genotype.getSampleId) 64 | }) 65 | 66 | // Convert the Genotype objects to our own SampleVariant objects to try and conserve memory 67 | case class SampleVariant(sampleId: String, 68 | variantId: Int, 69 | alternateCount: Int) 70 | 71 | def variantId(genotype: Genotype): String = { 72 | val name = genotype.getVariant.getContigName 73 | val start = genotype.getVariant.getStart 74 | val end = genotype.getVariant.getEnd 75 | s"$name:$start:$end" 76 | } 77 | 78 | def alternateCount(genotype: Genotype): Int = { 79 | genotype.getAlleles.asScala.count(_ != GenotypeAllele.REF) 80 | } 81 | 82 | def toVariant(genotype: Genotype): SampleVariant = { 83 | // Intern sample IDs as they will be repeated a lot 84 | new SampleVariant(genotype.getSampleId.intern(), 85 | variantId(genotype).hashCode(), 86 | alternateCount(genotype)) 87 | } 88 | 89 | val variantsRDD: RDD[SampleVariant] = genotypes.map(toVariant) 90 | val variantsBySampleId: RDD[(String, Iterable[SampleVariant])] = 91 | variantsRDD.groupBy(_.sampleId) 92 | val sampleCount: Long = variantsBySampleId.count() 93 | println("Found " + sampleCount + " samples") 94 | 95 | val variantsByVariantId: RDD[(Int, Iterable[SampleVariant])] = 96 | variantsRDD.groupBy(_.variantId).filter { 97 | case (_, sampleVariants) => sampleVariants.size == sampleCount 98 | } 99 | 100 | val variantFrequencies: collection.Map[Int, Int] = variantsByVariantId 101 | .map { 102 | case (variantId, sampleVariants) => 103 | (variantId, sampleVariants.count(_.alternateCount > 0)) 104 | } 105 | .collectAsMap() 106 | 107 | val permittedRange = inclusive(11, 11) 108 | val filteredVariantsBySampleId: RDD[(String, Iterable[SampleVariant])] = 109 | variantsBySampleId.map { 110 | case (sampleId, sampleVariants) => 111 | val filteredSampleVariants = sampleVariants.filter( 112 | variant => 113 | permittedRange.contains( 114 | variantFrequencies.getOrElse(variant.variantId, -1))) 115 | (sampleId, filteredSampleVariants) 116 | } 117 | 118 | val sortedVariantsBySampleId: RDD[(String, Array[SampleVariant])] = 119 | filteredVariantsBySampleId.map { 120 | case (sampleId, variants) => 121 | (sampleId, variants.toArray.sortBy(_.variantId)) 122 | } 123 | 124 | println(s"Sorted by Sample ID RDD: " + sortedVariantsBySampleId.first()) 125 | 126 | val header = StructType( 127 | Array(StructField("Region", StringType)) ++ 128 | sortedVariantsBySampleId 129 | .first() 130 | ._2 131 | .map(variant => { 132 | StructField(variant.variantId.toString, IntegerType) 133 | })) 134 | 135 | val rowRDD: RDD[Row] = sortedVariantsBySampleId.map { 136 | case (sampleId, sortedVariants) => 137 | val region: Array[String] = Array(panel.getOrElse(sampleId, "Unknown")) 138 | val alternateCounts: Array[Int] = sortedVariants.map(_.alternateCount) 139 | Row.fromSeq(region ++ alternateCounts) 140 | } 141 | 142 | //val featureVectorsRDD = rowRDD.map { x: Row => x.getAs[Vector](0) } 143 | 144 | // Create the SchemaRDD from the header and rows and convert the SchemaRDD into a Spark dataframe 145 | val sqlContext = sparkSession.sqlContext 146 | val schemaDF = sqlContext.createDataFrame(rowRDD, header).drop("Region") 147 | schemaDF.printSchema() 148 | schemaDF.show(10) 149 | 150 | val featureCols = schemaDF.columns 151 | 152 | val assembler = new VectorAssembler() 153 | .setInputCols(featureCols) 154 | .setOutputCol("features") 155 | 156 | val assembleDF = assembler.transform(schemaDF).select("features") 157 | assembleDF.show() 158 | 159 | val pca = new PCA() 160 | .setInputCol("features") 161 | .setOutputCol("pcaFeatures") 162 | .setK(50) 163 | .fit(assembleDF) 164 | 165 | val pcaDF = pca.transform(assembleDF).select("pcaFeatures").withColumnRenamed("pcaFeatures", "features") 166 | pcaDF.show() 167 | 168 | val iterations = 20 169 | for (i <- 2 to iterations) { 170 | // Trains a k-means model. 171 | val kmeans = new KMeans().setK(i).setSeed(12345L) 172 | val model = kmeans.fit(pcaDF) 173 | 174 | // Evaluate clustering by computing Within Set Sum of Squared Errors. 175 | val WSSSE = model.computeCost(pcaDF) 176 | println("Within Set Sum of Squared Errors for k = " + i + " is " + WSSSE) 177 | } 178 | sparkSession.stop() 179 | } 180 | } -------------------------------------------------------------------------------- /PopulationClustering_v2/src/main/scala/org/fit/genomics/featureExtractor.scala: -------------------------------------------------------------------------------- 1 | package org.fit.genomics 2 | 3 | import java.io._ 4 | 5 | import hex.FrameSplitter 6 | import hex.deeplearning.DeepLearning 7 | import hex.deeplearning.DeepLearningModel.DeepLearningParameters 8 | import hex.deeplearning.DeepLearningModel.DeepLearningParameters.Activation 9 | import org.apache.spark.SparkContext 10 | import org.apache.spark.h2o.H2OContext 11 | import org.apache.spark.rdd.RDD 12 | import org.apache.spark.sql._ 13 | import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType} 14 | 15 | import org.bdgenomics.adam.rdd.ADAMContext._ 16 | import org.bdgenomics.formats.avro.{ Genotype, GenotypeAllele} 17 | import water.{Job, Key} 18 | import water.support.ModelMetricsSupport 19 | import water.fvec.Frame 20 | 21 | import org.apache.spark.h2o._ 22 | import java.io.File 23 | 24 | import htsjdk.samtools.ValidationStringency 25 | 26 | import _root_.hex.{ModelMetrics, ModelMetricsSupervised, ModelMetricsMultinomial} 27 | 28 | import scala.collection.JavaConverters._ 29 | import scala.collection.immutable.Range.inclusive 30 | import scala.io.Source 31 | 32 | object featureExtractor { 33 | def main(args: Array[String]): Unit = { 34 | val genotypeFile = "ALL.chrY.phase3_integrated_v2a.20130502.genotypes.vcf" 35 | val panelFile = "genotypes.panel" 36 | 37 | val sparkSession: SparkSession = 38 | SparkSession.builder.appName("PopStrat").master("local[*]").getOrCreate() 39 | val sc: SparkContext = sparkSession.sparkContext 40 | 41 | // Create a set of the populations that we want to predict 42 | // Then create a map of sample ID -> population so that we can filter out the samples we're not interested in 43 | //val populations = Set("GBR", "ASW", "FIN", "CHB", "CLM") 44 | val populations = Set("FIN", "GBR", "ASW", "CHB", "CLM") 45 | 46 | def extract(file: String, 47 | filter: (String, String) => Boolean): Map[String, String] = { 48 | Source 49 | .fromFile(file) 50 | .getLines() 51 | .map(line => { 52 | val tokens = line.split(Array('\t', ' ')).toList 53 | tokens(0) -> tokens(1) 54 | }) 55 | .toMap 56 | .filter(tuple => filter(tuple._1, tuple._2)) 57 | } 58 | 59 | val panel: Map[String, String] = extract( 60 | panelFile, 61 | (sampleID: String, pop: String) => populations.contains(pop)) 62 | 63 | // Load the ADAM genotypes from the parquet file(s) 64 | 65 | // Next, filter the genotypes so that we're left with only those in the populations we're interested in 66 | //val allGenotypes: RDD[Genotype] = sc.loadGenotypes(genotypeFile, stringency = ValidationStringency.SILENT).rdd 67 | 68 | //If you want to generate RDD out of multiple vcf files, use the following: 69 | //val allGenotypes:RDD[Genotype] = sc.loadGenotypes('VCF_files/*.vcf', stringency = ValidationStringency.SILENT).rdd 70 | 71 | val genotypes0 = sc.loadGenotypes("VCF_files/1.vcf", stringency = ValidationStringency.SILENT) 72 | 73 | //val genotypes0 = sc.loadGenotypes("sample0.vcf") 74 | val genotypes1 = sc.loadGenotypes("VCF_files/2.vcf") 75 | val union = genotypes0.union(genotypes1) 76 | val rdd: RDD[Genotype] = union.rdd 77 | 78 | val allGenotypes: RDD[Genotype] = rdd.rdd 79 | 80 | //allGenotypes.adamParquetSave("output") 81 | val genotypesFiltered: RDD[Genotype] = allGenotypes.filter(genotype => { 82 | panel.contains(genotype.getSampleId) 83 | }) 84 | 85 | // Convert the Genotype objects to our own SampleVariant objects to try and conserve memory 86 | case class SampleVariant(sampleId: String, 87 | variantId: Int, 88 | alternateCount: Int) 89 | def variantId(genotype: Genotype): String = { 90 | val name = genotype.getVariant.getContigName 91 | val start = genotype.getVariant.getStart 92 | val end = genotype.getVariant.getEnd 93 | s"$name:$start:$end" 94 | } 95 | 96 | def alternateCount(genotype: Genotype): Int = { 97 | genotype.getAlleles.asScala.count(_ != GenotypeAllele.REF) 98 | } 99 | 100 | def toVariant(genotype: Genotype): SampleVariant = { 101 | // Intern sample IDs as they will be repeated a lot 102 | new SampleVariant(genotype.getSampleId.intern(), 103 | variantId(genotype).hashCode(), 104 | alternateCount(genotype)) 105 | } 106 | 107 | val variantsRDD: RDD[SampleVariant] = genotypesFiltered.map(toVariant) 108 | //println(s"Variant RDD: " + variantsRDD.first()) 109 | 110 | // Group the variants by sample ID so we can process the variants sample-by-sample 111 | // Then get the total number of samples. This will be used to find variants that are missing for some samples. 112 | // Group the variants by variant ID and filter out those variants that are missing from some samples 113 | val variantsBySampleId: RDD[(String, Iterable[SampleVariant])] = 114 | variantsRDD.groupBy(_.sampleId) 115 | val sampleCount: Long = variantsBySampleId.count() 116 | println("Found " + sampleCount + " samples") 117 | 118 | val writer_0 = new PrintWriter(new File("output_1.txt")) 119 | writer_0.write("Found " + sampleCount + " samples") 120 | //writer.write(s"Confusion Matrix"+ cm) 121 | //writer.write("Prediction Matrix"+ result) 122 | writer_0.close() 123 | 124 | val variantsByVariantId: RDD[(Int, Iterable[SampleVariant])] = 125 | variantsRDD.groupBy(_.variantId).filter { 126 | case (_, sampleVariants) => sampleVariants.size == sampleCount 127 | } 128 | 129 | // Make a map of variant ID -> count of samples with an alternate count of greater than zero 130 | // then filter out those variants that are not in our desired frequency range. The objective here is simply to 131 | // reduce the number of dimensions in the data set to make it easier to train the model. 132 | // The specified range is fairly arbitrary and was chosen based on the fact that it includes a reasonable 133 | // number of variants, but not too many. 134 | val variantFrequencies: collection.Map[Int, Int] = variantsByVariantId 135 | .map { 136 | case (variantId, sampleVariants) => 137 | (variantId, sampleVariants.count(_.alternateCount > 0)) 138 | } 139 | .collectAsMap() 140 | 141 | println(variantFrequencies.max) 142 | 143 | val permittedRange = inclusive(11, 11) 144 | val filteredVariantsBySampleId: RDD[(String, Iterable[SampleVariant])] = 145 | variantsBySampleId.map { 146 | case (sampleId, sampleVariants) => 147 | val filteredSampleVariants = sampleVariants.filter( 148 | variant => 149 | permittedRange.contains( 150 | variantFrequencies.getOrElse(variant.variantId, -1))) 151 | (sampleId, filteredSampleVariants) 152 | } 153 | 154 | //println(s"Filtered Variant RDD: " + filteredVariantsBySampleId.first()) 155 | 156 | // Sort the variants for each sample ID. Each sample should now have the same number of sorted variants. 157 | // All items in the RDD should now have the same variants in the same order so we can just use the first 158 | // one to construct our header 159 | // Next construct the rows of our SchemaRDD from the variants 160 | val sortedVariantsBySampleId: RDD[(String, Array[SampleVariant])] = 161 | filteredVariantsBySampleId.map { 162 | case (sampleId, variants) => 163 | (sampleId, variants.toArray.sortBy(_.variantId)) 164 | } 165 | 166 | println(s"Sorted by Sample ID RDD: " + sortedVariantsBySampleId.first()) 167 | 168 | val header = StructType( 169 | Seq(StructField("Region", StringType)) ++ 170 | sortedVariantsBySampleId 171 | .first() 172 | ._2 173 | .map(variant => { 174 | StructField(variant.variantId.toString, IntegerType) 175 | })) 176 | 177 | val rowRDD: RDD[Row] = sortedVariantsBySampleId.map { 178 | case (sampleId, sortedVariants) => 179 | val region: Array[String] = Array(panel.getOrElse(sampleId, "Unknown")) 180 | val alternateCounts: Array[Int] = sortedVariants.map(_.alternateCount) 181 | Row.fromSeq(region ++ alternateCounts) 182 | } 183 | 184 | // Create the SchemaRDD from the header and rows and convert the SchemaRDD into a H2O dataframe 185 | val sqlContext = sparkSession.sqlContext 186 | val schemaDF = sqlContext.createDataFrame(rowRDD, header) 187 | 188 | // Write the resultant DataFrame as CSV file to be used by Keras-based DEC algorithm 189 | schemaDF.coalesce(1).write.format("com.databricks.spark.csv").csv("results/train.csv") 190 | //testData.write.format("com.databricks.spark.csv").csv("results/test.csv") 191 | 192 | // Shutdown Spark cluster and H2O 193 | h2oContext.stop(stopSparkContext = true) 194 | sparkSession.stop() 195 | } 196 | } 197 | -------------------------------------------------------------------------------- /PopulationClustering_v2/target/classes/META-INF/MANIFEST.MF: -------------------------------------------------------------------------------- 1 | Manifest-Version: 1.0 2 | Built-By: admin-karim 3 | Build-Jdk: 1.8.0_171 4 | Created-By: Maven Integration for Eclipse 5 | 6 | -------------------------------------------------------------------------------- /PopulationClustering_v2/target/classes/META-INF/maven/com.deri.sels/PopulationClustering_v2/pom.properties: -------------------------------------------------------------------------------- 1 | #Generated by Maven Integration for Eclipse 2 | #Fri Aug 17 13:58:22 CEST 2018 3 | version=0.1-SNAPSHOT 4 | groupId=com.deri.sels 5 | m2e.projectName=PopulationClustering_v2 6 | m2e.projectLocation=C\:\\Users\\admin-karim\\Downloads\\WS\\PopulationClustering_v2 7 | artifactId=PopulationClustering_v2 8 | -------------------------------------------------------------------------------- /PopulationClustering_v2/target/classes/META-INF/maven/com.deri.sels/PopulationClustering_v2/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | com.deri.sels 5 | PopulationClustering_v2 6 | 0.1-SNAPSHOT 7 | 8 | 2.2.1 9 | 2.11.8 10 | 3.16.0.2 11 | 2.2.6 12 | 0.23.0 13 | 14 | 15 | 16 | scala-tools.org 17 | Scala-tools Maven2 Repository 18 | http://scala-tools.org/repo-releases 19 | 20 | 21 | 22 | 23 | org.bdgenomics.adam 24 | adam-core_2.11 25 | 0.23.0 26 | 27 | 28 | 29 | ai.h2o 30 | sparkling-water-core_2.11 31 | 2.2.6 32 | 33 | 34 | ai.h2o 35 | sparkling-water-examples_2.11 36 | 2.2.6 37 | 38 | 39 | org.apache.directory.studio 40 | org.apache.commons.io 41 | 2.4 42 | 43 | 44 | org.apache.spark 45 | spark-core_2.11 46 | ${spark.version} 47 | 48 | 49 | 50 | ai.h2o 51 | h2o-core 52 | ${h2o.version} 53 | 54 | 55 | ai.h2o 56 | h2o-scala_2.11 57 | ${h2o.version} 58 | 59 | 60 | ai.h2o 61 | h2o-algos 62 | ${h2o.version} 63 | 64 | 65 | ai.h2o 66 | h2o-app 67 | ${h2o.version} 68 | 69 | 70 | ai.h2o 71 | h2o-persist-hdfs 72 | ${h2o.version} 73 | 74 | 75 | scala-library 76 | org.scala-lang 77 | ${scala.version} 78 | 79 | 80 | ai.h2o 81 | google-analytics-java 82 | 1.1.2-H2O-CUSTOM 83 | 84 | 85 | joda-time 86 | joda-time 87 | 2.9.9 88 | 89 | 90 | 91 | 92 | snapshots-repo 93 | https://oss.sonatype.org/content/repositories/snapshots 94 | 95 | false 96 | 97 | 98 | true 99 | daily 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | org.apache.maven.plugins 108 | maven-eclipse-plugin 109 | 2.9 110 | 111 | true 112 | false 113 | 114 | 115 | 116 | 117 | org.apache.maven.plugins 118 | maven-compiler-plugin 119 | 3.5.1 120 | 121 | ${jdk.version} 122 | ${jdk.version} 123 | 124 | 125 | 126 | maven-shade-plugin 127 | 2.4.3 128 | 129 | 130 | package 131 | 132 | shade 133 | 134 | 135 | false 136 | 137 | 138 | 139 | *:* 140 | 141 | META-INF/*.SF 142 | META-INF/*.DSA 143 | META-INF/*.RSA 144 | 145 | 146 | 147 | 148 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | org.apache.maven.plugins 158 | maven-assembly-plugin 159 | 2.4.1 160 | 161 | 162 | 163 | jar-with-dependencies 164 | 165 | 166 | 167 | 168 | org.fit.genomics.PopStratClassification 169 | 170 | 171 | 172 | 173 | oozie.launcher.mapreduce.job.user.classpath.first 174 | true 175 | 176 | 177 | 178 | 179 | 180 | make-assembly 181 | 182 | package 183 | 184 | single 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | -------------------------------------------------------------------------------- /PopulationClustering_v2/target/maven-archiver/pom.properties: -------------------------------------------------------------------------------- 1 | #Generated by Maven 2 | #Thu Aug 02 01:47:00 CEST 2018 3 | version=0.1-SNAPSHOT 4 | groupId=com.deri.sels 5 | artifactId=PopulationClustering_v2 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This repository contains the implemnetation of our papers titled "Recurrent Deep Embedding Networks for Genotype Clustering and Ethnicity Prediction" and "Convolutional Embedded Networks for Population Scale Clustering and Bio-ancestry Inferencing". The former is available on "Arxiv as pre-print"(link: https://arxiv.org/pdf/1805.12218.pdf). The later has been submitted to IEEE/ACM Transactions on Computational Biology and Bioinformatics, which is under review. 2 | 3 | This repo will have two different implementations: i) Deep Embedding Networks(DEC) and Recurrent Deep Embedding Networks(CDEC) using ii) Spark and H2O implementations of our paper titled "Recurrent Deep Embedding Networks for Genotype Clustering and Ethnicity Prediction". 4 | 5 | ## Implementation details 6 | The proof of the concept of our approach is implemented in Spark, ADAM, and Keras. In particular, for the scalable and faster preprocessing of huge number of genetic variants across all the chromosomes (i.e. 870 GB of data), we used ADAM and Spark to convert the genetic variants from VCF format to Spark DataFrame. Then we convert Spark DataFrame into NumPy arrays. Finally, we use Keras to implement Conv-LSTM and CDEC networks for for Population Scale Clustering and Ancestry Inference, respectively. 7 | 8 | Experiments were carried out on a computing cluster having 32 cores, 64-bit Ubuntu 14.04 OS. Software stack consisting of Apache Spark v2.3.0, H2O v3.14.0.1, Sparkling Water v1.2.5, ADAM v0.22.0 and Keras v2.0.9 with TensorFlow backend. We compare approach with the state-of-the-art such as ADMIXTURE and VariationSpark. 9 | 10 | ### CDEC implementation in Python with Keras 11 | Refer to https://github.com/rezacsedu/Recurrent-Deep-Embedding-Networks/tree/master/CDEC for more details. Network training were carried out on a Nvidia TitanX GPU with CUDA and cuDNN enabled to make the overall pipeline faster. 12 | 13 | #### Step 1: Feature extraction using Scala, Adam, and Spark 14 | For this, first, download the VCF files (containing the variants) and the panel file (containing the labels) from ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/. 15 | 16 | Then go to https://github.com/rezacsedu/VariationDEC/tree/master/PopulationClustering_v2 and use the featureExtractor.scala 17 | to extract the features and save as a DataFrame in CSV to be used by Keras-based DEC. 18 | 19 | For this, make sure that you've configured Spark correctly on your machine. Alternatively, execute this script as a standalone Scala project from Eclipse or IntelliJ IDEA. 20 | 21 | #### Step 2: This is the CDEC part in Keras 22 | Go to https://github.com/rezacsedu/Recurrent-Deep-Embedding-Networks/tree/master/CDEC. Then there are several Python scripts and a sample genetic variants feature in csv for the clustering and classification, respectively. 23 | 24 | - genome.csv: is the sample genetic variants featres 25 | - customlayers.py: for creating custom clustering layer in Keras 26 | - keras_unpooling.py: for performing conv unpooling operation for COnv autoencoder part of the network 27 | - misc.py: contains the data preparation helper modules 28 | - network.py: CDEC network creation for the clustering 29 | - main.py: the main class that encapsulates all the steps. 30 | 31 | #### Instruction 32 | 33 | #### Acknowledgement: 34 | This implementation slightly based on https://github.com/elieJalbout/Clustering-with-Deep-learning 35 | 36 | ### DEC implementation in Python 37 | A modified version of Keras based DEC implementation (https://github.com/XifengGuo/DEC-keras) proposed by Ali F. et al. is used in our approach. Network training were carried out on a Nvidia TitanX GPU with CUDA and cuDNN enabled to make the overall pipeline faster. 38 | 39 | #### Step 1: Feature extraction using Scala, Adam and Spark 40 | For this, first, download the VCF files (containing the variants) and the panel file (containing the labels) from ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/. 41 | 42 | Then go to https://github.com/rezacsedu/VariationDEC/tree/master/PopulationClustering_v2 and use the featureExtractor.scala 43 | to extract the features and save as a DataFrame in CSV to be used by Keras-based DEC. 44 | 45 | For this, make sure that you've configured Spark correctly on your machine. Alternatively, execute this script as a standalone Scala project from Eclipse or IntelliJ IDEA. 46 | 47 | #### Step 2: This is the DEC part in Keras/Python 48 | Go to https://github.com/rezacsedu/VariationDEC/tree/master/DEC_GenotypeClustering_Keras. Then there are 2 Python scripts and a sample genetic variants feature in csv for the clustering and classification respectively. 49 | 50 | - genome.csv: is the sample genetic variants featres 51 | - DEC_Genotype_Clustering.py: for the clustering 52 | - LSTM_EthnicityPrediction.py: for the classification 53 | 54 | ### Spark and H2O implementation in Scala 55 | For this, first download the VCF files (containing the variants) and the panel file (containing the labels) from ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/. Then go to https://github.com/rezacsedu/VariationDEC/tree/master/PopulationClustering_v2 and you'll see there Scala scripts as listed below: 56 | 57 | - PopGenomicsClassificationSpark.scala: this is the Spark implementation of ethnicity prediction 58 | - PopStratClassification.scala: this is the H2O implementation of ethnicity prediction 59 | - PopStratClustering.scala: this is the H2O/Spark implementation of the genotype clustering but using K-means prediction 60 | 61 | For this, make sure that you've configured Spark and Adam (see https://github.com/bigdatagenomics/adam) correctly on your machine. Alternatively, execute this script as a standalone Scala project from Eclipse or IntelliJ IDEA. 62 | 63 | ### Citation request 64 | @inproceedings{karim2018recurrent, 65 | title={Recurrent Deep Embedding Networks for Genotype Clustering and Ethnicity Prediction}, 66 | author={Karim, Md and Cochez, Michael and Beyan, Oya Deniz and Zappa, Achille and Sahay, Ratnesh and Decker, Stefan and Schuhmann, Dietrich-Rebholz and others}, 67 | booktitle={arXiv preprint arXiv:1805.12218}, 68 | year={2018} 69 | } 70 | 71 | ### Contributing 72 | For any questions, feel free to open an issue or contact at rezaul.karim@rwth-aachen.de 73 | --------------------------------------------------------------------------------