├── CDEC
├── archs
│ ├── cancer.json
│ └── genome.json
├── customlayers.py
├── genome
│ └── last_numeric.csv
├── keras_unpooling.py
├── main.py
├── misc.py
├── network.py
├── plots
│ └── genome
│ │ ├── autoencoder.png
│ │ ├── clustered_kld.png
│ │ ├── clustered_km.png
│ │ └── raw.png
└── self.trainAutoencoder.png
├── DEC_GenotypeClustering_Keras
├── DEC_Genotype_Clustering.py
├── LSTM_EthnicityPrediction.py
└── genome.csv
├── PopulationClustering_v2
├── output_1.txt
├── pom.xml
├── results
│ └── train.csv
│ │ ├── DEC_Genotype_Clustering.py
│ │ ├── LSTM_EthnicityPrediction.py
│ │ ├── genome.csv
│ │ └── part-00000-2c4830b2-4c39-48fc-909d-4868a1164190-c000.csv
├── src
│ └── main
│ │ └── scala
│ │ └── org
│ │ └── fit
│ │ └── genomics
│ │ ├── PopGenomicsClassificationSpark.scala
│ │ ├── PopStratClassification.scala
│ │ ├── PopStratClustering.scala
│ │ └── featureExtractor.scala
└── target
│ ├── classes
│ └── META-INF
│ │ ├── MANIFEST.MF
│ │ └── maven
│ │ └── com.deri.sels
│ │ └── PopulationClustering_v2
│ │ ├── pom.properties
│ │ └── pom.xml
│ └── maven-archiver
│ └── pom.properties
└── README.md
/CDEC/archs/cancer.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "name": "c-3-32_p_c-3-64_p_fc-32",
4 | "batch_size": 50,
5 | "layers": [
6 | {
7 | "type": "Input",
8 | "output_shape": [
9 | 1,
10 | 127,
11 | 127
12 | ]
13 | },
14 | {
15 | "type": "Conv2D",
16 | "num_filters": 32,
17 | "filter_size": [
18 | 3,
19 | 3
20 | ],
21 | "non_linearity": "rectify",
22 | "conv_mode": "same"
23 | },
24 | {
25 | "type": "MaxPool2D",
26 | "filter_size": [
27 | 2,
28 | 2
29 | ]
30 | },
31 | {
32 | "type": "Conv2D",
33 | "num_filters": 64,
34 | "filter_size": [
35 | 3,
36 | 3
37 | ],
38 | "non_linearity": "rectify",
39 | "conv_mode": "same"
40 | },
41 | {
42 | "type": "MaxPool2D",
43 | "filter_size": [
44 | 2,
45 | 2
46 | ]
47 | },
48 | {
49 | "type": "Dense",
50 | "num_units": 3136,
51 | "non_linearity": "rectify"
52 | },
53 | {
54 | "type": "Dense",
55 | "num_units": 32,
56 | "non_linearity": "rectify"
57 | }
58 | ]
59 | },
60 | {
61 | "name": "c-5-6_p_c-5-16_p_c-4-120",
62 | "use_batch_norm": 1,
63 | "batch_size": 100,
64 | "layers": [
65 | {
66 | "type": "Input",
67 | "output_shape": [
68 | 1,
69 | 127,
70 | 127
71 | ]
72 | },
73 | {
74 | "type": "Conv2D",
75 | "num_filters": 50,
76 | "filter_size": [
77 | 4,
78 | 4
79 | ],
80 | "non_linearity": "rectify"
81 | },
82 | {
83 | "type": "MaxPool2D*",
84 | "filter_size": [
85 | 2,
86 | 2
87 | ]
88 | },
89 | {
90 | "type": "Conv2D",
91 | "num_filters": 50,
92 | "filter_size": [
93 | 3,
94 | 3
95 | ],
96 | "non_linearity": "rectify"
97 | },
98 | {
99 | "type": "MaxPool2D*",
100 | "filter_size": [
101 | 2,
102 | 2
103 | ]
104 | },
105 | {
106 | "type": "Conv2D",
107 | "num_filters": 120,
108 | "filter_size": [
109 | 2,
110 | 2
111 | ],
112 | "non_linearity": "linear"
113 | }
114 | ]
115 | }
116 |
117 | ]
118 |
--------------------------------------------------------------------------------
/CDEC/archs/genome.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "name": "c-3-32_p_c-3-64_p_fc-32",
4 | "batch_size": 32,
5 | "layers": [
6 | {
7 | "type": "Input",
8 | "output_shape": [
9 | 1,
10 | 67,
11 | 67
12 | ]
13 | },
14 | {
15 | "type": "Conv2D",
16 | "num_filters": 32,
17 | "filter_size": [
18 | 3,
19 | 3
20 | ],
21 | "non_linearity": "rectify",
22 | "conv_mode": "same"
23 | },
24 | {
25 | "type": "MaxPool2D",
26 | "filter_size": [
27 | 2,
28 | 2
29 | ]
30 | },
31 | {
32 | "type": "Conv2D",
33 | "num_filters": 64,
34 | "filter_size": [
35 | 3,
36 | 3
37 | ],
38 | "non_linearity": "rectify",
39 | "conv_mode": "same"
40 | },
41 | {
42 | "type": "MaxPool2D",
43 | "filter_size": [
44 | 2,
45 | 2
46 | ]
47 | },
48 | {
49 | "type": "Dense",
50 | "num_units": 3136,
51 | "non_linearity": "rectify"
52 | },
53 | {
54 | "type": "Dense",
55 | "num_units": 32,
56 | "non_linearity": "rectify"
57 | }
58 | ]
59 | },
60 | {
61 | "name": "c-5-6_p_c-5-16_p_c-4-120",
62 | "use_batch_norm": 1,
63 | "batch_size": 32,
64 | "layers": [
65 | {
66 | "type": "Input",
67 | "output_shape": [
68 | 1,
69 | 67,
70 | 67
71 | ]
72 | },
73 | {
74 | "type": "Conv2D",
75 | "num_filters": 50,
76 | "filter_size": [
77 | 5,
78 | 5
79 | ],
80 | "non_linearity": "rectify"
81 | },
82 | {
83 | "type": "MaxPool2D*",
84 | "filter_size": [
85 | 2,
86 | 2
87 | ]
88 | },
89 | {
90 | "type": "Conv2D",
91 | "num_filters": 50,
92 | "filter_size": [
93 | 5,
94 | 5
95 | ],
96 | "non_linearity": "rectify"
97 | },
98 | {
99 | "type": "MaxPool2D*",
100 | "filter_size": [
101 | 2,
102 | 2
103 | ]
104 | },
105 | {
106 | "type": "Conv2D",
107 | "num_filters": 32,
108 | "filter_size": [
109 | 2,
110 | 2
111 | ],
112 | "non_linearity": "linear"
113 | }
114 | ]
115 | }
116 | ]
117 |
--------------------------------------------------------------------------------
/CDEC/customlayers.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on Jul 25, 2017
3 | '''
4 |
5 | #from lasagne import layers
6 | from keras.models import Sequential
7 | from keras import backend as K
8 | from keras import layers
9 | from keras.engine.topology import Layer
10 | class Unpool2DLayer(layers.Layer):
11 | """
12 | This layer performs unpooling over the last two dimensions
13 | of a 4D tensor.
14 | Layer borrowed from: https://swarbrickjones.wordpress.com/2015/04/29/convolutional-autoencoders-in-pythontheanolasagne/
15 | """
16 |
17 | def __init__(self, incoming, ds, **kwargs):
18 | self.ds = ds
19 | super(Unpool2DLayer, self).__init__(**kwargs)
20 |
21 | def compute_output_shape(self, input_shape):
22 | output_shape = list(input_shape)
23 | output_shape[1] = input_shape[1] * self.ds[0]
24 | output_shape[2] = input_shape[2] * self.ds[1]
25 | return tuple(output_shape)
26 |
27 | def call(self,incoming,**kwargs):
28 | '''
29 | Just repeats the input element the upscaled image
30 | '''
31 | repaxis2 = K.repeat_elements(incoming,self.ds[0], axis=1)
32 | Unpool_layer = K.repeat_elements(repaxis2,self.ds[1], axis=2)
33 | return Unpool_layer
34 |
35 |
36 | class ClusteringLayer(layers.Layer):
37 | '''
38 | This layer gives soft assignments for the clusters based on distance from k-means based
39 | cluster centers. The weights of the layers are the cluster centers so that they can be learnt
40 | while optimizing for loss
41 | '''
42 | def __init__(self,num_of_clusters, num_samples,latent_space_dim,**kwargs):
43 | self.num_of_clusters = num_of_clusters
44 | #self.alpha = alpha
45 | #self.cluster_centers = cluster_centers
46 | self.num_samples = num_samples
47 | self.latent_space_dim = latent_space_dim
48 | #self.intial_clusters = intial_clusters
49 | super(ClusteringLayer, self).__init__(**kwargs)
50 | def build(self,intial_clusters_shape):
51 | # Create a trainable weight variable for this layer.
52 | self.W = self.add_weight(name='W',
53 | shape=intial_clusters_shape,
54 | initializer='glorot_uniform',
55 | trainable=True)
56 | super(ClusteringLayer, self).build(intial_clusters_shape) # Be sure to call this at the end
57 |
58 |
59 | def call(self,incoming,**kwargs):
60 |
61 | return getSoftAssignments(incoming,self.W,self.num_of_clusters,self.num_samples, self.latent_space_dim)
62 | def compute_output_shape(self, input_shape):
63 | return (input_shape[0], self.num_of_clusters)
64 |
65 | def get_config(self):
66 | config = {'W': self.W}
67 | base_config = super(ClusteringLayer, self).get_config()
68 | return dict(list(base_config.items()) + list(config.items()))
69 |
70 | def getSoftAssignments(latent_space, cluster_centers, num_clusters,num_samples,latent_space_dim):
71 | '''
72 | Returns cluster membership distribution for each sample
73 | :param latent_space: latent space representation of inputs
74 | :param cluster_centers: the coordinates of cluster centers in latent space
75 | :param num_clusters: total number of clusters
76 | :param latent_space_dim: dimensionality of latent space
77 | :param num_samples: total number of input samples
78 | :return: soft assigment based on the equation qij = (1+|zi - uj|^2)^(-1)/sum_j'((1+|zi - uj'|^2)^(-1))
79 | '''
80 | z_expanded = K.reshape(latent_space,shape=(num_samples,1,latent_space_dim,))
81 | z_expanded = K.tile(z_expanded, (1,num_clusters,1))
82 | u_expanded = K.tile(K.expand_dims(cluster_centers,0), [num_samples, 1, 1])#[1, 10,120] after expand_dims #[100,10,120] after tile
83 | distances_from_cluster_centers = K.sqrt(K.sum((z_expanded - u_expanded)**2,axis=2))#K.norm((z_expanded - u_expanded),2,axis=2)
84 | qij_numerator = 1 + distances_from_cluster_centers**2
85 | qij_numerator = 1 / qij_numerator
86 | normalizer_q = K.sum(qij_numerator, axis=1)
87 | normalizer_q = K.reshape(normalizer_q,(num_samples, 1))
88 | #print((qij_numerator/normalizer_q).shape)
89 | return qij_numerator/normalizer_q
90 |
91 |
92 |
93 |
94 |
--------------------------------------------------------------------------------
/CDEC/keras_unpooling.py:
--------------------------------------------------------------------------------
1 | from keras import backend as K
2 | from keras.layers.convolutional import UpSampling2D
3 | from keras.layers.convolutional import MaxPooling2D
4 | class MaxPoolingMask2D(MaxPooling2D):
5 | def __init__(self, pool_size=(2, 2), strides=None, border_mode='valid',
6 | dim_ordering='default', **kwargs):
7 | super(MaxPoolingMask2D, self).__init__(pool_size, strides, border_mode,
8 | dim_ordering, **kwargs)
9 |
10 | def _pooling_function(self, inputs, pool_size, strides,
11 | border_mode, dim_ordering):
12 | pooled = K.pool2d(inputs, pool_size, strides, border_mode,
13 | dim_ordering, pool_mode='max')
14 | upsampled = UpSampling2D(size=pool_size)(pooled)
15 | indexMask = K.tf.equal(inputs, upsampled)
16 | assert indexMask.get_shape().as_list() == inputs.get_shape().as_list()
17 | return indexMask
18 |
19 | def get_output_shape_for(self, input_shape):
20 | return input_shape
21 |
22 |
23 | def unpooling(inputs):
24 | '''
25 | do unpooling with indices, move this to separate layer if it works
26 | 1. do naive upsampling (repeat elements)
27 | 2. keep only values in mask (stored indices) and set the rest to zeros
28 | '''
29 | x = inputs[0]
30 | mask = inputs[1]
31 | mask_shape = mask.get_shape().as_list()
32 | x_shape = x.get_shape().as_list()
33 | pool_size = (mask_shape[1] / x_shape[1], mask_shape[2] / x_shape[2])
34 | on_success = UpSampling2D(size=pool_size)(x)
35 | on_fail = K.zeros_like(on_success)
36 | return K.tf.where(mask, on_success, on_fail)
37 |
38 |
39 | def unpooling_output_shape(input_shape):
40 | return input_shape[1]
41 |
--------------------------------------------------------------------------------
/CDEC/main.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on Jul 9, 2017
3 | '''
4 | import numpy
5 | import json
6 | from misc import DatasetHelper, evaluateKMeans, visualizeData
7 | from network import DCJC, rootLogger
8 | from copy import deepcopy
9 | import argparse
10 |
11 | import tensorflow as tf
12 | import keras.backend as K
13 | K.set_image_dim_ordering('tf')
14 |
15 | def testOnlyClusterInitialization(dataset_name, arch, epochs):
16 | '''
17 | Train an autoencoder defined by architecture arch and trains it with the dataset defined
18 | :param dataset_name: Name of the dataset with which the network will be trained [MNIST, COIL20]
19 | :param arch: Architecture of the network as a dictionary. Specification for architecture can be found in readme.md
20 | :param epochs: Number of train epochs
21 | :return: None - (side effect) saves the latent space and params of trained network in an appropriate location in saved_params folder
22 | '''
23 | arch_copy = deepcopy(arch)
24 | rootLogger.info("Loading dataset")
25 | dataset = DatasetHelper(dataset_name)
26 | dataset.loadDataset()
27 | rootLogger.info("Done loading dataset")
28 | rootLogger.info("Creating network")
29 | dcjc = DCJC(arch_copy)
30 | rootLogger.info("Done creating network")
31 | rootLogger.info("Starting training")
32 | dcjc.pretrainWithData(dataset, epochs, False);
33 |
34 |
35 | def testOnlyClusterImprovement(dataset_name, arch, epochs, method):
36 | '''
37 | Use an initialized autoencoder and train it along with clustering loss. Assumed that pretrained autoencoder params
38 | are available, i.e. testOnlyClusterInitialization has been run already with the given params
39 | :param dataset_name: Name of the dataset with which the network will be trained [MNIST, COIL20]
40 | :param arch: Architecture of the network as a dictionary. Specification for architecture can be found in readme.md
41 | :param epochs: Number of train epochs
42 | :param method: Can be KM or KLD - depending on whether the clustering loss is KLDivergence loss between the current KMeans distribution(Q) and a more desired one(Q^2), or if the clustering loss is just the Kmeans loss
43 | :return: None - (side effect) saves latent space and params of the trained network
44 | '''
45 | arch_copy = deepcopy(arch)
46 | rootLogger.info("Loading dataset")
47 | dataset = DatasetHelper(dataset_name)
48 | dataset.loadDataset()
49 | rootLogger.info("Done loading dataset")
50 | rootLogger.info("Creating network")
51 | dcjc = DCJC(arch_copy)
52 | rootLogger.info("Starting cluster improvement")
53 | if method == 'KM':
54 | dcjc.doClusteringWithKMeansLoss(dataset, epochs)
55 | elif method == 'KLD':
56 | dcjc.doClusteringWithKLdivLoss(dataset, True, epochs)
57 |
58 |
59 | def testKMeans(dataset_name, archs):
60 | '''
61 | Performs kMeans clustering, and report metrics on the output latent space produced by the networks defined in archs,
62 | with given dataset. Assumes that testOnlyClusterInitialization and testOnlyClusterImprovement have been run before
63 | this for the specified archs/datasets, as the results saved by them are used for clustering
64 | :param dataset_name: Name of dataset [MNIST, COIL20]
65 | :param archs: Architectures as a dictionary
66 | :return: None - reports the accuracy and nmi clustering metrics
67 | '''
68 | rootLogger.info('Initial Cluster Quality Comparison')
69 | rootLogger.info(80 * '_')
70 | rootLogger.info('%-50s %8s %8s' % ('method', 'ACC', 'NMI'))
71 | rootLogger.info(80 * '_')
72 | dataset = DatasetHelper(dataset_name)
73 | dataset.loadDataset()
74 | rootLogger.info(evaluateKMeans(dataset.input_flat, dataset.labels, dataset.getClusterCount(), 'image')[0])
75 | for arch in archs:
76 | Z = numpy.load('saved_params/' + dataset.name + '/z_' + arch['name'] + '.npy')
77 | rootLogger.info(evaluateKMeans(Z, dataset.labels, dataset.getClusterCount(), arch['name'])[0])
78 | Z = numpy.load('saved_params/' + dataset.name + '/pc_z_' + arch['name'] + '.npy')
79 | rootLogger.info(evaluateKMeans(Z, dataset.labels, dataset.getClusterCount(), arch['name'])[0])
80 | Z = numpy.load('saved_params/' + dataset.name + '/pc_km_z_' + arch['name'] + '.npy')
81 | rootLogger.info(evaluateKMeans(Z, dataset.labels, dataset.getClusterCount(), arch['name'])[0])
82 | rootLogger.info(80 * '_')
83 |
84 |
85 | def visualizeLatentSpace(dataset_name, arch):
86 | '''
87 | Plots and saves graphs for visualized images space, autoencoder latent space, and the final clustering latent space
88 | :param dataset_name: Name of dataset [MNIST, COIL20]
89 | :param arch: Architectures as a dictionary
90 | :return: None - (side effect) saved graphs in plots/ folder
91 | '''
92 | rootLogger.info("Loading dataset")
93 | dataset = DatasetHelper(dataset_name)
94 | dataset.loadDataset()
95 | rootLogger.info("Done loading dataset")
96 | # We consider only the first 5000 point or less for better visualization
97 | max_points = min(dataset.input_flat.shape[0], 5000)
98 | # Image space
99 | visualizeData(dataset.input_flat[0:max_points], dataset.labels[0:max_points], dataset.getClusterCount(), "plots/%s/raw.png" % dataset.name)
100 | # Latent space - autoencoder
101 | Z = numpy.load('saved_params/' + dataset.name + '/z_' + arch['name'] + '.npy')
102 | visualizeData(Z[0:max_points], dataset.labels[0:max_points], dataset.getClusterCount(), "plots/%s/autoencoder.png" % dataset.name)
103 | # Latent space - kl div clustering network
104 | Z = numpy.load('saved_params/' + dataset.name + '/pc_z_' + arch['name'] + '.npy')
105 | visualizeData(Z[0:max_points], dataset.labels[0:max_points], dataset.getClusterCount(), "plots/%s/clustered_kld.png" % dataset.name)
106 | # Latent space - kmeans clustering network
107 | Z = numpy.load('saved_params/' + dataset.name + '/pc_km_z_' + arch['name'] + '.npy')
108 | visualizeData(Z[0:max_points], dataset.labels[0:max_points], dataset.getClusterCount(), "plots/%s/clustered_km.png" % dataset.name)
109 |
110 |
111 | if __name__ == '__main__':
112 | '''
113 | usage: main.py [-h] -d DATASET -a ARCHITECTURE [--pretrain PRETRAIN]
114 | [--cluster CLUSTER] [--metrics METRICS] [--visualize VISUALIZE]
115 |
116 | required arguments:
117 | -d DATASET, --dataset DATASET
118 | Dataset on which autoencoder is trained [MNIST,COIL20]
119 | -a ARCHITECTURE, --architecture ARCHITECTURE
120 | Index of architecture of autoencoder in the json file
121 | (archs/)
122 |
123 | optional arguments:
124 | -h, --help show this help message and exit
125 | --pretrain PRETRAIN Pretrain the autoencoder for specified #epochs
126 | specified by architecture on specified dataset
127 | --cluster CLUSTER Refine the autoencoder for specified #epochs with
128 | clustering loss, assumes that pretraining results are
129 | available
130 | --metrics METRICS Report k-means clustering metrics on the clustered
131 | latent space, assumes pretrain and cluster based
132 | training have been performed
133 | --visualize VISUALIZE
134 | Visualize the image space and latent space, assumes
135 | pretraining and cluster based training have been
136 | performed
137 | '''
138 | # Load architectures from the json files
139 | mnist_archs = []
140 | coil_archs = []
141 | cancer_archs = []
142 | with open("archs/coil.json") as archs_file:
143 | coil_archs = json.load(archs_file)
144 | with open("archs/mnist.json") as archs_file:
145 | mnist_archs = json.load(archs_file)
146 | with open("archs/cancer.json") as archs_file:
147 | cancer_archs = json.load(archs_file)
148 |
149 | # Argument parsing
150 | parser = argparse.ArgumentParser()
151 | requiredArgs = parser.add_argument_group('required arguments')
152 | requiredArgs.add_argument("-d", "--dataset", help="Dataset on which autoencoder is trained [MNIST,COIL20]", required=True)
153 | requiredArgs.add_argument("-a", "--architecture", type=int, help="Index of architecture of autoencoder in the json file (archs/)", required=True)
154 | requiredArgs.add_argument("-m", "--method", help="type of loss KLD or KM")
155 | parser.add_argument("--pretrain", type=int, help="Pretrain the autoencoder for specified #epochs specified by architecture on specified dataset")
156 | parser.add_argument("--cluster", type=int, help="Refine the autoencoder for specified #epochs with clustering loss, assumes that pretraining results are available")
157 | parser.add_argument("--metrics", action='store_true', help="Report k-means clustering metrics on the clustered latent space, assumes pretrain and cluster based training have been performed")
158 | parser.add_argument("--visualize", action='store_true', help="Visualize the image space and latent space, assumes pretraining and cluster based training have been performed")
159 | args = parser.parse_args()
160 |
161 | # Train/Visualize as per the arguments
162 | dataset_name = args.dataset
163 | loss = args.method
164 | arch_index = args.architecture
165 |
166 | if dataset_name == 'MNIST':
167 | archs = mnist_archs
168 | elif dataset_name == 'COIL20':
169 | archs = coil_archs
170 | elif dataset_name == 'cancer':
171 | archs = cancer_archs
172 |
173 | if args.pretrain:
174 | testOnlyClusterInitialization(dataset_name, archs[arch_index], args.pretrain)
175 | if args.cluster and loss =='KLD':
176 | testOnlyClusterImprovement(dataset_name, archs[arch_index], args.cluster, loss)
177 | elif args.cluster and loss =='KM':
178 | testOnlyClusterImprovement(dataset_name, archs[arch_index], args.cluster, loss)
179 | elif args.cluster and loss !='KM' and loss!='KLD':
180 | print("Please specify the type of loss KLD or KM after writing -m ")
181 | if args.metrics:
182 | testKMeans(dataset_name, [archs[arch_index]])
183 | if args.visualize:
184 | visualizeLatentSpace(dataset_name, archs[arch_index])
185 |
--------------------------------------------------------------------------------
/CDEC/misc.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on Jul 11, 2017
3 | '''
4 |
5 | import _pickle as cPickle
6 | import _pickle
7 | import gzip
8 |
9 | import numpy as np
10 | from PIL import Image
11 | import matplotlib
12 |
13 | # For plotting graphs via ssh with no display
14 | # Ref: https://stackoverflow.com/questions/2801882/generating-a-png-with-matplotlib-when-display-is-undefined
15 | matplotlib.use('Agg')
16 |
17 | from matplotlib import pyplot as plt
18 | from numpy import float32
19 | from sklearn import metrics
20 | from sklearn.cluster.k_means_ import KMeans
21 | from sklearn import manifold
22 | from sklearn.utils.linear_assignment_ import linear_assignment
23 | from sklearn import preprocessing
24 | import os
25 | from keras.preprocessing.image import load_img
26 |
27 | import _pickle as cPickle
28 | import _pickle
29 | import gzip
30 | from skimage import transform
31 | import numpy as np
32 | from PIL import Image
33 | import matplotlib
34 | import os
35 | # For plotting graphs via ssh with no display
36 | # Ref: https://stackoverflow.com/questions/2801882/generating-a-png-with-matplotlib-when-display-is-undefined
37 | matplotlib.use('Agg')
38 | from keras.preprocessing.image import load_img
39 | from matplotlib import pyplot as plt
40 | from numpy import float32
41 | from sklearn import metrics
42 | from sklearn.cluster.k_means_ import KMeans
43 | from sklearn import manifold
44 | from sklearn.utils.linear_assignment_ import linear_assignment
45 | from sklearn import preprocessing
46 |
47 | import tensorflow as tf
48 | import keras.backend as K
49 | K.set_image_dim_ordering('tf')
50 |
51 |
52 | class DatasetHelper(object):
53 | '''
54 | Utility class for handling different datasets
55 | '''
56 |
57 | def __init__(self, name):
58 | '''
59 | A dataset instance keeps dataset name, the input set, the flat version of input set
60 | and the cluster labels
61 | '''
62 | self.name = name
63 | if name == 'MNIST':
64 | self.dataset = MNISTDataset()
65 | elif name == 'STL':
66 | self.dataset = STLDataset()
67 | elif name == 'COIL20':
68 | self.dataset = COIL20Dataset()
69 | elif name == 'cancer': # added by Sher
70 | self.dataset = CANCERDataset()
71 |
72 | def loadDataset(self):
73 | '''
74 | Load the appropriate dataset based on the dataset name
75 | '''
76 | self.input, self.labels, self.input_flat = self.dataset.loadDataset()
77 |
78 | def getClusterCount(self):
79 | '''
80 | Number of clusters in the dataset - e.g 10 for mnist, 20 for coil20
81 | '''
82 | return self.dataset.cluster_count
83 |
84 | def iterate_minibatches(self, set_type, batch_size, targets=None, shuffle=False):
85 | '''
86 | Utility method for getting batches out of a dataset
87 | :param set_type: IMAGE - suitable input for CNNs or FLAT - suitable for DNN
88 | :param batch_size: Size of minibatches
89 | :param targets: None if the output should be same as inputs (autoencoders), otherwise takes a target array from which batches can be extracted. Must have the same order as the dataset, e.g, dataset inputs nth sample has output at target's nth element
90 | :param shuffle: If the dataset needs to be shuffled or not
91 | :return: generates a batches of size batch_size from the dataset, each batch is the pair (input, output)
92 | '''
93 | inputs = None
94 | if set_type == 'IMAGE':
95 | inputs = self.input
96 | if targets is None:
97 | targets = self.input
98 | elif set_type == 'FLAT':
99 | inputs = self.input_flat
100 | if targets is None:
101 | targets = self.input_flat
102 | assert len(inputs) == len(targets)
103 | if shuffle:
104 | indices = np.arange(len(inputs))
105 | np.random.shuffle(indices)
106 | for start_idx in range(0, len(inputs) - batch_size + 1, batch_size):
107 | if shuffle:
108 | excerpt = indices[start_idx:start_idx + batch_size]
109 | else:
110 | excerpt = slice(start_idx, start_idx + batch_size)
111 | yield inputs[excerpt], targets[excerpt]
112 |
113 |
114 | class MNISTDataset(object):
115 | '''
116 | Class for reading and preparing MNIST dataset
117 | '''
118 |
119 | def __init__(self):
120 | self.cluster_count = 10
121 |
122 | def loadDataset(self):
123 | f = gzip.open('mnist/mnist.pkl.gz', 'rb')
124 | train_set, _, test_set = cPickle.load(f,encoding='latin1')
125 | train_input, train_input_flat, train_labels = self.prepareDatasetForAutoencoder(train_set[0], train_set[1])
126 | test_input, test_input_flat, test_labels = self.prepareDatasetForAutoencoder(test_set[0], test_set[1])
127 | f.close()
128 | # combine test and train samples
129 | return [np.concatenate((train_input, test_input)), np.concatenate((train_labels, test_labels)),
130 | np.concatenate((train_input_flat, test_input_flat))]
131 |
132 | def prepareDatasetForAutoencoder(self, inputs, targets):
133 | '''
134 | Returns the image, flat and labels as a tuple
135 | '''
136 | X = inputs
137 | X = X.reshape((-1,28, 28,1))
138 | return (X, X.reshape((-1, 28 * 28)), targets)
139 |
140 |
141 | class CANCERDataset1(object):
142 | '''
143 | Class for reading and preparing MNIST dataset
144 | '''
145 |
146 | def __init__(self):
147 | self.cluster_count = 5
148 |
149 | def loadDataset(self):
150 | import pandas as pd
151 | import pandas as pd
152 |
153 | trainDF = pd.read_csv('cancer/TCGA_train.csv')
154 | train_labels = trainDF[trainDF.columns[-1]]
155 | train_labels = np.asarray(train_labels)
156 |
157 | train_features = trainDF.drop(trainDF.columns[-1],axis=1)
158 | train_features = train_features.as_matrix().astype(np.float32)
159 | train_features = np.asarray([[train_features[row][col] for col in range(1,16130)] for row in range(599)])
160 | train_features = np.asarray(train_features)
161 |
162 | testDF = pd.read_csv('cancer/TCGA_test.csv')
163 | test_labels = testDF[testDF.columns[-1]]
164 | test_labels = np.asarray(test_labels)
165 |
166 | test_features = testDF.drop(testDF.columns[-1],axis=1)
167 | test_features = test_features.as_matrix().astype(np.float32)
168 | test_features = np.asarray([[test_features[row][col] for col in range(1,16130)] for row in range(200)])
169 | test_features = np.asarray(test_features)
170 |
171 | train_input, train_input_flat, train_labels = self.prepareDatasetForAutoencoder(train_features, train_labels)
172 | test_input, test_input_flat, test_labels = self.prepareDatasetForAutoencoder(test_features, test_labels)
173 |
174 | # combine test and train samples
175 | return [np.concatenate((train_input, test_input)), np.concatenate((train_labels, test_labels)),
176 | np.concatenate((train_input_flat, test_input_flat))]
177 |
178 | def prepareDatasetForAutoencoder(self, inputs, targets):
179 | '''
180 | Returns the image, flat and labels as a tuple
181 | '''
182 | X = inputs
183 | X = X.reshape((-1, 127, 127, 1))
184 | return (X, X.reshape((-1, 127 * 127)), targets)
185 |
186 | class CANCERDataset(object):
187 | '''
188 | Class for reading and preparing CANCER dataset
189 | '''
190 | def __init__(self):
191 | self.cluster_count = 4
192 |
193 | def loadDataset(self):
194 | root ='/home/rkarim/Training_data/'
195 | features = []
196 | features_flat = []
197 | for rootName,dirName,fileNames in os.walk(root):
198 | if(not rootName == root):
199 | for fileName in fileNames:
200 | imgGray = load_img(rootName+'/'+fileName,color_mode='grayscale')
201 | transformed=transform.resize(np.array(imgGray),(512,512))
202 | features += [transformed.reshape((transformed.shape[0],transformed.shape[1]))]
203 | features_flat+=[transformed.reshape((transformed.shape[0]*transformed.shape[1]*1))]
204 | features=np.stack(features)
205 | features_flat = np.stack(features_flat)
206 | labels= features
207 | return [np.concatenate((features, features),axis=0), np.concatenate((labels, labels),axis=0),
208 | np.concatenate((features_flat,features_flat),axis=0)]
209 |
210 | def loadDataset1(self):
211 | import pandas as pd
212 | import pandas as pd
213 |
214 | df = pd.read_csv('cancer/TCGA_train.csv')
215 | print(len(df.columns))
216 |
217 | labels = df[df.columns[-1]]
218 | features = df.drop(df.columns[-1],axis=1)
219 | features = features.as_matrix().astype(np.float32)
220 | features = np.asarray([[features[row][col] for col in range(1,16130)] for row in range(599)])
221 | print("Is there any NaN value?")
222 | print(np.count_nonzero(np.isnan(features)))
223 |
224 | min_max_scaler = preprocessing.MinMaxScaler()
225 | train_input = min_max_scaler.fit_transform(features)
226 | print(np.isfinite(train_input))
227 |
228 | train_input_flat = train_input
229 | train_input = train_input.reshape((-1, 127, 127, 1))
230 | train_input_flat = np.reshape(train_input, (-1, 127 * 127))
231 | train_labels = np.asarray(labels)
232 |
233 | df2 = pd.read_csv('cancer/TCGA_test.csv')
234 | labels2 = df2[df.columns[-1]]
235 | features2 = df2.drop(df2.columns[-1],axis=1)
236 |
237 | features2 = features2.as_matrix().astype(np.float32)
238 | features2 = np.asarray([[features2[row][col] for col in range(1,16130)] for row in range(200)])
239 |
240 | test_input = np.asarray(features2)
241 | print(np.isfinite(test_input))
242 |
243 | test_input = min_max_scaler.fit_transform(test_input)
244 | test_input_flat = test_input
245 | test_input = test_input.reshape((-1, 127, 127, 1))
246 | test_input_flat = np.reshape(test_input, (-1, 127 * 127))
247 | test_labels = np.asarray(labels2)
248 |
249 | # combine test and train samples
250 | return [np.concatenate((train_input, test_input)), np.concatenate((train_labels, test_labels)),
251 | np.concatenate((train_input_flat, test_input_flat))]
252 |
253 |
254 | class STLDataset(object):
255 | '''
256 | Class for preparing and reading the STL dataset
257 | '''
258 |
259 | def __init__(self):
260 | self.cluster_count = 10
261 |
262 | def loadDataset(self):
263 | train_x = np.fromfile('stl/train_X.bin', dtype=np.uint8)
264 | train_y = np.fromfile('stl/train_y.bin', dtype=np.uint8)
265 | test_x = np.fromfile('stl/train_X.bin', dtype=np.uint8)
266 | test_y = np.fromfile('stl/train_y.bin', dtype=np.uint8)
267 | train_input = np.reshape(train_x, (-1, 3, 96, 96))
268 | train_labels = train_y
269 | train_input_flat = np.reshape(test_x, (-1, 1, 3 * 96 * 96))
270 | test_input = np.reshape(test_x, (-1, 3, 96, 96))
271 | test_labels = test_y
272 | test_input_flat = np.reshape(test_x, (-1, 1, 3 * 96 * 96))
273 | return [np.concatenate(train_input, test_input), np.concatenate(train_labels, test_labels),
274 | np.concatenate(train_input_flat, test_input_flat)]
275 |
276 |
277 | class COIL20Dataset(object):
278 | '''
279 | Class for reading and preparing the COIL20Dataset
280 | '''
281 |
282 | def __init__(self):
283 | self.cluster_count = 20
284 |
285 | def loadDataset(self):
286 | train_x = np.load('coil/coil_X.npy').astype(np.float32) / 256.0
287 | train_y = np.load('coil/coil_y.npy')
288 | train_x_flat = np.reshape(train_x, (-1, 128 * 128))
289 | return [train_x, train_y, train_x_flat]
290 |
291 |
292 | def rescaleReshapeAndSaveImage(image_sample, out_filename):
293 | '''
294 | For saving the reconstructed output as an image
295 | :param image_sample: output of the autoencoder
296 | :param out_filename: filename for the saved image
297 | :return: None (side effect) Image saved
298 | '''
299 | image_sample = ((image_sample - np.amin(image_sample)) / (np.amax(image_sample) - np.amin(image_sample))) * 255;
300 | image_sample = np.rint(image_sample).astype(int)
301 | image_sample = np.clip(image_sample, a_min=0, a_max=255).astype('uint8')
302 | img = Image.fromarray(image_sample, 'L')
303 | img.save(out_filename)
304 |
305 |
306 | def cluster_acc(y_true, y_pred):
307 | '''
308 | Uses the hungarian algorithm to find the best permutation mapping and then calculates the accuracy wrt
309 | Implementation inpired from https://github.com/piiswrong/dec, since scikit does not implement this metric
310 | this mapping and true labels
311 | :param y_true: True cluster labels
312 | :param y_pred: Predicted cluster labels
313 | :return: accuracy score for the clustering
314 | '''
315 | D = int(max(y_pred.max(), y_true.max()) + 1)
316 | w = np.zeros((D, D), dtype=np.int32)
317 | for i in range(y_pred.size):
318 | idx1 = int(y_pred[i])
319 | idx2 = int(y_true[i])
320 | w[idx1, idx2] += 1
321 | ind = linear_assignment(w.max() - w)
322 | return sum([w[i, j] for i, j in ind]) * 1.0 / y_pred.size
323 |
324 |
325 | def getClusterMetricString(method_name, labels_true, labels_pred):
326 | '''
327 | Creates a formatted string containing the method name and acc, nmi metrics - can be used for printing
328 | :param method_name: Name of the clustering method (just for printing)
329 | :param labels_true: True label for each sample
330 | :param labels_pred: Predicted label for each sample
331 | :return: Formatted string containing metrics and method name
332 | '''
333 | acc = cluster_acc(labels_true, labels_pred)
334 | nmi = metrics.normalized_mutual_info_score(labels_true, labels_pred)
335 | return '%-50s %8.3f %8.3f' % (method_name, acc, nmi)
336 |
337 |
338 | def evaluateKMeans(data, labels, nclusters, method_name):
339 | '''
340 | Clusters data with kmeans algorithm and then returns the string containing method name and metrics, and also the evaluated cluster centers
341 | :param data: Points that need to be clustered as a numpy array
342 | :param labels: True labels for the given points
343 | :param nclusters: Total number of clusters
344 | :param method_name: Name of the method from which the clustering space originates (only used for printing)
345 | :return: Formatted string containing metrics and method name, cluster centers
346 | '''
347 | kmeans = KMeans(n_clusters=nclusters, n_init=5)
348 | kmeans.fit(data)
349 | return getClusterMetricString(method_name, labels, kmeans.labels_), kmeans.cluster_centers_
350 |
351 |
352 | def visualizeData(Z, labels, num_clusters, title):
353 | '''
354 | TSNE visualization of the points in latent space Z
355 | :param Z: Numpy array containing points in latent space in which clustering was performed
356 | :param labels: True labels - used for coloring points
357 | :param num_clusters: Total number of clusters
358 | :param title: filename where the plot should be saved
359 | :return: None - (side effect) saves clustering visualization plot in specified location
360 | '''
361 | labels = labels.astype(int)
362 | tsne = manifold.TSNE(n_components=2, init='pca', random_state=0)
363 | Z_tsne = tsne.fit_transform(Z)
364 | fig = plt.figure()
365 | plt.scatter(Z_tsne[:, 0], Z_tsne[:, 1], s=2, c=labels, cmap=plt.cm.get_cmap("jet", num_clusters))
366 | plt.colorbar(ticks=range(num_clusters))
367 | fig.savefig(title, dpi=fig.dpi)
368 |
--------------------------------------------------------------------------------
/CDEC/plots/genome/autoencoder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rezacsedu/Convolutional-embedded-networks/f52c2a3816acbf05be28a52fe93140fe31495eb0/CDEC/plots/genome/autoencoder.png
--------------------------------------------------------------------------------
/CDEC/plots/genome/clustered_kld.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rezacsedu/Convolutional-embedded-networks/f52c2a3816acbf05be28a52fe93140fe31495eb0/CDEC/plots/genome/clustered_kld.png
--------------------------------------------------------------------------------
/CDEC/plots/genome/clustered_km.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rezacsedu/Convolutional-embedded-networks/f52c2a3816acbf05be28a52fe93140fe31495eb0/CDEC/plots/genome/clustered_km.png
--------------------------------------------------------------------------------
/CDEC/plots/genome/raw.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rezacsedu/Convolutional-embedded-networks/f52c2a3816acbf05be28a52fe93140fe31495eb0/CDEC/plots/genome/raw.png
--------------------------------------------------------------------------------
/CDEC/self.trainAutoencoder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rezacsedu/Convolutional-embedded-networks/f52c2a3816acbf05be28a52fe93140fe31495eb0/CDEC/self.trainAutoencoder.png
--------------------------------------------------------------------------------
/DEC_GenotypeClustering_Keras/DEC_Genotype_Clustering.py:
--------------------------------------------------------------------------------
1 | from keras.datasets import mnist
2 | import numpy as np
3 | import pandas as pd
4 | np.random.seed(10)
5 |
6 | from time import time
7 | import numpy as np
8 | import keras.backend as K
9 | from keras.engine.topology import Layer, InputSpec
10 | from keras.layers import Dense, Input
11 | from keras.models import Model
12 | from keras.optimizers import RMSprop
13 | from keras import callbacks
14 | from keras.initializers import VarianceScaling
15 | from sklearn.cluster import KMeans
16 | from sklearn import metrics
17 | from sklearn.metrics.cluster import normalized_mutual_info_score
18 | from sklearn.metrics.cluster import adjusted_rand_score
19 | from sklearn.metrics import accuracy_score
20 | from sklearn import manifold
21 | import keras.layers.normalization as bn
22 |
23 | df1 = pd.read_csv('/home/asif/genome.csv', header=None)
24 | print(df1.head())
25 |
26 | label = df1[0]
27 | print(label.head())
28 |
29 | from sklearn import preprocessing
30 | le = preprocessing.LabelEncoder()
31 | lbl = le.fit(label)
32 | labelss = lbl.transform(label)
33 | labelDF = pd.DataFrame(labelss)
34 |
35 | #labelArr =
36 | print(labelDF.head())
37 |
38 | feature = df1.drop(0, axis=1)
39 | print(feature.head())
40 |
41 | from sklearn.preprocessing import MinMaxScaler
42 | scaler = MinMaxScaler()
43 | x1 = feature.iloc[:,1:]
44 | df_scaled = pd.DataFrame(scaler.fit_transform(x1), columns=x1.columns)
45 | df_scaled.head()
46 |
47 | y = labelss
48 | x = df_scaled.values
49 |
50 | print(y.shape)
51 | print(x.shape)
52 |
53 | print(np.isnan(np.min(x)))
54 |
55 | #y.shape
56 | #x.shape
57 | #print(x)
58 | #print(y)
59 |
60 | n_clusters = len(np.unique(y))
61 | print(n_clusters)
62 |
63 | kmeans = KMeans(n_clusters=n_clusters, n_init=5)
64 | y_pred_kmeans = kmeans.fit_predict(x)
65 |
66 | print(accuracy_score(y, y_pred_kmeans))
67 |
68 | dims = [x.shape[-1], 16, 16, 32, 5]
69 | init = VarianceScaling(scale=1. / 3., mode='fan_in', distribution='uniform')
70 | pretrain_optimizer = RMSprop(lr=0.001, rho=0.01, epsilon=None, decay=0.0)
71 | pretrain_epochs = 100
72 | batch_size = 32
73 | save_dir = 'result/'
74 |
75 | def autoencoder(dims, act='relu', init='glorot_uniform'):
76 | """
77 | Fully connected auto-encoder model, symmetric.
78 | Arguments:
79 | dims: list of number of units in each layer of encoder. dims[0] is input dim, dims[-1] is units in hidden layer.
80 | The decoder is symmetric with encoder. So number of layers of the auto-encoder is 2*len(dims)-1
81 | act: activation, not applied to Input, Hidden and Output layers
82 | return:
83 | (ae_model, encoder_model), Model of autoencoder and model of encoder
84 | """
85 | n_stacks = len(dims) - 1
86 | # input
87 | input_img = Input(shape=(dims[0],), name='input')
88 | x = input_img
89 | # internal layers in encoder
90 | for i in range(n_stacks-1):
91 | x = Dense(dims[i + 1], activation=act, kernel_initializer=init, name='encoder_%d' % i)(x)
92 | #bn.BatchNormalization(momentum=0.9, epsilon=1e-06, weights=None)
93 |
94 | # hidden layer
95 | encoded = Dense(dims[-1], kernel_initializer=init, name='encoder_%d' % (n_stacks - 1))(x) # hidden layer, features are extracted from here
96 | bn.BatchNormalization(momentum=0.9, epsilon=1e-06, weights=None)
97 |
98 | x = encoded
99 | # internal layers in decoder
100 | for i in range(n_stacks-1, 0, -1):
101 | x = Dense(dims[i], activation=act, kernel_initializer=init, name='decoder_%d' % i)(x)
102 |
103 | # output
104 | x = Dense(dims[0], kernel_initializer=init, name='decoder_0')(x)
105 | bn.BatchNormalization(momentum=0.9, epsilon=1e-06, weights=None)
106 |
107 | decoded = x
108 | return Model(inputs=input_img, outputs=decoded, name='AE'), Model(inputs=input_img, outputs=encoded, name='encoder')
109 |
110 | autoencoder, encoder = autoencoder(dims, init=init)
111 | autoencoder.compile(optimizer=pretrain_optimizer, loss='mse')
112 | autoencoder.fit(x, x, batch_size=batch_size, epochs=pretrain_epochs) #, callbacks=cb)
113 | autoencoder.save_weights(save_dir + '/ThesisDEC_weights.h5')
114 |
115 | autoencoder.save_weights(save_dir + '/ThesisDEC_weights.h5')
116 | autoencoder.load_weights(save_dir + '/ThesisDEC_weights.h5')
117 |
118 |
119 | class ClusteringLayer(Layer):
120 | """
121 | Clustering layer converts input sample (feature) to soft label, i.e. a vector that represents the probability of the
122 | sample belonging to each cluster. The probability is calculated with student's t-distribution.
123 |
124 | # Example
125 | ```
126 | model.add(ClusteringLayer(n_clusters=10))
127 | ```
128 | # Arguments
129 | n_clusters: number of clusters.
130 | weights: list of Numpy array with shape `(n_clusters, n_features)` witch represents the initial cluster centers.
131 | alpha: degrees of freedom parameter in Student's t-distribution. Default to 1.0.
132 | # Input shape
133 | 2D tensor with shape: `(n_samples, n_features)`.
134 | # Output shape
135 | 2D tensor with shape: `(n_samples, n_clusters)`.
136 | """
137 |
138 | def __init__(self, n_clusters, weights=None, alpha=1.0, **kwargs):
139 | if 'input_shape' not in kwargs and 'input_dim' in kwargs:
140 | kwargs['input_shape'] = (kwargs.pop('input_dim'),)
141 | super(ClusteringLayer, self).__init__(**kwargs)
142 | self.n_clusters = n_clusters
143 | self.alpha = alpha
144 | self.initial_weights = weights
145 | self.input_spec = InputSpec(ndim=2)
146 |
147 | def build(self, input_shape):
148 | assert len(input_shape) == 2
149 | input_dim = input_shape[1]
150 | self.input_spec = InputSpec(dtype=K.floatx(), shape=(None, input_dim))
151 | self.clusters = self.add_weight((self.n_clusters, input_dim), initializer='glorot_uniform', name='clusters')
152 | if self.initial_weights is not None:
153 | self.set_weights(self.initial_weights)
154 | del self.initial_weights
155 | self.built = True
156 |
157 | def call(self, inputs, **kwargs):
158 | """ student t-distribution, as same as used in t-SNE algorithm.
159 | Measure the similarity between embedded point z_i and centroid µ_j.
160 | q_ij = 1/(1+dist(x_i, µ_j)^2), then normalize it.
161 | q_ij can be interpreted as the probability of assigning sample i to cluster j.
162 | (i.e., a soft assignment)
163 | Arguments:
164 | inputs: the variable containing data, shape=(n_samples, n_features)
165 | Return:
166 | q: student's t-distribution, or soft labels for each sample. shape=(n_samples, n_clusters)
167 | """
168 | q = 1.0 / (1.0 + (K.sum(K.square(K.expand_dims(inputs, axis=1) - self.clusters), axis=2) / self.alpha))
169 | q **= (self.alpha + 1.0) / 2.0
170 | q = K.transpose(K.transpose(q) / K.sum(q, axis=1)) # Make sure each sample's 10 values add up to 1.
171 | return q
172 |
173 | def compute_output_shape(self, input_shape):
174 | assert input_shape and len(input_shape) == 2
175 | return input_shape[0], self.n_clusters
176 |
177 | def get_config(self):
178 | config = {'n_clusters': self.n_clusters}
179 | base_config = super(ClusteringLayer, self).get_config()
180 | return dict(list(base_config.items()) + list(config.items()))
181 |
182 | clustering_layer = ClusteringLayer(n_clusters, name='clustering')(encoder.output)
183 | model = Model(inputs=encoder.input, outputs=clustering_layer)
184 | model.compile(optimizer=RMSprop(lr=0.01, rho=0.9, epsilon=None, decay=0.0), loss='kld')
185 |
186 | kmeans = KMeans(n_clusters=n_clusters, n_init=n_clusters)
187 | y_pred = kmeans.fit_predict(encoder.predict(x))
188 |
189 | y_pred_last = np.copy(y_pred)
190 |
191 | model.get_layer(name='clustering').set_weights([kmeans.cluster_centers_])
192 |
193 | # computing an auxiliary target distribution
194 | def target_distribution(q):
195 | weight = q ** 2 / q.sum(0)
196 | return (weight.T / weight.sum(1)).T
197 |
198 | loss = 0
199 | index = 0
200 | maxiter = 20000
201 | update_interval = 500
202 | index_array = np.arange(x.shape[0])
203 |
204 | tol = 0.001 # tolerance threshold to stop training
205 |
206 | for ite in range(int(maxiter)):
207 | if ite % update_interval == 0:
208 | q = model.predict(x, verbose=0)
209 | p = target_distribution(q) # update the auxiliary target distribution p
210 |
211 | # evaluate the clustering performance
212 | y_pred = q.argmax(1)
213 | if y is not None:
214 | acc = np.round(accuracy_score(y, y_pred), 5)
215 | nmi = np.round(normalized_mutual_info_score(y, y_pred), 5)
216 | ari = np.round(adjusted_rand_score(y, y_pred), 5)
217 | loss = np.round(loss, 5)
218 | print('Iter %d: acc = %.5f, nmi = %.5f, ari = %.5f' % (ite, acc, nmi, ari), ' ; loss=', loss)
219 |
220 | # check stop criterion - model convergence
221 | delta_label = np.sum(y_pred != y_pred_last).astype(np.float32) / y_pred.shape[0]
222 | y_pred_last = np.copy(y_pred)
223 | if ite > 0 and delta_label < tol:
224 | print('delta_label ', delta_label, '< tol ', tol)
225 | print('Reached tolerance threshold. Stopping training.')
226 | break
227 | idx = index_array[index * batch_size: min((index+1) * batch_size, x.shape[0])]
228 | model.train_on_batch(x=x[idx], y=p[idx])
229 | index = index + 1 if (index + 1) * batch_size <= x.shape[0] else 0
230 |
231 | model.save_weights(save_dir + '/Thesis_DEC_model_final.h5')
232 | model.load_weights(save_dir + '/Thesis_DEC_model_final.h5')
233 |
234 | # Eval.
235 | q = model.predict(x, verbose=0)
236 | p = target_distribution(q) # update the auxiliary target distribution p
237 |
238 | # evaluate the clustering performance
239 | y_pred = q.argmax(1)
240 | if y is not None:
241 | acc = np.round(accuracy_score(y, y_pred), 5)
242 | nmi = np.round(normalized_mutual_info_score(y, y_pred), 5)
243 | ari = np.round(adjusted_rand_score(y, y_pred), 5)
244 | loss = np.round(loss, 5)
245 | print('Acc = %.5f, nmi = %.5f, ari = %.5f' % (acc, nmi, ari), ' ; loss=', loss)
246 |
247 | import seaborn as sns
248 | import sklearn.metrics
249 | import matplotlib.pyplot as plt
250 | sns.set(font_scale=1.5)
251 | confusion_matrix = sklearn.metrics.confusion_matrix(y, y_pred)
252 |
253 | plt.figure(figsize=(12, 11))
254 | sns.heatmap(confusion_matrix, annot=True, fmt="d", annot_kws={"size": 15});
255 | plt.title("Confusion matrix", fontsize=25)
256 | plt.ylabel('True label', fontsize=25)
257 | plt.xlabel('Clustering label', fontsize=25)
258 | plt.show()
259 |
260 | def visualizeData(Z, labels, num_clusters, title):
261 | '''
262 | TSNE visualization of the points in latent space Z
263 | :param Z: Numpy array containing points in latent space in which clustering was performed
264 | :param labels: True labels - used for coloring points
265 | :param num_clusters: Total number of clusters
266 | :param title: filename where the plot should be saved
267 | :return: None - (side effect) saves clustering visualization plot in specified location
268 | '''
269 | labels = labels.astype(int)
270 | tsne = manifold.TSNE(n_components=2, init='pca', random_state=0)
271 | Z_tsne = tsne.fit_transform(Z)
272 | fig = plt.figure()
273 | plt.scatter(Z_tsne[:, 0], Z_tsne[:, 1], s=2, c=labels, cmap=plt.cm.get_cmap("jet", num_clusters))
274 | plt.colorbar(ticks=range(num_clusters))
275 | fig.savefig(title, dpi=fig.dpi)
276 |
277 | from sklearn.manifold import TSNE
278 | import seaborn as sn
279 | import matplotlib.pyplot as plt
280 |
281 | data_1000 = x[0:1000,:]
282 | labels_1000 = y[0:1000]
283 |
284 | model = TSNE(n_components = 2, random_state = 0)
285 |
286 | tsne_data = model.fit_transform(x)
287 | #y_pred
288 |
289 | tsne_data = np.vstack((tsne_data.T, y)).T
290 | tsne_df = pd.DataFrame(data= tsne_data, columns= ("Dim_1","Dim_2","label"))
291 |
292 | sn.FacetGrid(tsne_df, hue= "label", size = 6).map(plt.scatter, 'Dim_1', 'Dim_2').add_legend()
293 | plt.show()
294 |
295 | visualizeData(x, y, n_clusters, "t_SNE_graph_original.png")
296 | visualizeData(x, y_pred, n_clusters, "t_SNE_graph_predicted.png")
297 |
--------------------------------------------------------------------------------
/DEC_GenotypeClustering_Keras/LSTM_EthnicityPrediction.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pandas as pd
3 | import glob
4 | import numpy as np
5 | import sys
6 | from time import time
7 |
8 | from sklearn.preprocessing import LabelEncoder
9 | from sklearn.model_selection import train_test_split
10 | from sklearn.metrics import precision_recall_fscore_support
11 | from sklearn import metrics
12 |
13 | from keras.models import Sequential
14 | from keras.layers import LSTM, Dense, Dropout, Activation, Flatten
15 | from keras.callbacks import TensorBoard
16 | from keras.optimizers import RMSprop
17 | from keras.regularizers import l2
18 | from keras.callbacks import EarlyStopping
19 | from sklearn.metrics import precision_recall_fscore_support, roc_auc_score
20 | from keras.utils import np_utils
21 |
22 | from keras import backend as K
23 | K.set_image_dim_ordering('tf')
24 | import matplotlib.pyplot as plt
25 | import itertools
26 |
27 | import numpy as np
28 | import pandas as pd
29 | np.random.seed(10)
30 |
31 | from time import time
32 | import numpy as np
33 | import keras.backend as K
34 | from keras.engine.topology import Layer, InputSpec
35 | from keras.layers import Dense, Input
36 | from keras.models import Model
37 | from keras.optimizers import RMSprop
38 | from keras import callbacks
39 | from keras.initializers import VarianceScaling
40 | from sklearn.cluster import KMeans
41 | from sklearn import metrics
42 | from sklearn.metrics.cluster import normalized_mutual_info_score
43 | from sklearn.metrics.cluster import adjusted_rand_score
44 | from sklearn.metrics import accuracy_score
45 | from sklearn import manifold
46 | import keras.layers.normalization as bn
47 |
48 | from sklearn.metrics import confusion_matrix
49 |
50 | df1 = pd.read_csv('/home/asif/genome.csv', header=None)
51 | print(df1.head())
52 |
53 | label = df1[0]
54 | print(label.head())
55 |
56 | from sklearn import preprocessing
57 | le = preprocessing.LabelEncoder()
58 | lbl = le.fit(label)
59 | labelss = lbl.transform(label)
60 | labelDF = pd.DataFrame(labelss)
61 |
62 | #labelArr =
63 | print(labelDF.head())
64 |
65 | feature = df1.drop(0, axis=1)
66 | print(feature.head())
67 |
68 | from sklearn.preprocessing import MinMaxScaler
69 | scaler = MinMaxScaler()
70 | x1 = feature.iloc[:,1:]
71 | df_scaled = pd.DataFrame(scaler.fit_transform(x1), columns=x1.columns)
72 | df_scaled.head()
73 |
74 | y = labelss
75 | x = df_scaled.values
76 |
77 | features = x
78 | labels = y
79 |
80 | def prepare_test_train_valid():
81 | # Train-test split
82 | train_x, test_x, train_y, test_y = train_test_split(features, labels, test_size=0.25, random_state=100)
83 | test_x, valid_x, test_y, valid_y = train_test_split(train_x, train_y, test_size=0.50, random_state=100)
84 |
85 | return train_x, test_x, train_y, test_y, valid_x, valid_y
86 |
87 | def one_hot_encode(labels):
88 | n_labels = len(labels)
89 | n_unique_labels = len(np.unique(labels))
90 | one_hot_encode = np.zeros((n_labels,n_unique_labels))
91 | one_hot_encode[np.arange(n_labels), labels] = 1
92 | return one_hot_encode
93 |
94 | labels = one_hot_encode(labels)
95 |
96 | # Extract feature
97 | train_x, test_x, train_y, test_y, valid_x, valid_y = prepare_test_train_valid()
98 |
99 | print('X_train shape:', train_x.shape)
100 | print('Y_train shape:', train_y.shape)
101 |
102 | num_classes = 5
103 | data_dim = 52
104 | timesteps = 1
105 |
106 | train_x = np.reshape(train_x,(train_x.shape[0], 1, train_x.shape[1]))
107 | test_x = np.reshape(test_x,(test_x.shape[0], 1, test_x.shape[1]))
108 | valid_x = np.reshape(valid_x,(valid_x.shape[0], 1, valid_x.shape[1]))
109 |
110 | def plot_confusion_matrix(cm, classes,
111 | normalize=False,
112 | title='Confusion matrix',
113 | cmap=plt.cm.Blues):
114 | """
115 | This function prints and plots the confusion matrix.
116 | Normalization can be applied by setting `normalize=True`.
117 | """
118 | if normalize:
119 | cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
120 | print("Normalized confusion matrix")
121 | else:
122 | print('Confusion matrix, without normalization')
123 |
124 | print(cm)
125 |
126 | plt.imshow(cm, interpolation='nearest', cmap=cmap)
127 | plt.title(title)
128 | plt.colorbar()
129 | tick_marks = np.arange(len(classes))
130 | plt.xticks(tick_marks, classes, rotation=45)
131 | plt.yticks(tick_marks, classes)
132 |
133 | fmt = '.2f' if normalize else 'd'
134 | thresh = cm.max() / 2.
135 | for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
136 | plt.text(j, i, format(cm[i, j], fmt),
137 | horizontalalignment="center",
138 | color="white" if cm[i, j] > thresh else "black")
139 |
140 | plt.tight_layout()
141 | plt.ylabel('True label')
142 | plt.xlabel('Predicted label')
143 |
144 | def build_LSTM(): #OK
145 | # expected input data shape: (batch_size, timesteps, data_dim)
146 | model = Sequential()
147 | model.add(LSTM(32, return_sequences=True, input_shape=(timesteps, data_dim)))
148 |
149 | model.add(LSTM(24, return_sequences=True))
150 |
151 | #model.add(Dropout(0.2))
152 | model.add(LSTM(16, return_sequences=True))
153 | model.add(Dropout(0.2))
154 |
155 | # apply softmax to output
156 | model.add(Flatten())
157 | model.add(Dense(num_classes, activation='softmax'))
158 | return model
159 |
160 | def model_train_evaluate(model, number_epoch):
161 | sgd = RMSprop(lr=0.001, rho=0.01, epsilon=None, decay=0.0)
162 |
163 | # a stopping function should the validation loss stop improving
164 | earlystop = EarlyStopping(monitor='val_loss', patience=1, verbose=0, mode='auto')
165 |
166 | #if model in ['RNN']:
167 | rnn_model = build_LSTM() #OK
168 | rnn_model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=sgd)
169 | tensorboardRNN = TensorBoard(log_dir="RNN_logs/{}".format(time()))
170 | rnn_model.fit(train_x, train_y, validation_data=(valid_x, valid_y), callbacks=[tensorboardRNN], batch_size=128, epochs=int(number_epoch))
171 | print(rnn_model.summary())
172 |
173 | y_prob = rnn_model.predict(test_x)
174 | y_pred = y_prob.argmax(axis=-1)
175 | y_true = np.argmax(test_y, 1)
176 |
177 | roc = roc_auc_score(test_y, y_prob)
178 | print ("ROC:", round(roc,3))
179 |
180 | # evaluate the model
181 | score, accuracy = rnn_model.evaluate(test_x, test_y, batch_size=32)
182 | print("\nAccuracy = {:.2f}".format(accuracy))
183 |
184 | # the F-score gives a similiar value to the accuracy score, but useful for cross-checking
185 | p,r,f,s = precision_recall_fscore_support(y_true, y_pred, average='micro')
186 | print ("F-Score:", round(f,2))
187 | print ("Precision:", round(p,2))
188 | print ("Recall:", round(r,2))
189 | print ("F-Score:", round(f,2))
190 |
191 | # Compute confusion matrix
192 | cnf_matrix = confusion_matrix(y_true, y_pred)
193 | np.set_printoptions(precision=2)
194 |
195 | class_names = ["FIN", "GBR", "ASW", "CHB", "CLM"]
196 |
197 | # Plot non-normalized confusion matrix
198 | plt.figure()
199 | plot_confusion_matrix(cnf_matrix, classes=class_names, title='Confusion matrix: true vs predicted label')
200 | plt.show()
201 |
202 | model = build_LSTM()
203 | model_train_evaluate(model, 1000)
204 | import gc; gc.collect()
205 |
--------------------------------------------------------------------------------
/DEC_GenotypeClustering_Keras/genome.csv:
--------------------------------------------------------------------------------
1 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2 | GBR,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
3 | CHB,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4 | CHB,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
6 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
7 | CHB,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
8 | ASW,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0
9 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1
10 | ASW,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0
11 | ASW,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0
12 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0
13 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
14 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
15 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
16 | CHB,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
17 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
18 | GBR,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
19 | GBR,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0
20 | GBR,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0
21 | ASW,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
22 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
23 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
24 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
25 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
26 | CHB,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
27 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
28 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
29 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
30 | GBR,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
31 | CLM,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
32 | CLM,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0
33 | CHB,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
34 | CLM,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
35 | CLM,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
36 | FIN,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
37 | GBR,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1
38 | FIN,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
39 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
40 | GBR,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0
41 | FIN,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
42 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
43 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
44 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
45 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
46 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
47 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
48 | FIN,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
49 | ASW,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0
50 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
51 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
52 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0
53 | CHB,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
54 | CLM,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
55 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
56 | CLM,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
57 | CHB,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
58 | ASW,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
59 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
60 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
61 | CLM,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0
62 | GBR,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
63 | GBR,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0
64 | FIN,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
65 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
66 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
67 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
68 | CLM,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0
69 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
70 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
71 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
72 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
73 | CHB,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
74 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
75 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
76 | GBR,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0
77 | ASW,1,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
78 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0
79 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
80 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0
81 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
82 | CHB,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
83 | CLM,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
84 | CLM,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
85 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
86 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
87 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
88 | FIN,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
89 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
90 | ASW,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0
91 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
92 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
93 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
94 | ASW,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0
95 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0
96 | FIN,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
97 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
98 | FIN,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
99 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
100 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
101 | GBR,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
102 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
103 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
104 | GBR,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
105 | FIN,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
106 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
107 | GBR,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
108 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1
109 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
110 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0
111 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
112 | FIN,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
113 | CHB,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
114 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
115 | FIN,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
116 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
117 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
118 | FIN,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
119 | CHB,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
120 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
121 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
122 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
123 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
124 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
125 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
126 | CLM,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
127 | ASW,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
128 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
129 | FIN,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
130 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
131 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
132 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1
133 | CLM,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0
134 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
135 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0
136 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
137 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
138 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
139 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1
140 | ASW,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1
141 | FIN,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
142 | CLM,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0
143 | GBR,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
144 | CLM,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
145 | FIN,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
146 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
147 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
148 | CLM,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1
149 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
150 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
151 | ASW,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0
152 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
153 | CLM,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
154 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
155 | GBR,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
156 | GBR,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0
157 | CLM,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1
158 | CHB,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
159 | ASW,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
160 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
161 | CLM,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
162 | CLM,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0
163 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
164 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0
165 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
166 | CHB,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
167 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
168 | FIN,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
169 | CLM,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
170 | ASW,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0
171 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
172 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
173 | ASW,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
174 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
175 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
176 | CLM,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
177 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
178 | CHB,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
179 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
180 | GBR,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
181 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
182 | CHB,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
183 | FIN,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
184 | ASW,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
185 | GBR,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
186 | GBR,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
187 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
188 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1
189 | GBR,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0
190 | FIN,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
191 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
192 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0
193 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0
194 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
195 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
196 | CLM,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
197 | CHB,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
198 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0
199 | CHB,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
200 |
--------------------------------------------------------------------------------
/PopulationClustering_v2/output_1.txt:
--------------------------------------------------------------------------------
1 | Found 199 samples
--------------------------------------------------------------------------------
/PopulationClustering_v2/pom.xml:
--------------------------------------------------------------------------------
1 |
3 | 4.0.0
4 | com.deri.sels
5 | PopulationClustering_v2
6 | 0.1-SNAPSHOT
7 |
8 | 2.2.1
9 | 2.11.8
10 | 3.16.0.2
11 | 2.2.6
12 | 0.23.0
13 |
14 |
15 |
16 | scala-tools.org
17 | Scala-tools Maven2 Repository
18 | http://scala-tools.org/repo-releases
19 |
20 |
21 |
22 |
23 | org.bdgenomics.adam
24 | adam-core_2.11
25 | 0.23.0
26 |
27 |
28 |
29 | ai.h2o
30 | sparkling-water-core_2.11
31 | 2.2.6
32 |
33 |
34 | ai.h2o
35 | sparkling-water-examples_2.11
36 | 2.2.6
37 |
38 |
39 | org.apache.directory.studio
40 | org.apache.commons.io
41 | 2.4
42 |
43 |
44 | org.apache.spark
45 | spark-core_2.11
46 | ${spark.version}
47 |
48 |
49 |
50 | ai.h2o
51 | h2o-core
52 | ${h2o.version}
53 |
54 |
55 | ai.h2o
56 | h2o-scala_2.11
57 | ${h2o.version}
58 |
59 |
60 | ai.h2o
61 | h2o-algos
62 | ${h2o.version}
63 |
64 |
65 | ai.h2o
66 | h2o-app
67 | ${h2o.version}
68 |
69 |
70 | ai.h2o
71 | h2o-persist-hdfs
72 | ${h2o.version}
73 |
74 |
75 | scala-library
76 | org.scala-lang
77 | ${scala.version}
78 |
79 |
80 | ai.h2o
81 | google-analytics-java
82 | 1.1.2-H2O-CUSTOM
83 |
84 |
85 | joda-time
86 | joda-time
87 | 2.9.9
88 |
89 |
90 |
91 |
92 | snapshots-repo
93 | https://oss.sonatype.org/content/repositories/snapshots
94 |
95 | false
96 |
97 |
98 | true
99 | daily
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 | org.apache.maven.plugins
108 | maven-eclipse-plugin
109 | 2.9
110 |
111 | true
112 | false
113 |
114 |
115 |
116 |
117 | org.apache.maven.plugins
118 | maven-compiler-plugin
119 | 3.5.1
120 |
121 | ${jdk.version}
122 | ${jdk.version}
123 |
124 |
125 |
126 | maven-shade-plugin
127 | 2.4.3
128 |
129 |
130 | package
131 |
132 | shade
133 |
134 |
135 | false
136 |
137 |
138 |
139 | *:*
140 |
141 | META-INF/*.SF
142 | META-INF/*.DSA
143 | META-INF/*.RSA
144 |
145 |
146 |
147 |
148 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 | org.apache.maven.plugins
158 | maven-assembly-plugin
159 | 2.4.1
160 |
161 |
162 |
163 | jar-with-dependencies
164 |
165 |
166 |
167 |
168 | org.fit.genomics.PopStratClassification
169 |
170 |
171 |
172 |
173 | oozie.launcher.mapreduce.job.user.classpath.first
174 | true
175 |
176 |
177 |
178 |
179 |
180 | make-assembly
181 |
182 | package
183 |
184 | single
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
--------------------------------------------------------------------------------
/PopulationClustering_v2/results/train.csv/DEC_Genotype_Clustering.py:
--------------------------------------------------------------------------------
1 | from keras.datasets import mnist
2 | import numpy as np
3 | import pandas as pd
4 | np.random.seed(10)
5 |
6 | from time import time
7 | import numpy as np
8 | import keras.backend as K
9 | from keras.engine.topology import Layer, InputSpec
10 | from keras.layers import Dense, Input
11 | from keras.models import Model
12 | from keras.optimizers import RMSprop
13 | from keras import callbacks
14 | from keras.initializers import VarianceScaling
15 | from sklearn.cluster import KMeans
16 | from sklearn import metrics
17 | from sklearn.metrics.cluster import normalized_mutual_info_score
18 | from sklearn.metrics.cluster import adjusted_rand_score
19 | from sklearn.metrics import accuracy_score
20 | from sklearn import manifold
21 | import keras.layers.normalization as bn
22 |
23 | df1 = pd.read_csv('/home/asif/genome.csv', header=None)
24 | print(df1.head())
25 |
26 | label = df1[0]
27 | print(label.head())
28 |
29 | from sklearn import preprocessing
30 | le = preprocessing.LabelEncoder()
31 | lbl = le.fit(label)
32 | labelss = lbl.transform(label)
33 | labelDF = pd.DataFrame(labelss)
34 |
35 | #labelArr =
36 | print(labelDF.head())
37 |
38 | feature = df1.drop(0, axis=1)
39 | print(feature.head())
40 |
41 | from sklearn.preprocessing import MinMaxScaler
42 | scaler = MinMaxScaler()
43 | x1 = feature.iloc[:,1:]
44 | df_scaled = pd.DataFrame(scaler.fit_transform(x1), columns=x1.columns)
45 | df_scaled.head()
46 |
47 | y = labelss
48 | x = df_scaled.values
49 |
50 | print(y.shape)
51 | print(x.shape)
52 |
53 | print(np.isnan(np.min(x)))
54 |
55 | #y.shape
56 | #x.shape
57 | #print(x)
58 | #print(y)
59 |
60 | n_clusters = len(np.unique(y))
61 | print(n_clusters)
62 |
63 | kmeans = KMeans(n_clusters=n_clusters, n_init=5)
64 | y_pred_kmeans = kmeans.fit_predict(x)
65 |
66 | print(accuracy_score(y, y_pred_kmeans))
67 |
68 | dims = [x.shape[-1], 16, 16, 32, 5]
69 | init = VarianceScaling(scale=1. / 3., mode='fan_in', distribution='uniform')
70 | pretrain_optimizer = RMSprop(lr=0.001, rho=0.01, epsilon=None, decay=0.0)
71 | pretrain_epochs = 100
72 | batch_size = 32
73 | save_dir = 'result/'
74 |
75 | def autoencoder(dims, act='relu', init='glorot_uniform'):
76 | """
77 | Fully connected auto-encoder model, symmetric.
78 | Arguments:
79 | dims: list of number of units in each layer of encoder. dims[0] is input dim, dims[-1] is units in hidden layer.
80 | The decoder is symmetric with encoder. So number of layers of the auto-encoder is 2*len(dims)-1
81 | act: activation, not applied to Input, Hidden and Output layers
82 | return:
83 | (ae_model, encoder_model), Model of autoencoder and model of encoder
84 | """
85 | n_stacks = len(dims) - 1
86 | # input
87 | input_img = Input(shape=(dims[0],), name='input')
88 | x = input_img
89 | # internal layers in encoder
90 | for i in range(n_stacks-1):
91 | x = Dense(dims[i + 1], activation=act, kernel_initializer=init, name='encoder_%d' % i)(x)
92 | #bn.BatchNormalization(momentum=0.9, epsilon=1e-06, weights=None)
93 |
94 | # hidden layer
95 | encoded = Dense(dims[-1], kernel_initializer=init, name='encoder_%d' % (n_stacks - 1))(x) # hidden layer, features are extracted from here
96 | bn.BatchNormalization(momentum=0.9, epsilon=1e-06, weights=None)
97 |
98 | x = encoded
99 | # internal layers in decoder
100 | for i in range(n_stacks-1, 0, -1):
101 | x = Dense(dims[i], activation=act, kernel_initializer=init, name='decoder_%d' % i)(x)
102 |
103 | # output
104 | x = Dense(dims[0], kernel_initializer=init, name='decoder_0')(x)
105 | bn.BatchNormalization(momentum=0.9, epsilon=1e-06, weights=None)
106 |
107 | decoded = x
108 | return Model(inputs=input_img, outputs=decoded, name='AE'), Model(inputs=input_img, outputs=encoded, name='encoder')
109 |
110 | autoencoder, encoder = autoencoder(dims, init=init)
111 | autoencoder.compile(optimizer=pretrain_optimizer, loss='mse')
112 | autoencoder.fit(x, x, batch_size=batch_size, epochs=pretrain_epochs) #, callbacks=cb)
113 | autoencoder.save_weights(save_dir + '/ThesisDEC_weights.h5')
114 |
115 | autoencoder.save_weights(save_dir + '/ThesisDEC_weights.h5')
116 | autoencoder.load_weights(save_dir + '/ThesisDEC_weights.h5')
117 |
118 |
119 | class ClusteringLayer(Layer):
120 | """
121 | Clustering layer converts input sample (feature) to soft label, i.e. a vector that represents the probability of the
122 | sample belonging to each cluster. The probability is calculated with student's t-distribution.
123 |
124 | # Example
125 | ```
126 | model.add(ClusteringLayer(n_clusters=10))
127 | ```
128 | # Arguments
129 | n_clusters: number of clusters.
130 | weights: list of Numpy array with shape `(n_clusters, n_features)` witch represents the initial cluster centers.
131 | alpha: degrees of freedom parameter in Student's t-distribution. Default to 1.0.
132 | # Input shape
133 | 2D tensor with shape: `(n_samples, n_features)`.
134 | # Output shape
135 | 2D tensor with shape: `(n_samples, n_clusters)`.
136 | """
137 |
138 | def __init__(self, n_clusters, weights=None, alpha=1.0, **kwargs):
139 | if 'input_shape' not in kwargs and 'input_dim' in kwargs:
140 | kwargs['input_shape'] = (kwargs.pop('input_dim'),)
141 | super(ClusteringLayer, self).__init__(**kwargs)
142 | self.n_clusters = n_clusters
143 | self.alpha = alpha
144 | self.initial_weights = weights
145 | self.input_spec = InputSpec(ndim=2)
146 |
147 | def build(self, input_shape):
148 | assert len(input_shape) == 2
149 | input_dim = input_shape[1]
150 | self.input_spec = InputSpec(dtype=K.floatx(), shape=(None, input_dim))
151 | self.clusters = self.add_weight((self.n_clusters, input_dim), initializer='glorot_uniform', name='clusters')
152 | if self.initial_weights is not None:
153 | self.set_weights(self.initial_weights)
154 | del self.initial_weights
155 | self.built = True
156 |
157 | def call(self, inputs, **kwargs):
158 | """ student t-distribution, as same as used in t-SNE algorithm.
159 | Measure the similarity between embedded point z_i and centroid µ_j.
160 | q_ij = 1/(1+dist(x_i, µ_j)^2), then normalize it.
161 | q_ij can be interpreted as the probability of assigning sample i to cluster j.
162 | (i.e., a soft assignment)
163 | Arguments:
164 | inputs: the variable containing data, shape=(n_samples, n_features)
165 | Return:
166 | q: student's t-distribution, or soft labels for each sample. shape=(n_samples, n_clusters)
167 | """
168 | q = 1.0 / (1.0 + (K.sum(K.square(K.expand_dims(inputs, axis=1) - self.clusters), axis=2) / self.alpha))
169 | q **= (self.alpha + 1.0) / 2.0
170 | q = K.transpose(K.transpose(q) / K.sum(q, axis=1)) # Make sure each sample's 10 values add up to 1.
171 | return q
172 |
173 | def compute_output_shape(self, input_shape):
174 | assert input_shape and len(input_shape) == 2
175 | return input_shape[0], self.n_clusters
176 |
177 | def get_config(self):
178 | config = {'n_clusters': self.n_clusters}
179 | base_config = super(ClusteringLayer, self).get_config()
180 | return dict(list(base_config.items()) + list(config.items()))
181 |
182 | clustering_layer = ClusteringLayer(n_clusters, name='clustering')(encoder.output)
183 | model = Model(inputs=encoder.input, outputs=clustering_layer)
184 | model.compile(optimizer=RMSprop(lr=0.01, rho=0.9, epsilon=None, decay=0.0), loss='kld')
185 |
186 | kmeans = KMeans(n_clusters=n_clusters, n_init=n_clusters)
187 | y_pred = kmeans.fit_predict(encoder.predict(x))
188 |
189 | y_pred_last = np.copy(y_pred)
190 |
191 | model.get_layer(name='clustering').set_weights([kmeans.cluster_centers_])
192 |
193 | # computing an auxiliary target distribution
194 | def target_distribution(q):
195 | weight = q ** 2 / q.sum(0)
196 | return (weight.T / weight.sum(1)).T
197 |
198 | loss = 0
199 | index = 0
200 | maxiter = 20000
201 | update_interval = 500
202 | index_array = np.arange(x.shape[0])
203 |
204 | tol = 0.001 # tolerance threshold to stop training
205 |
206 | for ite in range(int(maxiter)):
207 | if ite % update_interval == 0:
208 | q = model.predict(x, verbose=0)
209 | p = target_distribution(q) # update the auxiliary target distribution p
210 |
211 | # evaluate the clustering performance
212 | y_pred = q.argmax(1)
213 | if y is not None:
214 | acc = np.round(accuracy_score(y, y_pred), 5)
215 | nmi = np.round(normalized_mutual_info_score(y, y_pred), 5)
216 | ari = np.round(adjusted_rand_score(y, y_pred), 5)
217 | loss = np.round(loss, 5)
218 | print('Iter %d: acc = %.5f, nmi = %.5f, ari = %.5f' % (ite, acc, nmi, ari), ' ; loss=', loss)
219 |
220 | # check stop criterion - model convergence
221 | delta_label = np.sum(y_pred != y_pred_last).astype(np.float32) / y_pred.shape[0]
222 | y_pred_last = np.copy(y_pred)
223 | if ite > 0 and delta_label < tol:
224 | print('delta_label ', delta_label, '< tol ', tol)
225 | print('Reached tolerance threshold. Stopping training.')
226 | break
227 | idx = index_array[index * batch_size: min((index+1) * batch_size, x.shape[0])]
228 | model.train_on_batch(x=x[idx], y=p[idx])
229 | index = index + 1 if (index + 1) * batch_size <= x.shape[0] else 0
230 |
231 | model.save_weights(save_dir + '/Thesis_DEC_model_final.h5')
232 | model.load_weights(save_dir + '/Thesis_DEC_model_final.h5')
233 |
234 | # Eval.
235 | q = model.predict(x, verbose=0)
236 | p = target_distribution(q) # update the auxiliary target distribution p
237 |
238 | # evaluate the clustering performance
239 | y_pred = q.argmax(1)
240 | if y is not None:
241 | acc = np.round(accuracy_score(y, y_pred), 5)
242 | nmi = np.round(normalized_mutual_info_score(y, y_pred), 5)
243 | ari = np.round(adjusted_rand_score(y, y_pred), 5)
244 | loss = np.round(loss, 5)
245 | print('Acc = %.5f, nmi = %.5f, ari = %.5f' % (acc, nmi, ari), ' ; loss=', loss)
246 |
247 | import seaborn as sns
248 | import sklearn.metrics
249 | import matplotlib.pyplot as plt
250 | sns.set(font_scale=1.5)
251 | confusion_matrix = sklearn.metrics.confusion_matrix(y, y_pred)
252 |
253 | plt.figure(figsize=(12, 11))
254 | sns.heatmap(confusion_matrix, annot=True, fmt="d", annot_kws={"size": 15});
255 | plt.title("Confusion matrix", fontsize=25)
256 | plt.ylabel('True label', fontsize=25)
257 | plt.xlabel('Clustering label', fontsize=25)
258 | plt.show()
259 |
260 | def visualizeData(Z, labels, num_clusters, title):
261 | '''
262 | TSNE visualization of the points in latent space Z
263 | :param Z: Numpy array containing points in latent space in which clustering was performed
264 | :param labels: True labels - used for coloring points
265 | :param num_clusters: Total number of clusters
266 | :param title: filename where the plot should be saved
267 | :return: None - (side effect) saves clustering visualization plot in specified location
268 | '''
269 | labels = labels.astype(int)
270 | tsne = manifold.TSNE(n_components=2, init='pca', random_state=0)
271 | Z_tsne = tsne.fit_transform(Z)
272 | fig = plt.figure()
273 | plt.scatter(Z_tsne[:, 0], Z_tsne[:, 1], s=2, c=labels, cmap=plt.cm.get_cmap("jet", num_clusters))
274 | plt.colorbar(ticks=range(num_clusters))
275 | fig.savefig(title, dpi=fig.dpi)
276 |
277 | from sklearn.manifold import TSNE
278 | import seaborn as sn
279 | import matplotlib.pyplot as plt
280 |
281 | data_1000 = x[0:1000,:]
282 | labels_1000 = y[0:1000]
283 |
284 | model = TSNE(n_components = 2, random_state = 0)
285 |
286 | tsne_data = model.fit_transform(x)
287 | #y_pred
288 |
289 | tsne_data = np.vstack((tsne_data.T, y)).T
290 | tsne_df = pd.DataFrame(data= tsne_data, columns= ("Dim_1","Dim_2","label"))
291 |
292 | sn.FacetGrid(tsne_df, hue= "label", size = 6).map(plt.scatter, 'Dim_1', 'Dim_2').add_legend()
293 | plt.show()
294 |
295 | visualizeData(x, y, n_clusters, "t_SNE_graph_original.png")
296 | visualizeData(x, y_pred, n_clusters, "t_SNE_graph_predicted.png")
297 |
--------------------------------------------------------------------------------
/PopulationClustering_v2/results/train.csv/LSTM_EthnicityPrediction.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pandas as pd
3 | import glob
4 | import numpy as np
5 | import sys
6 | from time import time
7 |
8 | from sklearn.preprocessing import LabelEncoder
9 | from sklearn.model_selection import train_test_split
10 | from sklearn.metrics import precision_recall_fscore_support
11 | from sklearn import metrics
12 |
13 | from keras.models import Sequential
14 | from keras.layers import LSTM, Dense, Dropout, Activation, Flatten
15 | from keras.callbacks import TensorBoard
16 | from keras.optimizers import RMSprop
17 | from keras.regularizers import l2
18 | from keras.callbacks import EarlyStopping
19 | from sklearn.metrics import precision_recall_fscore_support, roc_auc_score
20 | from keras.utils import np_utils
21 |
22 | from keras import backend as K
23 | K.set_image_dim_ordering('tf')
24 | import matplotlib.pyplot as plt
25 | import itertools
26 |
27 | import numpy as np
28 | import pandas as pd
29 | np.random.seed(10)
30 |
31 | from time import time
32 | import numpy as np
33 | import keras.backend as K
34 | from keras.engine.topology import Layer, InputSpec
35 | from keras.layers import Dense, Input
36 | from keras.models import Model
37 | from keras.optimizers import RMSprop
38 | from keras import callbacks
39 | from keras.initializers import VarianceScaling
40 | from sklearn.cluster import KMeans
41 | from sklearn import metrics
42 | from sklearn.metrics.cluster import normalized_mutual_info_score
43 | from sklearn.metrics.cluster import adjusted_rand_score
44 | from sklearn.metrics import accuracy_score
45 | from sklearn import manifold
46 | import keras.layers.normalization as bn
47 |
48 | from sklearn.metrics import confusion_matrix
49 |
50 | df1 = pd.read_csv('/home/asif/genome.csv', header=None)
51 | print(df1.head())
52 |
53 | label = df1[0]
54 | print(label.head())
55 |
56 | from sklearn import preprocessing
57 | le = preprocessing.LabelEncoder()
58 | lbl = le.fit(label)
59 | labelss = lbl.transform(label)
60 | labelDF = pd.DataFrame(labelss)
61 |
62 | #labelArr =
63 | print(labelDF.head())
64 |
65 | feature = df1.drop(0, axis=1)
66 | print(feature.head())
67 |
68 | from sklearn.preprocessing import MinMaxScaler
69 | scaler = MinMaxScaler()
70 | x1 = feature.iloc[:,1:]
71 | df_scaled = pd.DataFrame(scaler.fit_transform(x1), columns=x1.columns)
72 | df_scaled.head()
73 |
74 | y = labelss
75 | x = df_scaled.values
76 |
77 | features = x
78 | labels = y
79 |
80 | def prepare_test_train_valid():
81 | # Train-test split
82 | train_x, test_x, train_y, test_y = train_test_split(features, labels, test_size=0.25, random_state=100)
83 | test_x, valid_x, test_y, valid_y = train_test_split(train_x, train_y, test_size=0.50, random_state=100)
84 |
85 | return train_x, test_x, train_y, test_y, valid_x, valid_y
86 |
87 | def one_hot_encode(labels):
88 | n_labels = len(labels)
89 | n_unique_labels = len(np.unique(labels))
90 | one_hot_encode = np.zeros((n_labels,n_unique_labels))
91 | one_hot_encode[np.arange(n_labels), labels] = 1
92 | return one_hot_encode
93 |
94 | labels = one_hot_encode(labels)
95 |
96 | # Extract feature
97 | train_x, test_x, train_y, test_y, valid_x, valid_y = prepare_test_train_valid()
98 |
99 | print('X_train shape:', train_x.shape)
100 | print('Y_train shape:', train_y.shape)
101 |
102 | num_classes = 5
103 | data_dim = 52
104 | timesteps = 1
105 |
106 | train_x = np.reshape(train_x,(train_x.shape[0], 1, train_x.shape[1]))
107 | test_x = np.reshape(test_x,(test_x.shape[0], 1, test_x.shape[1]))
108 | valid_x = np.reshape(valid_x,(valid_x.shape[0], 1, valid_x.shape[1]))
109 |
110 | def plot_confusion_matrix(cm, classes,
111 | normalize=False,
112 | title='Confusion matrix',
113 | cmap=plt.cm.Blues):
114 | """
115 | This function prints and plots the confusion matrix.
116 | Normalization can be applied by setting `normalize=True`.
117 | """
118 | if normalize:
119 | cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
120 | print("Normalized confusion matrix")
121 | else:
122 | print('Confusion matrix, without normalization')
123 |
124 | print(cm)
125 |
126 | plt.imshow(cm, interpolation='nearest', cmap=cmap)
127 | plt.title(title)
128 | plt.colorbar()
129 | tick_marks = np.arange(len(classes))
130 | plt.xticks(tick_marks, classes, rotation=45)
131 | plt.yticks(tick_marks, classes)
132 |
133 | fmt = '.2f' if normalize else 'd'
134 | thresh = cm.max() / 2.
135 | for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
136 | plt.text(j, i, format(cm[i, j], fmt),
137 | horizontalalignment="center",
138 | color="white" if cm[i, j] > thresh else "black")
139 |
140 | plt.tight_layout()
141 | plt.ylabel('True label')
142 | plt.xlabel('Predicted label')
143 |
144 | def build_LSTM(): #OK
145 | # expected input data shape: (batch_size, timesteps, data_dim)
146 | model = Sequential()
147 | model.add(LSTM(32, return_sequences=True, input_shape=(timesteps, data_dim)))
148 |
149 | model.add(LSTM(24, return_sequences=True))
150 |
151 | #model.add(Dropout(0.2))
152 | model.add(LSTM(16, return_sequences=True))
153 | model.add(Dropout(0.2))
154 |
155 | # apply softmax to output
156 | model.add(Flatten())
157 | model.add(Dense(num_classes, activation='softmax'))
158 | return model
159 |
160 | def model_train_evaluate(model, number_epoch):
161 | sgd = RMSprop(lr=0.001, rho=0.01, epsilon=None, decay=0.0)
162 |
163 | # a stopping function should the validation loss stop improving
164 | earlystop = EarlyStopping(monitor='val_loss', patience=1, verbose=0, mode='auto')
165 |
166 | #if model in ['RNN']:
167 | rnn_model = build_LSTM() #OK
168 | rnn_model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=sgd)
169 | tensorboardRNN = TensorBoard(log_dir="RNN_logs/{}".format(time()))
170 | rnn_model.fit(train_x, train_y, validation_data=(valid_x, valid_y), callbacks=[tensorboardRNN], batch_size=128, epochs=int(number_epoch))
171 | print(rnn_model.summary())
172 |
173 | y_prob = rnn_model.predict(test_x)
174 | y_pred = y_prob.argmax(axis=-1)
175 | y_true = np.argmax(test_y, 1)
176 |
177 | roc = roc_auc_score(test_y, y_prob)
178 | print ("ROC:", round(roc,3))
179 |
180 | # evaluate the model
181 | score, accuracy = rnn_model.evaluate(test_x, test_y, batch_size=32)
182 | print("\nAccuracy = {:.2f}".format(accuracy))
183 |
184 | # the F-score gives a similiar value to the accuracy score, but useful for cross-checking
185 | p,r,f,s = precision_recall_fscore_support(y_true, y_pred, average='micro')
186 | print ("F-Score:", round(f,2))
187 | print ("Precision:", round(p,2))
188 | print ("Recall:", round(r,2))
189 | print ("F-Score:", round(f,2))
190 |
191 | # Compute confusion matrix
192 | cnf_matrix = confusion_matrix(y_true, y_pred)
193 | np.set_printoptions(precision=2)
194 |
195 | class_names = ["FIN", "GBR", "ASW", "CHB", "CLM"]
196 |
197 | # Plot non-normalized confusion matrix
198 | plt.figure()
199 | plot_confusion_matrix(cnf_matrix, classes=class_names, title='Confusion matrix: true vs predicted label')
200 | plt.show()
201 |
202 | model = build_LSTM()
203 | model_train_evaluate(model, 1000)
204 | import gc; gc.collect()
205 |
--------------------------------------------------------------------------------
/PopulationClustering_v2/results/train.csv/genome.csv:
--------------------------------------------------------------------------------
1 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2 | GBR,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
3 | CHB,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4 | CHB,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
6 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
7 | CHB,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
8 | ASW,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0
9 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1
10 | ASW,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0
11 | ASW,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0
12 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0
13 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
14 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
15 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
16 | CHB,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
17 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
18 | GBR,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
19 | GBR,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0
20 | GBR,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0
21 | ASW,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
22 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
23 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
24 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
25 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
26 | CHB,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
27 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
28 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
29 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
30 | GBR,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
31 | CLM,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
32 | CLM,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0
33 | CHB,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
34 | CLM,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
35 | CLM,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
36 | FIN,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
37 | GBR,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1
38 | FIN,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
39 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
40 | GBR,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0
41 | FIN,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
42 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
43 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
44 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
45 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
46 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
47 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
48 | FIN,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
49 | ASW,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0
50 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
51 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
52 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0
53 | CHB,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
54 | CLM,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
55 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
56 | CLM,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
57 | CHB,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
58 | ASW,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
59 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
60 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
61 | CLM,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0
62 | GBR,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
63 | GBR,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0
64 | FIN,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
65 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
66 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
67 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
68 | CLM,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0
69 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
70 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
71 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
72 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
73 | CHB,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
74 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
75 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
76 | GBR,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0
77 | ASW,1,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
78 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0
79 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
80 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0
81 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
82 | CHB,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
83 | CLM,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
84 | CLM,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
85 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
86 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
87 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
88 | FIN,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
89 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
90 | ASW,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0
91 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
92 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
93 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
94 | ASW,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0
95 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0
96 | FIN,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
97 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
98 | FIN,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
99 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
100 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
101 | GBR,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
102 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
103 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
104 | GBR,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
105 | FIN,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
106 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
107 | GBR,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
108 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1
109 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
110 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0
111 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
112 | FIN,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
113 | CHB,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
114 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
115 | FIN,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
116 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
117 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
118 | FIN,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
119 | CHB,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
120 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
121 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
122 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
123 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
124 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
125 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
126 | CLM,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
127 | ASW,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
128 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
129 | FIN,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
130 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
131 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
132 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1
133 | CLM,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0
134 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
135 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0
136 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
137 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
138 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
139 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1
140 | ASW,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1
141 | FIN,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
142 | CLM,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0
143 | GBR,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
144 | CLM,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
145 | FIN,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
146 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
147 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
148 | CLM,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1
149 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
150 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
151 | ASW,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0
152 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
153 | CLM,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
154 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
155 | GBR,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
156 | GBR,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0
157 | CLM,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1
158 | CHB,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
159 | ASW,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
160 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
161 | CLM,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
162 | CLM,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0
163 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
164 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0
165 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
166 | CHB,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
167 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
168 | FIN,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
169 | CLM,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
170 | ASW,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0
171 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
172 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
173 | ASW,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
174 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
175 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
176 | CLM,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
177 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
178 | CHB,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
179 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
180 | GBR,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
181 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
182 | CHB,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
183 | FIN,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
184 | ASW,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
185 | GBR,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
186 | GBR,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
187 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
188 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1
189 | GBR,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0
190 | FIN,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
191 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
192 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0
193 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0
194 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
195 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
196 | CLM,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
197 | CHB,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
198 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0
199 | CHB,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
200 |
--------------------------------------------------------------------------------
/PopulationClustering_v2/results/train.csv/part-00000-2c4830b2-4c39-48fc-909d-4868a1164190-c000.csv:
--------------------------------------------------------------------------------
1 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2 | GBR,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
3 | CHB,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4 | CHB,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
6 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
7 | CHB,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
8 | ASW,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0
9 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1
10 | ASW,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0
11 | ASW,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0
12 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0
13 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
14 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
15 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
16 | CHB,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
17 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
18 | GBR,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
19 | GBR,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0
20 | GBR,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0
21 | ASW,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
22 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
23 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
24 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
25 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
26 | CHB,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
27 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
28 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
29 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
30 | GBR,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
31 | CLM,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
32 | CLM,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0
33 | CHB,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
34 | CLM,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
35 | CLM,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
36 | FIN,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
37 | GBR,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1
38 | FIN,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
39 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
40 | GBR,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0
41 | FIN,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
42 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
43 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
44 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
45 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
46 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
47 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
48 | FIN,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
49 | ASW,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0
50 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
51 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
52 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0
53 | CHB,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
54 | CLM,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
55 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
56 | CLM,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
57 | CHB,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
58 | ASW,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
59 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
60 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
61 | CLM,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0
62 | GBR,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
63 | GBR,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0
64 | FIN,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
65 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
66 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
67 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
68 | CLM,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0
69 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
70 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
71 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
72 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
73 | CHB,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
74 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
75 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
76 | GBR,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0
77 | ASW,1,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
78 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0
79 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
80 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0
81 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
82 | CHB,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
83 | CLM,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
84 | CLM,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
85 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
86 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
87 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
88 | FIN,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
89 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
90 | ASW,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0
91 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
92 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
93 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
94 | ASW,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0
95 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0
96 | FIN,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
97 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
98 | FIN,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
99 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
100 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
101 | GBR,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
102 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
103 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
104 | GBR,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
105 | FIN,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
106 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
107 | GBR,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
108 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1
109 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
110 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0
111 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
112 | FIN,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
113 | CHB,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
114 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
115 | FIN,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
116 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
117 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
118 | FIN,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
119 | CHB,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
120 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
121 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
122 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
123 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
124 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
125 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
126 | CLM,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
127 | ASW,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
128 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
129 | FIN,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
130 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
131 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
132 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1
133 | CLM,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0
134 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
135 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0
136 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
137 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
138 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
139 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1
140 | ASW,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1
141 | FIN,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
142 | CLM,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0
143 | GBR,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
144 | CLM,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
145 | FIN,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
146 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
147 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
148 | CLM,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1
149 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
150 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
151 | ASW,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0
152 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
153 | CLM,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
154 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
155 | GBR,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
156 | GBR,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0
157 | CLM,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1
158 | CHB,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
159 | ASW,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
160 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
161 | CLM,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
162 | CLM,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0
163 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
164 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0
165 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
166 | CHB,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
167 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
168 | FIN,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
169 | CLM,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
170 | ASW,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0
171 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
172 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
173 | ASW,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
174 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
175 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
176 | CLM,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
177 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
178 | CHB,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
179 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
180 | GBR,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
181 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
182 | CHB,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
183 | FIN,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
184 | ASW,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
185 | GBR,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
186 | GBR,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
187 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
188 | GBR,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1
189 | GBR,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,0,1,0,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,1,0
190 | FIN,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
191 | FIN,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
192 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0
193 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,0,1,0
194 | CHB,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
195 | CLM,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
196 | CLM,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
197 | CHB,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
198 | ASW,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0
199 | CHB,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
200 |
--------------------------------------------------------------------------------
/PopulationClustering_v2/src/main/scala/org/fit/genomics/PopGenomicsClassificationSpark.scala:
--------------------------------------------------------------------------------
1 | package org.fit.genomics
2 |
3 | import hex.FrameSplitter
4 | import org.apache.spark.SparkContext
5 | import org.apache.spark.h2o.H2OContext
6 | import org.bdgenomics.adam.rdd.ADAMContext._
7 | import org.bdgenomics.formats.avro.{ Genotype, GenotypeAllele }
8 | import water.{ Job, Key }
9 | import water.fvec.Frame
10 |
11 | import org.apache.spark.h2o._
12 | import java.io.File
13 | import java.io._
14 | import scala.collection.JavaConverters._
15 | import scala.collection.immutable.Range.inclusive
16 | import scala.io.Source
17 |
18 | import org.apache.spark.rdd.RDD
19 | import org.apache.spark.sql._
20 | import org.apache.spark.sql.types.{ IntegerType, StringType, StructField, StructType }
21 | import org.apache.spark.ml.feature.{ VectorAssembler, Normalizer }
22 | import org.apache.spark.ml.Pipeline
23 | import org.apache.spark.ml.feature.VectorIndexer
24 | import org.apache.spark.ml.feature.StringIndexer
25 | import org.apache.spark.ml.feature.PCA
26 | import org.apache.spark.ml.{ Pipeline }
27 | import org.apache.spark.ml.classification.{ RandomForestClassifier, RandomForestClassificationModel }
28 | import org.apache.spark.ml.evaluation.{ MulticlassClassificationEvaluator }
29 | import org.apache.spark.ml.tuning.{ ParamGridBuilder, CrossValidator }
30 |
31 | object PopGenomicsClassificationSpark {
32 | def main(args: Array[String]): Unit = {
33 | val genotypeFile = "C:/Users/admin-karim/Downloads/genotypes.vcf"
34 | val panelFile = "C:/Users/admin-karim/Downloads/genotypes.panel"
35 |
36 | val spark:SparkSession = SparkSession
37 | .builder()
38 | .appName("PopStrat")
39 | .master("local[*]")
40 | .config("spark.sql.warehouse.dir", "C:/Exp/")
41 | .getOrCreate()
42 |
43 | val sc: SparkContext = spark.sparkContext
44 |
45 | // Create a set of the populations that we want to predict
46 | // Then create a map of sample ID -> population so that we can filter out the samples we're not interested in
47 | //val populations = Set("GBR", "ASW", "FIN", "CHB", "CLM")
48 | val populations = Set("FIN", "GBR", "ASW", "CHB", "CLM")
49 |
50 | def extract(file: String,
51 | filter: (String, String) => Boolean): Map[String, String] = {
52 | Source
53 | .fromFile(file)
54 | .getLines()
55 | .map(line => {
56 | val tokens = line.split(Array('\t', ' ')).toList
57 | tokens(0) -> tokens(1)
58 | })
59 | .toMap
60 | .filter(tuple => filter(tuple._1, tuple._2))
61 | }
62 |
63 | val panel: Map[String, String] = extract(
64 | panelFile,
65 | (sampleID: String, pop: String) => populations.contains(pop))
66 |
67 | // Load the ADAM genotypes from the parquet file(s)
68 | // Next, filter the genotypes so that we're left with only those in the populations we're interested in
69 | val allGenotypes: RDD[Genotype] = sc.loadGenotypes(genotypeFile).rdd
70 | //allGenotypes.adamParquetSave("output")
71 | val genotypes: RDD[Genotype] = allGenotypes.filter(genotype => {
72 | panel.contains(genotype.getSampleId)
73 | })
74 |
75 | // Convert the Genotype objects to our own SampleVariant objects to try and conserve memory
76 | case class SampleVariant(sampleId: String,
77 | variantId: Int,
78 | alternateCount: Int)
79 |
80 | def variantId(genotype: Genotype): String = {
81 | val name = genotype.getVariant.getContigName
82 | val start = genotype.getVariant.getStart
83 | val end = genotype.getVariant.getEnd
84 | s"$name:$start:$end"
85 | }
86 |
87 | def alternateCount(genotype: Genotype): Int = {
88 | genotype.getAlleles.asScala.count(_ != GenotypeAllele.REF)
89 | }
90 |
91 | def toVariant(genotype: Genotype): SampleVariant = {
92 | // Intern sample IDs as they will be repeated a lot
93 | new SampleVariant(genotype.getSampleId.intern(),
94 | variantId(genotype).hashCode(),
95 | alternateCount(genotype))
96 | }
97 |
98 | val variantsRDD: RDD[SampleVariant] = genotypes.map(toVariant)
99 | //println(s"Variant RDD: " + variantsRDD.first())
100 |
101 | // Group the variants by sample ID so we can process the variants sample-by-sample
102 | // Then get the total number of samples. This will be used to find variants that are missing for some samples.
103 | // Group the variants by variant ID and filter out those variants that are missing from some samples
104 | val variantsBySampleId: RDD[(String, Iterable[SampleVariant])] =
105 | variantsRDD.groupBy(_.sampleId)
106 | val sampleCount: Long = variantsBySampleId.count()
107 | println("Found " + sampleCount + " samples")
108 |
109 | val variantsByVariantId: RDD[(Int, Iterable[SampleVariant])] =
110 | variantsRDD.groupBy(_.variantId).filter {
111 | case (_, sampleVariants) => sampleVariants.size == sampleCount
112 | }
113 |
114 | // Make a map of variant ID -> count of samples with an alternate count of greater than zero
115 | // then filter out those variants that are not in our desired frequency range. The objective here is simply to
116 | // reduce the number of dimensions in the data set to make it easier to train the model.
117 | // The specified range is fairly arbitrary and was chosen based on the fact that it includes a reasonable
118 | // number of variants, but not too many.
119 | val variantFrequencies: collection.Map[Int, Int] = variantsByVariantId
120 | .map {
121 | case (variantId, sampleVariants) =>
122 | (variantId, sampleVariants.count(_.alternateCount > 0))
123 | }
124 | .collectAsMap()
125 |
126 | val permittedRange = inclusive(11, 11)
127 | val filteredVariantsBySampleId: RDD[(String, Iterable[SampleVariant])] =
128 | variantsBySampleId.map {
129 | case (sampleId, sampleVariants) =>
130 | val filteredSampleVariants = sampleVariants.filter(
131 | variant =>
132 | permittedRange.contains(
133 | variantFrequencies.getOrElse(variant.variantId, -1)))
134 | (sampleId, filteredSampleVariants)
135 | }
136 |
137 | //println(s"Filtered Variant RDD: " + filteredVariantsBySampleId.first())
138 |
139 | // Sort the variants for each sample ID. Each sample should now have the same number of sorted variants.
140 | // All items in the RDD should now have the same variants in the same order so we can just use the first
141 | // one to construct our header
142 | // Next construct the rows of our SchemaRDD from the variants
143 | val sortedVariantsBySampleId: RDD[(String, Array[SampleVariant])] =
144 | filteredVariantsBySampleId.map {
145 | case (sampleId, variants) =>
146 | (sampleId, variants.toArray.sortBy(_.variantId))
147 | }
148 |
149 | println(s"Sorted by Sample ID RDD: " + sortedVariantsBySampleId.first())
150 |
151 | val header = StructType(
152 | Seq(StructField("Region", StringType)) ++
153 | sortedVariantsBySampleId
154 | .first()
155 | ._2
156 | .map(variant => {
157 | StructField(variant.variantId.toString, IntegerType)
158 | }))
159 |
160 | val rowRDD: RDD[Row] = sortedVariantsBySampleId.map {
161 | case (sampleId, sortedVariants) =>
162 | val region: Array[String] = Array(panel.getOrElse(sampleId, "Unknown"))
163 | val alternateCounts: Array[Int] = sortedVariants.map(_.alternateCount)
164 | Row.fromSeq(region ++ alternateCounts)
165 | }
166 |
167 | // Create the SchemaRDD from the header and rows and convert the SchemaRDD into a H2O dataframe
168 | val sqlContext = spark.sqlContext
169 | val schemaDF = sqlContext.createDataFrame(rowRDD, header)
170 | schemaDF.printSchema()
171 | schemaDF.show(10)
172 |
173 | val featureCols = schemaDF.columns.drop(1)
174 |
175 | val assembler = new VectorAssembler()
176 | .setInputCols(featureCols)
177 | .setOutputCol("features")
178 |
179 | val assembleDF = assembler.transform(schemaDF).select("features", "Region")
180 | assembleDF.show()
181 |
182 | /*
183 | val pca = new PCA()
184 | .setInputCol("features")
185 | .setOutputCol("pcaFeatures")
186 | .setK(50)
187 | .fit(assembleDF)
188 |
189 | val pcaDF = pca.transform(assembleDF).select("pcaFeatures", "Region").withColumnRenamed("pcaFeatures", "features")//.withColumnRenamed("Region", "label")
190 | pcaDF.show()
191 | *
192 | */
193 |
194 |
195 | val indexer = new StringIndexer()
196 | .setInputCol("Region")
197 | .setOutputCol("label")
198 |
199 | val indexedDF = indexer.fit(assembleDF).transform(assembleDF).select("features", "label")
200 | println("Indeexed: ")
201 | indexedDF.show(10)
202 |
203 | val seed = 12345L
204 | val splits = indexedDF.randomSplit(Array(0.75, 0.25), seed)
205 | val (trainDF, testDF) = (splits(0), splits(1))
206 |
207 | trainDF.cache
208 | testDF.cache
209 |
210 | val rf = new RandomForestClassifier()
211 | .setLabelCol("label")
212 | .setFeaturesCol("features")
213 | .setSeed(1234567L)
214 |
215 | // Search through decision tree's maxDepth parameter for best model
216 | val paramGrid = new ParamGridBuilder()
217 | .addGrid(rf.maxDepth, 3 :: 5 :: 15 :: 20 :: 25 :: 30 :: Nil)
218 | .addGrid(rf.featureSubsetStrategy, "auto" :: "all" :: Nil)
219 | .addGrid(rf.impurity, "gini" :: "entropy" :: Nil)
220 | .addGrid(rf.maxBins, 3 :: 5 :: 10 :: 15 :: 25 :: 35 :: 45 :: Nil)
221 | .addGrid(rf.numTrees, 5 :: 10 :: 15 :: 20 :: 30 :: Nil)
222 | .build()
223 |
224 | val evaluator = new MulticlassClassificationEvaluator()
225 | .setLabelCol("label")
226 | .setPredictionCol("prediction")
227 |
228 | // Set up 10-fold cross validation
229 | val numFolds = 10
230 | val crossval = new CrossValidator()
231 | .setEstimator(rf)
232 | .setEvaluator(evaluator)
233 | .setEstimatorParamMaps(paramGrid)
234 | .setNumFolds(numFolds)
235 |
236 | val cvModel = crossval.fit(trainDF)
237 |
238 | // Save the workflow
239 | //cvModel.write.overwrite().save("model/RF_model_churn")
240 |
241 | val predictions = cvModel.transform(testDF)
242 | predictions.show(10)
243 |
244 | val metric = new MulticlassClassificationEvaluator()
245 | .setLabelCol("label")
246 | .setPredictionCol("prediction")
247 |
248 | val evaluator1 = metric.setMetricName("accuracy")
249 | val evaluator2 = metric.setMetricName("weightedPrecision")
250 | val evaluator3 = metric.setMetricName("weightedRecall")
251 | val evaluator4 = metric.setMetricName("f1")
252 |
253 | // compute the classification accuracy, precision, recall, f1 measure and error on test data.
254 | val accuracy = evaluator1.evaluate(predictions)
255 | val precision = evaluator2.evaluate(predictions)
256 | val recall = evaluator3.evaluate(predictions)
257 | val f1 = evaluator4.evaluate(predictions)
258 |
259 | // Print the performance metrics
260 | println("Accuracy = " + accuracy);
261 | println("Precision = " + precision)
262 | println("Recall = " + recall)
263 | println("F1 = " + f1)
264 | println(s"Test Error = ${1 - accuracy}")
265 |
266 | // Shutdown Spark cluster and H2O
267 | spark.stop()
268 | }
269 |
270 | }
271 |
--------------------------------------------------------------------------------
/PopulationClustering_v2/src/main/scala/org/fit/genomics/PopStratClassification.scala:
--------------------------------------------------------------------------------
1 | package org.fit.genomics
2 |
3 | import java.io._
4 |
5 | import hex.FrameSplitter
6 | import hex.deeplearning.DeepLearning
7 | import hex.deeplearning.DeepLearningModel.DeepLearningParameters
8 | import hex.deeplearning.DeepLearningModel.DeepLearningParameters.Activation
9 | import org.apache.spark.SparkContext
10 | import org.apache.spark.h2o.H2OContext
11 | import org.apache.spark.rdd.RDD
12 | import org.apache.spark.sql._
13 | import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
14 |
15 | import org.bdgenomics.adam.rdd.ADAMContext._
16 | import org.bdgenomics.formats.avro.{ Genotype, GenotypeAllele}
17 | import water.{Job, Key}
18 | import water.support.ModelMetricsSupport
19 | import water.fvec.Frame
20 |
21 | import org.apache.spark.h2o._
22 | import java.io.File
23 |
24 | import htsjdk.samtools.ValidationStringency
25 |
26 | import _root_.hex.{ModelMetrics, ModelMetricsSupervised, ModelMetricsMultinomial}
27 |
28 | import scala.collection.JavaConverters._
29 | import scala.collection.immutable.Range.inclusive
30 | import scala.io.Source
31 |
32 | object PopStratClassification {
33 | def main(args: Array[String]): Unit = {
34 | val genotypeFile = "C:/Users/admin-karim/Downloads/ALL.chrY.phase3_integrated_v2a.20130502.genotypes.vcf"
35 | val panelFile = "C:/Users/admin-karim/Downloads/genotypes.panel"
36 |
37 | val sparkSession: SparkSession =
38 | SparkSession.builder.appName("PopStrat").master("local[*]").getOrCreate()
39 | val sc: SparkContext = sparkSession.sparkContext
40 |
41 | // Create a set of the populations that we want to predict
42 | // Then create a map of sample ID -> population so that we can filter out the samples we're not interested in
43 | //val populations = Set("GBR", "ASW", "FIN", "CHB", "CLM")
44 | val populations = Set("FIN", "GBR", "ASW", "CHB", "CLM")
45 |
46 | def extract(file: String,
47 | filter: (String, String) => Boolean): Map[String, String] = {
48 | Source
49 | .fromFile(file)
50 | .getLines()
51 | .map(line => {
52 | val tokens = line.split(Array('\t', ' ')).toList
53 | tokens(0) -> tokens(1)
54 | })
55 | .toMap
56 | .filter(tuple => filter(tuple._1, tuple._2))
57 | }
58 |
59 | val panel: Map[String, String] = extract(
60 | panelFile,
61 | (sampleID: String, pop: String) => populations.contains(pop))
62 |
63 | // Load the ADAM genotypes from the parquet file(s)
64 | // Next, filter the genotypes so that we're left with only those in the populations we're interested in
65 | //val allGenotypes: RDD[Genotype] = sc.loadGenotypes(genotypeFile, stringency = ValidationStringency.SILENT).rdd
66 | val genotypes0 = sc.loadGenotypes("C:/Users/admin-karim/Downloads/VCF_files/1.vcf", stringency = ValidationStringency.SILENT)
67 |
68 | //val genotypes0 = sc.loadGenotypes("sample0.vcf")
69 | val genotypes1 = sc.loadGenotypes("C:/Users/admin-karim/Downloads/VCF_files/2.vcf")
70 | val union = genotypes0.union(genotypes1)
71 | val rdd: RDD[Genotype] = union.rdd
72 |
73 | val allGenotypes: RDD[Genotype] = rdd.rdd
74 |
75 | //allGenotypes.adamParquetSave("output")
76 | val genotypesFiltered: RDD[Genotype] = allGenotypes.filter(genotype => {
77 | panel.contains(genotype.getSampleId)
78 | })
79 |
80 | // Convert the Genotype objects to our own SampleVariant objects to try and conserve memory
81 | case class SampleVariant(sampleId: String,
82 | variantId: Int,
83 | alternateCount: Int)
84 | def variantId(genotype: Genotype): String = {
85 | val name = genotype.getVariant.getContigName
86 | val start = genotype.getVariant.getStart
87 | val end = genotype.getVariant.getEnd
88 | s"$name:$start:$end"
89 | }
90 |
91 | def alternateCount(genotype: Genotype): Int = {
92 | genotype.getAlleles.asScala.count(_ != GenotypeAllele.REF)
93 | }
94 |
95 | def toVariant(genotype: Genotype): SampleVariant = {
96 | // Intern sample IDs as they will be repeated a lot
97 | new SampleVariant(genotype.getSampleId.intern(),
98 | variantId(genotype).hashCode(),
99 | alternateCount(genotype))
100 | }
101 |
102 | val variantsRDD: RDD[SampleVariant] = genotypesFiltered.map(toVariant)
103 | //println(s"Variant RDD: " + variantsRDD.first())
104 |
105 | // Group the variants by sample ID so we can process the variants sample-by-sample
106 | // Then get the total number of samples. This will be used to find variants that are missing for some samples.
107 | // Group the variants by variant ID and filter out those variants that are missing from some samples
108 | val variantsBySampleId: RDD[(String, Iterable[SampleVariant])] =
109 | variantsRDD.groupBy(_.sampleId)
110 | val sampleCount: Long = variantsBySampleId.count()
111 | println("Found " + sampleCount + " samples")
112 |
113 | val writer_0 = new PrintWriter(new File("output_1.txt"))
114 | writer_0.write("Found " + sampleCount + " samples")
115 | //writer.write(s"Confusion Matrix"+ cm)
116 | //writer.write("Prediction Matrix"+ result)
117 | writer_0.close()
118 |
119 | val variantsByVariantId: RDD[(Int, Iterable[SampleVariant])] =
120 | variantsRDD.groupBy(_.variantId).filter {
121 | case (_, sampleVariants) => sampleVariants.size == sampleCount
122 | }
123 |
124 | // Make a map of variant ID -> count of samples with an alternate count of greater than zero
125 | // then filter out those variants that are not in our desired frequency range. The objective here is simply to
126 | // reduce the number of dimensions in the data set to make it easier to train the model.
127 | // The specified range is fairly arbitrary and was chosen based on the fact that it includes a reasonable
128 | // number of variants, but not too many.
129 | val variantFrequencies: collection.Map[Int, Int] = variantsByVariantId
130 | .map {
131 | case (variantId, sampleVariants) =>
132 | (variantId, sampleVariants.count(_.alternateCount > 0))
133 | }
134 | .collectAsMap()
135 |
136 | println(variantFrequencies.max)
137 |
138 | val permittedRange = inclusive(11, 11)
139 | val filteredVariantsBySampleId: RDD[(String, Iterable[SampleVariant])] =
140 | variantsBySampleId.map {
141 | case (sampleId, sampleVariants) =>
142 | val filteredSampleVariants = sampleVariants.filter(
143 | variant =>
144 | permittedRange.contains(
145 | variantFrequencies.getOrElse(variant.variantId, -1)))
146 | (sampleId, filteredSampleVariants)
147 | }
148 |
149 | //println(s"Filtered Variant RDD: " + filteredVariantsBySampleId.first())
150 |
151 | // Sort the variants for each sample ID. Each sample should now have the same number of sorted variants.
152 | // All items in the RDD should now have the same variants in the same order so we can just use the first
153 | // one to construct our header
154 | // Next construct the rows of our SchemaRDD from the variants
155 | val sortedVariantsBySampleId: RDD[(String, Array[SampleVariant])] =
156 | filteredVariantsBySampleId.map {
157 | case (sampleId, variants) =>
158 | (sampleId, variants.toArray.sortBy(_.variantId))
159 | }
160 |
161 | println(s"Sorted by Sample ID RDD: " + sortedVariantsBySampleId.first())
162 |
163 | val header = StructType(
164 | Seq(StructField("Region", StringType)) ++
165 | sortedVariantsBySampleId
166 | .first()
167 | ._2
168 | .map(variant => {
169 | StructField(variant.variantId.toString, IntegerType)
170 | }))
171 |
172 | val rowRDD: RDD[Row] = sortedVariantsBySampleId.map {
173 | case (sampleId, sortedVariants) =>
174 | val region: Array[String] = Array(panel.getOrElse(sampleId, "Unknown"))
175 | val alternateCounts: Array[Int] = sortedVariants.map(_.alternateCount)
176 | Row.fromSeq(region ++ alternateCounts)
177 | }
178 |
179 | // Create the SchemaRDD from the header and rows and convert the SchemaRDD into a H2O dataframe
180 | val sqlContext = sparkSession.sqlContext
181 | val schemaDF = sqlContext.createDataFrame(rowRDD, header)
182 | schemaDF.coalesce(1).write.format("com.databricks.spark.csv").csv("results/train.csv")
183 | //testData.write.format("com.databricks.spark.csv").csv("results/test.csv")
184 |
185 | schemaDF.show()
186 |
187 | val h2oContext = H2OContext.getOrCreate(sparkSession)
188 | import h2oContext.implicits._
189 |
190 | val dataFrame = h2oContext.asH2OFrame(schemaDF)
191 | dataFrame
192 | .replace(dataFrame.find("Region"),
193 | dataFrame.vec("Region").toCategoricalVec())
194 | .remove()
195 | dataFrame.update()
196 |
197 | // Split the dataframe into 60% training, 20% test, and 20% validation data
198 | val frameSplitter = new FrameSplitter(
199 | dataFrame,
200 | Array(0.50, 0.30),
201 | Array("training", "test", "validation").map(Key.make[Frame]),
202 | null)
203 |
204 | water.H2O.submitTask(frameSplitter)
205 | val splits = frameSplitter.getResult
206 | val training = splits(0)
207 | val test = splits(1)
208 | val validation = splits(2)
209 |
210 | // Set the parameters for our deep learning model.
211 | val deepLearningParameters = new DeepLearningParameters()
212 | deepLearningParameters._train = training
213 | deepLearningParameters._valid = validation
214 | deepLearningParameters._response_column = "Region"
215 | deepLearningParameters._epochs = 2
216 | deepLearningParameters._l2 = 0.01
217 | deepLearningParameters._seed = 1234567
218 | deepLearningParameters._activation = Activation.RectifierWithDropout
219 | deepLearningParameters._hidden = Array[Int](32, 64, 128)
220 |
221 | // Train the deep learning model
222 | val deepLearning = new DeepLearning(deepLearningParameters)
223 | val deepLearningTrained = deepLearning.trainModel
224 | val trainedModel = deepLearningTrained.get
225 |
226 | val error = trainedModel.classification_error()
227 | println("Training Error: " + error)
228 |
229 | //val predict = trainedModel.score(test)('predict)
230 | //trainedModel.score(test)('predict)
231 |
232 | trainedModel.score(dataFrame)('predict)
233 | println(variantFrequencies.max)
234 |
235 |
236 | /*
237 | val h2oContext2 = H2OContext.getOrCreate(sc)
238 | import h2oContext2._
239 | import h2oContext2.implicits._
240 |
241 | val predictionsFromModel = asRDD[DoubleHolder](predict).collect.map(_.result.getOrElse(Double.NaN))
242 | predictionsFromModel.foreach{ value => println(value)}
243 | *
244 | */
245 |
246 | // Collect model metrics and evaluate model quality
247 | //val trainMetrics = ModelMetricsSupport.modelMetrics[ModelMetricsMultinomial](trainedModel, test)
248 | //val met = trainMetrics.cm()
249 | //println("Accuracy: "+ met.accuracy())
250 | //println("MSE: "+ trainMetrics.mse)
251 | //println("RMSE: "+ trainMetrics.rmse)
252 | //println("R2: " + trainMetrics.r2)
253 |
254 | // Shutdown Spark cluster and H2O
255 | h2oContext.stop(stopSparkContext = true)
256 | sparkSession.stop()
257 | }
258 | }
259 |
--------------------------------------------------------------------------------
/PopulationClustering_v2/src/main/scala/org/fit/genomics/PopStratClustering.scala:
--------------------------------------------------------------------------------
1 | package org.fit.genomics
2 |
3 | import hex.FrameSplitter
4 | import org.apache.spark.{ SparkConf, SparkContext }
5 | import org.apache.spark.h2o.H2OContext
6 | import org.apache.spark.rdd.RDD
7 | import org.apache.spark.sql._
8 | import org.bdgenomics.adam.rdd.ADAMContext._
9 | import org.bdgenomics.formats.avro.{ Genotype, GenotypeAllele}
10 | import org.apache.spark._
11 | import org.apache.spark.rdd.RDD
12 | import org.apache.spark.mllib.linalg.{ Vectors, Vector }
13 | import org.apache.spark.ml.clustering.KMeans
14 | import water.fvec.Frame
15 | import java.io._
16 | import org.apache.spark.SparkContext
17 | import org.apache.spark.h2o.H2OContext
18 | import org.apache.spark.sql.types.{ IntegerType, StringType, StructField, StructType }
19 |
20 | import org.apache.spark.ml.feature.{ VectorAssembler, Normalizer }
21 | import org.apache.spark.ml.Pipeline
22 | import org.apache.spark.ml.feature.VectorIndexer
23 | import org.apache.spark.ml.feature.PCA
24 |
25 | import water.{ Job, Key }
26 | import water.fvec.Frame
27 |
28 | import org.apache.spark.h2o._
29 | import java.io.File
30 | import water._
31 |
32 | import scala.collection.JavaConverters._
33 | import scala.collection.immutable.Range.inclusive
34 | import scala.io.Source
35 |
36 | object PopStratClusterings {
37 | def main(args: Array[String]): Unit = {
38 | val genotypeFile = "C:/Users/admin-karim/Downloads/genotypes.vcf"
39 | val panelFile = "C:/Users/admin-karim/Downloads/genotypes.panel"
40 |
41 | val sparkSession: SparkSession =
42 | SparkSession.builder.appName("PopStrat").master("local[*]").getOrCreate()
43 | val sc: SparkContext = sparkSession.sparkContext
44 |
45 | val populations = Set("GBR", "MXL", "ASW", "CHB", "CLM")
46 | def extract(file: String, filter: (String, String) => Boolean): Map[String, String] = {
47 | Source
48 | .fromFile(file)
49 | .getLines()
50 | .map(line => {
51 | val tokens = line.split(Array('\t', ' ')).toList
52 | tokens(0) -> tokens(1)
53 | })
54 | .toMap
55 | .filter(tuple => filter(tuple._1, tuple._2))
56 | }
57 |
58 | val panel: Map[String, String] = extract(
59 | panelFile,
60 | (sampleID: String, pop: String) => populations.contains(pop))
61 | val allGenotypes: RDD[Genotype] = sc.loadGenotypes(genotypeFile).rdd
62 | val genotypes: RDD[Genotype] = allGenotypes.filter(genotype => {
63 | panel.contains(genotype.getSampleId)
64 | })
65 |
66 | // Convert the Genotype objects to our own SampleVariant objects to try and conserve memory
67 | case class SampleVariant(sampleId: String,
68 | variantId: Int,
69 | alternateCount: Int)
70 |
71 | def variantId(genotype: Genotype): String = {
72 | val name = genotype.getVariant.getContigName
73 | val start = genotype.getVariant.getStart
74 | val end = genotype.getVariant.getEnd
75 | s"$name:$start:$end"
76 | }
77 |
78 | def alternateCount(genotype: Genotype): Int = {
79 | genotype.getAlleles.asScala.count(_ != GenotypeAllele.REF)
80 | }
81 |
82 | def toVariant(genotype: Genotype): SampleVariant = {
83 | // Intern sample IDs as they will be repeated a lot
84 | new SampleVariant(genotype.getSampleId.intern(),
85 | variantId(genotype).hashCode(),
86 | alternateCount(genotype))
87 | }
88 |
89 | val variantsRDD: RDD[SampleVariant] = genotypes.map(toVariant)
90 | val variantsBySampleId: RDD[(String, Iterable[SampleVariant])] =
91 | variantsRDD.groupBy(_.sampleId)
92 | val sampleCount: Long = variantsBySampleId.count()
93 | println("Found " + sampleCount + " samples")
94 |
95 | val variantsByVariantId: RDD[(Int, Iterable[SampleVariant])] =
96 | variantsRDD.groupBy(_.variantId).filter {
97 | case (_, sampleVariants) => sampleVariants.size == sampleCount
98 | }
99 |
100 | val variantFrequencies: collection.Map[Int, Int] = variantsByVariantId
101 | .map {
102 | case (variantId, sampleVariants) =>
103 | (variantId, sampleVariants.count(_.alternateCount > 0))
104 | }
105 | .collectAsMap()
106 |
107 | val permittedRange = inclusive(11, 11)
108 | val filteredVariantsBySampleId: RDD[(String, Iterable[SampleVariant])] =
109 | variantsBySampleId.map {
110 | case (sampleId, sampleVariants) =>
111 | val filteredSampleVariants = sampleVariants.filter(
112 | variant =>
113 | permittedRange.contains(
114 | variantFrequencies.getOrElse(variant.variantId, -1)))
115 | (sampleId, filteredSampleVariants)
116 | }
117 |
118 | val sortedVariantsBySampleId: RDD[(String, Array[SampleVariant])] =
119 | filteredVariantsBySampleId.map {
120 | case (sampleId, variants) =>
121 | (sampleId, variants.toArray.sortBy(_.variantId))
122 | }
123 |
124 | println(s"Sorted by Sample ID RDD: " + sortedVariantsBySampleId.first())
125 |
126 | val header = StructType(
127 | Array(StructField("Region", StringType)) ++
128 | sortedVariantsBySampleId
129 | .first()
130 | ._2
131 | .map(variant => {
132 | StructField(variant.variantId.toString, IntegerType)
133 | }))
134 |
135 | val rowRDD: RDD[Row] = sortedVariantsBySampleId.map {
136 | case (sampleId, sortedVariants) =>
137 | val region: Array[String] = Array(panel.getOrElse(sampleId, "Unknown"))
138 | val alternateCounts: Array[Int] = sortedVariants.map(_.alternateCount)
139 | Row.fromSeq(region ++ alternateCounts)
140 | }
141 |
142 | //val featureVectorsRDD = rowRDD.map { x: Row => x.getAs[Vector](0) }
143 |
144 | // Create the SchemaRDD from the header and rows and convert the SchemaRDD into a Spark dataframe
145 | val sqlContext = sparkSession.sqlContext
146 | val schemaDF = sqlContext.createDataFrame(rowRDD, header).drop("Region")
147 | schemaDF.printSchema()
148 | schemaDF.show(10)
149 |
150 | val featureCols = schemaDF.columns
151 |
152 | val assembler = new VectorAssembler()
153 | .setInputCols(featureCols)
154 | .setOutputCol("features")
155 |
156 | val assembleDF = assembler.transform(schemaDF).select("features")
157 | assembleDF.show()
158 |
159 | val pca = new PCA()
160 | .setInputCol("features")
161 | .setOutputCol("pcaFeatures")
162 | .setK(50)
163 | .fit(assembleDF)
164 |
165 | val pcaDF = pca.transform(assembleDF).select("pcaFeatures").withColumnRenamed("pcaFeatures", "features")
166 | pcaDF.show()
167 |
168 | val iterations = 20
169 | for (i <- 2 to iterations) {
170 | // Trains a k-means model.
171 | val kmeans = new KMeans().setK(i).setSeed(12345L)
172 | val model = kmeans.fit(pcaDF)
173 |
174 | // Evaluate clustering by computing Within Set Sum of Squared Errors.
175 | val WSSSE = model.computeCost(pcaDF)
176 | println("Within Set Sum of Squared Errors for k = " + i + " is " + WSSSE)
177 | }
178 | sparkSession.stop()
179 | }
180 | }
--------------------------------------------------------------------------------
/PopulationClustering_v2/src/main/scala/org/fit/genomics/featureExtractor.scala:
--------------------------------------------------------------------------------
1 | package org.fit.genomics
2 |
3 | import java.io._
4 |
5 | import hex.FrameSplitter
6 | import hex.deeplearning.DeepLearning
7 | import hex.deeplearning.DeepLearningModel.DeepLearningParameters
8 | import hex.deeplearning.DeepLearningModel.DeepLearningParameters.Activation
9 | import org.apache.spark.SparkContext
10 | import org.apache.spark.h2o.H2OContext
11 | import org.apache.spark.rdd.RDD
12 | import org.apache.spark.sql._
13 | import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
14 |
15 | import org.bdgenomics.adam.rdd.ADAMContext._
16 | import org.bdgenomics.formats.avro.{ Genotype, GenotypeAllele}
17 | import water.{Job, Key}
18 | import water.support.ModelMetricsSupport
19 | import water.fvec.Frame
20 |
21 | import org.apache.spark.h2o._
22 | import java.io.File
23 |
24 | import htsjdk.samtools.ValidationStringency
25 |
26 | import _root_.hex.{ModelMetrics, ModelMetricsSupervised, ModelMetricsMultinomial}
27 |
28 | import scala.collection.JavaConverters._
29 | import scala.collection.immutable.Range.inclusive
30 | import scala.io.Source
31 |
32 | object featureExtractor {
33 | def main(args: Array[String]): Unit = {
34 | val genotypeFile = "ALL.chrY.phase3_integrated_v2a.20130502.genotypes.vcf"
35 | val panelFile = "genotypes.panel"
36 |
37 | val sparkSession: SparkSession =
38 | SparkSession.builder.appName("PopStrat").master("local[*]").getOrCreate()
39 | val sc: SparkContext = sparkSession.sparkContext
40 |
41 | // Create a set of the populations that we want to predict
42 | // Then create a map of sample ID -> population so that we can filter out the samples we're not interested in
43 | //val populations = Set("GBR", "ASW", "FIN", "CHB", "CLM")
44 | val populations = Set("FIN", "GBR", "ASW", "CHB", "CLM")
45 |
46 | def extract(file: String,
47 | filter: (String, String) => Boolean): Map[String, String] = {
48 | Source
49 | .fromFile(file)
50 | .getLines()
51 | .map(line => {
52 | val tokens = line.split(Array('\t', ' ')).toList
53 | tokens(0) -> tokens(1)
54 | })
55 | .toMap
56 | .filter(tuple => filter(tuple._1, tuple._2))
57 | }
58 |
59 | val panel: Map[String, String] = extract(
60 | panelFile,
61 | (sampleID: String, pop: String) => populations.contains(pop))
62 |
63 | // Load the ADAM genotypes from the parquet file(s)
64 |
65 | // Next, filter the genotypes so that we're left with only those in the populations we're interested in
66 | //val allGenotypes: RDD[Genotype] = sc.loadGenotypes(genotypeFile, stringency = ValidationStringency.SILENT).rdd
67 |
68 | //If you want to generate RDD out of multiple vcf files, use the following:
69 | //val allGenotypes:RDD[Genotype] = sc.loadGenotypes('VCF_files/*.vcf', stringency = ValidationStringency.SILENT).rdd
70 |
71 | val genotypes0 = sc.loadGenotypes("VCF_files/1.vcf", stringency = ValidationStringency.SILENT)
72 |
73 | //val genotypes0 = sc.loadGenotypes("sample0.vcf")
74 | val genotypes1 = sc.loadGenotypes("VCF_files/2.vcf")
75 | val union = genotypes0.union(genotypes1)
76 | val rdd: RDD[Genotype] = union.rdd
77 |
78 | val allGenotypes: RDD[Genotype] = rdd.rdd
79 |
80 | //allGenotypes.adamParquetSave("output")
81 | val genotypesFiltered: RDD[Genotype] = allGenotypes.filter(genotype => {
82 | panel.contains(genotype.getSampleId)
83 | })
84 |
85 | // Convert the Genotype objects to our own SampleVariant objects to try and conserve memory
86 | case class SampleVariant(sampleId: String,
87 | variantId: Int,
88 | alternateCount: Int)
89 | def variantId(genotype: Genotype): String = {
90 | val name = genotype.getVariant.getContigName
91 | val start = genotype.getVariant.getStart
92 | val end = genotype.getVariant.getEnd
93 | s"$name:$start:$end"
94 | }
95 |
96 | def alternateCount(genotype: Genotype): Int = {
97 | genotype.getAlleles.asScala.count(_ != GenotypeAllele.REF)
98 | }
99 |
100 | def toVariant(genotype: Genotype): SampleVariant = {
101 | // Intern sample IDs as they will be repeated a lot
102 | new SampleVariant(genotype.getSampleId.intern(),
103 | variantId(genotype).hashCode(),
104 | alternateCount(genotype))
105 | }
106 |
107 | val variantsRDD: RDD[SampleVariant] = genotypesFiltered.map(toVariant)
108 | //println(s"Variant RDD: " + variantsRDD.first())
109 |
110 | // Group the variants by sample ID so we can process the variants sample-by-sample
111 | // Then get the total number of samples. This will be used to find variants that are missing for some samples.
112 | // Group the variants by variant ID and filter out those variants that are missing from some samples
113 | val variantsBySampleId: RDD[(String, Iterable[SampleVariant])] =
114 | variantsRDD.groupBy(_.sampleId)
115 | val sampleCount: Long = variantsBySampleId.count()
116 | println("Found " + sampleCount + " samples")
117 |
118 | val writer_0 = new PrintWriter(new File("output_1.txt"))
119 | writer_0.write("Found " + sampleCount + " samples")
120 | //writer.write(s"Confusion Matrix"+ cm)
121 | //writer.write("Prediction Matrix"+ result)
122 | writer_0.close()
123 |
124 | val variantsByVariantId: RDD[(Int, Iterable[SampleVariant])] =
125 | variantsRDD.groupBy(_.variantId).filter {
126 | case (_, sampleVariants) => sampleVariants.size == sampleCount
127 | }
128 |
129 | // Make a map of variant ID -> count of samples with an alternate count of greater than zero
130 | // then filter out those variants that are not in our desired frequency range. The objective here is simply to
131 | // reduce the number of dimensions in the data set to make it easier to train the model.
132 | // The specified range is fairly arbitrary and was chosen based on the fact that it includes a reasonable
133 | // number of variants, but not too many.
134 | val variantFrequencies: collection.Map[Int, Int] = variantsByVariantId
135 | .map {
136 | case (variantId, sampleVariants) =>
137 | (variantId, sampleVariants.count(_.alternateCount > 0))
138 | }
139 | .collectAsMap()
140 |
141 | println(variantFrequencies.max)
142 |
143 | val permittedRange = inclusive(11, 11)
144 | val filteredVariantsBySampleId: RDD[(String, Iterable[SampleVariant])] =
145 | variantsBySampleId.map {
146 | case (sampleId, sampleVariants) =>
147 | val filteredSampleVariants = sampleVariants.filter(
148 | variant =>
149 | permittedRange.contains(
150 | variantFrequencies.getOrElse(variant.variantId, -1)))
151 | (sampleId, filteredSampleVariants)
152 | }
153 |
154 | //println(s"Filtered Variant RDD: " + filteredVariantsBySampleId.first())
155 |
156 | // Sort the variants for each sample ID. Each sample should now have the same number of sorted variants.
157 | // All items in the RDD should now have the same variants in the same order so we can just use the first
158 | // one to construct our header
159 | // Next construct the rows of our SchemaRDD from the variants
160 | val sortedVariantsBySampleId: RDD[(String, Array[SampleVariant])] =
161 | filteredVariantsBySampleId.map {
162 | case (sampleId, variants) =>
163 | (sampleId, variants.toArray.sortBy(_.variantId))
164 | }
165 |
166 | println(s"Sorted by Sample ID RDD: " + sortedVariantsBySampleId.first())
167 |
168 | val header = StructType(
169 | Seq(StructField("Region", StringType)) ++
170 | sortedVariantsBySampleId
171 | .first()
172 | ._2
173 | .map(variant => {
174 | StructField(variant.variantId.toString, IntegerType)
175 | }))
176 |
177 | val rowRDD: RDD[Row] = sortedVariantsBySampleId.map {
178 | case (sampleId, sortedVariants) =>
179 | val region: Array[String] = Array(panel.getOrElse(sampleId, "Unknown"))
180 | val alternateCounts: Array[Int] = sortedVariants.map(_.alternateCount)
181 | Row.fromSeq(region ++ alternateCounts)
182 | }
183 |
184 | // Create the SchemaRDD from the header and rows and convert the SchemaRDD into a H2O dataframe
185 | val sqlContext = sparkSession.sqlContext
186 | val schemaDF = sqlContext.createDataFrame(rowRDD, header)
187 |
188 | // Write the resultant DataFrame as CSV file to be used by Keras-based DEC algorithm
189 | schemaDF.coalesce(1).write.format("com.databricks.spark.csv").csv("results/train.csv")
190 | //testData.write.format("com.databricks.spark.csv").csv("results/test.csv")
191 |
192 | // Shutdown Spark cluster and H2O
193 | h2oContext.stop(stopSparkContext = true)
194 | sparkSession.stop()
195 | }
196 | }
197 |
--------------------------------------------------------------------------------
/PopulationClustering_v2/target/classes/META-INF/MANIFEST.MF:
--------------------------------------------------------------------------------
1 | Manifest-Version: 1.0
2 | Built-By: admin-karim
3 | Build-Jdk: 1.8.0_171
4 | Created-By: Maven Integration for Eclipse
5 |
6 |
--------------------------------------------------------------------------------
/PopulationClustering_v2/target/classes/META-INF/maven/com.deri.sels/PopulationClustering_v2/pom.properties:
--------------------------------------------------------------------------------
1 | #Generated by Maven Integration for Eclipse
2 | #Fri Aug 17 13:58:22 CEST 2018
3 | version=0.1-SNAPSHOT
4 | groupId=com.deri.sels
5 | m2e.projectName=PopulationClustering_v2
6 | m2e.projectLocation=C\:\\Users\\admin-karim\\Downloads\\WS\\PopulationClustering_v2
7 | artifactId=PopulationClustering_v2
8 |
--------------------------------------------------------------------------------
/PopulationClustering_v2/target/classes/META-INF/maven/com.deri.sels/PopulationClustering_v2/pom.xml:
--------------------------------------------------------------------------------
1 |
3 | 4.0.0
4 | com.deri.sels
5 | PopulationClustering_v2
6 | 0.1-SNAPSHOT
7 |
8 | 2.2.1
9 | 2.11.8
10 | 3.16.0.2
11 | 2.2.6
12 | 0.23.0
13 |
14 |
15 |
16 | scala-tools.org
17 | Scala-tools Maven2 Repository
18 | http://scala-tools.org/repo-releases
19 |
20 |
21 |
22 |
23 | org.bdgenomics.adam
24 | adam-core_2.11
25 | 0.23.0
26 |
27 |
28 |
29 | ai.h2o
30 | sparkling-water-core_2.11
31 | 2.2.6
32 |
33 |
34 | ai.h2o
35 | sparkling-water-examples_2.11
36 | 2.2.6
37 |
38 |
39 | org.apache.directory.studio
40 | org.apache.commons.io
41 | 2.4
42 |
43 |
44 | org.apache.spark
45 | spark-core_2.11
46 | ${spark.version}
47 |
48 |
49 |
50 | ai.h2o
51 | h2o-core
52 | ${h2o.version}
53 |
54 |
55 | ai.h2o
56 | h2o-scala_2.11
57 | ${h2o.version}
58 |
59 |
60 | ai.h2o
61 | h2o-algos
62 | ${h2o.version}
63 |
64 |
65 | ai.h2o
66 | h2o-app
67 | ${h2o.version}
68 |
69 |
70 | ai.h2o
71 | h2o-persist-hdfs
72 | ${h2o.version}
73 |
74 |
75 | scala-library
76 | org.scala-lang
77 | ${scala.version}
78 |
79 |
80 | ai.h2o
81 | google-analytics-java
82 | 1.1.2-H2O-CUSTOM
83 |
84 |
85 | joda-time
86 | joda-time
87 | 2.9.9
88 |
89 |
90 |
91 |
92 | snapshots-repo
93 | https://oss.sonatype.org/content/repositories/snapshots
94 |
95 | false
96 |
97 |
98 | true
99 | daily
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 | org.apache.maven.plugins
108 | maven-eclipse-plugin
109 | 2.9
110 |
111 | true
112 | false
113 |
114 |
115 |
116 |
117 | org.apache.maven.plugins
118 | maven-compiler-plugin
119 | 3.5.1
120 |
121 | ${jdk.version}
122 | ${jdk.version}
123 |
124 |
125 |
126 | maven-shade-plugin
127 | 2.4.3
128 |
129 |
130 | package
131 |
132 | shade
133 |
134 |
135 | false
136 |
137 |
138 |
139 | *:*
140 |
141 | META-INF/*.SF
142 | META-INF/*.DSA
143 | META-INF/*.RSA
144 |
145 |
146 |
147 |
148 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 | org.apache.maven.plugins
158 | maven-assembly-plugin
159 | 2.4.1
160 |
161 |
162 |
163 | jar-with-dependencies
164 |
165 |
166 |
167 |
168 | org.fit.genomics.PopStratClassification
169 |
170 |
171 |
172 |
173 | oozie.launcher.mapreduce.job.user.classpath.first
174 | true
175 |
176 |
177 |
178 |
179 |
180 | make-assembly
181 |
182 | package
183 |
184 | single
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
--------------------------------------------------------------------------------
/PopulationClustering_v2/target/maven-archiver/pom.properties:
--------------------------------------------------------------------------------
1 | #Generated by Maven
2 | #Thu Aug 02 01:47:00 CEST 2018
3 | version=0.1-SNAPSHOT
4 | groupId=com.deri.sels
5 | artifactId=PopulationClustering_v2
6 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | This repository contains the implemnetation of our papers titled "Recurrent Deep Embedding Networks for Genotype Clustering and Ethnicity Prediction" and "Convolutional Embedded Networks for Population Scale Clustering and Bio-ancestry Inferencing". The former is available on "Arxiv as pre-print"(link: https://arxiv.org/pdf/1805.12218.pdf). The later has been submitted to IEEE/ACM Transactions on Computational Biology and Bioinformatics, which is under review.
2 |
3 | This repo will have two different implementations: i) Deep Embedding Networks(DEC) and Recurrent Deep Embedding Networks(CDEC) using ii) Spark and H2O implementations of our paper titled "Recurrent Deep Embedding Networks for Genotype Clustering and Ethnicity Prediction".
4 |
5 | ## Implementation details
6 | The proof of the concept of our approach is implemented in Spark, ADAM, and Keras. In particular, for the scalable and faster preprocessing of huge number of genetic variants across all the chromosomes (i.e. 870 GB of data), we used ADAM and Spark to convert the genetic variants from VCF format to Spark DataFrame. Then we convert Spark DataFrame into NumPy arrays. Finally, we use Keras to implement Conv-LSTM and CDEC networks for for Population Scale Clustering and Ancestry Inference, respectively.
7 |
8 | Experiments were carried out on a computing cluster having 32 cores, 64-bit Ubuntu 14.04 OS. Software stack consisting of Apache Spark v2.3.0, H2O v3.14.0.1, Sparkling Water v1.2.5, ADAM v0.22.0 and Keras v2.0.9 with TensorFlow backend. We compare approach with the state-of-the-art such as ADMIXTURE and VariationSpark.
9 |
10 | ### CDEC implementation in Python with Keras
11 | Refer to https://github.com/rezacsedu/Recurrent-Deep-Embedding-Networks/tree/master/CDEC for more details. Network training were carried out on a Nvidia TitanX GPU with CUDA and cuDNN enabled to make the overall pipeline faster.
12 |
13 | #### Step 1: Feature extraction using Scala, Adam, and Spark
14 | For this, first, download the VCF files (containing the variants) and the panel file (containing the labels) from ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/.
15 |
16 | Then go to https://github.com/rezacsedu/VariationDEC/tree/master/PopulationClustering_v2 and use the featureExtractor.scala
17 | to extract the features and save as a DataFrame in CSV to be used by Keras-based DEC.
18 |
19 | For this, make sure that you've configured Spark correctly on your machine. Alternatively, execute this script as a standalone Scala project from Eclipse or IntelliJ IDEA.
20 |
21 | #### Step 2: This is the CDEC part in Keras
22 | Go to https://github.com/rezacsedu/Recurrent-Deep-Embedding-Networks/tree/master/CDEC. Then there are several Python scripts and a sample genetic variants feature in csv for the clustering and classification, respectively.
23 |
24 | - genome.csv: is the sample genetic variants featres
25 | - customlayers.py: for creating custom clustering layer in Keras
26 | - keras_unpooling.py: for performing conv unpooling operation for COnv autoencoder part of the network
27 | - misc.py: contains the data preparation helper modules
28 | - network.py: CDEC network creation for the clustering
29 | - main.py: the main class that encapsulates all the steps.
30 |
31 | #### Instruction
32 |
33 | #### Acknowledgement:
34 | This implementation slightly based on https://github.com/elieJalbout/Clustering-with-Deep-learning
35 |
36 | ### DEC implementation in Python
37 | A modified version of Keras based DEC implementation (https://github.com/XifengGuo/DEC-keras) proposed by Ali F. et al. is used in our approach. Network training were carried out on a Nvidia TitanX GPU with CUDA and cuDNN enabled to make the overall pipeline faster.
38 |
39 | #### Step 1: Feature extraction using Scala, Adam and Spark
40 | For this, first, download the VCF files (containing the variants) and the panel file (containing the labels) from ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/.
41 |
42 | Then go to https://github.com/rezacsedu/VariationDEC/tree/master/PopulationClustering_v2 and use the featureExtractor.scala
43 | to extract the features and save as a DataFrame in CSV to be used by Keras-based DEC.
44 |
45 | For this, make sure that you've configured Spark correctly on your machine. Alternatively, execute this script as a standalone Scala project from Eclipse or IntelliJ IDEA.
46 |
47 | #### Step 2: This is the DEC part in Keras/Python
48 | Go to https://github.com/rezacsedu/VariationDEC/tree/master/DEC_GenotypeClustering_Keras. Then there are 2 Python scripts and a sample genetic variants feature in csv for the clustering and classification respectively.
49 |
50 | - genome.csv: is the sample genetic variants featres
51 | - DEC_Genotype_Clustering.py: for the clustering
52 | - LSTM_EthnicityPrediction.py: for the classification
53 |
54 | ### Spark and H2O implementation in Scala
55 | For this, first download the VCF files (containing the variants) and the panel file (containing the labels) from ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/. Then go to https://github.com/rezacsedu/VariationDEC/tree/master/PopulationClustering_v2 and you'll see there Scala scripts as listed below:
56 |
57 | - PopGenomicsClassificationSpark.scala: this is the Spark implementation of ethnicity prediction
58 | - PopStratClassification.scala: this is the H2O implementation of ethnicity prediction
59 | - PopStratClustering.scala: this is the H2O/Spark implementation of the genotype clustering but using K-means prediction
60 |
61 | For this, make sure that you've configured Spark and Adam (see https://github.com/bigdatagenomics/adam) correctly on your machine. Alternatively, execute this script as a standalone Scala project from Eclipse or IntelliJ IDEA.
62 |
63 | ### Citation request
64 | @inproceedings{karim2018recurrent,
65 | title={Recurrent Deep Embedding Networks for Genotype Clustering and Ethnicity Prediction},
66 | author={Karim, Md and Cochez, Michael and Beyan, Oya Deniz and Zappa, Achille and Sahay, Ratnesh and Decker, Stefan and Schuhmann, Dietrich-Rebholz and others},
67 | booktitle={arXiv preprint arXiv:1805.12218},
68 | year={2018}
69 | }
70 |
71 | ### Contributing
72 | For any questions, feel free to open an issue or contact at rezaul.karim@rwth-aachen.de
73 |
--------------------------------------------------------------------------------