├── __init__.py
├── Models
    └── .gitkeep
├── resources
    ├── __init__.py
    ├── node2vec
    │   ├── requirements.txt
    │   ├── .gitignore
    │   ├── LICENSE.md
    │   ├── README.md
    │   └── src
    │   │   ├── main.py
    │   │   └── node2vec.py
    ├── structural.py
    ├── textual.py
    └── stopwords.txt
├── featureExtractor
    ├── __init__.py
    ├── graph_features.py
    ├── feature_extractor.py
    ├── ngram_features.py
    └── dnn_features.py
├── requirements.txt
├── TwitterData
    └── README.md
├── LICENSE.md
├── grid_search.py
├── test.py
├── cross_validate.py
├── README.md
├── twitter_model.py
├── main_classifier.py
├── twitter_access.py
└── Results
    ├── Waseem_Hovy_auth.txt
    ├── Waseem_Hovy_hidden-baseline.txt
    ├── Waseem_Hovy_hidden-auth.txt
    └── Waseem_Hovy_sum-baseline.txt


/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Models/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/resources/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/featureExtractor/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/resources/node2vec/requirements.txt:
--------------------------------------------------------------------------------
1 | networkx==1.11
2 | numpy==1.11.2
3 | gensim==0.13.3
4 | 


--------------------------------------------------------------------------------
/resources/node2vec/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | .DS_Store
 3 | target
 4 | bin
 5 | build
 6 | .gradle
 7 | *.iml
 8 | *.ipr
 9 | *.iws
10 | *.log
11 | .classpath
12 | .project
13 | .settings
14 | .idea


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | boto==2.48.0
 2 | bz2file==0.98
 3 | certifi==2017.7.27.1
 4 | chardet==3.0.4
 5 | coloredlogs==7.3
 6 | h5py==2.7.1
 7 | humanfriendly==4.4.1
 8 | idna==2.6
 9 | Keras==2.0.8
10 | lightgbm==2.0.10
11 | nltk==3.2.5
12 | numpy==1.13.3
13 | requests==2.18.4
14 | scikit-learn==0.19.1
15 | scipy==1.0.0
16 | six==1.11.0
17 | sklearn==0.0
18 | smart-open==1.5.3
19 | Theano==0.9.0
20 | urllib3==1.22
21 | 


--------------------------------------------------------------------------------
/TwitterData/README.md:
--------------------------------------------------------------------------------
 1 | # Twitter Data
 2 | 
 3 | 16,202 tweets annotated as 0 (racism), 1 (sexism), or 2 (none).
 4 | This is a subset of the dataset made available by Waseem and Hovy
 5 | in proceedings of the NAACL 2016 Student Research Workshop.
 6 | 
 7 | The original dataset is can be found here: <https://github.com/ZeerakW/hatespeech>.
 8 | It contains 16,907 tweet IDs along with corresponding annotations. We could only
 9 | retrieve 16,202 of the tweets since some of them have been deleted or their visibility
10 | limited.
11 | 


--------------------------------------------------------------------------------
/resources/structural.py:
--------------------------------------------------------------------------------
 1 | from nltk import word_tokenize
 2 | from nltk import sent_tokenize
 3 | from nltk import PorterStemmer
 4 | 
 5 | import re
 6 | 
 7 | 
 8 | def word_stem(token):
 9 |     stem = PorterStemmer()
10 |     return stem.stem(token)
11 | 
12 | 
13 | def word_tokenizer(text):
14 |     return word_tokenize(text)
15 | 
16 | 
17 | def remove_non_words(all_words):
18 |     only_words = []
19 |     pattern = re.compile('[a-zA-Z]+')
20 | 
21 |     for word in all_words:
22 |         if pattern.match(word) != None:
23 |             only_words.append(word)
24 |     return only_words
25 | 
26 | 
27 | def sentence_tokenizer(text):
28 |     return sent_tokenize(text)
29 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Pushkar Mishra
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/featureExtractor/graph_features.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | import os
 3 | 
 4 | 
 5 | class GraphFeatures:
 6 | 
 7 |     def __init__(self, CONFIG):
 8 |         self.BASE = CONFIG['BASE']
 9 |         self.EMBED_DIM = 200
10 | 
11 |         self.authors = {}
12 |         with open(os.path.join(self.BASE, 'resources', 'authors.txt')) as authors:
13 |             for line in authors.readlines():
14 |                 text_id, author_id = line.strip().split()
15 |                 self.authors[text_id] = author_id
16 | 
17 |         self.embeddings = {}
18 |         with open(os.path.join(self.BASE, 'resources', 'authors.emb')) as embeds:
19 |             for line in embeds.readlines():
20 |                 tokens = line.strip().split()
21 |                 author_id = tokens[0]
22 |                 embed = [float(x) for x in tokens[1:]]
23 |                 self.embeddings[author_id] = numpy.array(embed)
24 | 
25 | 
26 |     def extract(self, text_id):
27 |         author_id = self.authors.get(text_id, None)
28 |         if author_id is None:
29 |             return numpy.zeros(self.EMBED_DIM)
30 | 
31 |         return self.embeddings.get(author_id, numpy.zeros(self.EMBED_DIM))
32 | 


--------------------------------------------------------------------------------
/resources/node2vec/LICENSE.md:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016 Aditya Grover
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/featureExtractor/feature_extractor.py:
--------------------------------------------------------------------------------
 1 | from featureExtractor.dnn_features import DNNFeatures
 2 | from featureExtractor.graph_features import GraphFeatures
 3 | from featureExtractor.ngram_features import NGramFeatures
 4 | 
 5 | 
 6 | class FeatureExtractor:
 7 | 
 8 |     def __init__(self, CONFIG):
 9 |         self.METHOD = CONFIG['METHOD']
10 | 
11 |         if 'hs' in self.METHOD or 'ws' in self.METHOD:
12 |             self.dnn = DNNFeatures(CONFIG)
13 |         if 'n' in self.METHOD:
14 |             self.ngram = NGramFeatures(CONFIG)
15 |         if 'a' in self.METHOD:
16 |             self.graph = GraphFeatures(CONFIG)
17 | 
18 | 
19 |     def extract_features(self, text, text_id=None):
20 |         features = []
21 | 
22 |         if 'hs' in self.METHOD or 'ws' in self.METHOD:
23 |             self.get_dnn_features(features, text)
24 |         if 'n' in self.METHOD:
25 |             self.get_ngram_features(features, text)
26 |         if 'a' in self.METHOD:
27 |             self.get_graph_features(features, text_id)
28 | 
29 |         return features
30 | 
31 | 
32 |     def get_dnn_features(self, features, text):
33 |         if 'ws' in self.METHOD:
34 |             features += self.dnn.sum_word_embeddings(text).tolist()
35 |         else:
36 |             features += self.dnn.last_hidden_state(text).tolist()
37 | 
38 | 
39 |     def get_ngram_features(self, features, text):
40 |         features += self.ngram.extract(text).tolist()
41 | 
42 | 
43 |     def get_graph_features(self, features, text_id):
44 |         features += self.graph.extract(text_id).tolist()
45 | 


--------------------------------------------------------------------------------
/resources/textual.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | 
 4 | 
 5 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 6 | stop_words = set(open(os.path.join(BASE_DIR, 'stopwords.txt'), 'r').read().split())
 7 | 
 8 | 
 9 | def clean_tweet(text):
10 |     space_pattern = '\\s+'
11 |     giant_url_regex = ('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|'
12 |                        '[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
13 |     mention_regex = '@[\w\-]+'
14 |     rt_regex = '\\b[Rr][Tt]\\b'
15 | 
16 |     cleaned_tweet = re.sub(giant_url_regex, '_URL_', text)
17 |     cleaned_tweet = re.sub(mention_regex, '_MTN_', cleaned_tweet)
18 |     cleaned_tweet = re.sub(rt_regex, '', cleaned_tweet)
19 |     cleaned_tweet = re.sub(space_pattern, ' ', cleaned_tweet)
20 | 
21 |     return cleaned_tweet
22 | 
23 | 
24 | def clean_detox(text):
25 |     space_pattern = '\\s+'
26 |     giant_url_regex = ('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|'
27 |                        '[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
28 |     line_token_pattern = 'NEWLINE_TOKEN'
29 | 
30 |     cleaned_text = re.sub(giant_url_regex, '_URL_', text)
31 |     cleaned_text = re.sub(line_token_pattern, ' ', cleaned_text)
32 |     cleaned_text = re.sub(space_pattern, ' ', cleaned_text)
33 | 
34 |     return cleaned_text
35 | 
36 | 
37 | def process_words(text):
38 |     space_pattern = '\\s+'
39 |     text = re.sub(space_pattern, ' ', text)
40 | 
41 |     words = text.split(' ')
42 |     text = []
43 |     for word in words:
44 |         word = word.lower()
45 |         if word not in stop_words:
46 |             text.append(word)
47 | 
48 |     return ' '.join(text)
49 | 


--------------------------------------------------------------------------------
/featureExtractor/ngram_features.py:
--------------------------------------------------------------------------------
 1 | from sklearn.externals import joblib
 2 | from sklearn.feature_extraction.text import TfidfVectorizer
 3 | 
 4 | import datetime
 5 | import logging
 6 | import os
 7 | 
 8 | 
 9 | class NGramFeatures:
10 | 
11 |     def __init__(self, CONFIG):
12 |         self.USE_IDF = CONFIG['TF_USE_IDF']
13 |         self.NRANGE = CONFIG['TF_NRANGE']
14 |         self.SUBLIN = CONFIG['TF_SUBLIN']
15 |         self.MAX_FEAT = CONFIG['TF_MAX_FEAT']
16 |         self.BASE = CONFIG['BASE']
17 | 
18 |         self.model = None
19 |         if CONFIG['NGRAM_MODEL'] is not None:
20 |             self.model = joblib.load(os.path.join(self.BASE, 'Models',
21 |                                                   CONFIG['NGRAM_MODEL']))
22 | 
23 | 
24 |     def extract(self, text):
25 |         return self.model.transform([text]).toarray()[0]
26 | 
27 | 
28 |     def train(self, all_texts):
29 |         self.model = TfidfVectorizer(analyzer='char',
30 |                                      ngram_range=self.NRANGE,
31 |                                      max_features=self.MAX_FEAT,
32 |                                      use_idf=self.USE_IDF,
33 |                                      sublinear_tf=self.SUBLIN)
34 |         self.model.fit(all_texts)
35 | 
36 |         # Save N-gram vocabulary
37 |         cur_time = str(datetime.datetime.now()).replace(':', '-') \
38 |                                                 .replace(' ', '_')
39 |         model_name = 'NGramModel_' + cur_time + '.pkl'
40 |         joblib.dump(self.model, os.path.join(self.BASE, 'Models', model_name))
41 | 
42 |         logger = logging.getLogger('TrainingLog')
43 |         logger.info('N-gram vectorization finished with vocabulary'
44 |                     ' size {}'.format(len(self.model.vocabulary_)))
45 | 
46 |         return model_name
47 | 


--------------------------------------------------------------------------------
/grid_search.py:
--------------------------------------------------------------------------------
 1 | from featureExtractor.feature_extractor import FeatureExtractor
 2 | from sklearn.model_selection import GridSearchCV
 3 | from sklearn.linear_model import LogisticRegression
 4 | from lightgbm import LGBMClassifier
 5 | 
 6 | import coloredlogs
 7 | import logging
 8 | import numpy
 9 | 
10 | 
11 | logger = logging.getLogger('GridSearchLog')
12 | coloredlogs.install(logger=logger, level='DEBUG',
13 |                     fmt='%(asctime)s - %(name)s - %(levelname)s'
14 |                         ' - %(message)s')
15 | 
16 | 
17 | def gbc_details():
18 |     classifier = LGBMClassifier(silent=False)
19 |     parameters = {'num_leaves': [15, 31, 63, 127],
20 |                   'min_child_weight': [1, 5, 7, 10, 20],
21 |                   'min_child_samples': [1, 5, 10, 15, 20],
22 |                   'learning_rate': [0.01, 0.05, 0.08, 0.1, 0.25],
23 |                   'n_estimators': [80, 100, 125, 150, 200]}
24 |     return (classifier, parameters)
25 | 
26 | 
27 | def lr_details():
28 |     classifier = LogisticRegression(verbose=True, max_iter=1000)
29 |     parameters = {'C': [0.01, 0.1, 0.25, 0.5, 0.75,
30 |                         1.0, 10.0, 25.0, 50.0, 100.0]}
31 |     return (classifier, parameters)
32 | 
33 | 
34 | def perform_grid_search(texts_ids, all_texts, classes, args, CONFIG):
35 |     estimator = args[0]
36 |     size = CONFIG['GRID_SEARCH_SIZE']
37 |     CONFIG['EMB_MODEL'] = args[1]
38 |     CONFIG['NGRAM_MODEL'] = args[2]
39 | 
40 |     feature_extractor = FeatureExtractor(CONFIG)
41 |     (classifier, parameters) = eval(estimator + '_details' + '()')
42 | 
43 |     data = []
44 |     for (i, text) in enumerate(all_texts[:size]):
45 |         features = feature_extractor.extract_features(text, texts_ids[i])
46 |         data.append(features)
47 | 
48 |         if i % 1000 == 0 and i > 0:
49 |             logger.info('{} of {} feature vectors prepared '
50 |                         'for grid search'.format(i + 1, size))
51 |     data = numpy.array(data)
52 |     categories = numpy.array(classes[:size])
53 | 
54 |     clf = GridSearchCV(classifier, parameters, cv=5)
55 |     clf.fit(data, categories)
56 | 
57 |     logger.info('Grid search results:\n{}'.format(clf.cv_results_))
58 |     logger.info('Best param set: {}'.format(clf.best_params_))
59 | 


--------------------------------------------------------------------------------
/resources/node2vec/README.md:
--------------------------------------------------------------------------------
 1 | # node2vec
 2 | 
 3 | This repository provides a reference implementation of *node2vec* as described in the paper:<br>
 4 | > node2vec: Scalable Feature Learning for Networks.<br>
 5 | > Aditya Grover and Jure Leskovec.<br>
 6 | > Knowledge Discovery and Data Mining, 2016.<br>
 7 | > <Insert paper link>
 8 | 
 9 | The *node2vec* algorithm learns continuous representations for nodes in any (un)directed, (un)weighted graph. Please check the [project page](https://snap.stanford.edu/node2vec/) for more details. 
10 | 
11 | ### Basic Usage
12 | 
13 | #### Example
14 | To run *node2vec* on Zachary's karate club network, execute the following command from the project home directory:<br/>
15 | 	``python src/main.py --input graph/karate.edgelist --output emb/karate.emd``
16 | 
17 | #### Options
18 | You can check out the other options available to use with *node2vec* using:<br/>
19 | 	``python src/main.py --help``
20 | 
21 | #### Input
22 | The supported input format is an edgelist:
23 | 
24 | 	node1_id_int node2_id_int <weight_float, optional>
25 | 		
26 | The graph is assumed to be undirected and unweighted by default. These options can be changed by setting the appropriate flags.
27 | 
28 | #### Output
29 | The output file has *n+1* lines for a graph with *n* vertices. 
30 | The first line has the following format:
31 | 
32 | 	num_of_nodes dim_of_representation
33 | 
34 | The next *n* lines are as follows:
35 | 	
36 | 	node_id dim1 dim2 ... dimd
37 | 
38 | where dim1, ... , dimd is the *d*-dimensional representation learned by *node2vec*.
39 | 
40 | ### Citing
41 | If you find *node2vec* useful for your research, please consider citing the following paper:
42 | 
43 | 	@inproceedings{node2vec-kdd2016,
44 | 	author = {Grover, Aditya and Leskovec, Jure},
45 | 	 title = {node2vec: Scalable Feature Learning for Networks},
46 | 	 booktitle = {Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining},
47 | 	 year = {2016}
48 | 	}
49 | 
50 | 
51 | ### Miscellaneous
52 | 
53 | Please send any questions you might have about the code and/or the algorithm to <adityag@cs.stanford.edu>.
54 | 
55 | *Note:* This is only a reference implementation of the *node2vec* algorithm and could benefit from several performance enhancement schemes, some of which are discussed in the paper.
56 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | from main_classifier import MainClassifier
 2 | from sklearn.metrics import accuracy_score
 3 | from sklearn.metrics import classification_report
 4 | from sklearn.metrics import confusion_matrix
 5 | from sklearn.metrics import precision_recall_fscore_support
 6 | from sklearn.metrics import roc_auc_score
 7 | 
 8 | import coloredlogs
 9 | import logging
10 | import numpy
11 | 
12 | 
13 | logger = logging.getLogger('TestLog')
14 | coloredlogs.install(logger=logger, level='DEBUG',
15 |                     fmt='%(asctime)s - %(name)s - %(levelname)s'
16 |                         ' - %(message)s')
17 | 
18 | 
19 | def one_hot(y):
20 |     m = y.shape[0]
21 | 
22 |     if len(y.shape) == 1:
23 |         n = len(set(y.ravel()))
24 |         idxs = y.astype(int)
25 |     else:
26 |         idxs = y.argmax(axis=1)
27 |         n = y.shape[1]
28 | 
29 |     y_oh = numpy.zeros((m, n))
30 |     y_oh[list(range(m)), idxs] = 1
31 | 
32 |     return y_oh
33 | 
34 | 
35 | def compute_roc_auc(classes, probs):
36 |     classes_arr = one_hot(numpy.array(classes))
37 |     prob_arr = numpy.array(probs)
38 | 
39 |     return roc_auc_score(classes_arr, prob_arr, average='macro')
40 | 
41 | 
42 | def test(text_ids, texts, classes, classifier):
43 |     classes_pred = []
44 |     probs = []
45 |     count_match = 0
46 |     for (i, text) in enumerate(texts):
47 |         (clazz, prob_score) = classifier.classify(text_ids[i], text, prob=True)
48 |         probs.append(prob_score)
49 |         classes_pred.append(clazz)
50 |         if clazz == classes[i]:
51 |             count_match += 1
52 | 
53 |         if i > 0 and i % 100 == 0:
54 |             accuracy = (1.0 * count_match) / (i + 1)
55 |             logger.info('{} samples classified. Accuracy up till '
56 |                         'now is {}'.format(i + 1, accuracy))
57 | 
58 |     # Calculate metrics
59 |     accuracy = (1.0 * count_match) / len(classes)
60 |     report = classification_report(classes, classes_pred, digits=5)
61 |     conf_matrix = confusion_matrix(classes, classes_pred)
62 |     roc_auc = compute_roc_auc(classes, probs)
63 | 
64 |     # Log results
65 |     logger.info('Total {} samples classified with accuracy '
66 |                 '{}'.format(len(classes), accuracy))
67 |     logger.info('AUROC is {}'.format(roc_auc))
68 |     logger.info('Classification report:\n{}'.format(report))
69 |     logger.info('Confusion matrix:\n{}'.format(conf_matrix))
70 | 
71 |     metrics = precision_recall_fscore_support(classes, classes_pred,
72 |                                               average='weighted')
73 |     metrics = [metrics[0], metrics[1], metrics[2],
74 |                accuracy_score(classes, classes_pred)]
75 | 
76 |     return metrics
77 | 


--------------------------------------------------------------------------------
/cross_validate.py:
--------------------------------------------------------------------------------
 1 | from main_classifier import MainClassifier
 2 | from sklearn.model_selection import StratifiedKFold
 3 | from test import test
 4 | 
 5 | import coloredlogs
 6 | import logging
 7 | 
 8 | 
 9 | logger = logging.getLogger('CVLog')
10 | coloredlogs.install(logger=logger, level='DEBUG',
11 |                     fmt='%(asctime)s - %(name)s - %(levelname)s'
12 |                         ' - %(message)s')
13 | 
14 | EMB_MODEL = [
15 |     'Emb_2018-03-04_12-22-03.453692.h5',
16 |     'Emb_2018-03-04_12-29-57.342629.h5',
17 |     'Emb_2018-03-04_12-38-56.418197.h5',
18 |     'Emb_2018-03-04_12-46-41.840651.h5',
19 |     'Emb_2018-03-04_12-54-29.838667.h5',
20 |     'Emb_2018-03-04_13-02-14.060916.h5',
21 |     'Emb_2018-03-04_13-09-58.910309.h5',
22 |     'Emb_2018-03-04_13-17-44.565754.h5',
23 |     'Emb_2018-03-04_13-25-30.865847.h5',
24 |     'Emb_2018-03-04_13-33-38.104125.h5',
25 | ]
26 | 
27 | def run_cv(text_ids, all_texts, categories, CONFIG, folds=10):
28 |     logger.info('{}-fold cross validation procedure has begun'.format(folds))
29 | 
30 |     k_fold = StratifiedKFold(n_splits=folds, shuffle=True, random_state=7)
31 |     metrics = []
32 |     count = 0
33 |     for train_idx, test_idx in k_fold.split(all_texts, categories):
34 |         count += 1
35 |         logger.info('Validation round {} of {} starting'
36 |                     .format(count, folds))
37 | 
38 |         ids_train, X_train, y_train = [], [], []
39 |         for idx in train_idx:
40 |             ids_train.append(text_ids[idx])
41 |             X_train.append(all_texts[idx])
42 |             y_train.append(categories[idx])
43 | 
44 |         ids_test, X_test, y_test = [], [], []
45 |         for idx in test_idx:
46 |             ids_test.append(text_ids[idx])
47 |             X_test.append(all_texts[idx])
48 |             y_test.append(categories[idx])
49 | 
50 |         if CONFIG['EMB_MODEL'] is None:
51 |             CONFIG['EMB_MODEL'] = EMB_MODEL[count - 1]
52 |         else:
53 |             CONFIG['EMB_MODEL'] = None
54 | 
55 |         classifier = MainClassifier(CONFIG)
56 |         classifier.train(ids_train, X_train, y_train)
57 | 
58 |         metrics.append(test(ids_test, X_test, y_test, classifier))
59 | 
60 |     # Average metrics
61 |     logger.info('\n')
62 |     logger.info('Summary (precision, recall, F1, accuracy):')
63 | 
64 |     prec = rec = f1 = acc = 0.0
65 |     for (i, metric) in enumerate(metrics):
66 |         logger.info('Metrics for round {}: {}'.format(i + 1, metric))
67 |         prec += metric[0]
68 |         rec += metric[1]
69 |         f1 += metric[2]
70 |         acc += metric[3]
71 | 
72 |     logger.info('\n')
73 |     logger.info('Final average metrics: {}, {}, {}, {}'.format(prec/folds,
74 |                                                                rec/folds,
75 |                                                                f1/folds,
76 |                                                                acc/folds))
77 | 


--------------------------------------------------------------------------------
/resources/stopwords.txt:
--------------------------------------------------------------------------------
  1 | a
  2 | about
  3 | above
  4 | accordingly
  5 | across
  6 | after
  7 | again
  8 | all
  9 | almost
 10 | alone
 11 | along
 12 | already
 13 | also
 14 | although
 15 | altogether
 16 | always
 17 | am
 18 | among
 19 | amongst
 20 | an
 21 | and
 22 | any
 23 | another
 24 | anybody
 25 | anyone
 26 | anything
 27 | anyway
 28 | anyways
 29 | anywhere
 30 | are
 31 | around
 32 | as
 33 | ask
 34 | at
 35 | away
 36 | b
 37 | back
 38 | be
 39 | because
 40 | become
 41 | been
 42 | before
 43 | began
 44 | begin
 45 | begun
 46 | behind
 47 | below
 48 | between
 49 | both
 50 | but
 51 | by
 52 | c
 53 | can
 54 | cannot
 55 | cant
 56 | can't
 57 | certain
 58 | certainly
 59 | clear
 60 | clearly
 61 | could
 62 | couldnt
 63 | couldn't
 64 | d
 65 | despite
 66 | did
 67 | didnt
 68 | didn't
 69 | do
 70 | does
 71 | doesnt
 72 | doesn't
 73 | done
 74 | dont
 75 | don't
 76 | down
 77 | due
 78 | e
 79 | each
 80 | earlier
 81 | either
 82 | end
 83 | enough
 84 | especially
 85 | even
 86 | evenly
 87 | ever
 88 | every
 89 | everybody
 90 | everyone
 91 | everything
 92 | everywhere
 93 | example
 94 | except
 95 | f
 96 | final
 97 | find
 98 | first
 99 | for
100 | from
101 | full
102 | fully
103 | further
104 | furthermore
105 | g
106 | gave
107 | generate
108 | get
109 | given
110 | go
111 | got
112 | h
113 | ha
114 | had
115 | hadnt
116 | hadn't
117 | hasnt
118 | hasn't
119 | have
120 | havent
121 | haven't
122 | he
123 | hence
124 | her
125 | here
126 | herself
127 | hi
128 | him
129 | himself
130 | how
131 | however
132 | i
133 | if
134 | import
135 | in
136 | into
137 | is
138 | isnt
139 | isn't
140 | it
141 | its
142 | itself
143 | j
144 | just
145 | k
146 | keep
147 | l
148 | last
149 | later
150 | least
151 | less
152 | let
153 | ll
154 | m
155 | many
156 | may
157 | me
158 | might
159 | more
160 | most
161 | mostly
162 | mr
163 | much
164 | must
165 | mustnt
166 | mustn't
167 | my
168 | myself
169 | n
170 | necessary
171 | neither
172 | next
173 | no
174 | nobody
175 | nothing
176 | now
177 | nowhere
178 | number
179 | o
180 | of
181 | off
182 | often
183 | on
184 | once
185 | only
186 | onto
187 | open
188 | or
189 | other
190 | otherwise
191 | ought
192 | our
193 | ourself
194 | ourselves
195 | out
196 | over
197 | own
198 | p
199 | per
200 | perhaps
201 | possible
202 | possibly
203 | q
204 | r
205 | rather
206 | re
207 | really
208 | right
209 | rt
210 | s
211 | said
212 | same
213 | seem
214 | shall
215 | shant
216 | shan't
217 | she
218 | should
219 | shouldnt
220 | shouldn't
221 | since
222 | so
223 | some
224 | somebody
225 | someone
226 | something
227 | somethings
228 | somewhere
229 | still
230 | such
231 | sure
232 | t
233 | taken
234 | than
235 | that
236 | the
237 | their
238 | them
239 | themselve
240 | themselves
241 | then
242 | there
243 | therefore
244 | these
245 | they
246 | thing
247 | think
248 | this
249 | those
250 | though
251 | through
252 | thus
253 | to
254 | today
255 | together
256 | too
257 | took
258 | toward
259 | towards
260 | turn
261 | u
262 | under
263 | until
264 | up
265 | upon
266 | us
267 | v
268 | ve
269 | very
270 | w
271 | want
272 | was
273 | wasnt
274 | wasn't
275 | way
276 | we
277 | well
278 | went
279 | were
280 | what
281 | whatsoever
282 | when
283 | where
284 | whereas
285 | wherever
286 | whether
287 | why
288 | which
289 | while
290 | who
291 | whole
292 | whom
293 | whose
294 | will
295 | with
296 | within
297 | without
298 | would
299 | wouldnt
300 | wouldn't
301 | wont
302 | won't
303 | x
304 | y
305 | ya
306 | year
307 | yes
308 | yet
309 | you
310 | your
311 | yours
312 | yourself
313 | yourselves
314 | z
315 | 


--------------------------------------------------------------------------------
/resources/node2vec/src/main.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Reference implementation of node2vec. 
  3 | 
  4 | Author: Aditya Grover
  5 | 
  6 | For more details, refer to the paper:
  7 | node2vec: Scalable Feature Learning for Networks
  8 | Aditya Grover and Jure Leskovec 
  9 | Knowledge Discovery and Data Mining (KDD), 2016
 10 | '''
 11 | 
 12 | import argparse
 13 | import numpy as np
 14 | import networkx as nx
 15 | import node2vec
 16 | from gensim.models import Word2Vec
 17 | 
 18 | def parse_args():
 19 | 	'''
 20 | 	Parses the node2vec arguments.
 21 | 	'''
 22 | 	parser = argparse.ArgumentParser(description="Run node2vec.")
 23 | 
 24 | 	parser.add_argument('--input', nargs='?', default='graph/karate.edgelist',
 25 | 	                    help='Input graph path')
 26 | 
 27 | 	parser.add_argument('--output', nargs='?', default='emb/karate.emb',
 28 | 	                    help='Embeddings path')
 29 | 
 30 | 	parser.add_argument('--dimensions', type=int, default=128,
 31 | 	                    help='Number of dimensions. Default is 128.')
 32 | 
 33 | 	parser.add_argument('--walk-length', type=int, default=80,
 34 | 	                    help='Length of walk per source. Default is 80.')
 35 | 
 36 | 	parser.add_argument('--num-walks', type=int, default=10,
 37 | 	                    help='Number of walks per source. Default is 10.')
 38 | 
 39 | 	parser.add_argument('--window-size', type=int, default=10,
 40 |                     	help='Context size for optimization. Default is 10.')
 41 | 
 42 | 	parser.add_argument('--iter', default=1, type=int,
 43 |                       help='Number of epochs in SGD')
 44 | 
 45 | 	parser.add_argument('--workers', type=int, default=8,
 46 | 	                    help='Number of parallel workers. Default is 8.')
 47 | 
 48 | 	parser.add_argument('--p', type=float, default=1,
 49 | 	                    help='Return hyperparameter. Default is 1.')
 50 | 
 51 | 	parser.add_argument('--q', type=float, default=1,
 52 | 	                    help='Inout hyperparameter. Default is 1.')
 53 | 
 54 | 	parser.add_argument('--weighted', dest='weighted', action='store_true',
 55 | 	                    help='Boolean specifying (un)weighted. Default is unweighted.')
 56 | 	parser.add_argument('--unweighted', dest='unweighted', action='store_false')
 57 | 	parser.set_defaults(weighted=False)
 58 | 
 59 | 	parser.add_argument('--directed', dest='directed', action='store_true',
 60 | 	                    help='Graph is (un)directed. Default is undirected.')
 61 | 	parser.add_argument('--undirected', dest='undirected', action='store_false')
 62 | 	parser.set_defaults(directed=False)
 63 | 
 64 | 	return parser.parse_args()
 65 | 
 66 | def read_graph():
 67 | 	'''
 68 | 	Reads the input network in networkx.
 69 | 	'''
 70 | 	if args.weighted:
 71 | 		G = nx.read_edgelist(args.input, nodetype=int, data=(('weight',float),), create_using=nx.DiGraph())
 72 | 	else:
 73 | 		G = nx.read_edgelist(args.input, nodetype=int, create_using=nx.DiGraph())
 74 | 		for edge in G.edges():
 75 | 			G[edge[0]][edge[1]]['weight'] = 1
 76 | 
 77 | 	if not args.directed:
 78 | 		G = G.to_undirected()
 79 | 
 80 | 	return G
 81 | 
 82 | def learn_embeddings(walks):
 83 | 	'''
 84 | 	Learn embeddings by optimizing the Skipgram objective using SGD.
 85 | 	'''
 86 | 	walks = [list(map(str, walk)) for walk in walks]
 87 | 	model = Word2Vec(walks, size=args.dimensions, window=args.window_size, min_count=0, sg=1, workers=args.workers, iter=args.iter)
 88 | 	model.wv.save_word2vec_format(args.output)
 89 | 	
 90 | 	return
 91 | 
 92 | def main(args):
 93 | 	'''
 94 | 	Pipeline for representational learning for all nodes in a graph.
 95 | 	'''
 96 | 	nx_G = read_graph()
 97 | 	G = node2vec.Graph(nx_G, args.directed, args.p, args.q)
 98 | 	G.preprocess_transition_probs()
 99 | 	walks = G.simulate_walks(args.num_walks, args.walk_length)
100 | 	learn_embeddings(walks)
101 | 
102 | if __name__ == "__main__":
103 | 	args = parse_args()
104 | 	main(args)
105 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Author Profiling for Abuse Detection
 2 | 
 3 | Code for paper "Author Profiling for Abuse Detection", in Proceedings of the 27th International Conference on Computational Linguistics (COLING) 2018
 4 | 
 5 | If you use this code, please cite our paper:
 6 | ```
 7 | @inproceedings{mishra-etal-2018-author,
 8 |     title = "Author Profiling for Abuse Detection",
 9 |     author = "Mishra, Pushkar  and
10 |       Del Tredici, Marco  and
11 |       Yannakoudakis, Helen  and
12 |       Shutova, Ekaterina",
13 |     booktitle = "Proceedings of the 27th International Conference on Computational Linguistics",
14 |     month = aug,
15 |     year = "2018",
16 |     address = "Santa Fe, New Mexico, USA",
17 |     publisher = "Association for Computational Linguistics",
18 |     url = "https://www.aclweb.org/anthology/C18-1093",
19 |     pages = "1088--1098",
20 | }
21 | ```
22 | 
23 | Python3.5+ required to run the code. Dependencies can be installed with `pip install -r requirements.txt` followed by `python -m nltk.downloader punkt`
24 | 
25 | The dataset for the code is provided in the _TwitterData/twitter_data_waseem_hovy.csv_ file as a list of _\[tweet ID, annotation\]_ pairs.
26 | To run the code, please use a Twitter API (_twitter_access.py_ employs Tweepy) to retrieve the tweets for the given tweet IDs. Replace the dataset file with a
27 | file of the same name that has a list of _\[tweet ID, tweet, annotation\]_ triples.
28 | Additionally, _twitter_access.py_ contains functions to retrieve follower-following relationships amongst the authors of the tweets (specified in _resources/authors.txt_). Once the relationships have been retrieved, please use _Node2vec_ (see _resources/node2vec_) to produce embeddings for each of the authors and store them in a file named _authors.emb_ in the _resources_ directory.
29 | 
30 | To run the best method (LR + AUTH):
31 | `python twitter_model.py -c 16202 -m lna`
32 | 
33 | 
34 | <br/>To run the other methods:
35 | * AUTH: `python twitter_model.py -c 16202 -m a`
36 | * LR: `python twitter_model.py -c 16202 -m ln`
37 | * WS: `python twitter_model.py -c 16202 -m ws`
38 | * HS: `python twitter_model.py -c 16202 -m hs`
39 | * WS + AUTH: `python twitter_model.py -c 16202 -m wsa`
40 | * HS + AUTH: `python twitter_model.py -c 16202 -m hsa`
41 | 
42 | For the HS and WS based methods, adding the `-ft` flag to the command ensures that the pre-trained deep neural models from the _Models_ directory
43 | are not used and instead all the training happens from scratch. This requires that the file of pre-trained GLoVe embeddings is downloaded from
44 | <http://nlp.stanford.edu/data/glove.twitter.27B.zip>, unzipped and placed in the _resources_ directory prior to the execution.
45 | 
46 | <br/>An overview of the complete training-testing flow is as follows:
47 | 1. For each tweet in the dataset, its author's identity is obtained using functions available in the _twitter_access.py_ file. For each author,
48 | information about which other authors from the dataset follow them on Twitter is also obtained in order to create a community graph where nodes
49 | are authors and edges denote follow relationship.
50 | 2. Node2vec is applied to the community graph to generate embeddings for the nodes, i.e., the authors. These author embeddings are saved to the
51 | _authors.emb_ file in the _resources_ directory.
52 | 3. The dataset is randomly split into train set and test set.
53 | 4. Tweets in the train set are used to produce an n-gram count based model or deep neural model depending on the method being used.
54 | 5. A feature extractor is instantiated that uses the models from step 2 along with the author embeddings to convert tweets to feature vectors.
55 | 6. LR/GBDT classifier is trained using the feature vectors extracted for the tweets in the train set. A part of the train set is held out as
56 | validation data to prevent over-fitting.
57 | 7. The trained classifier is made to predict classes for tweets in the test set and precision, recall and F<sub>1</sub> are calculated.
58 | 
59 | In the 10-fold CV, steps 3-7 are run 10 times (each time with a different set of tweets as the test set) and the final precision, recall and
60 | F<sub>1</sub> are calculated by averaging results from across the 10 runs.
61 | 


--------------------------------------------------------------------------------
/resources/node2vec/src/node2vec.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import networkx as nx
  3 | import random
  4 | 
  5 | 
  6 | class Graph():
  7 | 	def __init__(self, nx_G, is_directed, p, q):
  8 | 		self.G = nx_G
  9 | 		self.is_directed = is_directed
 10 | 		self.p = p
 11 | 		self.q = q
 12 | 
 13 | 	def node2vec_walk(self, walk_length, start_node):
 14 | 		'''
 15 | 		Simulate a random walk starting from start node.
 16 | 		'''
 17 | 		G = self.G
 18 | 		alias_nodes = self.alias_nodes
 19 | 		alias_edges = self.alias_edges
 20 | 
 21 | 		walk = [start_node]
 22 | 
 23 | 		while len(walk) < walk_length:
 24 | 			cur = walk[-1]
 25 | 			cur_nbrs = sorted(G.neighbors(cur))
 26 | 			if len(cur_nbrs) > 0:
 27 | 				if len(walk) == 1:
 28 | 					walk.append(cur_nbrs[alias_draw(alias_nodes[cur][0], alias_nodes[cur][1])])
 29 | 				else:
 30 | 					prev = walk[-2]
 31 | 					next = cur_nbrs[alias_draw(alias_edges[(prev, cur)][0], 
 32 | 						alias_edges[(prev, cur)][1])]
 33 | 					walk.append(next)
 34 | 			else:
 35 | 				break
 36 | 
 37 | 		return walk
 38 | 
 39 | 	def simulate_walks(self, num_walks, walk_length):
 40 | 		'''
 41 | 		Repeatedly simulate random walks from each node.
 42 | 		'''
 43 | 		G = self.G
 44 | 		walks = []
 45 | 		nodes = list(G.nodes())
 46 | 		print('Walk iteration:')
 47 | 		for walk_iter in range(num_walks):
 48 | 			print(str(walk_iter+1), '/', str(num_walks))
 49 | 			random.shuffle(nodes)
 50 | 			for node in nodes:
 51 | 				walks.append(self.node2vec_walk(walk_length=walk_length, start_node=node))
 52 | 
 53 | 		return walks
 54 | 
 55 | 	def get_alias_edge(self, src, dst):
 56 | 		'''
 57 | 		Get the alias edge setup lists for a given edge.
 58 | 		'''
 59 | 		G = self.G
 60 | 		p = self.p
 61 | 		q = self.q
 62 | 
 63 | 		unnormalized_probs = []
 64 | 		for dst_nbr in sorted(G.neighbors(dst)):
 65 | 			if dst_nbr == src:
 66 | 				unnormalized_probs.append(G[dst][dst_nbr]['weight']/p)
 67 | 			elif G.has_edge(dst_nbr, src):
 68 | 				unnormalized_probs.append(G[dst][dst_nbr]['weight'])
 69 | 			else:
 70 | 				unnormalized_probs.append(G[dst][dst_nbr]['weight']/q)
 71 | 		norm_const = sum(unnormalized_probs)
 72 | 		normalized_probs =  [float(u_prob)/norm_const for u_prob in unnormalized_probs]
 73 | 
 74 | 		return alias_setup(normalized_probs)
 75 | 
 76 | 	def preprocess_transition_probs(self):
 77 | 		'''
 78 | 		Preprocessing of transition probabilities for guiding the random walks.
 79 | 		'''
 80 | 		G = self.G
 81 | 		is_directed = self.is_directed
 82 | 
 83 | 		alias_nodes = {}
 84 | 		for node in G.nodes():
 85 | 			unnormalized_probs = [G[node][nbr]['weight'] for nbr in sorted(G.neighbors(node))]
 86 | 			norm_const = sum(unnormalized_probs)
 87 | 			normalized_probs =  [float(u_prob)/norm_const for u_prob in unnormalized_probs]
 88 | 			alias_nodes[node] = alias_setup(normalized_probs)
 89 | 
 90 | 		alias_edges = {}
 91 | 		triads = {}
 92 | 
 93 | 		if is_directed:
 94 | 			for edge in G.edges():
 95 | 				alias_edges[edge] = self.get_alias_edge(edge[0], edge[1])
 96 | 		else:
 97 | 			for edge in G.edges():
 98 | 				alias_edges[edge] = self.get_alias_edge(edge[0], edge[1])
 99 | 				alias_edges[(edge[1], edge[0])] = self.get_alias_edge(edge[1], edge[0])
100 | 
101 | 		self.alias_nodes = alias_nodes
102 | 		self.alias_edges = alias_edges
103 | 
104 | 		return
105 | 
106 | 
107 | def alias_setup(probs):
108 | 	'''
109 | 	Compute utility lists for non-uniform sampling from discrete distributions.
110 | 	Refer to https://hips.seas.harvard.edu/blog/2013/03/03/the-alias-method-efficient-sampling-with-many-discrete-outcomes/
111 | 	for details
112 | 	'''
113 | 	K = len(probs)
114 | 	q = np.zeros(K)
115 | 	J = np.zeros(K, dtype=np.int)
116 | 
117 | 	smaller = []
118 | 	larger = []
119 | 	for kk, prob in enumerate(probs):
120 | 	    q[kk] = K*prob
121 | 	    if q[kk] < 1.0:
122 | 	        smaller.append(kk)
123 | 	    else:
124 | 	        larger.append(kk)
125 | 
126 | 	while len(smaller) > 0 and len(larger) > 0:
127 | 	    small = smaller.pop()
128 | 	    large = larger.pop()
129 | 
130 | 	    J[small] = large
131 | 	    q[large] = q[large] + q[small] - 1.0
132 | 	    if q[large] < 1.0:
133 | 	        smaller.append(large)
134 | 	    else:
135 | 	        larger.append(large)
136 | 
137 | 	return J, q
138 | 
139 | def alias_draw(J, q):
140 | 	'''
141 | 	Draw sample from a non-uniform discrete distribution using alias sampling.
142 | 	'''
143 | 	K = len(J)
144 | 
145 | 	kk = int(np.floor(np.random.rand()*K))
146 | 	if np.random.rand() < q[kk]:
147 | 	    return kk
148 | 	else:
149 | 	    return J[kk]


--------------------------------------------------------------------------------
/twitter_model.py:
--------------------------------------------------------------------------------
  1 | import numpy
  2 | import os
  3 | import random
  4 | os.environ['PYTHONHASHSEED'] = '0'
  5 | numpy.random.seed(57)
  6 | random.seed(75)
  7 | os.environ['KERAS_BACKEND'] = 'theano'
  8 | 
  9 | if os.environ['KERAS_BACKEND'] == 'tensorflow':
 10 |     import tensorflow
 11 |     tensorflow.set_random_seed(35)
 12 | 
 13 | from cross_validate import run_cv
 14 | from grid_search import perform_grid_search
 15 | from main_classifier import MainClassifier
 16 | from resources.textual import clean_tweet
 17 | from test import test
 18 | 
 19 | import argparse
 20 | import csv
 21 | 
 22 | 
 23 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 24 | 
 25 | CONFIG = {
 26 |     'EMB_FILE': 'glove.twitter.27B.200d.txt',
 27 |     'EMB_MODEL': None,
 28 |     'EMB_DIM': 200,
 29 |     'EMB_MIN_DF': 1,
 30 |     'EMB_MAX_DF': -1,
 31 |     'EMB_MAX_VCB': 50000,
 32 |     'WORD_MIN_FREQ': 2,
 33 |     'DNN_EPOCH': 50,
 34 |     'DNN_BATCH': 64,
 35 |     'DNN_VAL_SPLIT': 0.04,
 36 |     'DNN_HIDDEN_UNITS': 128,
 37 |     'GB_LEAVES': 31,
 38 |     'GB_LEAF_WEIGHT': 7,
 39 |     'GB_LEAF_SAMPLES': 10,
 40 |     'GB_ITERATIONS': 125,
 41 |     'GB_LEARN_RATE': 0.08,
 42 |     'LR_C': 25,
 43 |     'NGRAM_MODEL': None,
 44 |     'TF_NRANGE': (1, 4),
 45 |     'TF_SUBLIN': False,
 46 |     'TF_MAX_FEAT': 10000,
 47 |     'TF_USE_IDF': False,
 48 |     'CLASSIFIER': None,
 49 |     'METHOD': None,
 50 |     'GRID_SEARCH_SIZE': 25000,
 51 |     'BASE': BASE_DIR,
 52 | }
 53 | 
 54 | 
 55 | def read_data(data_file):
 56 |     read_f = open(data_file, 'r', encoding='utf-8')
 57 |     csv_read = csv.reader(read_f)
 58 | 
 59 |     texts = []
 60 |     classes = []
 61 |     ids = []
 62 |     count = 0
 63 | 
 64 |     for line in csv_read:
 65 |         count += 1
 66 |         if count == 1:
 67 |             continue
 68 | 
 69 |         id, text, clazz = line
 70 |         classes.append(int(clazz))
 71 |         texts.append(text)
 72 |         ids.append(id)
 73 | 
 74 |     return (ids, texts, classes)
 75 | 
 76 | 
 77 | def check_classifier():
 78 |     classifier = MainClassifier(CONFIG)
 79 |     classifier.classify(None, '')
 80 |     while(True):
 81 |         text = input()
 82 |         category = classifier.classify(None, text)
 83 |         print(category)
 84 | 
 85 | 
 86 | def parse_arguments():
 87 |     parser = argparse.ArgumentParser(description='Experimentation with'
 88 |                                                  ' Twitter datasets')
 89 | 
 90 |     parser.add_argument('-c', '--cross_val', action='store', type=int,
 91 |                         dest='cross_val_size',
 92 |                         help='Part of dataset to be used for cross validation')
 93 | 
 94 |     parser.add_argument('-g', '--grid_search', action='store', type=str,
 95 |                         nargs=3, dest='grid_params',
 96 |                         metavar=('ESTIMATOR: gbc/svm', 'FEATURES', 'FEATURES'),
 97 |                         help='Model and features to be used for grid search')
 98 | 
 99 |     parser.add_argument('-t', '--train_test', action='store', type=int,
100 |                         dest='train_test_split', default=10000,
101 |                         help='Split point of data for training and testing')
102 | 
103 |     parser.add_argument('-m', '--method', action='store', type=str,
104 |                         dest='method', default='lna',
105 |                         help='Method to run')
106 | 
107 |     parser.add_argument('-ft', '--full-train', action='store_true',
108 |                         dest='full_train',
109 |                         help='Presence of flag will ensure pre-trained '
110 |                              'models are not used')
111 | 
112 |     return parser.parse_args()
113 | 
114 | 
115 | if __name__ == "__main__":
116 |     args = parse_arguments()
117 | 
118 |     data_file = os.path.join(BASE_DIR, 'TwitterData', 'twitter_data_waseem_hovy.csv')
119 |     (ids, texts, classes) = read_data(data_file)
120 |     texts = [clean_tweet(t) for t in texts]
121 | 
122 |     CONFIG['METHOD'] = args.method
123 |     CONFIG['EMB_MODEL'] = '' if args.full_train else None
124 |     if args.cross_val_size is not None:
125 |         run_cv(ids[:args.cross_val_size],
126 |                texts[:args.cross_val_size],
127 |                classes[:args.cross_val_size],
128 |                CONFIG)
129 | 
130 |     elif args.grid_params is not None:
131 |         perform_grid_search(ids, texts, classes, args.grid_params, CONFIG)
132 | 
133 |     else:
134 |         classifier = MainClassifier(CONFIG)
135 | 
136 |         split = args.train_test_split
137 |         classifier.train(ids[:split], texts[:split], classes[:split])
138 |         test(ids[split:], texts[split:], classes[split:], classifier)
139 | 


--------------------------------------------------------------------------------
/main_classifier.py:
--------------------------------------------------------------------------------
  1 | from featureExtractor.dnn_features import DNNFeatures
  2 | from featureExtractor.feature_extractor import FeatureExtractor
  3 | from featureExtractor.ngram_features import NGramFeatures
  4 | from sklearn.linear_model import LogisticRegression
  5 | from sklearn.externals import joblib
  6 | 
  7 | import coloredlogs
  8 | import copy
  9 | import datetime
 10 | import lightgbm
 11 | import logging
 12 | import numpy
 13 | import os
 14 | 
 15 | 
 16 | logger = logging.getLogger('TrainingLog')
 17 | coloredlogs.install(logger=logger, level='DEBUG',
 18 |                     fmt='%(asctime)s - %(name)s - %(levelname)s'
 19 |                         ' - %(message)s')
 20 | 
 21 | 
 22 | class MainClassifier:
 23 | 
 24 |     def __init__(self, CONFIG):
 25 |         self.CONFIG = copy.deepcopy(CONFIG)
 26 |         self.BASE = CONFIG['BASE']
 27 | 
 28 |         self. featureExtract = None
 29 |         self.classifier = None
 30 |         if CONFIG['CLASSIFIER'] is not None:
 31 |             self.classifier = joblib.load(os.path.join(self.BASE, 'Models',
 32 |                                                        CONFIG['CLASSIFIER']))
 33 | 
 34 | 
 35 |     def train(self, text_ids, all_texts, classes):
 36 |         logger = logging.getLogger('TrainingLog')
 37 |         logger.info('Initiating training of main classifier')
 38 | 
 39 |         # Prepare feature extractor
 40 |         if self.CONFIG['EMB_MODEL'] is None and \
 41 |                 ('ws' in self.CONFIG['METHOD'] or 'hs' in self.CONFIG['METHOD']):
 42 |             self.CONFIG['EMB_MODEL'] = \
 43 |                 DNNFeatures(self.CONFIG).train(all_texts, classes)
 44 | 
 45 |         if self.CONFIG['NGRAM_MODEL'] is None and 'n' in self.CONFIG['METHOD']:
 46 |             self.CONFIG['NGRAM_MODEL'] = \
 47 |                 NGramFeatures(self.CONFIG).train(all_texts)
 48 | 
 49 |         self.featureExtract = FeatureExtractor(self.CONFIG)
 50 |         logger.info('Feature extractor ready')
 51 | 
 52 |         # Prepare data
 53 |         data = []
 54 |         for (i, text) in enumerate(all_texts):
 55 |             features = self.featureExtract.extract_features(text, text_ids[i])
 56 |             data.append(features)
 57 | 
 58 |             if i % 1000 == 0 and i > 0:
 59 |                 logger.info('{} of {} feature vectors prepared '
 60 |                             'for training'.format(i + 1, len(all_texts)))
 61 |         train_X, train_Y = numpy.array(data), numpy.array(classes)
 62 | 
 63 |         # Train classifier
 64 |         train_data = lightgbm.Dataset(train_X, train_Y)
 65 |         params = {
 66 |             'learning_rate': self.CONFIG['GB_LEARN_RATE'],
 67 |             'num_leaves': self.CONFIG['GB_LEAVES'],
 68 |             'min_child_weight': self.CONFIG['GB_LEAF_WEIGHT'],
 69 |             'min_child_samples': self.CONFIG['GB_LEAF_SAMPLES'],
 70 |             'objective': 'multiclass',
 71 |             'num_class': len(set(classes)),
 72 |             'metric': {'multi_logloss'},
 73 |         }
 74 |         if 'l' not in self.CONFIG['METHOD']:
 75 |             self.classifier = lightgbm.train(params, train_data,
 76 |                                              self.CONFIG['GB_ITERATIONS'])
 77 |         else:
 78 |             self.classifier = LogisticRegression(C=self.CONFIG['LR_C'])
 79 |             self.classifier.fit(train_X, train_Y)
 80 | 
 81 |         # Save classifier
 82 |         cur_time = str(datetime.datetime.now()).replace(':', '-') \
 83 |                                                 .replace(' ', '_')
 84 |         self.CONFIG['CLASSIFIER'] = 'Classifier_' + cur_time + '.pkl'
 85 |         joblib.dump(self.classifier, os.path.join(self.BASE, 'Models',
 86 |                                                   self.CONFIG['CLASSIFIER']))
 87 | 
 88 |         logger = logging.getLogger('TrainingLog')
 89 |         logger.info('Main classifier training finished')
 90 | 
 91 |         return self.CONFIG['CLASSIFIER']
 92 | 
 93 | 
 94 |     def classify(self, text_id, text, prob=False):
 95 |         # Prepare classifier
 96 |         if self.classifier is None:
 97 |             logger = logging.getLogger('TrainingLog')
 98 |             models = os.listdir(os.path.join(self.BASE, 'Models'))
 99 |             models.sort(reverse=True)
100 | 
101 |             for model in models:
102 |                 if model.startswith('Classifier') and model.endswith('.pkl'):
103 |                     self.CONFIG['CLASSIFIER'] = model
104 |                     break
105 | 
106 |             logger.info('Using Classifier Model {}'
107 |                         .format(self.CONFIG['CLASSIFIER']))
108 |             self.classifier = joblib.load(os.path.join(self.BASE, 'Models',
109 |                                           self.CONFIG['CLASSIFIER']))
110 | 
111 |         # Prepare feature extractor
112 |         if self.featureExtract is None:
113 |             logger = logging.getLogger('TrainingLog')
114 |             models = os.listdir(os.path.join(self.BASE, 'Models'))
115 |             models.sort(reverse=True)
116 | 
117 |             if self.CONFIG['EMB_MODEL'] is None:
118 |                 for model in models:
119 |                     if model.startswith('Emb_') and model.endswith('.h5'):
120 |                         self.CONFIG['EMB_MODEL'] = model
121 |                         break
122 | 
123 |             if self.CONFIG['NGRAM_MODEL'] is None:
124 |                 for model in models:
125 |                     if model.startswith('NGram') and model.endswith('.pkl'):
126 |                         self.CONFIG['NGRAM_MODEL'] = model
127 |                         break
128 | 
129 |             logger.info('Using Embedding Model {} and N-gram Model {}'
130 |                         .format(self.CONFIG['EMB_MODEL'],
131 |                                 self.CONFIG['NGRAM_MODEL']))
132 | 
133 |             self.featureExtract = FeatureExtractor(self.CONFIG)
134 |             logger.info('Feature extractor ready')
135 | 
136 |         # Classify
137 |         features = self.featureExtract.extract_features(text, text_id)
138 |         features = numpy.array([features])
139 |         if isinstance(self.classifier, LogisticRegression):
140 |             prediction = self.classifier.predict_proba(features)[0].tolist()
141 |         else: prediction = self.classifier.predict(features)[0].tolist()
142 | 
143 |         if prob:
144 |             return (prediction.index(max(prediction)), prediction)
145 |         return prediction.index(max(prediction))
146 | 


--------------------------------------------------------------------------------
/twitter_access.py:
--------------------------------------------------------------------------------
  1 | # from matplotlib import pyplot
  2 | from networkx.drawing.nx_agraph import write_dot
  3 | from tweepy import OAuthHandler
  4 | 
  5 | import coloredlogs
  6 | import csv
  7 | import json
  8 | import logging
  9 | import networkx
 10 | import os
 11 | import time
 12 | import tweepy
 13 | 
 14 | 
 15 | logger = logging.getLogger('TwitterAccess')
 16 | coloredlogs.install(logger=logger, level='DEBUG',
 17 |                     fmt='%(asctime)s - %(name)s - %(levelname)s'
 18 |                         ' - %(message)s')
 19 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 20 | SLEEP_TIME = 1000
 21 | 
 22 | 
 23 | class TwitterAccess:
 24 | 
 25 |     def __init__(self):
 26 |         self.api = self.load_api()
 27 | 
 28 | 
 29 |     def load_api(self):
 30 |         consumer_key = ''
 31 |         consumer_secret = ''
 32 |         access_token = ''
 33 |         access_secret = ''
 34 |         auth = OAuthHandler(consumer_key, consumer_secret)
 35 |         auth.set_access_token(access_token, access_secret)
 36 | 
 37 |         # Load the twitter API via Tweepy
 38 |         return tweepy.API(auth)
 39 | 
 40 | 
 41 |     # Status Methods
 42 |     def tweet_text_from_tweet_id(self, idx):
 43 |         tweet = self.api.get_status(idx)
 44 |         return tweet.text
 45 | 
 46 | 
 47 |     # User Methods
 48 |     def get_followers(self, screen_name):
 49 |         user_ids = []
 50 |         for page in tweepy.Cursor(self.api.followers_ids,
 51 |                                   screen_name=screen_name).pages():
 52 |             user_ids.extend(page)
 53 |             time.sleep(60)
 54 | 
 55 |         return user_ids
 56 | 
 57 | 
 58 |     def user_from_tweet_id(self, idx):
 59 |         status = self.api.get_status(idx)
 60 |         return (status.user.id_str, status.user.screen_name)
 61 | 
 62 | 
 63 |     def get_follow_info(self, x, y):
 64 |         return self.api.show_friendship(source_id=x, target_id=y)
 65 | 
 66 | 
 67 |     def username_from_user_id(self, idx):
 68 |         user = self.api.get_user(user_id=idx)
 69 |         return user.screen_name
 70 | 
 71 | 
 72 |     def timeline_from_username(self, screen_name):
 73 |         timeline = self.api.user_timeline(screen_name=screen_name)
 74 |         return timeline
 75 | 
 76 | 
 77 | class Graph:
 78 | 
 79 |     def __init__(self, tweet_ids=None, nodes_file=None, edges_file=None):
 80 |         self.TWEET_IDS = tweet_ids
 81 |         self.ACCESSOR = TwitterAccess()
 82 |         self.GRAPH = networkx.Graph()
 83 |         self.NODES = {}
 84 |         self.EDGES = set()
 85 | 
 86 |         if nodes_file is not None:
 87 |             with open(os.path.join(BASE_DIR, 'resources', nodes_file)) as nodes:
 88 |                 self.NODES = json.load(nodes)
 89 |         else:
 90 |             self.prepare_nodes()
 91 | 
 92 |         if edges_file is not None:
 93 |             with open(os.path.join(BASE_DIR, 'resources', edges_file)) as edges:
 94 |                 for line in edges.readlines():
 95 |                     x, y = line.strip().split(',')
 96 |                     self.EDGES.add((x, y))
 97 | 
 98 | 
 99 |     def prepare_nodes(self):
100 |         if self.TWEET_IDS is None:
101 |             return
102 | 
103 |         def fill_node_data(idx):
104 |             user = self.ACCESSOR.user_from_tweet_id(idx)
105 |             self.NODES[user[0]] = user[1]
106 | 
107 |         for idx in self.TWEET_IDS:
108 |             try:
109 |                 fill_node_data(idx)
110 |             except tweepy.error.RateLimitError:
111 |                 try:
112 |                     logger.info('Hit rate limit; waiting and retrying')
113 |                     time.sleep(SLEEP_TIME)
114 |                     fill_node_data(idx)
115 |                 except:
116 |                     break
117 |             except Exception as e:
118 |                 logger.error('Problem with tweet id {}: {}'.format(idx, e))
119 | 
120 |         with open(os.path.join(BASE_DIR, 'resources',
121 |                                'authors.json'), 'w') as nodes_file:
122 |             json.dump(self.NODES, nodes_file)
123 | 
124 | 
125 |     def add_follower_edges(self):
126 |         edges_file = open(os.path.join(BASE_DIR, 'resources',
127 |                                        'author_edges.txt'), 'a')
128 |         def fill_edge_data(x):
129 |             followers =  self.ACCESSOR.get_followers(self.NODES[x])
130 |             followers = set([str(f) for f in followers])
131 | 
132 |             for y in self.NODES:
133 |                 if (x, y) in self.EDGES:
134 |                     continue
135 | 
136 |                 if y in followers:
137 |                     self.EDGES.add((y, x))
138 |                     print('{},{}'.format(y, x), file=edges_file)
139 |             edges_file.flush()
140 |             logger.info('Followers of user {} added'.format(self.NODES[x]))
141 | 
142 |         for x in self.NODES.keys():
143 |             try:
144 |                 fill_edge_data(x)
145 |             except tweepy.error.RateLimitError:
146 |                 try:
147 |                     logger.info('Hit rate limit; waiting and retrying')
148 |                     time.sleep(SLEEP_TIME)
149 |                     fill_edge_data(x)
150 |                 except:
151 |                     break
152 |             except Exception as e:
153 |                 logger.error('Problem with user {}: {}'.format(self.NODES[x], e))
154 |         edges_file.close()
155 | 
156 | 
157 |     def form_graph(self):
158 |         for node_id in self.NODES.keys():
159 |             self.GRAPH.add_node(self.NODES[node_id])
160 |         for edge in self.EDGES:
161 |             self.GRAPH.add_edge(self.NODES[edge[0]], self.NODES[edge[1]])
162 | 
163 | 
164 |     def print_graph(self):
165 |         write_dot(self.GRAPH, 'graph.dot')
166 |         # networkx.draw(self.GRAPH)
167 |         # pyplot.savefig('graph.png')
168 | 
169 | 
170 | def main():
171 |     f = open(os.path.join(BASE_DIR, 'TwitterData', 'twitter_data_waseem_hovy.csv'),
172 |              'r', encoding='utf-8')
173 |     csv_read = csv.reader(f)
174 | 
175 |     count = 0
176 |     tweet_ids = []
177 |     for line in csv_read:
178 |         count += 1
179 |         if count == 1:
180 |             continue
181 | 
182 |         idx, text, cat = line
183 |         tweet_ids.append(idx)
184 | 
185 |     graph = Graph(tweet_ids=None, nodes_file='authors.json',
186 |                   edges_file='author_edges.txt')
187 |     graph.add_follower_edges()
188 |     graph.form_graph()
189 |     # graph.print_graph()
190 | 
191 | 
192 | if __name__ == "__main__":
193 |     main()
194 | 


--------------------------------------------------------------------------------
/featureExtractor/dnn_features.py:
--------------------------------------------------------------------------------
  1 | from keras.callbacks import EarlyStopping
  2 | from keras.callbacks import ModelCheckpoint
  3 | from keras.layers import Dense
  4 | from keras.layers import Dropout
  5 | from keras.layers import Embedding
  6 | from keras.layers import GRU
  7 | from keras.layers import Input
  8 | from keras.models import load_model
  9 | from keras.models import Model
 10 | from keras.preprocessing.sequence import pad_sequences
 11 | from keras.utils import np_utils
 12 | from resources.textual import process_words
 13 | from resources.structural import word_tokenizer
 14 | from string import punctuation
 15 | 
 16 | import datetime
 17 | import io
 18 | import logging
 19 | import numpy
 20 | import os
 21 | import pickle
 22 | import re
 23 | import time
 24 | 
 25 | 
 26 | class DNNFeatures:
 27 | 
 28 |     def __init__(self, CONFIG):
 29 |         self.EMBED_DIM = CONFIG['EMB_DIM']
 30 |         self.EMB_FILE = CONFIG['EMB_FILE']
 31 |         self.MIN_DF = CONFIG['EMB_MIN_DF']
 32 |         self.MAX_DF = CONFIG['EMB_MAX_DF']
 33 |         self.MAX_VOCAB = CONFIG['EMB_MAX_VCB']
 34 |         self.WORD_MIN_FREQ = CONFIG['WORD_MIN_FREQ']
 35 |         self.EPOCH = CONFIG['DNN_EPOCH']
 36 |         self.BATCH_SIZE = CONFIG['DNN_BATCH']
 37 |         self.VAL_SPLIT = CONFIG['DNN_VAL_SPLIT']
 38 |         self.HIDDEN_UNITS = CONFIG['DNN_HIDDEN_UNITS']
 39 |         self.BASE = CONFIG['BASE']
 40 | 
 41 |         self.model = None
 42 |         self.prediction_model = None
 43 |         self.vocab = None
 44 |         self.word_freq = None
 45 | 
 46 |         if CONFIG['EMB_MODEL'] is not None:
 47 |             saved_vocab = CONFIG['EMB_MODEL'].split('.h5')[0] + '.pkl'
 48 |             self.model = load_model(os.path.join(self.BASE, 'Models',
 49 |                                                  CONFIG['EMB_MODEL']))
 50 | 
 51 |             with open(os.path.join(self.BASE, 'Models', saved_vocab), 'rb') as vocab_file:
 52 |                 self.vocab, self.word_freq = pickle.load(vocab_file)
 53 | 
 54 | 
 55 |     def tokenize_text(self, texts):
 56 |         text_tokens = []
 57 |         for (i, text) in enumerate(texts):
 58 |             text = re.sub('[' + punctuation + ']', ' ', text)
 59 |             text = re.sub('\\b[0-9]+\\b', '', text)
 60 |             text = process_words(text)
 61 | 
 62 |             tokens = word_tokenizer(text)
 63 |             text_tokens.append(tokens)
 64 | 
 65 |         return text_tokens
 66 | 
 67 | 
 68 |     def build_vocab(self, text_tokens):
 69 |         self.word_freq = {}
 70 |         for text_token in text_tokens:
 71 |             for token in set(text_token):
 72 |                 self.word_freq[token] = self.word_freq.get(token, 0) + 1
 73 |         self.word_freq = [(f, w) for (w, f) in self.word_freq.items()]
 74 |         self.word_freq.sort(reverse=True)
 75 | 
 76 |         token_counts = []
 77 |         for (count, token) in self.word_freq:
 78 |             if self.MAX_DF != -1 and count > self.MAX_DF:
 79 |                 continue
 80 |             if count < self.MIN_DF:
 81 |                 continue
 82 |             token_counts.append((count, token))
 83 | 
 84 |         token_counts.sort(reverse=True)
 85 |         if self.MAX_VOCAB != -1:
 86 |             token_counts = token_counts[:self.MAX_VOCAB]
 87 |         # NIV: not in vocab token, i.e., out of vocab
 88 |         token_counts.append((0, 'NIV'))
 89 | 
 90 |         self.vocab = {}
 91 |         for (i, (count, token)) in enumerate(token_counts):
 92 |             self.vocab[token] = i + 1
 93 | 
 94 | 
 95 |     def transform_texts(self, text_tokens):
 96 |         transformed = []
 97 |         for text_token in text_tokens:
 98 |             entry = []
 99 |             for token in text_token:
100 |                 entry.append(self.vocab.get(token, self.vocab['NIV']))
101 |             transformed.append(entry)
102 | 
103 |         return transformed
104 | 
105 | 
106 |     def prepare_model(self, emb_dimension, seq_length, num_categories):
107 |         input = Input(shape=(seq_length,), dtype='int32')
108 |         embed = Embedding(input_dim=len(self.vocab) + 1,
109 |                           output_dim=emb_dimension,
110 |                           input_length=seq_length,
111 |                           mask_zero=True, trainable=True)(input)
112 |         dropout_1 = Dropout(0.25)(embed)
113 |         gru_1 = GRU(self.HIDDEN_UNITS, return_sequences=True)(dropout_1)
114 |         dropout_2 = Dropout(0.25)(gru_1)
115 |         gru_2 = GRU(self.HIDDEN_UNITS)(dropout_2)
116 |         dropout_3 = Dropout(0.50)(gru_2)
117 |         softmax = Dense(num_categories, activation='softmax')(dropout_3)
118 | 
119 |         self.model = Model(inputs=input, outputs=softmax)
120 |         self.model.compile(optimizer='adam',
121 |                            loss='categorical_crossentropy',
122 |                            metrics=['accuracy'])
123 | 
124 | 
125 |     def train(self, texts, classes):
126 |         logger = logging.getLogger('TrainingLog')
127 | 
128 |         tokens = self.tokenize_text(texts)
129 |         self.build_vocab(tokens)
130 |         logger.info('Vocabulary of size {} built for embeddings'
131 |                     .format(len(self.vocab)))
132 | 
133 |         X = self.transform_texts(tokens)
134 |         X = pad_sequences(X)
135 | 
136 |         seq_length = X.shape[1]
137 |         class_weights = {}
138 |         for clazz in classes:
139 |             class_weights[clazz] = class_weights.get(clazz, 0) + 1
140 |         for clazz in class_weights:
141 |             class_weights[clazz] /= (1.0 * len(classes))
142 | 
143 |         y = numpy.array(classes)
144 |         y = np_utils.to_categorical(y, len(class_weights))
145 | 
146 |         self.prepare_model(self.EMBED_DIM, seq_length, len(class_weights))
147 |         if self.EMB_FILE is not None:
148 |             trained_vectors = self.initialise_embeddings(
149 |                                 os.path.join(self.BASE, 'resources', self.EMB_FILE))
150 |             self.model.layers[1].set_weights([trained_vectors])
151 | 
152 |         # Train DNN
153 |         best_model = 'Emb_best_' + str(time.time()) + '.h5'
154 |         checkpoint = ModelCheckpoint(os.path.join(self.BASE, 'Models', best_model),
155 |                                      monitor='val_loss', verbose=1,
156 |                                      save_best_only=True, mode='auto')
157 |         earlyStopping = EarlyStopping(monitor='val_loss',
158 |                                       patience=3, verbose=0,
159 |                                       mode='auto')
160 |         callbacks = [checkpoint, earlyStopping]
161 | 
162 |         self.model.fit(X, y, epochs=self.EPOCH,
163 |                        class_weight=class_weights,
164 |                        batch_size=self.BATCH_SIZE,
165 |                        validation_split=self.VAL_SPLIT,
166 |                        callbacks=callbacks, verbose=2)
167 |         self.model.load_weights(os.path.join(self.BASE, 'Models', best_model))
168 | 
169 |         # Save model
170 |         logger.info('DNN training finished')
171 |         cur_time = str(datetime.datetime.now()).replace(':', '-') \
172 |                                                 .replace(' ', '_')
173 |         model_name = 'Emb_' + cur_time + '.h5'
174 |         self.model.save(os.path.join(self.BASE, 'Models', model_name))
175 |         vocab_name = 'Emb_' + cur_time + '.pkl'
176 |         with open(os.path.join(self.BASE, 'Models', vocab_name), 'wb') as vocab_file:
177 |             pickle.dump([self.vocab, self.word_freq], vocab_file)
178 | 
179 |         return model_name
180 | 
181 | 
182 |     def sum_word_embeddings(self, text):
183 |         tokens = self.tokenize_text([text])
184 |         X = self.transform_texts(tokens)[0]
185 | 
186 |         embed = numpy.zeros(self.EMBED_DIM)
187 |         embeddings = self.model.layers[1].get_weights()[0]
188 | 
189 |         for (i, word) in enumerate(X):
190 |             embed += embeddings[word]
191 |         embed = np_utils.normalize(embed)[0]
192 | 
193 |         return embed
194 | 
195 | 
196 |     def last_hidden_state(self, text):
197 |         if self.prediction_model is None:
198 |             self.prediction_model = Model(inputs=self.model.input,
199 |                                           outputs=self.model.layers[-3].output)
200 | 
201 |         tokens = self.tokenize_text([text])
202 |         indexes = self.transform_texts(tokens)[0]
203 |         seq_length = self.model.layers[1].input_length
204 | 
205 |         while len(indexes) < seq_length:
206 |             indexes.append(0)
207 |         indexes = indexes[:seq_length]
208 | 
209 |         X = numpy.array([indexes])
210 |         return self.prediction_model.predict(X)[0]
211 | 
212 | 
213 |     def predict(self, text):
214 |         if self.prediction_model is None:
215 |             self.prediction_model = Model(inputs=self.model.input,
216 |                                           outputs=self.model.output)
217 | 
218 |         tokens = self.tokenize_text([text])
219 |         indexes = self.transform_texts(tokens)[0]
220 |         seq_length = self.model.layers[1].input_length
221 | 
222 |         while len(indexes) < seq_length:
223 |             indexes.append(0)
224 |         indexes = indexes[:seq_length]
225 | 
226 |         X = numpy.array([indexes])
227 |         return self.prediction_model.predict(X)
228 | 
229 | 
230 |     def initialise_embeddings(self, filename):
231 |         logger = logging.getLogger('TrainingLog')
232 |         weights = numpy.random.uniform(size=(len(self.vocab) + 1,
233 |                                              self.EMBED_DIM),
234 |                                        low=-0.05, high=0.05)
235 | 
236 |         with io.open(filename, 'r', encoding='utf-8') as vectors:
237 |             for vector in vectors:
238 |                 tokens = vector.split(' ')
239 |                 word = tokens[0]
240 |                 embed = [float(val) for val in tokens[1:]]
241 | 
242 |                 if word not in self.vocab:
243 |                     continue
244 |                 weights[self.vocab[word]] = numpy.array(embed)
245 |         logger.info('{} vectors initialised'.format(len(self.vocab)))
246 | 
247 |         return weights
248 | 


--------------------------------------------------------------------------------
/Results/Waseem_Hovy_auth.txt:
--------------------------------------------------------------------------------
  1 | /Library/Frameworks/Python.framework/Versions/3.6/bin/python3.6 /Users/pushkarmishra/Desktop/AuthorProfileAbuseDetection/twitter_model.py -c 30000
  2 | Using Theano backend.
  3 | 2018-03-10 01:05:38 - CVLog - INFO - 10-fold cross validation procedure has begun
  4 | 2018-03-10 01:05:38 - CVLog - INFO - Validation round 1 of 10 starting
  5 | 2018-03-10 01:05:38 - TrainingLog - INFO - Initiating training of main classifier
  6 | 2018-03-10 01:05:56 - TrainingLog - INFO - Feature extractor ready
  7 | 2018-03-10 01:05:56 - TrainingLog - INFO - 1001 of 14581 feature vectors prepared for training
  8 | 2018-03-10 01:05:56 - TrainingLog - INFO - 2001 of 14581 feature vectors prepared for training
  9 | 2018-03-10 01:05:56 - TrainingLog - INFO - 3001 of 14581 feature vectors prepared for training
 10 | 2018-03-10 01:05:56 - TrainingLog - INFO - 4001 of 14581 feature vectors prepared for training
 11 | 2018-03-10 01:05:56 - TrainingLog - INFO - 5001 of 14581 feature vectors prepared for training
 12 | 2018-03-10 01:05:56 - TrainingLog - INFO - 6001 of 14581 feature vectors prepared for training
 13 | 2018-03-10 01:05:56 - TrainingLog - INFO - 7001 of 14581 feature vectors prepared for training
 14 | 2018-03-10 01:05:56 - TrainingLog - INFO - 8001 of 14581 feature vectors prepared for training
 15 | 2018-03-10 01:05:56 - TrainingLog - INFO - 9001 of 14581 feature vectors prepared for training
 16 | 2018-03-10 01:05:56 - TrainingLog - INFO - 10001 of 14581 feature vectors prepared for training
 17 | 2018-03-10 01:05:56 - TrainingLog - INFO - 11001 of 14581 feature vectors prepared for training
 18 | 2018-03-10 01:05:56 - TrainingLog - INFO - 12001 of 14581 feature vectors prepared for training
 19 | 2018-03-10 01:05:56 - TrainingLog - INFO - 13001 of 14581 feature vectors prepared for training
 20 | 2018-03-10 01:05:56 - TrainingLog - INFO - 14001 of 14581 feature vectors prepared for training
 21 | 2018-03-10 01:06:03 - TrainingLog - INFO - Main classifier training finished
 22 | 2018-03-10 01:06:04 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.8217821782178217
 23 | 2018-03-10 01:06:04 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.8159203980099502
 24 | 2018-03-10 01:06:04 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.8039867109634552
 25 | 2018-03-10 01:06:04 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.7930174563591023
 26 | 2018-03-10 01:06:04 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.7964071856287425
 27 | 2018-03-10 01:06:04 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.7853577371048253
 28 | 2018-03-10 01:06:04 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.776034236804565
 29 | 2018-03-10 01:06:04 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.7777777777777778
 30 | 2018-03-10 01:06:04 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.7746947835738068
 31 | 2018-03-10 01:06:04 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.7612387612387612
 32 | 2018-03-10 01:06:04 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.7638510445049954
 33 | 2018-03-10 01:06:04 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.765195670274771
 34 | 2018-03-10 01:06:04 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.7732513451191392
 35 | 2018-03-10 01:06:04 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.7723054960742327
 36 | 2018-03-10 01:06:04 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.7714856762158561
 37 | 2018-03-10 01:06:04 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.7676452217364147
 38 | 2018-03-10 01:06:04 - TestLog - INFO - Total 1621 samples classified with accuracy 0.7686613201727329
 39 | 2018-03-10 01:06:04 - TestLog - INFO - AUROC is 0.8904875087081418
 40 | 2018-03-10 01:06:04 - TestLog - INFO - Classification report:
 41 |              precision    recall  f1-score   support
 42 | 
 43 |           0    1.00000   0.00515   0.01026       194
 44 |           1    0.69275   0.75873   0.72424       315
 45 |           2    0.78902   0.90468   0.84290      1112
 46 | 
 47 | avg / total    0.79556   0.76866   0.72019      1621
 48 | 
 49 | 2018-03-10 01:06:04 - TestLog - INFO - Confusion matrix:
 50 | [[   1    0  193]
 51 |  [   0  239   76]
 52 |  [   0  106 1006]]
 53 | 2018-03-10 01:06:04 - CVLog - INFO - Validation round 2 of 10 starting
 54 | 2018-03-10 01:06:04 - TrainingLog - INFO - Initiating training of main classifier
 55 | 2018-03-10 01:06:20 - TrainingLog - INFO - Feature extractor ready
 56 | 2018-03-10 01:06:20 - TrainingLog - INFO - 1001 of 14581 feature vectors prepared for training
 57 | 2018-03-10 01:06:20 - TrainingLog - INFO - 2001 of 14581 feature vectors prepared for training
 58 | 2018-03-10 01:06:20 - TrainingLog - INFO - 3001 of 14581 feature vectors prepared for training
 59 | 2018-03-10 01:06:20 - TrainingLog - INFO - 4001 of 14581 feature vectors prepared for training
 60 | 2018-03-10 01:06:20 - TrainingLog - INFO - 5001 of 14581 feature vectors prepared for training
 61 | 2018-03-10 01:06:20 - TrainingLog - INFO - 6001 of 14581 feature vectors prepared for training
 62 | 2018-03-10 01:06:20 - TrainingLog - INFO - 7001 of 14581 feature vectors prepared for training
 63 | 2018-03-10 01:06:20 - TrainingLog - INFO - 8001 of 14581 feature vectors prepared for training
 64 | 2018-03-10 01:06:20 - TrainingLog - INFO - 9001 of 14581 feature vectors prepared for training
 65 | 2018-03-10 01:06:20 - TrainingLog - INFO - 10001 of 14581 feature vectors prepared for training
 66 | 2018-03-10 01:06:20 - TrainingLog - INFO - 11001 of 14581 feature vectors prepared for training
 67 | 2018-03-10 01:06:20 - TrainingLog - INFO - 12001 of 14581 feature vectors prepared for training
 68 | 2018-03-10 01:06:20 - TrainingLog - INFO - 13001 of 14581 feature vectors prepared for training
 69 | 2018-03-10 01:06:20 - TrainingLog - INFO - 14001 of 14581 feature vectors prepared for training
 70 | 2018-03-10 01:06:28 - TrainingLog - INFO - Main classifier training finished
 71 | 2018-03-10 01:06:28 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.6732673267326733
 72 | 2018-03-10 01:06:28 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.7213930348258707
 73 | 2018-03-10 01:06:28 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.7242524916943521
 74 | 2018-03-10 01:06:28 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.7082294264339152
 75 | 2018-03-10 01:06:28 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.7125748502994012
 76 | 2018-03-10 01:06:28 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.7221297836938436
 77 | 2018-03-10 01:06:28 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.7232524964336662
 78 | 2018-03-10 01:06:28 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.7365792759051186
 79 | 2018-03-10 01:06:28 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.7380688124306326
 80 | 2018-03-10 01:06:28 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.7482517482517482
 81 | 2018-03-10 01:06:28 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.7493188010899182
 82 | 2018-03-10 01:06:28 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.7577019150707743
 83 | 2018-03-10 01:06:28 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.7617217524980784
 84 | 2018-03-10 01:06:28 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.7673090649536045
 85 | 2018-03-10 01:06:28 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.7694870086608927
 86 | 2018-03-10 01:06:28 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.7657713928794504
 87 | 2018-03-10 01:06:28 - TestLog - INFO - Total 1621 samples classified with accuracy 0.7655768044417026
 88 | 2018-03-10 01:06:28 - TestLog - INFO - AUROC is 0.882812427956678
 89 | 2018-03-10 01:06:28 - TestLog - INFO - Classification report:
 90 |              precision    recall  f1-score   support
 91 | 
 92 |           0    0.33333   0.00515   0.01015       194
 93 |           1    0.68067   0.77143   0.72321       315
 94 |           2    0.79064   0.89658   0.84029      1112
 95 | 
 96 | avg / total    0.71454   0.76558   0.71819      1621
 97 | 
 98 | 2018-03-10 01:06:28 - TestLog - INFO - Confusion matrix:
 99 | [[  1   1 192]
100 |  [  0 243  72]
101 |  [  2 113 997]]
102 | 2018-03-10 01:06:28 - CVLog - INFO - Validation round 3 of 10 starting
103 | 2018-03-10 01:06:28 - TrainingLog - INFO - Initiating training of main classifier
104 | 2018-03-10 01:06:44 - TrainingLog - INFO - Feature extractor ready
105 | 2018-03-10 01:06:44 - TrainingLog - INFO - 1001 of 14581 feature vectors prepared for training
106 | 2018-03-10 01:06:44 - TrainingLog - INFO - 2001 of 14581 feature vectors prepared for training
107 | 2018-03-10 01:06:44 - TrainingLog - INFO - 3001 of 14581 feature vectors prepared for training
108 | 2018-03-10 01:06:44 - TrainingLog - INFO - 4001 of 14581 feature vectors prepared for training
109 | 2018-03-10 01:06:44 - TrainingLog - INFO - 5001 of 14581 feature vectors prepared for training
110 | 2018-03-10 01:06:44 - TrainingLog - INFO - 6001 of 14581 feature vectors prepared for training
111 | 2018-03-10 01:06:44 - TrainingLog - INFO - 7001 of 14581 feature vectors prepared for training
112 | 2018-03-10 01:06:44 - TrainingLog - INFO - 8001 of 14581 feature vectors prepared for training
113 | 2018-03-10 01:06:44 - TrainingLog - INFO - 9001 of 14581 feature vectors prepared for training
114 | 2018-03-10 01:06:44 - TrainingLog - INFO - 10001 of 14581 feature vectors prepared for training
115 | 2018-03-10 01:06:44 - TrainingLog - INFO - 11001 of 14581 feature vectors prepared for training
116 | 2018-03-10 01:06:44 - TrainingLog - INFO - 12001 of 14581 feature vectors prepared for training
117 | 2018-03-10 01:06:44 - TrainingLog - INFO - 13001 of 14581 feature vectors prepared for training
118 | 2018-03-10 01:06:44 - TrainingLog - INFO - 14001 of 14581 feature vectors prepared for training
119 | 2018-03-10 01:06:52 - TrainingLog - INFO - Main classifier training finished
120 | 2018-03-10 01:06:52 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.7524752475247525
121 | 2018-03-10 01:06:52 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.7661691542288557
122 | 2018-03-10 01:06:52 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.7740863787375415
123 | 2018-03-10 01:06:52 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.7630922693266833
124 | 2018-03-10 01:06:52 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.7624750499001997
125 | 2018-03-10 01:06:52 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.7487520798668885
126 | 2018-03-10 01:06:52 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.7560627674750356
127 | 2018-03-10 01:06:52 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.7590511860174781
128 | 2018-03-10 01:06:52 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.7635960044395117
129 | 2018-03-10 01:06:52 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.7572427572427572
130 | 2018-03-10 01:06:52 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.7620345140781108
131 | 2018-03-10 01:06:52 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.761865112406328
132 | 2018-03-10 01:06:52 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.7632590315142198
133 | 2018-03-10 01:06:52 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.7673090649536045
134 | 2018-03-10 01:06:53 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.7634910059960026
135 | 2018-03-10 01:06:53 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.7645221736414741
136 | 2018-03-10 01:06:53 - TestLog - INFO - Total 1621 samples classified with accuracy 0.7643429981492905
137 | 2018-03-10 01:06:53 - TestLog - INFO - AUROC is 0.8903265999142658
138 | 2018-03-10 01:06:53 - TestLog - INFO - Classification report:
139 |              precision    recall  f1-score   support
140 | 
141 |           0    1.00000   0.00515   0.01026       194
142 |           1    0.67898   0.75873   0.71664       315
143 |           2    0.78785   0.89838   0.83950      1112
144 | 
145 | avg / total    0.79209   0.76434   0.71638      1621
146 | 
147 | 2018-03-10 01:06:53 - TestLog - INFO - Confusion matrix:
148 | [[  1   0 193]
149 |  [  0 239  76]
150 |  [  0 113 999]]
151 | 2018-03-10 01:06:53 - CVLog - INFO - Validation round 4 of 10 starting
152 | 2018-03-10 01:06:53 - TrainingLog - INFO - Initiating training of main classifier
153 | 2018-03-10 01:07:09 - TrainingLog - INFO - Feature extractor ready
154 | 2018-03-10 01:07:09 - TrainingLog - INFO - 1001 of 14581 feature vectors prepared for training
155 | 2018-03-10 01:07:09 - TrainingLog - INFO - 2001 of 14581 feature vectors prepared for training
156 | 2018-03-10 01:07:09 - TrainingLog - INFO - 3001 of 14581 feature vectors prepared for training
157 | 2018-03-10 01:07:09 - TrainingLog - INFO - 4001 of 14581 feature vectors prepared for training
158 | 2018-03-10 01:07:09 - TrainingLog - INFO - 5001 of 14581 feature vectors prepared for training
159 | 2018-03-10 01:07:09 - TrainingLog - INFO - 6001 of 14581 feature vectors prepared for training
160 | 2018-03-10 01:07:09 - TrainingLog - INFO - 7001 of 14581 feature vectors prepared for training
161 | 2018-03-10 01:07:09 - TrainingLog - INFO - 8001 of 14581 feature vectors prepared for training
162 | 2018-03-10 01:07:09 - TrainingLog - INFO - 9001 of 14581 feature vectors prepared for training
163 | 2018-03-10 01:07:09 - TrainingLog - INFO - 10001 of 14581 feature vectors prepared for training
164 | 2018-03-10 01:07:09 - TrainingLog - INFO - 11001 of 14581 feature vectors prepared for training
165 | 2018-03-10 01:07:09 - TrainingLog - INFO - 12001 of 14581 feature vectors prepared for training
166 | 2018-03-10 01:07:09 - TrainingLog - INFO - 13001 of 14581 feature vectors prepared for training
167 | 2018-03-10 01:07:09 - TrainingLog - INFO - 14001 of 14581 feature vectors prepared for training
168 | 2018-03-10 01:07:17 - TrainingLog - INFO - Main classifier training finished
169 | 2018-03-10 01:07:17 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.7029702970297029
170 | 2018-03-10 01:07:17 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.736318407960199
171 | 2018-03-10 01:07:17 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.7475083056478405
172 | 2018-03-10 01:07:17 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.7655860349127181
173 | 2018-03-10 01:07:17 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.7624750499001997
174 | 2018-03-10 01:07:17 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.762063227953411
175 | 2018-03-10 01:07:17 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.7631954350927247
176 | 2018-03-10 01:07:17 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.7615480649188514
177 | 2018-03-10 01:07:17 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.7547169811320755
178 | 2018-03-10 01:07:17 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.7572427572427572
179 | 2018-03-10 01:07:17 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.7529518619436876
180 | 2018-03-10 01:07:17 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.7543713572023314
181 | 2018-03-10 01:07:18 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.7524980784012298
182 | 2018-03-10 01:07:18 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.7508922198429693
183 | 2018-03-10 01:07:18 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.754163890739507
184 | 2018-03-10 01:07:18 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.7526545908806995
185 | 2018-03-10 01:07:18 - TestLog - INFO - Total 1621 samples classified with accuracy 0.7526218383713756
186 | 2018-03-10 01:07:18 - TestLog - INFO - AUROC is 0.8857194782076196
187 | 2018-03-10 01:07:18 - TestLog - INFO - Classification report:
188 |              precision    recall  f1-score   support
189 | 
190 |           0    1.00000   0.00515   0.01026       194
191 |           1    0.64171   0.76190   0.69666       315
192 |           2    0.78571   0.88040   0.83036      1112
193 | 
194 | avg / total    0.78338   0.75262   0.70623      1621
195 | 
196 | 2018-03-10 01:07:18 - TestLog - INFO - Confusion matrix:
197 | [[  1   1 192]
198 |  [  0 240  75]
199 |  [  0 133 979]]
200 | 2018-03-10 01:07:18 - CVLog - INFO - Validation round 5 of 10 starting
201 | 2018-03-10 01:07:18 - TrainingLog - INFO - Initiating training of main classifier
202 | 2018-03-10 01:07:37 - TrainingLog - INFO - Feature extractor ready
203 | 2018-03-10 01:07:37 - TrainingLog - INFO - 1001 of 14581 feature vectors prepared for training
204 | 2018-03-10 01:07:37 - TrainingLog - INFO - 2001 of 14581 feature vectors prepared for training
205 | 2018-03-10 01:07:37 - TrainingLog - INFO - 3001 of 14581 feature vectors prepared for training
206 | 2018-03-10 01:07:37 - TrainingLog - INFO - 4001 of 14581 feature vectors prepared for training
207 | 2018-03-10 01:07:37 - TrainingLog - INFO - 5001 of 14581 feature vectors prepared for training
208 | 2018-03-10 01:07:37 - TrainingLog - INFO - 6001 of 14581 feature vectors prepared for training
209 | 2018-03-10 01:07:37 - TrainingLog - INFO - 7001 of 14581 feature vectors prepared for training
210 | 2018-03-10 01:07:37 - TrainingLog - INFO - 8001 of 14581 feature vectors prepared for training
211 | 2018-03-10 01:07:37 - TrainingLog - INFO - 9001 of 14581 feature vectors prepared for training
212 | 2018-03-10 01:07:37 - TrainingLog - INFO - 10001 of 14581 feature vectors prepared for training
213 | 2018-03-10 01:07:37 - TrainingLog - INFO - 11001 of 14581 feature vectors prepared for training
214 | 2018-03-10 01:07:37 - TrainingLog - INFO - 12001 of 14581 feature vectors prepared for training
215 | 2018-03-10 01:07:37 - TrainingLog - INFO - 13001 of 14581 feature vectors prepared for training
216 | 2018-03-10 01:07:37 - TrainingLog - INFO - 14001 of 14581 feature vectors prepared for training
217 | 2018-03-10 01:07:46 - TrainingLog - INFO - Main classifier training finished
218 | 2018-03-10 01:07:46 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.7623762376237624
219 | 2018-03-10 01:07:46 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.7562189054726368
220 | 2018-03-10 01:07:46 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.7873754152823921
221 | 2018-03-10 01:07:46 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.7930174563591023
222 | 2018-03-10 01:07:46 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.7724550898203593
223 | 2018-03-10 01:07:46 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.7603993344425957
224 | 2018-03-10 01:07:46 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.7703281027104137
225 | 2018-03-10 01:07:46 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.7765293383270911
226 | 2018-03-10 01:07:46 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.7758046614872364
227 | 2018-03-10 01:07:46 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.7682317682317682
228 | 2018-03-10 01:07:46 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.7665758401453224
229 | 2018-03-10 01:07:46 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.768526228143214
230 | 2018-03-10 01:07:46 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.7701767870868562
231 | 2018-03-10 01:07:46 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.7651677373304783
232 | 2018-03-10 01:07:46 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.7641572285143238
233 | 2018-03-10 01:07:46 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.7582760774515928
234 | 2018-03-10 01:07:46 - TestLog - INFO - Total 1621 samples classified with accuracy 0.757557063541024
235 | 2018-03-10 01:07:46 - TestLog - INFO - AUROC is 0.8808544966232935
236 | 2018-03-10 01:07:46 - TestLog - INFO - Classification report:
237 |              precision    recall  f1-score   support
238 | 
239 |           0    0.00000   0.00000   0.00000       194
240 |           1    0.65833   0.75238   0.70222       315
241 |           2    0.78651   0.89119   0.83558      1112
242 | 
243 | avg / total    0.66747   0.75756   0.70966      1621
244 | 
245 | 2018-03-10 01:07:46 - TestLog - INFO - Confusion matrix:
246 | [[  0   3 191]
247 |  [  0 237  78]
248 |  [  1 120 991]]
249 | 2018-03-10 01:07:46 - CVLog - INFO - Validation round 6 of 10 starting
250 | 2018-03-10 01:07:46 - TrainingLog - INFO - Initiating training of main classifier
251 | 2018-03-10 01:08:03 - TrainingLog - INFO - Feature extractor ready
252 | 2018-03-10 01:08:03 - TrainingLog - INFO - 1001 of 14582 feature vectors prepared for training
253 | 2018-03-10 01:08:03 - TrainingLog - INFO - 2001 of 14582 feature vectors prepared for training
254 | 2018-03-10 01:08:03 - TrainingLog - INFO - 3001 of 14582 feature vectors prepared for training
255 | 2018-03-10 01:08:03 - TrainingLog - INFO - 4001 of 14582 feature vectors prepared for training
256 | 2018-03-10 01:08:03 - TrainingLog - INFO - 5001 of 14582 feature vectors prepared for training
257 | 2018-03-10 01:08:03 - TrainingLog - INFO - 6001 of 14582 feature vectors prepared for training
258 | 2018-03-10 01:08:03 - TrainingLog - INFO - 7001 of 14582 feature vectors prepared for training
259 | 2018-03-10 01:08:03 - TrainingLog - INFO - 8001 of 14582 feature vectors prepared for training
260 | 2018-03-10 01:08:03 - TrainingLog - INFO - 9001 of 14582 feature vectors prepared for training
261 | 2018-03-10 01:08:03 - TrainingLog - INFO - 10001 of 14582 feature vectors prepared for training
262 | 2018-03-10 01:08:03 - TrainingLog - INFO - 11001 of 14582 feature vectors prepared for training
263 | 2018-03-10 01:08:03 - TrainingLog - INFO - 12001 of 14582 feature vectors prepared for training
264 | 2018-03-10 01:08:03 - TrainingLog - INFO - 13001 of 14582 feature vectors prepared for training
265 | 2018-03-10 01:08:03 - TrainingLog - INFO - 14001 of 14582 feature vectors prepared for training
266 | 2018-03-10 01:08:10 - TrainingLog - INFO - Main classifier training finished
267 | 2018-03-10 01:08:11 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.8118811881188119
268 | 2018-03-10 01:08:11 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.7562189054726368
269 | 2018-03-10 01:08:11 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.7740863787375415
270 | 2018-03-10 01:08:11 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.7880299251870324
271 | 2018-03-10 01:08:11 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.7924151696606786
272 | 2018-03-10 01:08:11 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.7853577371048253
273 | 2018-03-10 01:08:11 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.7902995720399429
274 | 2018-03-10 01:08:11 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.7852684144818977
275 | 2018-03-10 01:08:11 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.7857935627081021
276 | 2018-03-10 01:08:11 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.7792207792207793
277 | 2018-03-10 01:08:11 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.7756584922797457
278 | 2018-03-10 01:08:11 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.771856786011657
279 | 2018-03-10 01:08:11 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.7778631821675634
280 | 2018-03-10 01:08:11 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.7744468236973591
281 | 2018-03-10 01:08:11 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.7634910059960026
282 | 2018-03-10 01:08:11 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.7645221736414741
283 | 2018-03-10 01:08:11 - TestLog - INFO - Total 1620 samples classified with accuracy 0.7635802469135803
284 | 2018-03-10 01:08:11 - TestLog - INFO - AUROC is 0.8866142691231639
285 | 2018-03-10 01:08:11 - TestLog - INFO - Classification report:
286 |              precision    recall  f1-score   support
287 | 
288 |           0    1.00000   0.01031   0.02041       194
289 |           1    0.67131   0.76508   0.71513       315
290 |           2    0.78952   0.89469   0.83882      1111
291 | 
292 | avg / total    0.79174   0.76358   0.71676      1620
293 | 
294 | 2018-03-10 01:08:11 - TestLog - INFO - Confusion matrix:
295 | [[  2   1 191]
296 |  [  0 241  74]
297 |  [  0 117 994]]
298 | 2018-03-10 01:08:11 - CVLog - INFO - Validation round 7 of 10 starting
299 | 2018-03-10 01:08:11 - TrainingLog - INFO - Initiating training of main classifier
300 | 2018-03-10 01:08:27 - TrainingLog - INFO - Feature extractor ready
301 | 2018-03-10 01:08:27 - TrainingLog - INFO - 1001 of 14582 feature vectors prepared for training
302 | 2018-03-10 01:08:27 - TrainingLog - INFO - 2001 of 14582 feature vectors prepared for training
303 | 2018-03-10 01:08:27 - TrainingLog - INFO - 3001 of 14582 feature vectors prepared for training
304 | 2018-03-10 01:08:27 - TrainingLog - INFO - 4001 of 14582 feature vectors prepared for training
305 | 2018-03-10 01:08:27 - TrainingLog - INFO - 5001 of 14582 feature vectors prepared for training
306 | 2018-03-10 01:08:27 - TrainingLog - INFO - 6001 of 14582 feature vectors prepared for training
307 | 2018-03-10 01:08:27 - TrainingLog - INFO - 7001 of 14582 feature vectors prepared for training
308 | 2018-03-10 01:08:27 - TrainingLog - INFO - 8001 of 14582 feature vectors prepared for training
309 | 2018-03-10 01:08:27 - TrainingLog - INFO - 9001 of 14582 feature vectors prepared for training
310 | 2018-03-10 01:08:27 - TrainingLog - INFO - 10001 of 14582 feature vectors prepared for training
311 | 2018-03-10 01:08:27 - TrainingLog - INFO - 11001 of 14582 feature vectors prepared for training
312 | 2018-03-10 01:08:27 - TrainingLog - INFO - 12001 of 14582 feature vectors prepared for training
313 | 2018-03-10 01:08:27 - TrainingLog - INFO - 13001 of 14582 feature vectors prepared for training
314 | 2018-03-10 01:08:27 - TrainingLog - INFO - 14001 of 14582 feature vectors prepared for training
315 | 2018-03-10 01:08:35 - TrainingLog - INFO - Main classifier training finished
316 | 2018-03-10 01:08:35 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.7821782178217822
317 | 2018-03-10 01:08:35 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.8159203980099502
318 | 2018-03-10 01:08:35 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.8106312292358804
319 | 2018-03-10 01:08:35 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.8154613466334164
320 | 2018-03-10 01:08:35 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.8003992015968064
321 | 2018-03-10 01:08:35 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.7953410981697171
322 | 2018-03-10 01:08:35 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.7902995720399429
323 | 2018-03-10 01:08:35 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.7852684144818977
324 | 2018-03-10 01:08:35 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.7869034406215316
325 | 2018-03-10 01:08:35 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.7812187812187812
326 | 2018-03-10 01:08:36 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.773841961852861
327 | 2018-03-10 01:08:36 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.768526228143214
328 | 2018-03-10 01:08:36 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.7701767870868562
329 | 2018-03-10 01:08:36 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.76802284082798
330 | 2018-03-10 01:08:36 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.7641572285143238
331 | 2018-03-10 01:08:36 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.7632729544034978
332 | 2018-03-10 01:08:36 - TestLog - INFO - Total 1620 samples classified with accuracy 0.7623456790123457
333 | 2018-03-10 01:08:36 - TestLog - INFO - AUROC is 0.8935140467577484
334 | 2018-03-10 01:08:36 - TestLog - INFO - Classification report:
335 |              precision    recall  f1-score   support
336 | 
337 |           0    0.00000   0.00000   0.00000       194
338 |           1    0.67318   0.76508   0.71620       315
339 |           2    0.78764   0.89469   0.83776      1111
340 | 
341 | avg / total    0.67106   0.76235   0.71380      1620
342 | 
343 | 2018-03-10 01:08:36 - TestLog - INFO - Confusion matrix:
344 | [[  0   0 194]
345 |  [  0 241  74]
346 |  [  0 117 994]]
347 | 2018-03-10 01:08:36 - CVLog - INFO - Validation round 8 of 10 starting
348 | 2018-03-10 01:08:36 - TrainingLog - INFO - Initiating training of main classifier
349 | 2018-03-10 01:08:52 - TrainingLog - INFO - Feature extractor ready
350 | 2018-03-10 01:08:52 - TrainingLog - INFO - 1001 of 14582 feature vectors prepared for training
351 | 2018-03-10 01:08:52 - TrainingLog - INFO - 2001 of 14582 feature vectors prepared for training
352 | 2018-03-10 01:08:52 - TrainingLog - INFO - 3001 of 14582 feature vectors prepared for training
353 | 2018-03-10 01:08:52 - TrainingLog - INFO - 4001 of 14582 feature vectors prepared for training
354 | 2018-03-10 01:08:52 - TrainingLog - INFO - 5001 of 14582 feature vectors prepared for training
355 | 2018-03-10 01:08:52 - TrainingLog - INFO - 6001 of 14582 feature vectors prepared for training
356 | 2018-03-10 01:08:52 - TrainingLog - INFO - 7001 of 14582 feature vectors prepared for training
357 | 2018-03-10 01:08:52 - TrainingLog - INFO - 8001 of 14582 feature vectors prepared for training
358 | 2018-03-10 01:08:52 - TrainingLog - INFO - 9001 of 14582 feature vectors prepared for training
359 | 2018-03-10 01:08:52 - TrainingLog - INFO - 10001 of 14582 feature vectors prepared for training
360 | 2018-03-10 01:08:52 - TrainingLog - INFO - 11001 of 14582 feature vectors prepared for training
361 | 2018-03-10 01:08:52 - TrainingLog - INFO - 12001 of 14582 feature vectors prepared for training
362 | 2018-03-10 01:08:52 - TrainingLog - INFO - 13001 of 14582 feature vectors prepared for training
363 | 2018-03-10 01:08:52 - TrainingLog - INFO - 14001 of 14582 feature vectors prepared for training
364 | 2018-03-10 01:09:00 - TrainingLog - INFO - Main classifier training finished
365 | 2018-03-10 01:09:00 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.7524752475247525
366 | 2018-03-10 01:09:00 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.7562189054726368
367 | 2018-03-10 01:09:00 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.7674418604651163
368 | 2018-03-10 01:09:00 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.7630922693266833
369 | 2018-03-10 01:09:00 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.7744510978043913
370 | 2018-03-10 01:09:00 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.7703826955074875
371 | 2018-03-10 01:09:00 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.7631954350927247
372 | 2018-03-10 01:09:00 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.7590511860174781
373 | 2018-03-10 01:09:00 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.7591564927857936
374 | 2018-03-10 01:09:00 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.7552447552447552
375 | 2018-03-10 01:09:00 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.7484105358764759
376 | 2018-03-10 01:09:00 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.7485428809325562
377 | 2018-03-10 01:09:00 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.7486548808608763
378 | 2018-03-10 01:09:00 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.7516059957173448
379 | 2018-03-10 01:09:00 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.7534976682211859
380 | 2018-03-10 01:09:01 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.7507807620237351
381 | 2018-03-10 01:09:01 - TestLog - INFO - Total 1620 samples classified with accuracy 0.7506172839506173
382 | 2018-03-10 01:09:01 - TestLog - INFO - AUROC is 0.8805339837381058
383 | 2018-03-10 01:09:01 - TestLog - INFO - Classification report:
384 |              precision    recall  f1-score   support
385 | 
386 |           0    0.00000   0.00000   0.00000       194
387 |           1    0.64607   0.73016   0.68554       315
388 |           2    0.78006   0.88749   0.83032      1111
389 | 
390 | avg / total    0.66059   0.75062   0.70273      1620
391 | 
392 | 2018-03-10 01:09:01 - TestLog - INFO - Confusion matrix:
393 | [[  0   1 193]
394 |  [  0 230  85]
395 |  [  0 125 986]]
396 | 2018-03-10 01:09:01 - CVLog - INFO - Validation round 9 of 10 starting
397 | 2018-03-10 01:09:01 - TrainingLog - INFO - Initiating training of main classifier
398 | 2018-03-10 01:09:17 - TrainingLog - INFO - Feature extractor ready
399 | 2018-03-10 01:09:17 - TrainingLog - INFO - 1001 of 14583 feature vectors prepared for training
400 | 2018-03-10 01:09:17 - TrainingLog - INFO - 2001 of 14583 feature vectors prepared for training
401 | 2018-03-10 01:09:17 - TrainingLog - INFO - 3001 of 14583 feature vectors prepared for training
402 | 2018-03-10 01:09:17 - TrainingLog - INFO - 4001 of 14583 feature vectors prepared for training
403 | 2018-03-10 01:09:17 - TrainingLog - INFO - 5001 of 14583 feature vectors prepared for training
404 | 2018-03-10 01:09:17 - TrainingLog - INFO - 6001 of 14583 feature vectors prepared for training
405 | 2018-03-10 01:09:17 - TrainingLog - INFO - 7001 of 14583 feature vectors prepared for training
406 | 2018-03-10 01:09:17 - TrainingLog - INFO - 8001 of 14583 feature vectors prepared for training
407 | 2018-03-10 01:09:17 - TrainingLog - INFO - 9001 of 14583 feature vectors prepared for training
408 | 2018-03-10 01:09:17 - TrainingLog - INFO - 10001 of 14583 feature vectors prepared for training
409 | 2018-03-10 01:09:17 - TrainingLog - INFO - 11001 of 14583 feature vectors prepared for training
410 | 2018-03-10 01:09:17 - TrainingLog - INFO - 12001 of 14583 feature vectors prepared for training
411 | 2018-03-10 01:09:17 - TrainingLog - INFO - 13001 of 14583 feature vectors prepared for training
412 | 2018-03-10 01:09:17 - TrainingLog - INFO - 14001 of 14583 feature vectors prepared for training
413 | 2018-03-10 01:09:25 - TrainingLog - INFO - Main classifier training finished
414 | 2018-03-10 01:09:25 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.693069306930693
415 | 2018-03-10 01:09:25 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.7164179104477612
416 | 2018-03-10 01:09:25 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.7342192691029901
417 | 2018-03-10 01:09:25 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.7306733167082294
418 | 2018-03-10 01:09:25 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.7445109780439122
419 | 2018-03-10 01:09:25 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.7554076539101497
420 | 2018-03-10 01:09:25 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.7574893009985735
421 | 2018-03-10 01:09:25 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.7565543071161048
422 | 2018-03-10 01:09:26 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.755826859045505
423 | 2018-03-10 01:09:26 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.7602397602397603
424 | 2018-03-10 01:09:26 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.7593097184377838
425 | 2018-03-10 01:09:26 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.7552039966694422
426 | 2018-03-10 01:09:26 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.754803996925442
427 | 2018-03-10 01:09:26 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.7508922198429693
428 | 2018-03-10 01:09:26 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.7481678880746169
429 | 2018-03-10 01:09:26 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.7489069331667708
430 | 2018-03-10 01:09:26 - TestLog - INFO - Total 1619 samples classified with accuracy 0.7492279184681903
431 | 2018-03-10 01:09:26 - TestLog - INFO - AUROC is 0.8715033293006965
432 | 2018-03-10 01:09:26 - TestLog - INFO - Classification report:
433 |              precision    recall  f1-score   support
434 | 
435 |           0    0.00000   0.00000   0.00000       194
436 |           1    0.64655   0.71656   0.67976       314
437 |           2    0.77734   0.88929   0.82955      1111
438 | 
439 | avg / total    0.65883   0.74923   0.70110      1619
440 | 
441 | 2018-03-10 01:09:26 - TestLog - INFO - Confusion matrix:
442 | [[  0   0 194]
443 |  [  0 225  89]
444 |  [  0 123 988]]
445 | 2018-03-10 01:09:26 - CVLog - INFO - Validation round 10 of 10 starting
446 | 2018-03-10 01:09:26 - TrainingLog - INFO - Initiating training of main classifier
447 | 2018-03-10 01:09:44 - TrainingLog - INFO - Feature extractor ready
448 | 2018-03-10 01:09:44 - TrainingLog - INFO - 1001 of 14584 feature vectors prepared for training
449 | 2018-03-10 01:09:44 - TrainingLog - INFO - 2001 of 14584 feature vectors prepared for training
450 | 2018-03-10 01:09:44 - TrainingLog - INFO - 3001 of 14584 feature vectors prepared for training
451 | 2018-03-10 01:09:44 - TrainingLog - INFO - 4001 of 14584 feature vectors prepared for training
452 | 2018-03-10 01:09:44 - TrainingLog - INFO - 5001 of 14584 feature vectors prepared for training
453 | 2018-03-10 01:09:44 - TrainingLog - INFO - 6001 of 14584 feature vectors prepared for training
454 | 2018-03-10 01:09:44 - TrainingLog - INFO - 7001 of 14584 feature vectors prepared for training
455 | 2018-03-10 01:09:44 - TrainingLog - INFO - 8001 of 14584 feature vectors prepared for training
456 | 2018-03-10 01:09:44 - TrainingLog - INFO - 9001 of 14584 feature vectors prepared for training
457 | 2018-03-10 01:09:44 - TrainingLog - INFO - 10001 of 14584 feature vectors prepared for training
458 | 2018-03-10 01:09:44 - TrainingLog - INFO - 11001 of 14584 feature vectors prepared for training
459 | 2018-03-10 01:09:44 - TrainingLog - INFO - 12001 of 14584 feature vectors prepared for training
460 | 2018-03-10 01:09:44 - TrainingLog - INFO - 13001 of 14584 feature vectors prepared for training
461 | 2018-03-10 01:09:44 - TrainingLog - INFO - 14001 of 14584 feature vectors prepared for training
462 | 2018-03-10 01:09:54 - TrainingLog - INFO - Main classifier training finished
463 | 2018-03-10 01:09:54 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.801980198019802
464 | 2018-03-10 01:09:54 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.7910447761194029
465 | 2018-03-10 01:09:54 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.7973421926910299
466 | 2018-03-10 01:09:54 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.7880299251870324
467 | 2018-03-10 01:09:54 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.7904191616766467
468 | 2018-03-10 01:09:54 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.7820299500831946
469 | 2018-03-10 01:09:54 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.7803138373751783
470 | 2018-03-10 01:09:54 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.787765293383271
471 | 2018-03-10 01:09:54 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.779134295227525
472 | 2018-03-10 01:09:54 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.7752247752247752
473 | 2018-03-10 01:09:54 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.7683923705722071
474 | 2018-03-10 01:09:54 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.7735220649458784
475 | 2018-03-10 01:09:54 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.7732513451191392
476 | 2018-03-10 01:09:54 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.7715917201998572
477 | 2018-03-10 01:09:54 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.7721518987341772
478 | 2018-03-10 01:09:54 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.7695190505933791
479 | 2018-03-10 01:09:54 - TestLog - INFO - Total 1618 samples classified with accuracy 0.7707045735475896
480 | 2018-03-10 01:09:54 - TestLog - INFO - AUROC is 0.8862904796467865
481 | 2018-03-10 01:09:54 - TestLog - INFO - Classification report:
482 |              precision    recall  f1-score   support
483 | 
484 |           0    0.00000   0.00000   0.00000       193
485 |           1    0.69565   0.76433   0.72838       314
486 |           2    0.79104   0.90639   0.84480      1111
487 | 
488 | avg / total    0.67817   0.77070   0.72143      1618
489 | 
490 | 2018-03-10 01:09:54 - TestLog - INFO - Confusion matrix:
491 | [[   0    1  192]
492 |  [   0  240   74]
493 |  [   0  104 1007]]
494 | 2018-03-10 01:09:54 - CVLog - INFO - 
495 | 
496 | 2018-03-10 01:09:54 - CVLog - INFO - Summary (precision, recall, F1, accuracy):
497 | 2018-03-10 01:09:54 - CVLog - INFO - Metrics for round 1: [0.7955627361048222, 0.76866132017273292, 0.72019113865246276, 0.76866132017273292]
498 | 2018-03-10 01:09:54 - CVLog - INFO - Metrics for round 2: [0.71454208613102133, 0.76557680444170262, 0.71818673327773275, 0.76557680444170262]
499 | 2018-03-10 01:09:54 - CVLog - INFO - Metrics for round 3: [0.79208666140226136, 0.76434299814929052, 0.71637951897370034, 0.76434299814929052]
500 | 2018-03-10 01:09:54 - CVLog - INFO - Metrics for round 4: [0.78337651026985855, 0.75262183837137564, 0.70623305594035346, 0.75262183837137564]
501 | 2018-03-10 01:09:54 - CVLog - INFO - Metrics for round 5: [0.66747182319359988, 0.75755706354102403, 0.70966498934203581, 0.75755706354102403]
502 | 2018-03-10 01:09:54 - CVLog - INFO - Metrics for round 6: [0.79173710077018711, 0.76358024691358029, 0.7167615260174981, 0.76358024691358029]
503 | 2018-03-10 01:09:54 - CVLog - INFO - Metrics for round 7: [0.67106150224688976, 0.76234567901234573, 0.71379694172703356, 0.76234567901234573]
504 | 2018-03-10 01:09:54 - CVLog - INFO - Metrics for round 8: [0.66059355087083482, 0.75061728395061733, 0.70273283385092422, 0.75061728395061733]
505 | 2018-03-10 01:09:54 - CVLog - INFO - Metrics for round 9: [0.65882812422365578, 0.74922791846819026, 0.70109926442074211, 0.74922791846819026]
506 | 2018-03-10 01:09:54 - CVLog - INFO - Metrics for round 10: [0.67817399807005707, 0.77070457354758959, 0.72143478099159908, 0.77070457354758959]
507 | 2018-03-10 01:09:54 - CVLog - INFO - 
508 | 
509 | 2018-03-10 01:09:54 - CVLog - INFO - Final average metrics: 0.7213434093283188, 0.7605235726568448, 0.7126480783194082, 0.7605235726568448
510 | 
511 | Process finished with exit code 0
512 | 


--------------------------------------------------------------------------------
/Results/Waseem_Hovy_hidden-baseline.txt:
--------------------------------------------------------------------------------
  1 | /Library/Frameworks/Python.framework/Versions/3.6/bin/python3.6 /Users/pushkarmishra/Desktop/AuthorProfileAbuseDetection/twitter_model.py -c 30000
  2 | Using Theano backend.
  3 | 2018-03-06 17:25:33 - CVLog - INFO - 10-fold cross validation procedure has begun
  4 | 2018-03-06 17:25:33 - CVLog - INFO - Validation round 1 of 10 starting
  5 | 2018-03-06 17:25:33 - TrainingLog - INFO - Initiating training of main classifier
  6 | 2018-03-06 17:25:53 - TrainingLog - INFO - Feature extractor ready
  7 | 2018-03-06 17:26:06 - TrainingLog - INFO - 1001 of 14581 feature vectors prepared for training
  8 | 2018-03-06 17:26:13 - TrainingLog - INFO - 2001 of 14581 feature vectors prepared for training
  9 | 2018-03-06 17:26:20 - TrainingLog - INFO - 3001 of 14581 feature vectors prepared for training
 10 | 2018-03-06 17:26:27 - TrainingLog - INFO - 4001 of 14581 feature vectors prepared for training
 11 | 2018-03-06 17:26:34 - TrainingLog - INFO - 5001 of 14581 feature vectors prepared for training
 12 | 2018-03-06 17:26:40 - TrainingLog - INFO - 6001 of 14581 feature vectors prepared for training
 13 | 2018-03-06 17:26:46 - TrainingLog - INFO - 7001 of 14581 feature vectors prepared for training
 14 | 2018-03-06 17:26:53 - TrainingLog - INFO - 8001 of 14581 feature vectors prepared for training
 15 | 2018-03-06 17:27:00 - TrainingLog - INFO - 9001 of 14581 feature vectors prepared for training
 16 | 2018-03-06 17:27:07 - TrainingLog - INFO - 10001 of 14581 feature vectors prepared for training
 17 | 2018-03-06 17:27:13 - TrainingLog - INFO - 11001 of 14581 feature vectors prepared for training
 18 | 2018-03-06 17:27:19 - TrainingLog - INFO - 12001 of 14581 feature vectors prepared for training
 19 | 2018-03-06 17:27:26 - TrainingLog - INFO - 13001 of 14581 feature vectors prepared for training
 20 | 2018-03-06 17:27:32 - TrainingLog - INFO - 14001 of 14581 feature vectors prepared for training
 21 | 2018-03-06 17:27:47 - TrainingLog - INFO - Main classifier training finished
 22 | 2018-03-06 17:27:48 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.8316831683168316
 23 | 2018-03-06 17:27:49 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.8159203980099502
 24 | 2018-03-06 17:27:51 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.8205980066445183
 25 | 2018-03-06 17:27:52 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.8129675810473815
 26 | 2018-03-06 17:27:54 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.8243512974051896
 27 | 2018-03-06 17:27:55 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.8386023294509152
 28 | 2018-03-06 17:27:57 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.8388017118402282
 29 | 2018-03-06 17:27:58 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.8389513108614233
 30 | 2018-03-06 17:28:00 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.8379578246392897
 31 | 2018-03-06 17:28:01 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.8371628371628371
 32 | 2018-03-06 17:28:02 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.8365122615803815
 33 | 2018-03-06 17:28:04 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.8401332223147377
 34 | 2018-03-06 17:28:04 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.8378170637970792
 35 | 2018-03-06 17:28:05 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.8379728765167738
 36 | 2018-03-06 17:28:06 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.8374417055296469
 37 | 2018-03-06 17:28:07 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.8376014990630856
 38 | 2018-03-06 17:28:07 - TestLog - INFO - Total 1621 samples classified with accuracy 0.838371375694016
 39 | 2018-03-06 17:28:07 - TestLog - INFO - AUROC is 0.9214737176884372
 40 | 2018-03-06 17:28:07 - TestLog - INFO - Classification report:
 41 |              precision    recall  f1-score   support
 42 | 
 43 |           0    0.76316   0.74742   0.75521       194
 44 |           1    0.74823   0.66984   0.70687       315
 45 |           2    0.87293   0.90198   0.88722      1112
 46 | 
 47 | avg / total    0.83556   0.83837   0.83637      1621
 48 | 
 49 | 2018-03-06 17:28:07 - TestLog - INFO - Confusion matrix:
 50 | [[ 145    4   45]
 51 |  [   3  211  101]
 52 |  [  42   67 1003]]
 53 | 2018-03-06 17:28:07 - CVLog - INFO - Validation round 2 of 10 starting
 54 | 2018-03-06 17:28:07 - TrainingLog - INFO - Initiating training of main classifier
 55 | 2018-03-06 17:28:27 - TrainingLog - INFO - Feature extractor ready
 56 | 2018-03-06 17:28:35 - TrainingLog - INFO - 1001 of 14581 feature vectors prepared for training
 57 | 2018-03-06 17:28:41 - TrainingLog - INFO - 2001 of 14581 feature vectors prepared for training
 58 | 2018-03-06 17:28:48 - TrainingLog - INFO - 3001 of 14581 feature vectors prepared for training
 59 | 2018-03-06 17:28:55 - TrainingLog - INFO - 4001 of 14581 feature vectors prepared for training
 60 | 2018-03-06 17:29:01 - TrainingLog - INFO - 5001 of 14581 feature vectors prepared for training
 61 | 2018-03-06 17:29:09 - TrainingLog - INFO - 6001 of 14581 feature vectors prepared for training
 62 | 2018-03-06 17:29:15 - TrainingLog - INFO - 7001 of 14581 feature vectors prepared for training
 63 | 2018-03-06 17:29:22 - TrainingLog - INFO - 8001 of 14581 feature vectors prepared for training
 64 | 2018-03-06 17:29:31 - TrainingLog - INFO - 9001 of 14581 feature vectors prepared for training
 65 | 2018-03-06 17:29:39 - TrainingLog - INFO - 10001 of 14581 feature vectors prepared for training
 66 | 2018-03-06 17:29:46 - TrainingLog - INFO - 11001 of 14581 feature vectors prepared for training
 67 | 2018-03-06 17:29:53 - TrainingLog - INFO - 12001 of 14581 feature vectors prepared for training
 68 | 2018-03-06 17:30:01 - TrainingLog - INFO - 13001 of 14581 feature vectors prepared for training
 69 | 2018-03-06 17:30:09 - TrainingLog - INFO - 14001 of 14581 feature vectors prepared for training
 70 | 2018-03-06 17:30:26 - TrainingLog - INFO - Main classifier training finished
 71 | 2018-03-06 17:30:27 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.8415841584158416
 72 | 2018-03-06 17:30:28 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.8706467661691543
 73 | 2018-03-06 17:30:29 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.8504983388704319
 74 | 2018-03-06 17:30:30 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.8403990024937655
 75 | 2018-03-06 17:30:31 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.8363273453093812
 76 | 2018-03-06 17:30:33 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.8402662229617305
 77 | 2018-03-06 17:30:34 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.8459343794579173
 78 | 2018-03-06 17:30:36 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.8401997503121099
 79 | 2018-03-06 17:30:37 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.8357380688124306
 80 | 2018-03-06 17:30:38 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.8391608391608392
 81 | 2018-03-06 17:30:39 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.8310626702997275
 82 | 2018-03-06 17:30:41 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.832639467110741
 83 | 2018-03-06 17:30:43 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.8285933897002306
 84 | 2018-03-06 17:30:44 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.8251249107780158
 85 | 2018-03-06 17:30:45 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.8221185876082612
 86 | 2018-03-06 17:30:46 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.8219862585883823
 87 | 2018-03-06 17:30:47 - TestLog - INFO - Total 1621 samples classified with accuracy 0.8217149907464528
 88 | 2018-03-06 17:30:47 - TestLog - INFO - AUROC is 0.9248618781665271
 89 | 2018-03-06 17:30:47 - TestLog - INFO - Classification report:
 90 |              precision    recall  f1-score   support
 91 | 
 92 |           0    0.72021   0.71649   0.71835       194
 93 |           1    0.72982   0.66032   0.69333       315
 94 |           2    0.86177   0.88579   0.87361      1112
 95 | 
 96 | avg / total    0.81919   0.82171   0.82000      1621
 97 | 
 98 | 2018-03-06 17:30:47 - TestLog - INFO - Confusion matrix:
 99 | [[139   2  53]
100 |  [  2 208 105]
101 |  [ 52  75 985]]
102 | 2018-03-06 17:30:47 - CVLog - INFO - Validation round 3 of 10 starting
103 | 2018-03-06 17:30:47 - TrainingLog - INFO - Initiating training of main classifier
104 | 2018-03-06 17:31:10 - TrainingLog - INFO - Feature extractor ready
105 | 2018-03-06 17:31:18 - TrainingLog - INFO - 1001 of 14581 feature vectors prepared for training
106 | 2018-03-06 17:31:25 - TrainingLog - INFO - 2001 of 14581 feature vectors prepared for training
107 | 2018-03-06 17:31:33 - TrainingLog - INFO - 3001 of 14581 feature vectors prepared for training
108 | 2018-03-06 17:31:40 - TrainingLog - INFO - 4001 of 14581 feature vectors prepared for training
109 | 2018-03-06 17:31:48 - TrainingLog - INFO - 5001 of 14581 feature vectors prepared for training
110 | 2018-03-06 17:31:56 - TrainingLog - INFO - 6001 of 14581 feature vectors prepared for training
111 | 2018-03-06 17:32:03 - TrainingLog - INFO - 7001 of 14581 feature vectors prepared for training
112 | 2018-03-06 17:32:09 - TrainingLog - INFO - 8001 of 14581 feature vectors prepared for training
113 | 2018-03-06 17:32:17 - TrainingLog - INFO - 9001 of 14581 feature vectors prepared for training
114 | 2018-03-06 17:32:28 - TrainingLog - INFO - 10001 of 14581 feature vectors prepared for training
115 | 2018-03-06 17:32:37 - TrainingLog - INFO - 11001 of 14581 feature vectors prepared for training
116 | 2018-03-06 17:32:45 - TrainingLog - INFO - 12001 of 14581 feature vectors prepared for training
117 | 2018-03-06 17:32:53 - TrainingLog - INFO - 13001 of 14581 feature vectors prepared for training
118 | 2018-03-06 17:33:03 - TrainingLog - INFO - 14001 of 14581 feature vectors prepared for training
119 | 2018-03-06 17:33:21 - TrainingLog - INFO - Main classifier training finished
120 | 2018-03-06 17:33:22 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.8514851485148515
121 | 2018-03-06 17:33:23 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.8656716417910447
122 | 2018-03-06 17:33:25 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.8571428571428571
123 | 2018-03-06 17:33:26 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.8478802992518704
124 | 2018-03-06 17:33:28 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.846307385229541
125 | 2018-03-06 17:33:30 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.848585690515807
126 | 2018-03-06 17:33:31 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.8416547788873039
127 | 2018-03-06 17:33:32 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.8314606741573034
128 | 2018-03-06 17:33:34 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.8312985571587126
129 | 2018-03-06 17:33:35 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.8271728271728271
130 | 2018-03-06 17:33:36 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.8265213442325159
131 | 2018-03-06 17:33:38 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.8309741881765196
132 | 2018-03-06 17:33:39 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.8339738662567256
133 | 2018-03-06 17:33:41 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.8358315488936474
134 | 2018-03-06 17:33:42 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.8367754830113258
135 | 2018-03-06 17:33:44 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.835103060587133
136 | 2018-03-06 17:33:44 - TestLog - INFO - Total 1621 samples classified with accuracy 0.8340530536705737
137 | 2018-03-06 17:33:44 - TestLog - INFO - AUROC is 0.9239567880714993
138 | 2018-03-06 17:33:44 - TestLog - INFO - Classification report:
139 |              precision    recall  f1-score   support
140 | 
141 |           0    0.71287   0.74227   0.72727       194
142 |           1    0.75779   0.69524   0.72517       315
143 |           2    0.87522   0.88939   0.88225      1112
144 | 
145 | avg / total    0.83297   0.83405   0.83318      1621
146 | 
147 | 2018-03-06 17:33:44 - TestLog - INFO - Confusion matrix:
148 | [[144   1  49]
149 |  [  4 219  92]
150 |  [ 54  69 989]]
151 | 2018-03-06 17:33:44 - CVLog - INFO - Validation round 4 of 10 starting
152 | 2018-03-06 17:33:44 - TrainingLog - INFO - Initiating training of main classifier
153 | 2018-03-06 17:34:06 - TrainingLog - INFO - Feature extractor ready
154 | 2018-03-06 17:34:15 - TrainingLog - INFO - 1001 of 14581 feature vectors prepared for training
155 | 2018-03-06 17:34:22 - TrainingLog - INFO - 2001 of 14581 feature vectors prepared for training
156 | 2018-03-06 17:34:30 - TrainingLog - INFO - 3001 of 14581 feature vectors prepared for training
157 | 2018-03-06 17:34:38 - TrainingLog - INFO - 4001 of 14581 feature vectors prepared for training
158 | 2018-03-06 17:34:49 - TrainingLog - INFO - 5001 of 14581 feature vectors prepared for training
159 | 2018-03-06 17:34:58 - TrainingLog - INFO - 6001 of 14581 feature vectors prepared for training
160 | 2018-03-06 17:35:08 - TrainingLog - INFO - 7001 of 14581 feature vectors prepared for training
161 | 2018-03-06 17:35:18 - TrainingLog - INFO - 8001 of 14581 feature vectors prepared for training
162 | 2018-03-06 17:35:27 - TrainingLog - INFO - 9001 of 14581 feature vectors prepared for training
163 | 2018-03-06 17:35:36 - TrainingLog - INFO - 10001 of 14581 feature vectors prepared for training
164 | 2018-03-06 17:35:46 - TrainingLog - INFO - 11001 of 14581 feature vectors prepared for training
165 | 2018-03-06 17:35:56 - TrainingLog - INFO - 12001 of 14581 feature vectors prepared for training
166 | 2018-03-06 17:36:07 - TrainingLog - INFO - 13001 of 14581 feature vectors prepared for training
167 | 2018-03-06 17:36:16 - TrainingLog - INFO - 14001 of 14581 feature vectors prepared for training
168 | 2018-03-06 17:36:34 - TrainingLog - INFO - Main classifier training finished
169 | 2018-03-06 17:36:35 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.8514851485148515
170 | 2018-03-06 17:36:36 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.8855721393034826
171 | 2018-03-06 17:36:37 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.867109634551495
172 | 2018-03-06 17:36:38 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.8778054862842892
173 | 2018-03-06 17:36:39 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.8842315369261478
174 | 2018-03-06 17:36:40 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.8768718801996672
175 | 2018-03-06 17:36:41 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.8730385164051355
176 | 2018-03-06 17:36:42 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.8589263420724095
177 | 2018-03-06 17:36:44 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.8546059933407325
178 | 2018-03-06 17:36:46 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.8541458541458542
179 | 2018-03-06 17:36:47 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.846503178928247
180 | 2018-03-06 17:36:49 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.8442964196502915
181 | 2018-03-06 17:36:50 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.8393543428132206
182 | 2018-03-06 17:36:51 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.8386866523911491
183 | 2018-03-06 17:36:53 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.8367754830113258
184 | 2018-03-06 17:36:54 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.8425983760149907
185 | 2018-03-06 17:36:54 - TestLog - INFO - Total 1621 samples classified with accuracy 0.8433066008636644
186 | 2018-03-06 17:36:54 - TestLog - INFO - AUROC is 0.9252341574439802
187 | 2018-03-06 17:36:54 - TestLog - INFO - Classification report:
188 |              precision    recall  f1-score   support
189 | 
190 |           0    0.78453   0.73196   0.75733       194
191 |           1    0.76157   0.67937   0.71812       315
192 |           2    0.87230   0.90917   0.89036      1112
193 | 
194 | avg / total    0.84028   0.84331   0.84097      1621
195 | 
196 | 2018-03-06 17:36:54 - TestLog - INFO - Confusion matrix:
197 | [[ 142    3   49]
198 |  [   2  214   99]
199 |  [  37   64 1011]]
200 | 2018-03-06 17:36:54 - CVLog - INFO - Validation round 5 of 10 starting
201 | 2018-03-06 17:36:54 - TrainingLog - INFO - Initiating training of main classifier
202 | 2018-03-06 17:37:24 - TrainingLog - INFO - Feature extractor ready
203 | 2018-03-06 17:37:37 - TrainingLog - INFO - 1001 of 14581 feature vectors prepared for training
204 | 2018-03-06 17:37:46 - TrainingLog - INFO - 2001 of 14581 feature vectors prepared for training
205 | 2018-03-06 17:37:56 - TrainingLog - INFO - 3001 of 14581 feature vectors prepared for training
206 | 2018-03-06 17:38:06 - TrainingLog - INFO - 4001 of 14581 feature vectors prepared for training
207 | 2018-03-06 17:38:16 - TrainingLog - INFO - 5001 of 14581 feature vectors prepared for training
208 | 2018-03-06 17:38:26 - TrainingLog - INFO - 6001 of 14581 feature vectors prepared for training
209 | 2018-03-06 17:38:38 - TrainingLog - INFO - 7001 of 14581 feature vectors prepared for training
210 | 2018-03-06 17:38:49 - TrainingLog - INFO - 8001 of 14581 feature vectors prepared for training
211 | 2018-03-06 17:38:59 - TrainingLog - INFO - 9001 of 14581 feature vectors prepared for training
212 | 2018-03-06 17:39:09 - TrainingLog - INFO - 10001 of 14581 feature vectors prepared for training
213 | 2018-03-06 17:39:18 - TrainingLog - INFO - 11001 of 14581 feature vectors prepared for training
214 | 2018-03-06 17:39:27 - TrainingLog - INFO - 12001 of 14581 feature vectors prepared for training
215 | 2018-03-06 17:39:37 - TrainingLog - INFO - 13001 of 14581 feature vectors prepared for training
216 | 2018-03-06 17:39:48 - TrainingLog - INFO - 14001 of 14581 feature vectors prepared for training
217 | 2018-03-06 17:40:08 - TrainingLog - INFO - Main classifier training finished
218 | 2018-03-06 17:40:09 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.8613861386138614
219 | 2018-03-06 17:40:12 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.8208955223880597
220 | 2018-03-06 17:40:13 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.8172757475083057
221 | 2018-03-06 17:40:14 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.8329177057356608
222 | 2018-03-06 17:40:15 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.8323353293413174
223 | 2018-03-06 17:40:16 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.8336106489184693
224 | 2018-03-06 17:40:17 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.8402282453637661
225 | 2018-03-06 17:40:18 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.8439450686641697
226 | 2018-03-06 17:40:20 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.8423973362930077
227 | 2018-03-06 17:40:21 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.8471528471528471
228 | 2018-03-06 17:40:23 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.849227974568574
229 | 2018-03-06 17:40:24 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.8492922564529559
230 | 2018-03-06 17:40:26 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.851652574942352
231 | 2018-03-06 17:40:27 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.8479657387580299
232 | 2018-03-06 17:40:29 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.8481012658227848
233 | 2018-03-06 17:40:31 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.8450968144909432
234 | 2018-03-06 17:40:32 - TestLog - INFO - Total 1621 samples classified with accuracy 0.8457742134484886
235 | 2018-03-06 17:40:32 - TestLog - INFO - AUROC is 0.9265803964921612
236 | 2018-03-06 17:40:32 - TestLog - INFO - Classification report:
237 |              precision    recall  f1-score   support
238 | 
239 |           0    0.76744   0.68041   0.72131       194
240 |           1    0.77627   0.72698   0.75082       315
241 |           2    0.87522   0.90827   0.89144      1112
242 | 
243 | avg / total    0.84309   0.84577   0.84375      1621
244 | 
245 | 2018-03-06 17:40:32 - TestLog - INFO - Confusion matrix:
246 | [[ 132    3   59]
247 |  [   1  229   85]
248 |  [  39   63 1010]]
249 | 2018-03-06 17:40:32 - CVLog - INFO - Validation round 6 of 10 starting
250 | 2018-03-06 17:40:32 - TrainingLog - INFO - Initiating training of main classifier
251 | 2018-03-06 17:41:00 - TrainingLog - INFO - Feature extractor ready
252 | 2018-03-06 17:41:09 - TrainingLog - INFO - 1001 of 14582 feature vectors prepared for training
253 | 2018-03-06 17:41:19 - TrainingLog - INFO - 2001 of 14582 feature vectors prepared for training
254 | 2018-03-06 17:41:29 - TrainingLog - INFO - 3001 of 14582 feature vectors prepared for training
255 | 2018-03-06 17:41:39 - TrainingLog - INFO - 4001 of 14582 feature vectors prepared for training
256 | 2018-03-06 17:41:50 - TrainingLog - INFO - 5001 of 14582 feature vectors prepared for training
257 | 2018-03-06 17:42:02 - TrainingLog - INFO - 6001 of 14582 feature vectors prepared for training
258 | 2018-03-06 17:42:13 - TrainingLog - INFO - 7001 of 14582 feature vectors prepared for training
259 | 2018-03-06 17:42:24 - TrainingLog - INFO - 8001 of 14582 feature vectors prepared for training
260 | 2018-03-06 17:42:35 - TrainingLog - INFO - 9001 of 14582 feature vectors prepared for training
261 | 2018-03-06 17:42:44 - TrainingLog - INFO - 10001 of 14582 feature vectors prepared for training
262 | 2018-03-06 17:42:55 - TrainingLog - INFO - 11001 of 14582 feature vectors prepared for training
263 | 2018-03-06 17:43:05 - TrainingLog - INFO - 12001 of 14582 feature vectors prepared for training
264 | 2018-03-06 17:43:15 - TrainingLog - INFO - 13001 of 14582 feature vectors prepared for training
265 | 2018-03-06 17:43:26 - TrainingLog - INFO - 14001 of 14582 feature vectors prepared for training
266 | 2018-03-06 17:43:45 - TrainingLog - INFO - Main classifier training finished
267 | 2018-03-06 17:43:46 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.8910891089108911
268 | 2018-03-06 17:43:47 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.835820895522388
269 | 2018-03-06 17:43:48 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.8504983388704319
270 | 2018-03-06 17:43:49 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.85785536159601
271 | 2018-03-06 17:43:50 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.8642714570858283
272 | 2018-03-06 17:43:51 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.8569051580698835
273 | 2018-03-06 17:43:53 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.8573466476462197
274 | 2018-03-06 17:43:56 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.8576779026217228
275 | 2018-03-06 17:43:57 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.8523862375138734
276 | 2018-03-06 17:43:58 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.8521478521478522
277 | 2018-03-06 17:44:00 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.849227974568574
278 | 2018-03-06 17:44:01 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.8484596169858452
279 | 2018-03-06 17:44:02 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.8485780169100692
280 | 2018-03-06 17:44:04 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.8465381870092791
281 | 2018-03-06 17:44:05 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.8421052631578947
282 | 2018-03-06 17:44:06 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.8450968144909432
283 | 2018-03-06 17:44:07 - TestLog - INFO - Total 1620 samples classified with accuracy 0.845679012345679
284 | 2018-03-06 17:44:07 - TestLog - INFO - AUROC is 0.9191414585478324
285 | 2018-03-06 17:44:07 - TestLog - INFO - Classification report:
286 |              precision    recall  f1-score   support
287 | 
288 |           0    0.75691   0.70619   0.73067       194
289 |           1    0.78322   0.71111   0.74542       315
290 |           2    0.87511   0.90819   0.89134      1111
291 | 
292 | avg / total    0.84309   0.84568   0.84373      1620
293 | 
294 | 2018-03-06 17:44:07 - TestLog - INFO - Confusion matrix:
295 | [[ 137    4   53]
296 |  [   0  224   91]
297 |  [  44   58 1009]]
298 | 2018-03-06 17:44:07 - CVLog - INFO - Validation round 7 of 10 starting
299 | 2018-03-06 17:44:07 - TrainingLog - INFO - Initiating training of main classifier
300 | 2018-03-06 17:44:38 - TrainingLog - INFO - Feature extractor ready
301 | 2018-03-06 17:44:51 - TrainingLog - INFO - 1001 of 14582 feature vectors prepared for training
302 | 2018-03-06 17:45:00 - TrainingLog - INFO - 2001 of 14582 feature vectors prepared for training
303 | 2018-03-06 17:45:10 - TrainingLog - INFO - 3001 of 14582 feature vectors prepared for training
304 | 2018-03-06 17:45:18 - TrainingLog - INFO - 4001 of 14582 feature vectors prepared for training
305 | 2018-03-06 17:45:27 - TrainingLog - INFO - 5001 of 14582 feature vectors prepared for training
306 | 2018-03-06 17:45:36 - TrainingLog - INFO - 6001 of 14582 feature vectors prepared for training
307 | 2018-03-06 17:45:48 - TrainingLog - INFO - 7001 of 14582 feature vectors prepared for training
308 | 2018-03-06 17:45:58 - TrainingLog - INFO - 8001 of 14582 feature vectors prepared for training
309 | 2018-03-06 17:46:07 - TrainingLog - INFO - 9001 of 14582 feature vectors prepared for training
310 | 2018-03-06 17:46:18 - TrainingLog - INFO - 10001 of 14582 feature vectors prepared for training
311 | 2018-03-06 17:46:28 - TrainingLog - INFO - 11001 of 14582 feature vectors prepared for training
312 | 2018-03-06 17:46:40 - TrainingLog - INFO - 12001 of 14582 feature vectors prepared for training
313 | 2018-03-06 17:46:50 - TrainingLog - INFO - 13001 of 14582 feature vectors prepared for training
314 | 2018-03-06 17:47:00 - TrainingLog - INFO - 14001 of 14582 feature vectors prepared for training
315 | 2018-03-06 17:47:18 - TrainingLog - INFO - Main classifier training finished
316 | 2018-03-06 17:47:20 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.8415841584158416
317 | 2018-03-06 17:47:22 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.8756218905472637
318 | 2018-03-06 17:47:23 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.8704318936877077
319 | 2018-03-06 17:47:24 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.8728179551122195
320 | 2018-03-06 17:47:25 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.8522954091816367
321 | 2018-03-06 17:47:26 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.8535773710482529
322 | 2018-03-06 17:47:27 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.854493580599144
323 | 2018-03-06 17:47:29 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.8514357053682896
324 | 2018-03-06 17:47:31 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.8512763596004439
325 | 2018-03-06 17:47:33 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.8511488511488512
326 | 2018-03-06 17:47:35 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.8483197093551317
327 | 2018-03-06 17:47:37 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.8401332223147377
328 | 2018-03-06 17:47:38 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.8355111452728671
329 | 2018-03-06 17:47:39 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.8336902212705211
330 | 2018-03-06 17:47:41 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.832111925383078
331 | 2018-03-06 17:47:43 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.835103060587133
332 | 2018-03-06 17:47:43 - TestLog - INFO - Total 1620 samples classified with accuracy 0.8351851851851851
333 | 2018-03-06 17:47:43 - TestLog - INFO - AUROC is 0.9241269014770084
334 | 2018-03-06 17:47:43 - TestLog - INFO - Classification report:
335 |              precision    recall  f1-score   support
336 | 
337 |           0    0.73232   0.74742   0.73980       194
338 |           1    0.74138   0.68254   0.71074       315
339 |           2    0.87721   0.89379   0.88542      1111
340 | 
341 | avg / total    0.83345   0.83519   0.83402      1620
342 | 
343 | 2018-03-06 17:47:43 - TestLog - INFO - Confusion matrix:
344 | [[145   4  45]
345 |  [  6 215  94]
346 |  [ 47  71 993]]
347 | 2018-03-06 17:47:43 - CVLog - INFO - Validation round 8 of 10 starting
348 | 2018-03-06 17:47:44 - TrainingLog - INFO - Initiating training of main classifier
349 | 2018-03-06 17:48:16 - TrainingLog - INFO - Feature extractor ready
350 | 2018-03-06 17:48:28 - TrainingLog - INFO - 1001 of 14582 feature vectors prepared for training
351 | 2018-03-06 17:48:38 - TrainingLog - INFO - 2001 of 14582 feature vectors prepared for training
352 | 2018-03-06 17:48:49 - TrainingLog - INFO - 3001 of 14582 feature vectors prepared for training
353 | 2018-03-06 17:48:59 - TrainingLog - INFO - 4001 of 14582 feature vectors prepared for training
354 | 2018-03-06 17:49:08 - TrainingLog - INFO - 5001 of 14582 feature vectors prepared for training
355 | 2018-03-06 17:49:17 - TrainingLog - INFO - 6001 of 14582 feature vectors prepared for training
356 | 2018-03-06 17:49:26 - TrainingLog - INFO - 7001 of 14582 feature vectors prepared for training
357 | 2018-03-06 17:49:34 - TrainingLog - INFO - 8001 of 14582 feature vectors prepared for training
358 | 2018-03-06 17:49:41 - TrainingLog - INFO - 9001 of 14582 feature vectors prepared for training
359 | 2018-03-06 17:49:49 - TrainingLog - INFO - 10001 of 14582 feature vectors prepared for training
360 | 2018-03-06 17:49:57 - TrainingLog - INFO - 11001 of 14582 feature vectors prepared for training
361 | 2018-03-06 17:50:04 - TrainingLog - INFO - 12001 of 14582 feature vectors prepared for training
362 | 2018-03-06 17:50:11 - TrainingLog - INFO - 13001 of 14582 feature vectors prepared for training
363 | 2018-03-06 17:50:21 - TrainingLog - INFO - 14001 of 14582 feature vectors prepared for training
364 | 2018-03-06 17:50:39 - TrainingLog - INFO - Main classifier training finished
365 | 2018-03-06 17:50:40 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.900990099009901
366 | 2018-03-06 17:50:41 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.8805970149253731
367 | 2018-03-06 17:50:42 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.8737541528239202
368 | 2018-03-06 17:50:43 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.8603491271820449
369 | 2018-03-06 17:50:44 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.8522954091816367
370 | 2018-03-06 17:50:45 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.8452579034941764
371 | 2018-03-06 17:50:46 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.8459343794579173
372 | 2018-03-06 17:50:47 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.846441947565543
373 | 2018-03-06 17:50:48 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.8523862375138734
374 | 2018-03-06 17:50:49 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.8511488511488512
375 | 2018-03-06 17:50:50 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.8555858310626703
376 | 2018-03-06 17:50:51 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.8567860116569526
377 | 2018-03-06 17:50:53 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.8524212144504227
378 | 2018-03-06 17:50:54 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.8501070663811563
379 | 2018-03-06 17:50:55 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.8421052631578947
380 | 2018-03-06 17:50:57 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.8413491567770144
381 | 2018-03-06 17:50:57 - TestLog - INFO - Total 1620 samples classified with accuracy 0.841358024691358
382 | 2018-03-06 17:50:57 - TestLog - INFO - AUROC is 0.9299742699303101
383 | 2018-03-06 17:50:57 - TestLog - INFO - Classification report:
384 |              precision    recall  f1-score   support
385 | 
386 |           0    0.75843   0.69588   0.72581       194
387 |           1    0.79468   0.66349   0.72318       315
388 |           2    0.86429   0.91719   0.88996      1111
389 | 
390 | avg / total    0.83808   0.84136   0.83787      1620
391 | 
392 | 2018-03-06 17:50:57 - TestLog - INFO - Confusion matrix:
393 | [[ 135    1   58]
394 |  [   4  209  102]
395 |  [  39   53 1019]]
396 | 2018-03-06 17:50:57 - CVLog - INFO - Validation round 9 of 10 starting
397 | 2018-03-06 17:50:57 - TrainingLog - INFO - Initiating training of main classifier
398 | 2018-03-06 17:51:25 - TrainingLog - INFO - Feature extractor ready
399 | 2018-03-06 17:51:38 - TrainingLog - INFO - 1001 of 14583 feature vectors prepared for training
400 | 2018-03-06 17:51:45 - TrainingLog - INFO - 2001 of 14583 feature vectors prepared for training
401 | 2018-03-06 17:51:51 - TrainingLog - INFO - 3001 of 14583 feature vectors prepared for training
402 | 2018-03-06 17:51:57 - TrainingLog - INFO - 4001 of 14583 feature vectors prepared for training
403 | 2018-03-06 17:52:04 - TrainingLog - INFO - 5001 of 14583 feature vectors prepared for training
404 | 2018-03-06 17:52:11 - TrainingLog - INFO - 6001 of 14583 feature vectors prepared for training
405 | 2018-03-06 17:52:17 - TrainingLog - INFO - 7001 of 14583 feature vectors prepared for training
406 | 2018-03-06 17:52:24 - TrainingLog - INFO - 8001 of 14583 feature vectors prepared for training
407 | 2018-03-06 17:52:31 - TrainingLog - INFO - 9001 of 14583 feature vectors prepared for training
408 | 2018-03-06 17:52:38 - TrainingLog - INFO - 10001 of 14583 feature vectors prepared for training
409 | 2018-03-06 17:52:45 - TrainingLog - INFO - 11001 of 14583 feature vectors prepared for training
410 | 2018-03-06 17:52:53 - TrainingLog - INFO - 12001 of 14583 feature vectors prepared for training
411 | 2018-03-06 17:53:02 - TrainingLog - INFO - 13001 of 14583 feature vectors prepared for training
412 | 2018-03-06 17:53:09 - TrainingLog - INFO - 14001 of 14583 feature vectors prepared for training
413 | 2018-03-06 17:53:26 - TrainingLog - INFO - Main classifier training finished
414 | 2018-03-06 17:53:27 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.8217821782178217
415 | 2018-03-06 17:53:28 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.8308457711442786
416 | 2018-03-06 17:53:28 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.8571428571428571
417 | 2018-03-06 17:53:29 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.8478802992518704
418 | 2018-03-06 17:53:30 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.846307385229541
419 | 2018-03-06 17:53:31 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.8419301164725458
420 | 2018-03-06 17:53:32 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.8416547788873039
421 | 2018-03-06 17:53:33 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.8414481897627965
422 | 2018-03-06 17:53:33 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.8446170921198668
423 | 2018-03-06 17:53:34 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.8411588411588412
424 | 2018-03-06 17:53:35 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.8392370572207084
425 | 2018-03-06 17:53:36 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.8451290591174022
426 | 2018-03-06 17:53:38 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.8447348193697156
427 | 2018-03-06 17:53:40 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.8422555317630264
428 | 2018-03-06 17:53:41 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.843437708194537
429 | 2018-03-06 17:53:43 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.8457214241099313
430 | 2018-03-06 17:53:43 - TestLog - INFO - Total 1619 samples classified with accuracy 0.8449660284126004
431 | 2018-03-06 17:53:43 - TestLog - INFO - AUROC is 0.9303725809457754
432 | 2018-03-06 17:53:43 - TestLog - INFO - Classification report:
433 |              precision    recall  f1-score   support
434 | 
435 |           0    0.71569   0.75258   0.73367       194
436 |           1    0.78947   0.71656   0.75125       314
437 |           2    0.88230   0.89739   0.88978      1111
438 | 
439 | avg / total    0.84433   0.84497   0.84421      1619
440 | 
441 | 2018-03-06 17:53:43 - TestLog - INFO - Confusion matrix:
442 | [[146   2  46]
443 |  [  2 225  87]
444 |  [ 56  58 997]]
445 | 2018-03-06 17:53:43 - CVLog - INFO - Validation round 10 of 10 starting
446 | 2018-03-06 17:53:43 - TrainingLog - INFO - Initiating training of main classifier
447 | 2018-03-06 17:54:13 - TrainingLog - INFO - Feature extractor ready
448 | 2018-03-06 17:54:26 - TrainingLog - INFO - 1001 of 14584 feature vectors prepared for training
449 | 2018-03-06 17:54:35 - TrainingLog - INFO - 2001 of 14584 feature vectors prepared for training
450 | 2018-03-06 17:54:43 - TrainingLog - INFO - 3001 of 14584 feature vectors prepared for training
451 | 2018-03-06 17:54:52 - TrainingLog - INFO - 4001 of 14584 feature vectors prepared for training
452 | 2018-03-06 17:55:02 - TrainingLog - INFO - 5001 of 14584 feature vectors prepared for training
453 | 2018-03-06 17:55:12 - TrainingLog - INFO - 6001 of 14584 feature vectors prepared for training
454 | 2018-03-06 17:55:20 - TrainingLog - INFO - 7001 of 14584 feature vectors prepared for training
455 | 2018-03-06 17:55:27 - TrainingLog - INFO - 8001 of 14584 feature vectors prepared for training
456 | 2018-03-06 17:55:35 - TrainingLog - INFO - 9001 of 14584 feature vectors prepared for training
457 | 2018-03-06 17:55:43 - TrainingLog - INFO - 10001 of 14584 feature vectors prepared for training
458 | 2018-03-06 17:55:50 - TrainingLog - INFO - 11001 of 14584 feature vectors prepared for training
459 | 2018-03-06 17:55:59 - TrainingLog - INFO - 12001 of 14584 feature vectors prepared for training
460 | 2018-03-06 17:56:07 - TrainingLog - INFO - 13001 of 14584 feature vectors prepared for training
461 | 2018-03-06 17:56:16 - TrainingLog - INFO - 14001 of 14584 feature vectors prepared for training
462 | 2018-03-06 17:56:34 - TrainingLog - INFO - Main classifier training finished
463 | 2018-03-06 17:56:35 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.8316831683168316
464 | 2018-03-06 17:56:37 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.8407960199004975
465 | 2018-03-06 17:56:38 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.8239202657807309
466 | 2018-03-06 17:56:39 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.8154613466334164
467 | 2018-03-06 17:56:40 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.8223552894211577
468 | 2018-03-06 17:56:41 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.8186356073211315
469 | 2018-03-06 17:56:42 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.8231098430813124
470 | 2018-03-06 17:56:43 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.8277153558052435
471 | 2018-03-06 17:56:44 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.8246392896781354
472 | 2018-03-06 17:56:46 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.8191808191808192
473 | 2018-03-06 17:56:47 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.821071752951862
474 | 2018-03-06 17:56:48 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.8259783513738551
475 | 2018-03-06 17:56:50 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.8270561106840891
476 | 2018-03-06 17:56:52 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.8251249107780158
477 | 2018-03-06 17:56:53 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.8241172551632245
478 | 2018-03-06 17:56:54 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.8219862585883823
479 | 2018-03-06 17:56:54 - TestLog - INFO - Total 1618 samples classified with accuracy 0.8207663782447466
480 | 2018-03-06 17:56:54 - TestLog - INFO - AUROC is 0.9136706536351435
481 | 2018-03-06 17:56:54 - TestLog - INFO - Classification report:
482 |              precision    recall  f1-score   support
483 | 
484 |           0    0.70352   0.72539   0.71429       193
485 |           1    0.72203   0.67834   0.69951       314
486 |           2    0.86744   0.87759   0.87248      1111
487 | 
488 | avg / total    0.81967   0.82077   0.82004      1618
489 | 
490 | 2018-03-06 17:56:54 - TestLog - INFO - Confusion matrix:
491 | [[140   0  53]
492 |  [  5 213  96]
493 |  [ 54  82 975]]
494 | 2018-03-06 17:56:54 - CVLog - INFO - 
495 | 
496 | 2018-03-06 17:56:54 - CVLog - INFO - Summary (precision, recall, F1, accuracy):
497 | 2018-03-06 17:56:54 - CVLog - INFO - Metrics for round 1: [0.83556175230603469, 0.83837137569401599, 0.83637273252004629, 0.83837137569401599]
498 | 2018-03-06 17:56:54 - CVLog - INFO - Metrics for round 2: [0.81918578559173216, 0.82171499074645282, 0.81999886068535532, 0.82171499074645282]
499 | 2018-03-06 17:56:54 - CVLog - INFO - Metrics for round 3: [0.83297067831425919, 0.8340530536705737, 0.83317571219803721, 0.8340530536705737]
500 | 2018-03-06 17:56:54 - CVLog - INFO - Metrics for round 4: [0.84027998709890628, 0.84330660086366438, 0.84096689610598185, 0.84330660086366438]
501 | 2018-03-06 17:56:54 - CVLog - INFO - Metrics for round 5: [0.84309071306120764, 0.84577421344848858, 0.84375225855879799, 0.84577421344848858]
502 | 2018-03-06 17:56:54 - CVLog - INFO - Metrics for round 6: [0.84308550146894823, 0.84567901234567899, 0.8437282640121796, 0.84567901234567899]
503 | 2018-03-06 17:56:54 - CVLog - INFO - Metrics for round 7: [0.83344679736879934, 0.83518518518518514, 0.83401714933401772, 0.83518518518518514]
504 | 2018-03-06 17:56:54 - CVLog - INFO - Metrics for round 8: [0.83807789187295734, 0.84135802469135801, 0.83787080524391999, 0.84135802469135801]
505 | 2018-03-06 17:56:54 - CVLog - INFO - Metrics for round 9: [0.84433240103943663, 0.84496602841260038, 0.84420746803452851, 0.84496602841260038]
506 | 2018-03-06 17:56:54 - CVLog - INFO - Metrics for round 10: [0.81966677883108718, 0.82076637824474663, 0.82004408041692833, 0.82076637824474663]
507 | 2018-03-06 17:56:54 - CVLog - INFO - 
508 | 
509 | 2018-03-06 17:56:54 - CVLog - INFO - Final average metrics: 0.8349698286953368, 0.8371174863302764, 0.8354134227109793, 0.8371174863302764
510 | 
511 | Process finished with exit code 0
512 | 


--------------------------------------------------------------------------------
/Results/Waseem_Hovy_hidden-auth.txt:
--------------------------------------------------------------------------------
  1 | /Library/Frameworks/Python.framework/Versions/3.6/bin/python3.6 /Users/pushkarmishra/Desktop/AuthorProfileAbuseDetection/twitter_model.py -c 30000
  2 | Using Theano backend.
  3 | 2018-03-10 00:48:08 - CVLog - INFO - 10-fold cross validation procedure has begun
  4 | 2018-03-10 00:48:08 - CVLog - INFO - Validation round 1 of 10 starting
  5 | 2018-03-10 00:48:08 - TrainingLog - INFO - Initiating training of main classifier
  6 | 2018-03-10 00:48:28 - TrainingLog - INFO - Feature extractor ready
  7 | 2018-03-10 00:48:35 - TrainingLog - INFO - 1001 of 14581 feature vectors prepared for training
  8 | 2018-03-10 00:48:41 - TrainingLog - INFO - 2001 of 14581 feature vectors prepared for training
  9 | 2018-03-10 00:48:46 - TrainingLog - INFO - 3001 of 14581 feature vectors prepared for training
 10 | 2018-03-10 00:48:52 - TrainingLog - INFO - 4001 of 14581 feature vectors prepared for training
 11 | 2018-03-10 00:48:58 - TrainingLog - INFO - 5001 of 14581 feature vectors prepared for training
 12 | 2018-03-10 00:49:02 - TrainingLog - INFO - 6001 of 14581 feature vectors prepared for training
 13 | 2018-03-10 00:49:06 - TrainingLog - INFO - 7001 of 14581 feature vectors prepared for training
 14 | 2018-03-10 00:49:10 - TrainingLog - INFO - 8001 of 14581 feature vectors prepared for training
 15 | 2018-03-10 00:49:14 - TrainingLog - INFO - 9001 of 14581 feature vectors prepared for training
 16 | 2018-03-10 00:49:18 - TrainingLog - INFO - 10001 of 14581 feature vectors prepared for training
 17 | 2018-03-10 00:49:23 - TrainingLog - INFO - 11001 of 14581 feature vectors prepared for training
 18 | 2018-03-10 00:49:27 - TrainingLog - INFO - 12001 of 14581 feature vectors prepared for training
 19 | 2018-03-10 00:49:31 - TrainingLog - INFO - 13001 of 14581 feature vectors prepared for training
 20 | 2018-03-10 00:49:35 - TrainingLog - INFO - 14001 of 14581 feature vectors prepared for training
 21 | 2018-03-10 00:49:52 - TrainingLog - INFO - Main classifier training finished
 22 | 2018-03-10 00:49:53 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.8415841584158416
 23 | 2018-03-10 00:49:54 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.8557213930348259
 24 | 2018-03-10 00:49:54 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.8372093023255814
 25 | 2018-03-10 00:49:55 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.8403990024937655
 26 | 2018-03-10 00:49:56 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.8562874251497006
 27 | 2018-03-10 00:49:57 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.8635607321131448
 28 | 2018-03-10 00:49:57 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.8587731811697575
 29 | 2018-03-10 00:49:58 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.8614232209737828
 30 | 2018-03-10 00:49:59 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.8612652608213096
 31 | 2018-03-10 00:49:59 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.8591408591408591
 32 | 2018-03-10 00:50:00 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.8619436875567665
 33 | 2018-03-10 00:50:01 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.8659450457951707
 34 | 2018-03-10 00:50:01 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.8662567255956957
 35 | 2018-03-10 00:50:02 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.8679514632405425
 36 | 2018-03-10 00:50:03 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.8674217188540972
 37 | 2018-03-10 00:50:04 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.8688319800124922
 38 | 2018-03-10 00:50:04 - TestLog - INFO - Total 1621 samples classified with accuracy 0.8704503392967304
 39 | 2018-03-10 00:50:04 - TestLog - INFO - AUROC is 0.9603672569920351
 40 | 2018-03-10 00:50:04 - TestLog - INFO - Classification report:
 41 |              precision    recall  f1-score   support
 42 | 
 43 |           0    0.73000   0.75258   0.74112       194
 44 |           1    0.83553   0.80635   0.82068       315
 45 |           2    0.90510   0.90917   0.90713      1112
 46 | 
 47 | avg / total    0.87063   0.87045   0.87046      1621
 48 | 
 49 | 2018-03-10 00:50:04 - TestLog - INFO - Confusion matrix:
 50 | [[ 146    1   47]
 51 |  [   2  254   59]
 52 |  [  52   49 1011]]
 53 | 2018-03-10 00:50:04 - CVLog - INFO - Validation round 2 of 10 starting
 54 | 2018-03-10 00:50:04 - TrainingLog - INFO - Initiating training of main classifier
 55 | 2018-03-10 00:50:25 - TrainingLog - INFO - Feature extractor ready
 56 | 2018-03-10 00:50:35 - TrainingLog - INFO - 1001 of 14581 feature vectors prepared for training
 57 | 2018-03-10 00:50:41 - TrainingLog - INFO - 2001 of 14581 feature vectors prepared for training
 58 | 2018-03-10 00:50:47 - TrainingLog - INFO - 3001 of 14581 feature vectors prepared for training
 59 | 2018-03-10 00:50:53 - TrainingLog - INFO - 4001 of 14581 feature vectors prepared for training
 60 | 2018-03-10 00:51:00 - TrainingLog - INFO - 5001 of 14581 feature vectors prepared for training
 61 | 2018-03-10 00:51:07 - TrainingLog - INFO - 6001 of 14581 feature vectors prepared for training
 62 | 2018-03-10 00:51:14 - TrainingLog - INFO - 7001 of 14581 feature vectors prepared for training
 63 | 2018-03-10 00:51:21 - TrainingLog - INFO - 8001 of 14581 feature vectors prepared for training
 64 | 2018-03-10 00:51:28 - TrainingLog - INFO - 9001 of 14581 feature vectors prepared for training
 65 | 2018-03-10 00:51:36 - TrainingLog - INFO - 10001 of 14581 feature vectors prepared for training
 66 | 2018-03-10 00:51:43 - TrainingLog - INFO - 11001 of 14581 feature vectors prepared for training
 67 | 2018-03-10 00:51:50 - TrainingLog - INFO - 12001 of 14581 feature vectors prepared for training
 68 | 2018-03-10 00:51:59 - TrainingLog - INFO - 13001 of 14581 feature vectors prepared for training
 69 | 2018-03-10 00:52:07 - TrainingLog - INFO - 14001 of 14581 feature vectors prepared for training
 70 | 2018-03-10 00:52:24 - TrainingLog - INFO - Main classifier training finished
 71 | 2018-03-10 00:52:25 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.8910891089108911
 72 | 2018-03-10 00:52:25 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.8905472636815921
 73 | 2018-03-10 00:52:26 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.8803986710963455
 74 | 2018-03-10 00:52:26 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.8678304239401496
 75 | 2018-03-10 00:52:26 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.872255489021956
 76 | 2018-03-10 00:52:27 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.870216306156406
 77 | 2018-03-10 00:52:27 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.8744650499286734
 78 | 2018-03-10 00:52:28 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.8726591760299626
 79 | 2018-03-10 00:52:28 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.8745837957824639
 80 | 2018-03-10 00:52:29 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.8791208791208791
 81 | 2018-03-10 00:52:29 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.8737511353315168
 82 | 2018-03-10 00:52:29 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.8726061615320566
 83 | 2018-03-10 00:52:30 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.8693312836279785
 84 | 2018-03-10 00:52:30 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.8665239114917915
 85 | 2018-03-10 00:52:31 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.866089273817455
 86 | 2018-03-10 00:52:31 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.8657089319175515
 87 | 2018-03-10 00:52:31 - TestLog - INFO - Total 1621 samples classified with accuracy 0.864898210980876
 88 | 2018-03-10 00:52:31 - TestLog - INFO - AUROC is 0.9606214246607155
 89 | 2018-03-10 00:52:31 - TestLog - INFO - Classification report:
 90 |              precision    recall  f1-score   support
 91 | 
 92 |           0    0.72222   0.73711   0.72959       194
 93 |           1    0.82524   0.80952   0.81731       315
 94 |           2    0.90126   0.90288   0.90207      1112
 95 | 
 96 | avg / total    0.86506   0.86490   0.86495      1621
 97 | 
 98 | 2018-03-10 00:52:31 - TestLog - INFO - Confusion matrix:
 99 | [[ 143    1   50]
100 |  [   0  255   60]
101 |  [  55   53 1004]]
102 | 2018-03-10 00:52:31 - CVLog - INFO - Validation round 3 of 10 starting
103 | 2018-03-10 00:52:31 - TrainingLog - INFO - Initiating training of main classifier
104 | 2018-03-10 00:52:48 - TrainingLog - INFO - Feature extractor ready
105 | 2018-03-10 00:52:56 - TrainingLog - INFO - 1001 of 14581 feature vectors prepared for training
106 | 2018-03-10 00:53:03 - TrainingLog - INFO - 2001 of 14581 feature vectors prepared for training
107 | 2018-03-10 00:53:11 - TrainingLog - INFO - 3001 of 14581 feature vectors prepared for training
108 | 2018-03-10 00:53:18 - TrainingLog - INFO - 4001 of 14581 feature vectors prepared for training
109 | 2018-03-10 00:53:25 - TrainingLog - INFO - 5001 of 14581 feature vectors prepared for training
110 | 2018-03-10 00:53:33 - TrainingLog - INFO - 6001 of 14581 feature vectors prepared for training
111 | 2018-03-10 00:53:41 - TrainingLog - INFO - 7001 of 14581 feature vectors prepared for training
112 | 2018-03-10 00:53:47 - TrainingLog - INFO - 8001 of 14581 feature vectors prepared for training
113 | 2018-03-10 00:53:53 - TrainingLog - INFO - 9001 of 14581 feature vectors prepared for training
114 | 2018-03-10 00:53:59 - TrainingLog - INFO - 10001 of 14581 feature vectors prepared for training
115 | 2018-03-10 00:54:05 - TrainingLog - INFO - 11001 of 14581 feature vectors prepared for training
116 | 2018-03-10 00:54:11 - TrainingLog - INFO - 12001 of 14581 feature vectors prepared for training
117 | 2018-03-10 00:54:18 - TrainingLog - INFO - 13001 of 14581 feature vectors prepared for training
118 | 2018-03-10 00:54:24 - TrainingLog - INFO - 14001 of 14581 feature vectors prepared for training
119 | 2018-03-10 00:54:43 - TrainingLog - INFO - Main classifier training finished
120 | 2018-03-10 00:54:43 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.8811881188118812
121 | 2018-03-10 00:54:44 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.8955223880597015
122 | 2018-03-10 00:54:45 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.893687707641196
123 | 2018-03-10 00:54:45 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.8902743142144638
124 | 2018-03-10 00:54:46 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.8882235528942116
125 | 2018-03-10 00:54:47 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.8801996672212978
126 | 2018-03-10 00:54:48 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.8730385164051355
127 | 2018-03-10 00:54:48 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.8639200998751561
128 | 2018-03-10 00:54:49 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.8634850166481687
129 | 2018-03-10 00:54:50 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.8621378621378621
130 | 2018-03-10 00:54:50 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.8646684831970936
131 | 2018-03-10 00:54:51 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.8684429641965029
132 | 2018-03-10 00:54:52 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.8700999231360492
133 | 2018-03-10 00:54:52 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.8715203426124197
134 | 2018-03-10 00:54:53 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.8720852764823451
135 | 2018-03-10 00:54:54 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.8707058088694566
136 | 2018-03-10 00:54:54 - TestLog - INFO - Total 1621 samples classified with accuracy 0.8698334361505243
137 | 2018-03-10 00:54:54 - TestLog - INFO - AUROC is 0.9605434641915648
138 | 2018-03-10 00:54:54 - TestLog - INFO - Classification report:
139 |              precision    recall  f1-score   support
140 | 
141 |           0    0.73737   0.75258   0.74490       194
142 |           1    0.82166   0.81905   0.82035       315
143 |           2    0.90712   0.90468   0.90590      1112
144 | 
145 | avg / total    0.87020   0.86983   0.87001      1621
146 | 
147 | 2018-03-10 00:54:54 - TestLog - INFO - Confusion matrix:
148 | [[ 146    0   48]
149 |  [   2  258   55]
150 |  [  50   56 1006]]
151 | 2018-03-10 00:54:54 - CVLog - INFO - Validation round 4 of 10 starting
152 | 2018-03-10 00:54:54 - TrainingLog - INFO - Initiating training of main classifier
153 | 2018-03-10 00:55:16 - TrainingLog - INFO - Feature extractor ready
154 | 2018-03-10 00:55:24 - TrainingLog - INFO - 1001 of 14581 feature vectors prepared for training
155 | 2018-03-10 00:55:30 - TrainingLog - INFO - 2001 of 14581 feature vectors prepared for training
156 | 2018-03-10 00:55:36 - TrainingLog - INFO - 3001 of 14581 feature vectors prepared for training
157 | 2018-03-10 00:55:43 - TrainingLog - INFO - 4001 of 14581 feature vectors prepared for training
158 | 2018-03-10 00:55:49 - TrainingLog - INFO - 5001 of 14581 feature vectors prepared for training
159 | 2018-03-10 00:55:56 - TrainingLog - INFO - 6001 of 14581 feature vectors prepared for training
160 | 2018-03-10 00:56:03 - TrainingLog - INFO - 7001 of 14581 feature vectors prepared for training
161 | 2018-03-10 00:56:10 - TrainingLog - INFO - 8001 of 14581 feature vectors prepared for training
162 | 2018-03-10 00:56:17 - TrainingLog - INFO - 9001 of 14581 feature vectors prepared for training
163 | 2018-03-10 00:56:23 - TrainingLog - INFO - 10001 of 14581 feature vectors prepared for training
164 | 2018-03-10 00:56:29 - TrainingLog - INFO - 11001 of 14581 feature vectors prepared for training
165 | 2018-03-10 00:56:36 - TrainingLog - INFO - 12001 of 14581 feature vectors prepared for training
166 | 2018-03-10 00:56:43 - TrainingLog - INFO - 13001 of 14581 feature vectors prepared for training
167 | 2018-03-10 00:56:50 - TrainingLog - INFO - 14001 of 14581 feature vectors prepared for training
168 | 2018-03-10 00:57:06 - TrainingLog - INFO - Main classifier training finished
169 | 2018-03-10 00:57:07 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.8514851485148515
170 | 2018-03-10 00:57:07 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.8905472636815921
171 | 2018-03-10 00:57:08 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.8770764119601329
172 | 2018-03-10 00:57:08 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.8927680798004988
173 | 2018-03-10 00:57:08 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.8982035928143712
174 | 2018-03-10 00:57:09 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.8935108153078203
175 | 2018-03-10 00:57:09 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.8873038516405135
176 | 2018-03-10 00:57:10 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.8826466916354557
177 | 2018-03-10 00:57:10 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.881243063263041
178 | 2018-03-10 00:57:11 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.8821178821178821
179 | 2018-03-10 00:57:11 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.8801089918256131
180 | 2018-03-10 00:57:12 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.8776019983347211
181 | 2018-03-10 00:57:12 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.8724058416602614
182 | 2018-03-10 00:57:13 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.8715203426124197
183 | 2018-03-10 00:57:13 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.871419053964024
184 | 2018-03-10 00:57:14 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.8744534665833854
185 | 2018-03-10 00:57:14 - TestLog - INFO - Total 1621 samples classified with accuracy 0.8753855644663788
186 | 2018-03-10 00:57:14 - TestLog - INFO - AUROC is 0.9598398472423768
187 | 2018-03-10 00:57:14 - TestLog - INFO - Classification report:
188 |              precision    recall  f1-score   support
189 | 
190 |           0    0.79213   0.72680   0.75806       194
191 |           1    0.82315   0.81270   0.81789       315
192 |           2    0.90283   0.91906   0.91087      1112
193 | 
194 | avg / total    0.87410   0.87539   0.87452      1621
195 | 
196 | 2018-03-10 00:57:14 - TestLog - INFO - Confusion matrix:
197 | [[ 141    2   51]
198 |  [   0  256   59]
199 |  [  37   53 1022]]
200 | 2018-03-10 00:57:14 - CVLog - INFO - Validation round 5 of 10 starting
201 | 2018-03-10 00:57:14 - TrainingLog - INFO - Initiating training of main classifier
202 | 2018-03-10 00:57:30 - TrainingLog - INFO - Feature extractor ready
203 | 2018-03-10 00:57:40 - TrainingLog - INFO - 1001 of 14581 feature vectors prepared for training
204 | 2018-03-10 00:57:47 - TrainingLog - INFO - 2001 of 14581 feature vectors prepared for training
205 | 2018-03-10 00:57:53 - TrainingLog - INFO - 3001 of 14581 feature vectors prepared for training
206 | 2018-03-10 00:58:00 - TrainingLog - INFO - 4001 of 14581 feature vectors prepared for training
207 | 2018-03-10 00:58:06 - TrainingLog - INFO - 5001 of 14581 feature vectors prepared for training
208 | 2018-03-10 00:58:13 - TrainingLog - INFO - 6001 of 14581 feature vectors prepared for training
209 | 2018-03-10 00:58:22 - TrainingLog - INFO - 7001 of 14581 feature vectors prepared for training
210 | 2018-03-10 00:58:29 - TrainingLog - INFO - 8001 of 14581 feature vectors prepared for training
211 | 2018-03-10 00:58:35 - TrainingLog - INFO - 9001 of 14581 feature vectors prepared for training
212 | 2018-03-10 00:58:41 - TrainingLog - INFO - 10001 of 14581 feature vectors prepared for training
213 | 2018-03-10 00:58:47 - TrainingLog - INFO - 11001 of 14581 feature vectors prepared for training
214 | 2018-03-10 00:58:55 - TrainingLog - INFO - 12001 of 14581 feature vectors prepared for training
215 | 2018-03-10 00:59:03 - TrainingLog - INFO - 13001 of 14581 feature vectors prepared for training
216 | 2018-03-10 00:59:10 - TrainingLog - INFO - 14001 of 14581 feature vectors prepared for training
217 | 2018-03-10 00:59:31 - TrainingLog - INFO - Main classifier training finished
218 | 2018-03-10 00:59:32 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.900990099009901
219 | 2018-03-10 00:59:33 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.8805970149253731
220 | 2018-03-10 00:59:33 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.8770764119601329
221 | 2018-03-10 00:59:34 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.8827930174563591
222 | 2018-03-10 00:59:34 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.8762475049900199
223 | 2018-03-10 00:59:35 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.870216306156406
224 | 2018-03-10 00:59:35 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.8744650499286734
225 | 2018-03-10 00:59:36 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.8789013732833958
226 | 2018-03-10 00:59:36 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.8790233074361821
227 | 2018-03-10 00:59:37 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.8791208791208791
228 | 2018-03-10 00:59:38 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.8801089918256131
229 | 2018-03-10 00:59:38 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.8825978351373855
230 | 2018-03-10 00:59:39 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.8831667947732513
231 | 2018-03-10 00:59:39 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.8807994289793005
232 | 2018-03-10 00:59:40 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.8800799467021986
233 | 2018-03-10 00:59:40 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.876951905059338
234 | 2018-03-10 00:59:40 - TestLog - INFO - Total 1621 samples classified with accuracy 0.8778531770512029
235 | 2018-03-10 00:59:40 - TestLog - INFO - AUROC is 0.9594299612390799
236 | 2018-03-10 00:59:40 - TestLog - INFO - Classification report:
237 |              precision    recall  f1-score   support
238 | 
239 |           0    0.76404   0.70103   0.73118       194
240 |           1    0.85574   0.82857   0.84194       315
241 |           2    0.90158   0.92266   0.91200      1112
242 | 
243 | avg / total    0.87621   0.87785   0.87674      1621
244 | 
245 | 2018-03-10 00:59:40 - TestLog - INFO - Confusion matrix:
246 | [[ 136    0   58]
247 |  [   0  261   54]
248 |  [  42   44 1026]]
249 | 2018-03-10 00:59:40 - CVLog - INFO - Validation round 6 of 10 starting
250 | 2018-03-10 00:59:40 - TrainingLog - INFO - Initiating training of main classifier
251 | 2018-03-10 00:59:55 - TrainingLog - INFO - Feature extractor ready
252 | 2018-03-10 01:00:03 - TrainingLog - INFO - 1001 of 14582 feature vectors prepared for training
253 | 2018-03-10 01:00:09 - TrainingLog - INFO - 2001 of 14582 feature vectors prepared for training
254 | 2018-03-10 01:00:15 - TrainingLog - INFO - 3001 of 14582 feature vectors prepared for training
255 | 2018-03-10 01:00:21 - TrainingLog - INFO - 4001 of 14582 feature vectors prepared for training
256 | 2018-03-10 01:00:27 - TrainingLog - INFO - 5001 of 14582 feature vectors prepared for training
257 | 2018-03-10 01:00:32 - TrainingLog - INFO - 6001 of 14582 feature vectors prepared for training
258 | 2018-03-10 01:00:38 - TrainingLog - INFO - 7001 of 14582 feature vectors prepared for training
259 | 2018-03-10 01:00:44 - TrainingLog - INFO - 8001 of 14582 feature vectors prepared for training
260 | 2018-03-10 01:00:50 - TrainingLog - INFO - 9001 of 14582 feature vectors prepared for training
261 | 2018-03-10 01:00:57 - TrainingLog - INFO - 10001 of 14582 feature vectors prepared for training
262 | 2018-03-10 01:01:04 - TrainingLog - INFO - 11001 of 14582 feature vectors prepared for training
263 | 2018-03-10 01:01:09 - TrainingLog - INFO - 12001 of 14582 feature vectors prepared for training
264 | 2018-03-10 01:01:15 - TrainingLog - INFO - 13001 of 14582 feature vectors prepared for training
265 | 2018-03-10 01:01:21 - TrainingLog - INFO - 14001 of 14582 feature vectors prepared for training
266 | 2018-03-10 01:01:36 - TrainingLog - INFO - Main classifier training finished
267 | 2018-03-10 01:01:36 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.8910891089108911
268 | 2018-03-10 01:01:37 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.8756218905472637
269 | 2018-03-10 01:01:37 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.8704318936877077
270 | 2018-03-10 01:01:38 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.8902743142144638
271 | 2018-03-10 01:01:38 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.8982035928143712
272 | 2018-03-10 01:01:39 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.8968386023294509
273 | 2018-03-10 01:01:39 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.8958630527817404
274 | 2018-03-10 01:01:40 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.8963795255930087
275 | 2018-03-10 01:01:40 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.8912319644839067
276 | 2018-03-10 01:01:41 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.8921078921078921
277 | 2018-03-10 01:01:41 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.8873751135331517
278 | 2018-03-10 01:01:41 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.884263114071607
279 | 2018-03-10 01:01:42 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.8854727132974635
280 | 2018-03-10 01:01:42 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.880085653104925
281 | 2018-03-10 01:01:43 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.8760826115922719
282 | 2018-03-10 01:01:43 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.8800749531542785
283 | 2018-03-10 01:01:43 - TestLog - INFO - Total 1620 samples classified with accuracy 0.8802469135802469
284 | 2018-03-10 01:01:43 - TestLog - INFO - AUROC is 0.9605301476112963
285 | 2018-03-10 01:01:43 - TestLog - INFO - Classification report:
286 |              precision    recall  f1-score   support
287 | 
288 |           0    0.74611   0.74227   0.74419       194
289 |           1    0.86424   0.82857   0.84603       315
290 |           2    0.90756   0.91899   0.91324      1111
291 | 
292 | avg / total    0.87980   0.88025   0.87993      1620
293 | 
294 | 2018-03-10 01:01:43 - TestLog - INFO - Confusion matrix:
295 | [[ 144    0   50]
296 |  [   0  261   54]
297 |  [  49   41 1021]]
298 | 2018-03-10 01:01:43 - CVLog - INFO - Validation round 7 of 10 starting
299 | 2018-03-10 01:01:43 - TrainingLog - INFO - Initiating training of main classifier
300 | 2018-03-10 01:01:59 - TrainingLog - INFO - Feature extractor ready
301 | 2018-03-10 01:02:07 - TrainingLog - INFO - 1001 of 14582 feature vectors prepared for training
302 | 2018-03-10 01:02:13 - TrainingLog - INFO - 2001 of 14582 feature vectors prepared for training
303 | 2018-03-10 01:02:21 - TrainingLog - INFO - 3001 of 14582 feature vectors prepared for training
304 | 2018-03-10 01:02:29 - TrainingLog - INFO - 4001 of 14582 feature vectors prepared for training
305 | 2018-03-10 01:02:35 - TrainingLog - INFO - 5001 of 14582 feature vectors prepared for training
306 | 2018-03-10 01:02:40 - TrainingLog - INFO - 6001 of 14582 feature vectors prepared for training
307 | 2018-03-10 01:02:46 - TrainingLog - INFO - 7001 of 14582 feature vectors prepared for training
308 | 2018-03-10 01:02:52 - TrainingLog - INFO - 8001 of 14582 feature vectors prepared for training
309 | 2018-03-10 01:02:57 - TrainingLog - INFO - 9001 of 14582 feature vectors prepared for training
310 | 2018-03-10 01:03:03 - TrainingLog - INFO - 10001 of 14582 feature vectors prepared for training
311 | 2018-03-10 01:03:08 - TrainingLog - INFO - 11001 of 14582 feature vectors prepared for training
312 | 2018-03-10 01:03:14 - TrainingLog - INFO - 12001 of 14582 feature vectors prepared for training
313 | 2018-03-10 01:03:19 - TrainingLog - INFO - 13001 of 14582 feature vectors prepared for training
314 | 2018-03-10 01:03:25 - TrainingLog - INFO - 14001 of 14582 feature vectors prepared for training
315 | 2018-03-10 01:03:37 - TrainingLog - INFO - Main classifier training finished
316 | 2018-03-10 01:03:38 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.8514851485148515
317 | 2018-03-10 01:03:39 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.8855721393034826
318 | 2018-03-10 01:03:39 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.893687707641196
319 | 2018-03-10 01:03:40 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.9027431421446384
320 | 2018-03-10 01:03:41 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.8922155688622755
321 | 2018-03-10 01:03:41 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.891846921797005
322 | 2018-03-10 01:03:42 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.8972895863052782
323 | 2018-03-10 01:03:43 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.898876404494382
324 | 2018-03-10 01:03:43 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.8990011098779135
325 | 2018-03-10 01:03:44 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.8981018981018981
326 | 2018-03-10 01:03:44 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.8946412352406903
327 | 2018-03-10 01:03:45 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.8900915903413822
328 | 2018-03-10 01:03:46 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.8862413528055342
329 | 2018-03-10 01:03:46 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.8872234118486795
330 | 2018-03-10 01:03:47 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.8834110592938041
331 | 2018-03-10 01:03:47 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.8838226108682073
332 | 2018-03-10 01:03:47 - TestLog - INFO - Total 1620 samples classified with accuracy 0.8833333333333333
333 | 2018-03-10 01:03:47 - TestLog - INFO - AUROC is 0.9644468378794926
334 | 2018-03-10 01:03:47 - TestLog - INFO - Classification report:
335 |              precision    recall  f1-score   support
336 | 
337 |           0    0.74742   0.74742   0.74742       194
338 |           1    0.85342   0.83175   0.84244       315
339 |           2    0.91510   0.92169   0.91839      1111
340 | 
341 | avg / total    0.88303   0.88333   0.88315      1620
342 | 
343 | 2018-03-10 01:03:47 - TestLog - INFO - Confusion matrix:
344 | [[ 145    3   46]
345 |  [   4  262   49]
346 |  [  45   42 1024]]
347 | 2018-03-10 01:03:47 - CVLog - INFO - Validation round 8 of 10 starting
348 | 2018-03-10 01:03:48 - TrainingLog - INFO - Initiating training of main classifier
349 | 2018-03-10 01:04:04 - TrainingLog - INFO - Feature extractor ready
350 | 2018-03-10 01:04:11 - TrainingLog - INFO - 1001 of 14582 feature vectors prepared for training
351 | 2018-03-10 01:04:17 - TrainingLog - INFO - 2001 of 14582 feature vectors prepared for training
352 | 2018-03-10 01:04:22 - TrainingLog - INFO - 3001 of 14582 feature vectors prepared for training
353 | 2018-03-10 01:04:28 - TrainingLog - INFO - 4001 of 14582 feature vectors prepared for training
354 | 2018-03-10 01:04:34 - TrainingLog - INFO - 5001 of 14582 feature vectors prepared for training
355 | 2018-03-10 01:04:39 - TrainingLog - INFO - 6001 of 14582 feature vectors prepared for training
356 | 2018-03-10 01:04:45 - TrainingLog - INFO - 7001 of 14582 feature vectors prepared for training
357 | 2018-03-10 01:04:50 - TrainingLog - INFO - 8001 of 14582 feature vectors prepared for training
358 | 2018-03-10 01:04:56 - TrainingLog - INFO - 9001 of 14582 feature vectors prepared for training
359 | 2018-03-10 01:05:02 - TrainingLog - INFO - 10001 of 14582 feature vectors prepared for training
360 | 2018-03-10 01:05:07 - TrainingLog - INFO - 11001 of 14582 feature vectors prepared for training
361 | 2018-03-10 01:05:13 - TrainingLog - INFO - 12001 of 14582 feature vectors prepared for training
362 | 2018-03-10 01:05:18 - TrainingLog - INFO - 13001 of 14582 feature vectors prepared for training
363 | 2018-03-10 01:05:24 - TrainingLog - INFO - 14001 of 14582 feature vectors prepared for training
364 | 2018-03-10 01:05:36 - TrainingLog - INFO - Main classifier training finished
365 | 2018-03-10 01:05:37 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.8910891089108911
366 | 2018-03-10 01:05:37 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.8805970149253731
367 | 2018-03-10 01:05:38 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.8903654485049833
368 | 2018-03-10 01:05:38 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.8877805486284289
369 | 2018-03-10 01:05:39 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.8862275449101796
370 | 2018-03-10 01:05:39 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.8868552412645591
371 | 2018-03-10 01:05:39 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.884450784593438
372 | 2018-03-10 01:05:40 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.8888888888888888
373 | 2018-03-10 01:05:40 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.8912319644839067
374 | 2018-03-10 01:05:41 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.8881118881118881
375 | 2018-03-10 01:05:41 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.888283378746594
376 | 2018-03-10 01:05:42 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.8892589508742714
377 | 2018-03-10 01:05:42 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.8847040737893928
378 | 2018-03-10 01:05:42 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.8843683083511777
379 | 2018-03-10 01:05:43 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.8807461692205196
380 | 2018-03-10 01:05:43 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.8782011242973142
381 | 2018-03-10 01:05:43 - TestLog - INFO - Total 1620 samples classified with accuracy 0.8783950617283951
382 | 2018-03-10 01:05:43 - TestLog - INFO - AUROC is 0.9646955779919653
383 | 2018-03-10 01:05:43 - TestLog - INFO - Classification report:
384 |              precision    recall  f1-score   support
385 | 
386 |           0    0.78613   0.70103   0.74114       194
387 |           1    0.87108   0.79365   0.83056       315
388 |           2    0.89397   0.93339   0.91325      1111
389 | 
390 | avg / total    0.87660   0.87840   0.87656      1620
391 | 
392 | 2018-03-10 01:05:43 - TestLog - INFO - Confusion matrix:
393 | [[ 136    0   58]
394 |  [   0  250   65]
395 |  [  37   37 1037]]
396 | 2018-03-10 01:05:43 - CVLog - INFO - Validation round 9 of 10 starting
397 | 2018-03-10 01:05:44 - TrainingLog - INFO - Initiating training of main classifier
398 | 2018-03-10 01:06:00 - TrainingLog - INFO - Feature extractor ready
399 | 2018-03-10 01:06:08 - TrainingLog - INFO - 1001 of 14583 feature vectors prepared for training
400 | 2018-03-10 01:06:14 - TrainingLog - INFO - 2001 of 14583 feature vectors prepared for training
401 | 2018-03-10 01:06:19 - TrainingLog - INFO - 3001 of 14583 feature vectors prepared for training
402 | 2018-03-10 01:06:26 - TrainingLog - INFO - 4001 of 14583 feature vectors prepared for training
403 | 2018-03-10 01:06:31 - TrainingLog - INFO - 5001 of 14583 feature vectors prepared for training
404 | 2018-03-10 01:06:37 - TrainingLog - INFO - 6001 of 14583 feature vectors prepared for training
405 | 2018-03-10 01:06:43 - TrainingLog - INFO - 7001 of 14583 feature vectors prepared for training
406 | 2018-03-10 01:06:49 - TrainingLog - INFO - 8001 of 14583 feature vectors prepared for training
407 | 2018-03-10 01:06:55 - TrainingLog - INFO - 9001 of 14583 feature vectors prepared for training
408 | 2018-03-10 01:07:01 - TrainingLog - INFO - 10001 of 14583 feature vectors prepared for training
409 | 2018-03-10 01:07:06 - TrainingLog - INFO - 11001 of 14583 feature vectors prepared for training
410 | 2018-03-10 01:07:13 - TrainingLog - INFO - 12001 of 14583 feature vectors prepared for training
411 | 2018-03-10 01:07:19 - TrainingLog - INFO - 13001 of 14583 feature vectors prepared for training
412 | 2018-03-10 01:07:25 - TrainingLog - INFO - 14001 of 14583 feature vectors prepared for training
413 | 2018-03-10 01:07:41 - TrainingLog - INFO - Main classifier training finished
414 | 2018-03-10 01:07:42 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.8712871287128713
415 | 2018-03-10 01:07:43 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.8606965174129353
416 | 2018-03-10 01:07:43 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.8903654485049833
417 | 2018-03-10 01:07:44 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.8778054862842892
418 | 2018-03-10 01:07:44 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.8662674650698603
419 | 2018-03-10 01:07:45 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.8635607321131448
420 | 2018-03-10 01:07:46 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.8630527817403709
421 | 2018-03-10 01:07:46 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.8664169787765293
422 | 2018-03-10 01:07:47 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.8723640399556049
423 | 2018-03-10 01:07:47 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.8681318681318682
424 | 2018-03-10 01:07:47 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.8683015440508629
425 | 2018-03-10 01:07:48 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.8734388009991674
426 | 2018-03-10 01:07:48 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.8754803996925442
427 | 2018-03-10 01:07:49 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.8736616702355461
428 | 2018-03-10 01:07:49 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.8747501665556295
429 | 2018-03-10 01:07:50 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.8744534665833854
430 | 2018-03-10 01:07:50 - TestLog - INFO - Total 1619 samples classified with accuracy 0.8746139592340951
431 | 2018-03-10 01:07:50 - TestLog - INFO - AUROC is 0.9607994436010016
432 | 2018-03-10 01:07:50 - TestLog - INFO - Classification report:
433 |              precision    recall  f1-score   support
434 | 
435 |           0    0.72464   0.77320   0.74813       194
436 |           1    0.84950   0.80892   0.82871       314
437 |           2    0.90925   0.91089   0.91007      1111
438 | 
439 | avg / total    0.87554   0.87461   0.87489      1619
440 | 
441 | 2018-03-10 01:07:50 - TestLog - INFO - Confusion matrix:
442 | [[ 150    2   42]
443 |  [   1  254   59]
444 |  [  56   43 1012]]
445 | 2018-03-10 01:07:50 - CVLog - INFO - Validation round 10 of 10 starting
446 | 2018-03-10 01:07:50 - TrainingLog - INFO - Initiating training of main classifier
447 | 2018-03-10 01:08:07 - TrainingLog - INFO - Feature extractor ready
448 | 2018-03-10 01:08:16 - TrainingLog - INFO - 1001 of 14584 feature vectors prepared for training
449 | 2018-03-10 01:08:22 - TrainingLog - INFO - 2001 of 14584 feature vectors prepared for training
450 | 2018-03-10 01:08:27 - TrainingLog - INFO - 3001 of 14584 feature vectors prepared for training
451 | 2018-03-10 01:08:34 - TrainingLog - INFO - 4001 of 14584 feature vectors prepared for training
452 | 2018-03-10 01:08:40 - TrainingLog - INFO - 5001 of 14584 feature vectors prepared for training
453 | 2018-03-10 01:08:45 - TrainingLog - INFO - 6001 of 14584 feature vectors prepared for training
454 | 2018-03-10 01:08:51 - TrainingLog - INFO - 7001 of 14584 feature vectors prepared for training
455 | 2018-03-10 01:08:58 - TrainingLog - INFO - 8001 of 14584 feature vectors prepared for training
456 | 2018-03-10 01:09:04 - TrainingLog - INFO - 9001 of 14584 feature vectors prepared for training
457 | 2018-03-10 01:09:09 - TrainingLog - INFO - 10001 of 14584 feature vectors prepared for training
458 | 2018-03-10 01:09:15 - TrainingLog - INFO - 11001 of 14584 feature vectors prepared for training
459 | 2018-03-10 01:09:22 - TrainingLog - INFO - 12001 of 14584 feature vectors prepared for training
460 | 2018-03-10 01:09:29 - TrainingLog - INFO - 13001 of 14584 feature vectors prepared for training
461 | 2018-03-10 01:09:34 - TrainingLog - INFO - 14001 of 14584 feature vectors prepared for training
462 | 2018-03-10 01:09:51 - TrainingLog - INFO - Main classifier training finished
463 | 2018-03-10 01:09:51 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.900990099009901
464 | 2018-03-10 01:09:52 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.8955223880597015
465 | 2018-03-10 01:09:53 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.867109634551495
466 | 2018-03-10 01:09:54 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.8553615960099751
467 | 2018-03-10 01:09:55 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.8602794411177644
468 | 2018-03-10 01:09:55 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.8635607321131448
469 | 2018-03-10 01:09:56 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.8616262482168331
470 | 2018-03-10 01:09:56 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.8626716604244694
471 | 2018-03-10 01:09:57 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.8645948945615982
472 | 2018-03-10 01:09:58 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.8611388611388612
473 | 2018-03-10 01:09:58 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.8637602179836512
474 | 2018-03-10 01:09:59 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.8659450457951707
475 | 2018-03-10 01:09:59 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.8639508070714835
476 | 2018-03-10 01:10:00 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.860813704496788
477 | 2018-03-10 01:10:01 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.8580946035976016
478 | 2018-03-10 01:10:01 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.858213616489694
479 | 2018-03-10 01:10:01 - TestLog - INFO - Total 1618 samples classified with accuracy 0.8572311495673671
480 | 2018-03-10 01:10:01 - TestLog - INFO - AUROC is 0.9508771093530785
481 | 2018-03-10 01:10:01 - TestLog - INFO - Classification report:
482 |              precision    recall  f1-score   support
483 | 
484 |           0    0.69154   0.72021   0.70558       193
485 |           1    0.81553   0.80255   0.80899       314
486 |           2    0.89892   0.89649   0.89770      1111
487 | 
488 | avg / total    0.85800   0.85723   0.85757      1618
489 | 
490 | 2018-03-10 01:10:01 - TestLog - INFO - Confusion matrix:
491 | [[139   0  54]
492 |  [  4 252  58]
493 |  [ 58  57 996]]
494 | 2018-03-10 01:10:01 - CVLog - INFO - 
495 | 
496 | 2018-03-10 01:10:01 - CVLog - INFO - Summary (precision, recall, F1, accuracy):
497 | 2018-03-10 01:10:01 - CVLog - INFO - Metrics for round 1: [0.87062632615791136, 0.87045033929673044, 0.87046424951431234, 0.87045033929673044]
498 | 2018-03-10 01:10:01 - CVLog - INFO - Metrics for round 2: [0.86505863908500913, 0.86489821098087605, 0.86495414738832921, 0.86489821098087605]
499 | 2018-03-10 01:10:01 - CVLog - INFO - Metrics for round 3: [0.87019958772708716, 0.86983343615052433, 0.87000569175104703, 0.86983343615052433]
500 | 2018-03-10 01:10:01 - CVLog - INFO - Metrics for round 4: [0.87409637551049046, 0.87538556446637883, 0.87451669615902761, 0.87538556446637883]
501 | 2018-03-10 01:10:01 - CVLog - INFO - Metrics for round 5: [0.87621281392349637, 0.87785317705120292, 0.87674468833124375, 0.87785317705120292]
502 | 2018-03-10 01:10:01 - CVLog - INFO - Metrics for round 6: [0.879799651573832, 0.88024691358024687, 0.8799250723311256, 0.88024691358024687]
503 | 2018-03-10 01:10:01 - CVLog - INFO - Metrics for round 7: [0.88302872802523003, 0.8833333333333333, 0.88314582241896122, 0.8833333333333333]
504 | 2018-03-10 01:10:01 - CVLog - INFO - Metrics for round 8: [0.87660160745545845, 0.8783950617283951, 0.87656493736708452, 0.8783950617283951]
505 | 2018-03-10 01:10:01 - CVLog - INFO - Metrics for round 9: [0.8755427279344199, 0.87461395923409513, 0.8748872264272185, 0.87461395923409513]
506 | 2018-03-10 01:10:01 - CVLog - INFO - Metrics for round 10: [0.85799881489306673, 0.85723114956736712, 0.85756902928668621, 0.85723114956736712]
507 | 2018-03-10 01:10:01 - CVLog - INFO - 
508 | 
509 | 2018-03-10 01:10:01 - CVLog - INFO - Final average metrics: 0.8729165272286001, 0.873224114538915, 0.8728777560975036, 0.873224114538915
510 | 
511 | Process finished with exit code 0
512 | 


--------------------------------------------------------------------------------
/Results/Waseem_Hovy_sum-baseline.txt:
--------------------------------------------------------------------------------
  1 | /Library/Frameworks/Python.framework/Versions/3.6/bin/python3.6 /Users/pushkarmishra/Desktop/AuthorProfileAbuseDetection/twitter_model.py -c 30000
  2 | Using Theano backend.
  3 | 2018-03-06 16:58:33 - CVLog - INFO - 10-fold cross validation procedure has begun
  4 | 2018-03-06 16:58:33 - CVLog - INFO - Validation round 1 of 10 starting
  5 | 2018-03-06 16:58:33 - TrainingLog - INFO - Initiating training of main classifier
  6 | 2018-03-06 16:58:53 - TrainingLog - INFO - Feature extractor ready
  7 | 2018-03-06 16:58:55 - TrainingLog - INFO - 1001 of 14581 feature vectors prepared for training
  8 | 2018-03-06 16:58:57 - TrainingLog - INFO - 2001 of 14581 feature vectors prepared for training
  9 | 2018-03-06 16:58:59 - TrainingLog - INFO - 3001 of 14581 feature vectors prepared for training
 10 | 2018-03-06 16:59:01 - TrainingLog - INFO - 4001 of 14581 feature vectors prepared for training
 11 | 2018-03-06 16:59:03 - TrainingLog - INFO - 5001 of 14581 feature vectors prepared for training
 12 | 2018-03-06 16:59:06 - TrainingLog - INFO - 6001 of 14581 feature vectors prepared for training
 13 | 2018-03-06 16:59:09 - TrainingLog - INFO - 7001 of 14581 feature vectors prepared for training
 14 | 2018-03-06 16:59:11 - TrainingLog - INFO - 8001 of 14581 feature vectors prepared for training
 15 | 2018-03-06 16:59:13 - TrainingLog - INFO - 9001 of 14581 feature vectors prepared for training
 16 | 2018-03-06 16:59:15 - TrainingLog - INFO - 10001 of 14581 feature vectors prepared for training
 17 | 2018-03-06 16:59:17 - TrainingLog - INFO - 11001 of 14581 feature vectors prepared for training
 18 | 2018-03-06 16:59:20 - TrainingLog - INFO - 12001 of 14581 feature vectors prepared for training
 19 | 2018-03-06 16:59:22 - TrainingLog - INFO - 13001 of 14581 feature vectors prepared for training
 20 | 2018-03-06 16:59:25 - TrainingLog - INFO - 14001 of 14581 feature vectors prepared for training
 21 | 2018-03-06 16:59:43 - TrainingLog - INFO - Main classifier training finished
 22 | 2018-03-06 16:59:43 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.801980198019802
 23 | 2018-03-06 16:59:44 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.8208955223880597
 24 | 2018-03-06 16:59:44 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.813953488372093
 25 | 2018-03-06 16:59:45 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.8104738154613467
 26 | 2018-03-06 16:59:46 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.8263473053892215
 27 | 2018-03-06 16:59:46 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.8369384359400999
 28 | 2018-03-06 16:59:47 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.8345221112696148
 29 | 2018-03-06 16:59:47 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.8339575530586767
 30 | 2018-03-06 16:59:48 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.8346281908990011
 31 | 2018-03-06 16:59:49 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.8321678321678322
 32 | 2018-03-06 16:59:49 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.8310626702997275
 33 | 2018-03-06 16:59:50 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.8309741881765196
 34 | 2018-03-06 16:59:50 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.8278247501921598
 35 | 2018-03-06 16:59:51 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.8265524625267666
 36 | 2018-03-06 16:59:52 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.8281145902731513
 37 | 2018-03-06 16:59:52 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.8282323547782636
 38 | 2018-03-06 16:59:52 - TestLog - INFO - Total 1621 samples classified with accuracy 0.8285009253547193
 39 | 2018-03-06 16:59:52 - TestLog - INFO - AUROC is 0.9170369128337352
 40 | 2018-03-06 16:59:52 - TestLog - INFO - Classification report:
 41 |              precision    recall  f1-score   support
 42 | 
 43 |           0    0.77246   0.66495   0.71468       194
 44 |           1    0.79821   0.56508   0.66171       315
 45 |           2    0.84159   0.93165   0.88434      1112
 46 | 
 47 | avg / total    0.82489   0.82850   0.82077      1621
 48 | 
 49 | 2018-03-06 16:59:52 - TestLog - INFO - Confusion matrix:
 50 | [[ 129    4   61]
 51 |  [   3  178  134]
 52 |  [  35   41 1036]]
 53 | 2018-03-06 16:59:52 - CVLog - INFO - Validation round 2 of 10 starting
 54 | 2018-03-06 16:59:52 - TrainingLog - INFO - Initiating training of main classifier
 55 | 2018-03-06 17:00:41 - TrainingLog - INFO - Feature extractor ready
 56 | 2018-03-06 17:00:51 - TrainingLog - INFO - 1001 of 14581 feature vectors prepared for training
 57 | 2018-03-06 17:01:00 - TrainingLog - INFO - 2001 of 14581 feature vectors prepared for training
 58 | 2018-03-06 17:01:07 - TrainingLog - INFO - 3001 of 14581 feature vectors prepared for training
 59 | 2018-03-06 17:01:16 - TrainingLog - INFO - 4001 of 14581 feature vectors prepared for training
 60 | 2018-03-06 17:01:26 - TrainingLog - INFO - 5001 of 14581 feature vectors prepared for training
 61 | 2018-03-06 17:01:32 - TrainingLog - INFO - 6001 of 14581 feature vectors prepared for training
 62 | 2018-03-06 17:01:37 - TrainingLog - INFO - 7001 of 14581 feature vectors prepared for training
 63 | 2018-03-06 17:01:41 - TrainingLog - INFO - 8001 of 14581 feature vectors prepared for training
 64 | 2018-03-06 17:01:45 - TrainingLog - INFO - 9001 of 14581 feature vectors prepared for training
 65 | 2018-03-06 17:01:49 - TrainingLog - INFO - 10001 of 14581 feature vectors prepared for training
 66 | 2018-03-06 17:01:53 - TrainingLog - INFO - 11001 of 14581 feature vectors prepared for training
 67 | 2018-03-06 17:01:57 - TrainingLog - INFO - 12001 of 14581 feature vectors prepared for training
 68 | 2018-03-06 17:02:01 - TrainingLog - INFO - 13001 of 14581 feature vectors prepared for training
 69 | 2018-03-06 17:02:05 - TrainingLog - INFO - 14001 of 14581 feature vectors prepared for training
 70 | 2018-03-06 17:02:23 - TrainingLog - INFO - Main classifier training finished
 71 | 2018-03-06 17:02:24 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.7920792079207921
 72 | 2018-03-06 17:02:24 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.8407960199004975
 73 | 2018-03-06 17:02:24 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.8305647840531561
 74 | 2018-03-06 17:02:25 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.827930174563591
 75 | 2018-03-06 17:02:25 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.8303393213572854
 76 | 2018-03-06 17:02:26 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.831946755407654
 77 | 2018-03-06 17:02:26 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.833095577746077
 78 | 2018-03-06 17:02:27 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.8277153558052435
 79 | 2018-03-06 17:02:27 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.8290788013318535
 80 | 2018-03-06 17:02:28 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.8371628371628371
 81 | 2018-03-06 17:02:28 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.8346957311534968
 82 | 2018-03-06 17:02:29 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.8351373855120733
 83 | 2018-03-06 17:02:29 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.8324365872405841
 84 | 2018-03-06 17:02:29 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.8294075660242684
 85 | 2018-03-06 17:02:30 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.8241172551632245
 86 | 2018-03-06 17:02:30 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.8269831355402874
 87 | 2018-03-06 17:02:31 - TestLog - INFO - Total 1621 samples classified with accuracy 0.8260333127698951
 88 | 2018-03-06 17:02:31 - TestLog - INFO - AUROC is 0.9192880488408429
 89 | 2018-03-06 17:02:31 - TestLog - INFO - Classification report:
 90 |              precision    recall  f1-score   support
 91 | 
 92 |           0    0.76216   0.72680   0.74406       194
 93 |           1    0.80476   0.53651   0.64381       315
 94 |           2    0.83931   0.92536   0.88024      1112
 95 | 
 96 | avg / total    0.82337   0.82603   0.81800      1621
 97 | 
 98 | 2018-03-06 17:02:31 - TestLog - INFO - Confusion matrix:
 99 | [[ 141    0   53]
100 |  [   2  169  144]
101 |  [  42   41 1029]]
102 | 2018-03-06 17:02:31 - CVLog - INFO - Validation round 3 of 10 starting
103 | 2018-03-06 17:02:31 - TrainingLog - INFO - Initiating training of main classifier
104 | 2018-03-06 17:02:52 - TrainingLog - INFO - Feature extractor ready
105 | 2018-03-06 17:02:56 - TrainingLog - INFO - 1001 of 14581 feature vectors prepared for training
106 | 2018-03-06 17:03:01 - TrainingLog - INFO - 2001 of 14581 feature vectors prepared for training
107 | 2018-03-06 17:03:05 - TrainingLog - INFO - 3001 of 14581 feature vectors prepared for training
108 | 2018-03-06 17:03:13 - TrainingLog - INFO - 4001 of 14581 feature vectors prepared for training
109 | 2018-03-06 17:03:19 - TrainingLog - INFO - 5001 of 14581 feature vectors prepared for training
110 | 2018-03-06 17:03:26 - TrainingLog - INFO - 6001 of 14581 feature vectors prepared for training
111 | 2018-03-06 17:03:32 - TrainingLog - INFO - 7001 of 14581 feature vectors prepared for training
112 | 2018-03-06 17:03:37 - TrainingLog - INFO - 8001 of 14581 feature vectors prepared for training
113 | 2018-03-06 17:03:41 - TrainingLog - INFO - 9001 of 14581 feature vectors prepared for training
114 | 2018-03-06 17:03:45 - TrainingLog - INFO - 10001 of 14581 feature vectors prepared for training
115 | 2018-03-06 17:03:49 - TrainingLog - INFO - 11001 of 14581 feature vectors prepared for training
116 | 2018-03-06 17:03:53 - TrainingLog - INFO - 12001 of 14581 feature vectors prepared for training
117 | 2018-03-06 17:03:57 - TrainingLog - INFO - 13001 of 14581 feature vectors prepared for training
118 | 2018-03-06 17:04:01 - TrainingLog - INFO - 14001 of 14581 feature vectors prepared for training
119 | 2018-03-06 17:04:21 - TrainingLog - INFO - Main classifier training finished
120 | 2018-03-06 17:04:22 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.8712871287128713
121 | 2018-03-06 17:04:23 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.8756218905472637
122 | 2018-03-06 17:04:24 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.8637873754152824
123 | 2018-03-06 17:04:24 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.8528678304239401
124 | 2018-03-06 17:04:25 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.8562874251497006
125 | 2018-03-06 17:04:26 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.8519134775374376
126 | 2018-03-06 17:04:26 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.8445078459343794
127 | 2018-03-06 17:04:27 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.8426966292134831
128 | 2018-03-06 17:04:27 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.8446170921198668
129 | 2018-03-06 17:04:28 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.8411588411588412
130 | 2018-03-06 17:04:28 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.8401453224341507
131 | 2018-03-06 17:04:29 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.8409658617818485
132 | 2018-03-06 17:04:29 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.8439661798616449
133 | 2018-03-06 17:04:30 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.8465381870092791
134 | 2018-03-06 17:04:30 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.843437708194537
135 | 2018-03-06 17:04:30 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.8438475952529669
136 | 2018-03-06 17:04:31 - TestLog - INFO - Total 1621 samples classified with accuracy 0.8439235040098705
137 | 2018-03-06 17:04:31 - TestLog - INFO - AUROC is 0.9328229317933127
138 | 2018-03-06 17:04:31 - TestLog - INFO - Classification report:
139 |              precision    recall  f1-score   support
140 | 
141 |           0    0.79885   0.71649   0.75543       194
142 |           1    0.83562   0.58095   0.68539       315
143 |           2    0.85179   0.94065   0.89402      1112
144 | 
145 | avg / total    0.84231   0.84392   0.83689      1621
146 | 
147 | 2018-03-06 17:04:31 - TestLog - INFO - Confusion matrix:
148 | [[ 139    1   54]
149 |  [   4  183  128]
150 |  [  31   35 1046]]
151 | 2018-03-06 17:04:31 - CVLog - INFO - Validation round 4 of 10 starting
152 | 2018-03-06 17:04:31 - TrainingLog - INFO - Initiating training of main classifier
153 | 2018-03-06 17:04:53 - TrainingLog - INFO - Feature extractor ready
154 | 2018-03-06 17:04:57 - TrainingLog - INFO - 1001 of 14581 feature vectors prepared for training
155 | 2018-03-06 17:05:03 - TrainingLog - INFO - 2001 of 14581 feature vectors prepared for training
156 | 2018-03-06 17:05:07 - TrainingLog - INFO - 3001 of 14581 feature vectors prepared for training
157 | 2018-03-06 17:05:13 - TrainingLog - INFO - 4001 of 14581 feature vectors prepared for training
158 | 2018-03-06 17:05:18 - TrainingLog - INFO - 5001 of 14581 feature vectors prepared for training
159 | 2018-03-06 17:05:24 - TrainingLog - INFO - 6001 of 14581 feature vectors prepared for training
160 | 2018-03-06 17:05:32 - TrainingLog - INFO - 7001 of 14581 feature vectors prepared for training
161 | 2018-03-06 17:05:37 - TrainingLog - INFO - 8001 of 14581 feature vectors prepared for training
162 | 2018-03-06 17:05:43 - TrainingLog - INFO - 9001 of 14581 feature vectors prepared for training
163 | 2018-03-06 17:05:50 - TrainingLog - INFO - 10001 of 14581 feature vectors prepared for training
164 | 2018-03-06 17:05:54 - TrainingLog - INFO - 11001 of 14581 feature vectors prepared for training
165 | 2018-03-06 17:05:58 - TrainingLog - INFO - 12001 of 14581 feature vectors prepared for training
166 | 2018-03-06 17:06:02 - TrainingLog - INFO - 13001 of 14581 feature vectors prepared for training
167 | 2018-03-06 17:06:07 - TrainingLog - INFO - 14001 of 14581 feature vectors prepared for training
168 | 2018-03-06 17:06:26 - TrainingLog - INFO - Main classifier training finished
169 | 2018-03-06 17:06:26 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.7722772277227723
170 | 2018-03-06 17:06:27 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.8507462686567164
171 | 2018-03-06 17:06:28 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.840531561461794
172 | 2018-03-06 17:06:28 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.8478802992518704
173 | 2018-03-06 17:06:29 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.8542914171656687
174 | 2018-03-06 17:06:29 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.8535773710482529
175 | 2018-03-06 17:06:30 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.8530670470756063
176 | 2018-03-06 17:06:31 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.8451935081148564
177 | 2018-03-06 17:06:31 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.8357380688124306
178 | 2018-03-06 17:06:32 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.8371628371628371
179 | 2018-03-06 17:06:32 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.8337874659400545
180 | 2018-03-06 17:06:33 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.8343047460449625
181 | 2018-03-06 17:06:33 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.8316679477325134
182 | 2018-03-06 17:06:34 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.828693790149893
183 | 2018-03-06 17:06:34 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.826782145236509
184 | 2018-03-06 17:06:34 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.8288569643972518
185 | 2018-03-06 17:06:35 - TestLog - INFO - Total 1621 samples classified with accuracy 0.8291178285009253
186 | 2018-03-06 17:06:35 - TestLog - INFO - AUROC is 0.9130971979911422
187 | 2018-03-06 17:06:35 - TestLog - INFO - Classification report:
188 |              precision    recall  f1-score   support
189 | 
190 |           0    0.76331   0.66495   0.71074       194
191 |           1    0.83981   0.54921   0.66411       315
192 |           2    0.83628   0.93705   0.88380      1112
193 | 
194 | avg / total    0.82823   0.82912   0.82040      1621
195 | 
196 | 2018-03-06 17:06:35 - TestLog - INFO - Confusion matrix:
197 | [[ 129    2   63]
198 |  [   1  173  141]
199 |  [  39   31 1042]]
200 | 2018-03-06 17:06:35 - CVLog - INFO - Validation round 5 of 10 starting
201 | 2018-03-06 17:06:35 - TrainingLog - INFO - Initiating training of main classifier
202 | 2018-03-06 17:07:00 - TrainingLog - INFO - Feature extractor ready
203 | 2018-03-06 17:07:06 - TrainingLog - INFO - 1001 of 14581 feature vectors prepared for training
204 | 2018-03-06 17:07:11 - TrainingLog - INFO - 2001 of 14581 feature vectors prepared for training
205 | 2018-03-06 17:07:17 - TrainingLog - INFO - 3001 of 14581 feature vectors prepared for training
206 | 2018-03-06 17:07:23 - TrainingLog - INFO - 4001 of 14581 feature vectors prepared for training
207 | 2018-03-06 17:07:30 - TrainingLog - INFO - 5001 of 14581 feature vectors prepared for training
208 | 2018-03-06 17:07:38 - TrainingLog - INFO - 6001 of 14581 feature vectors prepared for training
209 | 2018-03-06 17:07:45 - TrainingLog - INFO - 7001 of 14581 feature vectors prepared for training
210 | 2018-03-06 17:07:51 - TrainingLog - INFO - 8001 of 14581 feature vectors prepared for training
211 | 2018-03-06 17:07:56 - TrainingLog - INFO - 9001 of 14581 feature vectors prepared for training
212 | 2018-03-06 17:08:00 - TrainingLog - INFO - 10001 of 14581 feature vectors prepared for training
213 | 2018-03-06 17:08:04 - TrainingLog - INFO - 11001 of 14581 feature vectors prepared for training
214 | 2018-03-06 17:08:07 - TrainingLog - INFO - 12001 of 14581 feature vectors prepared for training
215 | 2018-03-06 17:08:11 - TrainingLog - INFO - 13001 of 14581 feature vectors prepared for training
216 | 2018-03-06 17:08:14 - TrainingLog - INFO - 14001 of 14581 feature vectors prepared for training
217 | 2018-03-06 17:08:30 - TrainingLog - INFO - Main classifier training finished
218 | 2018-03-06 17:08:31 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.8118811881188119
219 | 2018-03-06 17:08:32 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.8009950248756219
220 | 2018-03-06 17:08:33 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.8205980066445183
221 | 2018-03-06 17:08:33 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.830423940149626
222 | 2018-03-06 17:08:34 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.8263473053892215
223 | 2018-03-06 17:08:35 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.8252911813643927
224 | 2018-03-06 17:08:35 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.8373751783166904
225 | 2018-03-06 17:08:36 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.846441947565543
226 | 2018-03-06 17:08:36 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.8412874583795783
227 | 2018-03-06 17:08:37 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.8441558441558441
228 | 2018-03-06 17:08:37 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.8446866485013624
229 | 2018-03-06 17:08:38 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.8451290591174022
230 | 2018-03-06 17:08:38 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.8431975403535742
231 | 2018-03-06 17:08:39 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.8401142041399001
232 | 2018-03-06 17:08:39 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.8407728181212525
233 | 2018-03-06 17:08:39 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.8376014990630856
234 | 2018-03-06 17:08:40 - TestLog - INFO - Total 1621 samples classified with accuracy 0.838371375694016
235 | 2018-03-06 17:08:40 - TestLog - INFO - AUROC is 0.9203825097948846
236 | 2018-03-06 17:08:40 - TestLog - INFO - Classification report:
237 |              precision    recall  f1-score   support
238 | 
239 |           0    0.77500   0.63918   0.70056       194
240 |           1    0.82231   0.63175   0.71454       315
241 |           2    0.84988   0.93165   0.88889      1112
242 | 
243 | avg / total    0.83556   0.83837   0.83247      1621
244 | 
245 | 2018-03-06 17:08:40 - TestLog - INFO - Confusion matrix:
246 | [[ 124    1   69]
247 |  [   2  199  114]
248 |  [  34   42 1036]]
249 | 2018-03-06 17:08:40 - CVLog - INFO - Validation round 6 of 10 starting
250 | 2018-03-06 17:08:40 - TrainingLog - INFO - Initiating training of main classifier
251 | 2018-03-06 17:09:03 - TrainingLog - INFO - Feature extractor ready
252 | 2018-03-06 17:09:07 - TrainingLog - INFO - 1001 of 14582 feature vectors prepared for training
253 | 2018-03-06 17:09:11 - TrainingLog - INFO - 2001 of 14582 feature vectors prepared for training
254 | 2018-03-06 17:09:15 - TrainingLog - INFO - 3001 of 14582 feature vectors prepared for training
255 | 2018-03-06 17:09:20 - TrainingLog - INFO - 4001 of 14582 feature vectors prepared for training
256 | 2018-03-06 17:09:26 - TrainingLog - INFO - 5001 of 14582 feature vectors prepared for training
257 | 2018-03-06 17:09:33 - TrainingLog - INFO - 6001 of 14582 feature vectors prepared for training
258 | 2018-03-06 17:09:40 - TrainingLog - INFO - 7001 of 14582 feature vectors prepared for training
259 | 2018-03-06 17:09:46 - TrainingLog - INFO - 8001 of 14582 feature vectors prepared for training
260 | 2018-03-06 17:09:51 - TrainingLog - INFO - 9001 of 14582 feature vectors prepared for training
261 | 2018-03-06 17:09:55 - TrainingLog - INFO - 10001 of 14582 feature vectors prepared for training
262 | 2018-03-06 17:09:59 - TrainingLog - INFO - 11001 of 14582 feature vectors prepared for training
263 | 2018-03-06 17:10:04 - TrainingLog - INFO - 12001 of 14582 feature vectors prepared for training
264 | 2018-03-06 17:10:09 - TrainingLog - INFO - 13001 of 14582 feature vectors prepared for training
265 | 2018-03-06 17:10:13 - TrainingLog - INFO - 14001 of 14582 feature vectors prepared for training
266 | 2018-03-06 17:10:33 - TrainingLog - INFO - Main classifier training finished
267 | 2018-03-06 17:10:34 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.8613861386138614
268 | 2018-03-06 17:10:35 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.8059701492537313
269 | 2018-03-06 17:10:35 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.8305647840531561
270 | 2018-03-06 17:10:36 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.8329177057356608
271 | 2018-03-06 17:10:36 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.8403193612774451
272 | 2018-03-06 17:10:37 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.8302828618968386
273 | 2018-03-06 17:10:38 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.833095577746077
274 | 2018-03-06 17:10:38 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.8377028714107366
275 | 2018-03-06 17:10:38 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.8290788013318535
276 | 2018-03-06 17:10:39 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.8301698301698301
277 | 2018-03-06 17:10:39 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.8301544050862852
278 | 2018-03-06 17:10:40 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.8301415487094088
279 | 2018-03-06 17:10:40 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.8316679477325134
280 | 2018-03-06 17:10:41 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.827266238401142
281 | 2018-03-06 17:10:42 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.826782145236509
282 | 2018-03-06 17:10:42 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.8276077451592755
283 | 2018-03-06 17:10:42 - TestLog - INFO - Total 1620 samples classified with accuracy 0.8290123456790124
284 | 2018-03-06 17:10:42 - TestLog - INFO - AUROC is 0.9197047278704623
285 | 2018-03-06 17:10:42 - TestLog - INFO - Classification report:
286 |              precision    recall  f1-score   support
287 | 
288 |           0    0.77160   0.64433   0.70225       194
289 |           1    0.80702   0.58413   0.67772       315
290 |           2    0.84065   0.93069   0.88338      1111
291 | 
292 | avg / total    0.82584   0.82901   0.82170      1620
293 | 
294 | 2018-03-06 17:10:42 - TestLog - INFO - Confusion matrix:
295 | [[ 125    3   66]
296 |  [   1  184  130]
297 |  [  36   41 1034]]
298 | 2018-03-06 17:10:42 - CVLog - INFO - Validation round 7 of 10 starting
299 | 2018-03-06 17:10:42 - TrainingLog - INFO - Initiating training of main classifier
300 | 2018-03-06 17:11:00 - TrainingLog - INFO - Feature extractor ready
301 | 2018-03-06 17:11:02 - TrainingLog - INFO - 1001 of 14582 feature vectors prepared for training
302 | 2018-03-06 17:11:03 - TrainingLog - INFO - 2001 of 14582 feature vectors prepared for training
303 | 2018-03-06 17:11:05 - TrainingLog - INFO - 3001 of 14582 feature vectors prepared for training
304 | 2018-03-06 17:11:07 - TrainingLog - INFO - 4001 of 14582 feature vectors prepared for training
305 | 2018-03-06 17:11:09 - TrainingLog - INFO - 5001 of 14582 feature vectors prepared for training
306 | 2018-03-06 17:11:13 - TrainingLog - INFO - 6001 of 14582 feature vectors prepared for training
307 | 2018-03-06 17:11:15 - TrainingLog - INFO - 7001 of 14582 feature vectors prepared for training
308 | 2018-03-06 17:11:17 - TrainingLog - INFO - 8001 of 14582 feature vectors prepared for training
309 | 2018-03-06 17:11:20 - TrainingLog - INFO - 9001 of 14582 feature vectors prepared for training
310 | 2018-03-06 17:11:23 - TrainingLog - INFO - 10001 of 14582 feature vectors prepared for training
311 | 2018-03-06 17:11:25 - TrainingLog - INFO - 11001 of 14582 feature vectors prepared for training
312 | 2018-03-06 17:11:28 - TrainingLog - INFO - 12001 of 14582 feature vectors prepared for training
313 | 2018-03-06 17:11:31 - TrainingLog - INFO - 13001 of 14582 feature vectors prepared for training
314 | 2018-03-06 17:11:35 - TrainingLog - INFO - 14001 of 14582 feature vectors prepared for training
315 | 2018-03-06 17:11:55 - TrainingLog - INFO - Main classifier training finished
316 | 2018-03-06 17:11:56 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.8514851485148515
317 | 2018-03-06 17:11:56 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.8706467661691543
318 | 2018-03-06 17:11:57 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.8770764119601329
319 | 2018-03-06 17:11:58 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.8753117206982544
320 | 2018-03-06 17:11:58 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.8602794411177644
321 | 2018-03-06 17:11:59 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.8585690515806988
322 | 2018-03-06 17:11:59 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.8573466476462197
323 | 2018-03-06 17:12:00 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.8576779026217228
324 | 2018-03-06 17:12:00 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.853496115427303
325 | 2018-03-06 17:12:01 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.8461538461538461
326 | 2018-03-06 17:12:01 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.8446866485013624
327 | 2018-03-06 17:12:02 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.8343047460449625
328 | 2018-03-06 17:12:02 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.8332052267486548
329 | 2018-03-06 17:12:03 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.8294075660242684
330 | 2018-03-06 17:12:04 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.8287808127914723
331 | 2018-03-06 17:12:05 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.8313554028732042
332 | 2018-03-06 17:12:05 - TestLog - INFO - Total 1620 samples classified with accuracy 0.832716049382716
333 | 2018-03-06 17:12:05 - TestLog - INFO - AUROC is 0.9210440101916978
334 | 2018-03-06 17:12:05 - TestLog - INFO - Classification report:
335 |              precision    recall  f1-score   support
336 | 
337 |           0    0.77901   0.72680   0.75200       194
338 |           1    0.79111   0.56508   0.65926       315
339 |           2    0.84843   0.92709   0.88602      1111
340 | 
341 | avg / total    0.82897   0.83272   0.82588      1620
342 | 
343 | 2018-03-06 17:12:05 - TestLog - INFO - Confusion matrix:
344 | [[ 141    5   48]
345 |  [   1  178  136]
346 |  [  39   42 1030]]
347 | 2018-03-06 17:12:05 - CVLog - INFO - Validation round 8 of 10 starting
348 | 2018-03-06 17:12:05 - TrainingLog - INFO - Initiating training of main classifier
349 | 2018-03-06 17:12:26 - TrainingLog - INFO - Feature extractor ready
350 | 2018-03-06 17:12:30 - TrainingLog - INFO - 1001 of 14582 feature vectors prepared for training
351 | 2018-03-06 17:12:34 - TrainingLog - INFO - 2001 of 14582 feature vectors prepared for training
352 | 2018-03-06 17:12:39 - TrainingLog - INFO - 3001 of 14582 feature vectors prepared for training
353 | 2018-03-06 17:12:43 - TrainingLog - INFO - 4001 of 14582 feature vectors prepared for training
354 | 2018-03-06 17:12:47 - TrainingLog - INFO - 5001 of 14582 feature vectors prepared for training
355 | 2018-03-06 17:12:52 - TrainingLog - INFO - 6001 of 14582 feature vectors prepared for training
356 | 2018-03-06 17:12:56 - TrainingLog - INFO - 7001 of 14582 feature vectors prepared for training
357 | 2018-03-06 17:13:00 - TrainingLog - INFO - 8001 of 14582 feature vectors prepared for training
358 | 2018-03-06 17:13:05 - TrainingLog - INFO - 9001 of 14582 feature vectors prepared for training
359 | 2018-03-06 17:13:09 - TrainingLog - INFO - 10001 of 14582 feature vectors prepared for training
360 | 2018-03-06 17:13:13 - TrainingLog - INFO - 11001 of 14582 feature vectors prepared for training
361 | 2018-03-06 17:13:18 - TrainingLog - INFO - 12001 of 14582 feature vectors prepared for training
362 | 2018-03-06 17:13:25 - TrainingLog - INFO - 13001 of 14582 feature vectors prepared for training
363 | 2018-03-06 17:13:33 - TrainingLog - INFO - 14001 of 14582 feature vectors prepared for training
364 | 2018-03-06 17:13:57 - TrainingLog - INFO - Main classifier training finished
365 | 2018-03-06 17:13:58 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.8118811881188119
366 | 2018-03-06 17:13:58 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.8159203980099502
367 | 2018-03-06 17:13:59 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.8305647840531561
368 | 2018-03-06 17:13:59 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.8229426433915212
369 | 2018-03-06 17:14:00 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.8323353293413174
370 | 2018-03-06 17:14:00 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.826955074875208
371 | 2018-03-06 17:14:01 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.8245363766048502
372 | 2018-03-06 17:14:01 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.8239700374531835
373 | 2018-03-06 17:14:01 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.8290788013318535
374 | 2018-03-06 17:14:02 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.8331668331668332
375 | 2018-03-06 17:14:02 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.8374205267938238
376 | 2018-03-06 17:14:03 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.8426311407160699
377 | 2018-03-06 17:14:03 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.8401229823212913
378 | 2018-03-06 17:14:04 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.8422555317630264
379 | 2018-03-06 17:14:04 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.8387741505662891
380 | 2018-03-06 17:14:05 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.835103060587133
381 | 2018-03-06 17:14:05 - TestLog - INFO - Total 1620 samples classified with accuracy 0.8358024691358025
382 | 2018-03-06 17:14:05 - TestLog - INFO - AUROC is 0.9204341492651115
383 | 2018-03-06 17:14:05 - TestLog - INFO - Classification report:
384 |              precision    recall  f1-score   support
385 | 
386 |           0    0.79245   0.64948   0.71388       194
387 |           1    0.85646   0.56825   0.68321       315
388 |           2    0.83786   0.94419   0.88785      1111
389 | 
390 | avg / total    0.83604   0.83580   0.82723      1620
391 | 
392 | 2018-03-06 17:14:05 - TestLog - INFO - Confusion matrix:
393 | [[ 126    1   67]
394 |  [   0  179  136]
395 |  [  33   29 1049]]
396 | 2018-03-06 17:14:05 - CVLog - INFO - Validation round 9 of 10 starting
397 | 2018-03-06 17:14:05 - TrainingLog - INFO - Initiating training of main classifier
398 | 2018-03-06 17:14:25 - TrainingLog - INFO - Feature extractor ready
399 | 2018-03-06 17:14:30 - TrainingLog - INFO - 1001 of 14583 feature vectors prepared for training
400 | 2018-03-06 17:14:35 - TrainingLog - INFO - 2001 of 14583 feature vectors prepared for training
401 | 2018-03-06 17:14:39 - TrainingLog - INFO - 3001 of 14583 feature vectors prepared for training
402 | 2018-03-06 17:14:43 - TrainingLog - INFO - 4001 of 14583 feature vectors prepared for training
403 | 2018-03-06 17:14:48 - TrainingLog - INFO - 5001 of 14583 feature vectors prepared for training
404 | 2018-03-06 17:14:54 - TrainingLog - INFO - 6001 of 14583 feature vectors prepared for training
405 | 2018-03-06 17:14:59 - TrainingLog - INFO - 7001 of 14583 feature vectors prepared for training
406 | 2018-03-06 17:15:05 - TrainingLog - INFO - 8001 of 14583 feature vectors prepared for training
407 | 2018-03-06 17:15:10 - TrainingLog - INFO - 9001 of 14583 feature vectors prepared for training
408 | 2018-03-06 17:15:15 - TrainingLog - INFO - 10001 of 14583 feature vectors prepared for training
409 | 2018-03-06 17:15:19 - TrainingLog - INFO - 11001 of 14583 feature vectors prepared for training
410 | 2018-03-06 17:15:23 - TrainingLog - INFO - 12001 of 14583 feature vectors prepared for training
411 | 2018-03-06 17:15:29 - TrainingLog - INFO - 13001 of 14583 feature vectors prepared for training
412 | 2018-03-06 17:15:33 - TrainingLog - INFO - 14001 of 14583 feature vectors prepared for training
413 | 2018-03-06 17:15:53 - TrainingLog - INFO - Main classifier training finished
414 | 2018-03-06 17:15:54 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.8514851485148515
415 | 2018-03-06 17:15:55 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.8208955223880597
416 | 2018-03-06 17:15:56 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.8438538205980066
417 | 2018-03-06 17:15:56 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.8254364089775561
418 | 2018-03-06 17:15:57 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.8263473053892215
419 | 2018-03-06 17:15:58 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.8302828618968386
420 | 2018-03-06 17:15:58 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.8273894436519258
421 | 2018-03-06 17:15:59 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.8339575530586767
422 | 2018-03-06 17:15:59 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.8357380688124306
423 | 2018-03-06 17:16:00 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.8391608391608392
424 | 2018-03-06 17:16:01 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.8356039963669392
425 | 2018-03-06 17:16:01 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.8426311407160699
426 | 2018-03-06 17:16:02 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.8385857033051499
427 | 2018-03-06 17:16:03 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.8351177730192719
428 | 2018-03-06 17:16:03 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.8361092604930047
429 | 2018-03-06 17:16:04 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.8369768894440974
430 | 2018-03-06 17:16:04 - TestLog - INFO - Total 1619 samples classified with accuracy 0.8363187152563311
431 | 2018-03-06 17:16:04 - TestLog - INFO - AUROC is 0.9275169920444647
432 | 2018-03-06 17:16:04 - TestLog - INFO - Classification report:
433 |              precision    recall  f1-score   support
434 | 
435 |           0    0.73913   0.70103   0.71958       194
436 |           1    0.82128   0.61465   0.70310       314
437 |           2    0.85417   0.92259   0.88706      1111
438 | 
439 | avg / total    0.83400   0.83632   0.83131      1619
440 | 
441 | 2018-03-06 17:16:04 - TestLog - INFO - Confusion matrix:
442 | [[ 136    3   55]
443 |  [   1  193  120]
444 |  [  47   39 1025]]
445 | 2018-03-06 17:16:04 - CVLog - INFO - Validation round 10 of 10 starting
446 | 2018-03-06 17:16:04 - TrainingLog - INFO - Initiating training of main classifier
447 | 2018-03-06 17:16:26 - TrainingLog - INFO - Feature extractor ready
448 | 2018-03-06 17:16:31 - TrainingLog - INFO - 1001 of 14584 feature vectors prepared for training
449 | 2018-03-06 17:16:36 - TrainingLog - INFO - 2001 of 14584 feature vectors prepared for training
450 | 2018-03-06 17:16:42 - TrainingLog - INFO - 3001 of 14584 feature vectors prepared for training
451 | 2018-03-06 17:16:48 - TrainingLog - INFO - 4001 of 14584 feature vectors prepared for training
452 | 2018-03-06 17:16:54 - TrainingLog - INFO - 5001 of 14584 feature vectors prepared for training
453 | 2018-03-06 17:17:01 - TrainingLog - INFO - 6001 of 14584 feature vectors prepared for training
454 | 2018-03-06 17:17:10 - TrainingLog - INFO - 7001 of 14584 feature vectors prepared for training
455 | 2018-03-06 17:17:17 - TrainingLog - INFO - 8001 of 14584 feature vectors prepared for training
456 | 2018-03-06 17:17:25 - TrainingLog - INFO - 9001 of 14584 feature vectors prepared for training
457 | 2018-03-06 17:17:30 - TrainingLog - INFO - 10001 of 14584 feature vectors prepared for training
458 | 2018-03-06 17:17:35 - TrainingLog - INFO - 11001 of 14584 feature vectors prepared for training
459 | 2018-03-06 17:17:39 - TrainingLog - INFO - 12001 of 14584 feature vectors prepared for training
460 | 2018-03-06 17:17:43 - TrainingLog - INFO - 13001 of 14584 feature vectors prepared for training
461 | 2018-03-06 17:17:47 - TrainingLog - INFO - 14001 of 14584 feature vectors prepared for training
462 | 2018-03-06 17:18:09 - TrainingLog - INFO - Main classifier training finished
463 | 2018-03-06 17:18:10 - TestLog - INFO - 101 samples classified. Accuracy up till now is 0.8514851485148515
464 | 2018-03-06 17:18:10 - TestLog - INFO - 201 samples classified. Accuracy up till now is 0.835820895522388
465 | 2018-03-06 17:18:11 - TestLog - INFO - 301 samples classified. Accuracy up till now is 0.8272425249169435
466 | 2018-03-06 17:18:12 - TestLog - INFO - 401 samples classified. Accuracy up till now is 0.8179551122194514
467 | 2018-03-06 17:18:12 - TestLog - INFO - 501 samples classified. Accuracy up till now is 0.8263473053892215
468 | 2018-03-06 17:18:13 - TestLog - INFO - 601 samples classified. Accuracy up till now is 0.8136439267886856
469 | 2018-03-06 17:18:13 - TestLog - INFO - 701 samples classified. Accuracy up till now is 0.81169757489301
470 | 2018-03-06 17:18:14 - TestLog - INFO - 801 samples classified. Accuracy up till now is 0.815230961298377
471 | 2018-03-06 17:18:15 - TestLog - INFO - 901 samples classified. Accuracy up till now is 0.8102108768035516
472 | 2018-03-06 17:18:15 - TestLog - INFO - 1001 samples classified. Accuracy up till now is 0.8101898101898102
473 | 2018-03-06 17:18:16 - TestLog - INFO - 1101 samples classified. Accuracy up till now is 0.810172570390554
474 | 2018-03-06 17:18:16 - TestLog - INFO - 1201 samples classified. Accuracy up till now is 0.8109908409658618
475 | 2018-03-06 17:18:17 - TestLog - INFO - 1301 samples classified. Accuracy up till now is 0.8109146810146042
476 | 2018-03-06 17:18:18 - TestLog - INFO - 1401 samples classified. Accuracy up till now is 0.8094218415417559
477 | 2018-03-06 17:18:19 - TestLog - INFO - 1501 samples classified. Accuracy up till now is 0.8074616922051966
478 | 2018-03-06 17:18:19 - TestLog - INFO - 1601 samples classified. Accuracy up till now is 0.8101186758276078
479 | 2018-03-06 17:18:19 - TestLog - INFO - Total 1618 samples classified with accuracy 0.8102595797280593
480 | 2018-03-06 17:18:19 - TestLog - INFO - AUROC is 0.9083249252336464
481 | 2018-03-06 17:18:19 - TestLog - INFO - Classification report:
482 |              precision    recall  f1-score   support
483 | 
484 |           0    0.68889   0.64249   0.66488       193
485 |           1    0.79812   0.54140   0.64516       314
486 |           2    0.83020   0.91539   0.87072      1111
487 | 
488 | avg / total    0.80712   0.81026   0.80239      1618
489 | 
490 | 2018-03-06 17:18:19 - TestLog - INFO - Confusion matrix:
491 | [[ 124    2   67]
492 |  [   3  170  141]
493 |  [  53   41 1017]]
494 | 2018-03-06 17:18:19 - CVLog - INFO - 
495 | 
496 | 2018-03-06 17:18:19 - CVLog - INFO - Summary (precision, recall, F1, accuracy):
497 | 2018-03-06 17:18:19 - CVLog - INFO - Metrics for round 1: [0.82488697904356578, 0.82850092535471931, 0.8207704195306208, 0.82850092535471931]
498 | 2018-03-06 17:18:19 - CVLog - INFO - Metrics for round 2: [0.82336679033107341, 0.82603331276989511, 0.81799792243418357, 0.82603331276989511]
499 | 2018-03-06 17:18:19 - CVLog - INFO - Metrics for round 3: [0.84231238247184959, 0.84392350400987048, 0.83689095174434913, 0.84392350400987048]
500 | 2018-03-06 17:18:19 - CVLog - INFO - Metrics for round 4: [0.82822990746671932, 0.8291178285009253, 0.82039701841559298, 0.8291178285009253]
501 | 2018-03-06 17:18:19 - CVLog - INFO - Metrics for round 5: [0.83555958800109631, 0.83837137569401599, 0.83247059772384846, 0.83837137569401599]
502 | 2018-03-06 17:18:19 - CVLog - INFO - Metrics for round 6: [0.82584227528796117, 0.82901234567901239, 0.82170081447485133, 0.82901234567901239]
503 | 2018-03-06 17:18:19 - CVLog - INFO - Metrics for round 7: [0.8289742434932833, 0.83271604938271604, 0.82587935749369434, 0.83271604938271604]
504 | 2018-03-06 17:18:19 - CVLog - INFO - Metrics for round 8: [0.83603849329511171, 0.83580246913580247, 0.8272278424295445, 0.83580246913580247]
505 | 2018-03-06 17:18:19 - CVLog - INFO - Metrics for round 9: [0.834003287262707, 0.83631871525633106, 0.831313121263066, 0.83631871525633106]
506 | 2018-03-06 17:18:19 - CVLog - INFO - Metrics for round 10: [0.80712151970823898, 0.81025957972805929, 0.80239268716211365, 0.81025957972805929]
507 | 2018-03-06 17:18:19 - CVLog - INFO - 
508 | 
509 | 2018-03-06 17:18:19 - CVLog - INFO - Final average metrics: 0.8286335466361606, 0.8310056105511346, 0.8237040732671865, 0.8310056105511346
510 | 
511 | Process finished with exit code 0
512 | 


--------------------------------------------------------------------------------