├── LICENSE
├── README.md
├── data
    ├── small_samples.csv
    └── train.csv.zip
├── data_helper.py
├── predict.py
├── predicted_results_1516404693
    └── predictions_all.csv
├── text_cnn_rnn.py
├── train.py
├── trained_results_1516404693
    ├── best_model.ckpt.data-00000-of-00001
    ├── best_model.ckpt.index
    ├── best_model.ckpt.meta
    ├── checkpoint
    ├── embeddings.pickle
    ├── labels.json
    ├── trained_parameters.json
    └── words_index.json
└── training_config.json


/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ### Project: Classify Kaggle San Francisco Crime Description
 2 | 
 3 | ### Highlights:
 4 |   - This is a **multi-class text classification (sentence classification)** problem.
 5 |   - The goal of this project is to **classify Kaggle San Francisco Crime Description into 39 classes**.
 6 |   - This model was built with **CNN, RNN (LSTM and GRU) and Word Embeddings** on **Tensorflow**.
 7 | 
 8 | ### Data: [Kaggle San Francisco Crime](https://www.kaggle.com/c/sf-crime/data)
 9 |   - Input: **Descript**
10 |   - Output: **Category**
11 |   - Examples:
12 | 
13 |     Descript   | Category
14 |     -----------|-----------
15 |     GRAND THEFT FROM LOCKED AUTO|LARCENY/THEFT
16 |     POSSESSION OF NARCOTICS PARAPHERNALIA|DRUG/NARCOTIC
17 |     AIDED CASE, MENTAL DISTURBED|NON-CRIMINAL
18 |     AGGRAVATED ASSAULT WITH BODILY FORCE|ASSAULT
19 |     ATTEMPTED ROBBERY ON THE STREET WITH A GUN|ROBBERY
20 |     
21 | ### Train:
22 |   - Command: python3 train.py train_data.file train_parameters.json
23 |   - Example: ```python3 train.py ./data/train.csv.zip ./training_config.json```
24 | 
25 | ### Predict:
26 |   - Command: python3 predict.py ./trained_results_dir/ new_data.csv
27 |   - Example: ```python3 predict.py ./trained_results_1478563595/ ./data/small_samples.csv```
28 |   
29 | ### Reference:
30 |  - [Implement a cnn for text classification in tensorflow](http://www.wildml.com/2015/12/implementing-a-cnn-for-text-classification-in-tensorflow/)
31 | 


--------------------------------------------------------------------------------
/data/train.csv.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiegzhan/multi-class-text-classification-cnn-rnn/5db803b7c156655ead270cb9788148dc8c7f6afd/data/train.csv.zip


--------------------------------------------------------------------------------
/data_helper.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | import sys
  4 | import json
  5 | import pickle
  6 | import logging
  7 | import itertools
  8 | import numpy as np
  9 | import pandas as pd
 10 | import gensim as gs
 11 | from pprint import pprint
 12 | from collections import Counter
 13 | from tensorflow.contrib import learn
 14 | 
 15 | logging.getLogger().setLevel(logging.INFO)
 16 | 
 17 | def clean_str(s):
 18 | 	s = re.sub(r"[^A-Za-z0-9:(),!?\'\`]", " ", s)
 19 | 	s = re.sub(r" : ", ":", s)
 20 | 	s = re.sub(r"\'s", " \'s", s)
 21 | 	s = re.sub(r"\'ve", " \'ve", s)
 22 | 	s = re.sub(r"n\'t", " n\'t", s)
 23 | 	s = re.sub(r"\'re", " \'re", s)
 24 | 	s = re.sub(r"\'d", " \'d", s)
 25 | 	s = re.sub(r"\'ll", " \'ll", s)
 26 | 	s = re.sub(r",", " , ", s)
 27 | 	s = re.sub(r"!", " ! ", s)
 28 | 	s = re.sub(r"\(", " \( ", s)
 29 | 	s = re.sub(r"\)", " \) ", s)
 30 | 	s = re.sub(r"\?", " \? ", s)
 31 | 	s = re.sub(r"\s{2,}", " ", s)
 32 | 	return s.strip().lower()
 33 | 
 34 | def load_embeddings(vocabulary):
 35 | 	word_embeddings = {}
 36 | 	for word in vocabulary:
 37 | 		word_embeddings[word] = np.random.uniform(-0.25, 0.25, 300)
 38 | 	return word_embeddings
 39 | 
 40 | def pad_sentences(sentences, padding_word="<PAD/>", forced_sequence_length=None):
 41 | 	"""Pad setences during training or prediction"""
 42 | 	if forced_sequence_length is None: # Train
 43 | 		sequence_length = max(len(x) for x in sentences)
 44 | 	else: # Prediction
 45 | 		logging.critical('This is prediction, reading the trained sequence length')
 46 | 		sequence_length = forced_sequence_length
 47 | 	logging.critical('The maximum length is {}'.format(sequence_length))
 48 | 
 49 | 	padded_sentences = []
 50 | 	for i in range(len(sentences)):
 51 | 		sentence = sentences[i]
 52 | 		num_padding = sequence_length - len(sentence)
 53 | 
 54 | 		if num_padding < 0: # Prediction: cut off the sentence if it is longer than the sequence length
 55 | 			logging.info('This sentence has to be cut off because it is longer than trained sequence length')
 56 | 			padded_sentence = sentence[0:sequence_length]
 57 | 		else:
 58 | 			padded_sentence = sentence + [padding_word] * num_padding
 59 | 		padded_sentences.append(padded_sentence)
 60 | 	return padded_sentences
 61 | 
 62 | def build_vocab(sentences):
 63 | 	word_counts = Counter(itertools.chain(*sentences))
 64 | 	vocabulary_inv = [word[0] for word in word_counts.most_common()]
 65 | 	vocabulary = {word: index for index, word in enumerate(vocabulary_inv)}
 66 | 	return vocabulary, vocabulary_inv
 67 | 
 68 | def batch_iter(data, batch_size, num_epochs, shuffle=True):
 69 | 	data = np.array(data)
 70 | 	data_size = len(data)
 71 | 	num_batches_per_epoch = int(data_size / batch_size) + 1
 72 | 
 73 | 	for epoch in range(num_epochs):
 74 | 		if shuffle:
 75 | 			shuffle_indices = np.random.permutation(np.arange(data_size))
 76 | 			shuffled_data = data[shuffle_indices]
 77 | 		else:
 78 | 			shuffled_data = data
 79 | 
 80 | 		for batch_num in range(num_batches_per_epoch):
 81 | 			start_index = batch_num * batch_size
 82 | 			end_index = min((batch_num + 1) * batch_size, data_size)
 83 | 			yield shuffled_data[start_index:end_index]
 84 | 
 85 | def load_data(filename):
 86 | 	df = pd.read_csv(filename, compression='zip')
 87 | 	selected = ['Category', 'Descript']
 88 | 	non_selected = list(set(df.columns) - set(selected))
 89 | 
 90 | 	df = df.drop(non_selected, axis=1)
 91 | 	df = df.dropna(axis=0, how='any', subset=selected)
 92 | 	df = df.reindex(np.random.permutation(df.index))
 93 | 
 94 | 	labels = sorted(list(set(df[selected[0]].tolist())))
 95 | 	num_labels = len(labels)
 96 | 	one_hot = np.zeros((num_labels, num_labels), int)
 97 | 	np.fill_diagonal(one_hot, 1)
 98 | 	label_dict = dict(zip(labels, one_hot))
 99 | 
100 | 	x_raw= df[selected[1]].apply(lambda x: clean_str(x).split(' ')).tolist()
101 | 	y_raw = df[selected[0]].apply(lambda y: label_dict[y]).tolist()
102 | 
103 | 	x_raw = pad_sentences(x_raw)
104 | 	vocabulary, vocabulary_inv = build_vocab(x_raw)
105 | 
106 | 	x = np.array([[vocabulary[word] for word in sentence] for sentence in x_raw])
107 | 	y = np.array(y_raw)
108 | 	return x, y, vocabulary, vocabulary_inv, df, labels
109 | 
110 | if __name__ == "__main__":
111 | 	train_file = './data/train.csv.zip'
112 | 	load_data(train_file)
113 | 


--------------------------------------------------------------------------------
/predict.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import json
  4 | import shutil
  5 | import pickle
  6 | import logging
  7 | import data_helper
  8 | import numpy as np
  9 | import pandas as pd
 10 | import tensorflow as tf
 11 | from text_cnn_rnn import TextCNNRNN
 12 | 
 13 | logging.getLogger().setLevel(logging.INFO)
 14 | 
 15 | def load_trained_params(trained_dir):
 16 | 	params = json.loads(open(trained_dir + 'trained_parameters.json').read())
 17 | 	words_index = json.loads(open(trained_dir + 'words_index.json').read())
 18 | 	labels = json.loads(open(trained_dir + 'labels.json').read())
 19 | 
 20 | 	with open(trained_dir + 'embeddings.pickle', 'rb') as input_file:
 21 | 		fetched_embedding = pickle.load(input_file)
 22 | 	embedding_mat = np.array(fetched_embedding, dtype = np.float32)
 23 | 	return params, words_index, labels, embedding_mat
 24 | 
 25 | def load_test_data(test_file, labels):
 26 | 	df = pd.read_csv(test_file, sep='|')
 27 | 	select = ['Descript']
 28 | 
 29 | 	df = df.dropna(axis=0, how='any', subset=select)
 30 | 	test_examples = df[select[0]].apply(lambda x: data_helper.clean_str(x).split(' ')).tolist()
 31 | 
 32 | 	num_labels = len(labels)
 33 | 	one_hot = np.zeros((num_labels, num_labels), int)
 34 | 	np.fill_diagonal(one_hot, 1)
 35 | 	label_dict = dict(zip(labels, one_hot))
 36 | 
 37 | 	y_ = None
 38 | 	if 'Category' in df.columns:
 39 | 		select.append('Category')
 40 | 		y_ = df[select[1]].apply(lambda x: label_dict[x]).tolist()
 41 | 
 42 | 	not_select = list(set(df.columns) - set(select))
 43 | 	df = df.drop(not_select, axis=1)
 44 | 	return test_examples, y_, df
 45 | 
 46 | def map_word_to_index(examples, words_index):
 47 | 	x_ = []
 48 | 	for example in examples:
 49 | 		temp = []
 50 | 		for word in example:
 51 | 			if word in words_index:
 52 | 				temp.append(words_index[word])
 53 | 			else:
 54 | 				temp.append(0)
 55 | 		x_.append(temp)
 56 | 	return x_
 57 | 
 58 | def predict_unseen_data():
 59 | 	trained_dir = sys.argv[1]
 60 | 	if not trained_dir.endswith('/'):
 61 | 		trained_dir += '/'
 62 | 	test_file = sys.argv[2]
 63 | 
 64 | 	params, words_index, labels, embedding_mat = load_trained_params(trained_dir)
 65 | 	x_, y_, df = load_test_data(test_file, labels)
 66 | 	x_ = data_helper.pad_sentences(x_, forced_sequence_length=params['sequence_length'])
 67 | 	x_ = map_word_to_index(x_, words_index)
 68 | 
 69 | 	x_test, y_test = np.asarray(x_), None
 70 | 	if y_ is not None:
 71 | 		y_test = np.asarray(y_)
 72 | 
 73 | 	timestamp = trained_dir.split('/')[-2].split('_')[-1]
 74 | 	predicted_dir = './predicted_results_' + timestamp + '/'
 75 | 	if os.path.exists(predicted_dir):
 76 | 		shutil.rmtree(predicted_dir)
 77 | 	os.makedirs(predicted_dir)
 78 | 
 79 | 	with tf.Graph().as_default():
 80 | 		session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
 81 | 		sess = tf.Session(config=session_conf)
 82 | 		with sess.as_default():
 83 | 			cnn_rnn = TextCNNRNN(
 84 | 				embedding_mat = embedding_mat,
 85 | 				non_static = params['non_static'],
 86 | 				hidden_unit = params['hidden_unit'],
 87 | 				sequence_length = len(x_test[0]),
 88 | 				max_pool_size = params['max_pool_size'],
 89 | 				filter_sizes = map(int, params['filter_sizes'].split(",")),
 90 | 				num_filters = params['num_filters'],
 91 | 				num_classes = len(labels),
 92 | 				embedding_size = params['embedding_dim'],
 93 | 				l2_reg_lambda = params['l2_reg_lambda'])
 94 | 
 95 | 			def real_len(batches):
 96 | 				return [np.ceil(np.argmin(batch + [0]) * 1.0 / params['max_pool_size']) for batch in batches]
 97 | 
 98 | 			def predict_step(x_batch):
 99 | 				feed_dict = {
100 | 					cnn_rnn.input_x: x_batch,
101 | 					cnn_rnn.dropout_keep_prob: 1.0,
102 | 					cnn_rnn.batch_size: len(x_batch),
103 | 					cnn_rnn.pad: np.zeros([len(x_batch), 1, params['embedding_dim'], 1]),
104 | 					cnn_rnn.real_len: real_len(x_batch),
105 | 				}
106 | 				predictions = sess.run([cnn_rnn.predictions], feed_dict)
107 | 				return predictions
108 | 
109 | 			checkpoint_file = trained_dir + 'best_model.ckpt'
110 | 			saver = tf.train.Saver(tf.all_variables())
111 | 			saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
112 | 			saver.restore(sess, checkpoint_file)
113 | 			logging.critical('{} has been loaded'.format(checkpoint_file))
114 | 
115 | 			batches = data_helper.batch_iter(list(x_test), params['batch_size'], 1, shuffle=False)
116 | 
117 | 			predictions, predict_labels = [], []
118 | 			for x_batch in batches:
119 | 				batch_predictions = predict_step(x_batch)[0]
120 | 				for batch_prediction in batch_predictions:
121 | 					predictions.append(batch_prediction)
122 | 					predict_labels.append(labels[batch_prediction])
123 | 
124 | 			# Save the predictions back to file
125 | 			df['NEW_PREDICTED'] = predict_labels
126 | 			columns = sorted(df.columns, reverse=True)
127 | 			df.to_csv(predicted_dir + 'predictions_all.csv', index=False, columns=columns, sep='|')
128 | 
129 | 			if y_test is not None:
130 | 				y_test = np.array(np.argmax(y_test, axis=1))
131 | 				accuracy = sum(np.array(predictions) == y_test) / float(len(y_test))
132 | 				logging.critical('The prediction accuracy is: {}'.format(accuracy))
133 | 
134 | 			logging.critical('Prediction is complete, all files have been saved: {}'.format(predicted_dir))
135 | 
136 | if __name__ == '__main__':
137 | 	# python3 predict.py ./trained_results_1478563595/ ./data/small_samples.csv
138 | 	predict_unseen_data()
139 | 


--------------------------------------------------------------------------------
/text_cnn_rnn.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | 
 4 | class TextCNNRNN(object):
 5 | 	def __init__(self, embedding_mat, non_static, hidden_unit, sequence_length, max_pool_size,
 6 | 		num_classes, embedding_size, filter_sizes, num_filters, l2_reg_lambda=0.0):
 7 | 
 8 | 		self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name='input_x')
 9 | 		self.input_y = tf.placeholder(tf.float32, [None, num_classes], name='input_y')
10 | 		self.dropout_keep_prob = tf.placeholder(tf.float32, name='dropout_keep_prob')
11 | 		self.batch_size = tf.placeholder(tf.int32, [])
12 | 		self.pad = tf.placeholder(tf.float32, [None, 1, embedding_size, 1], name='pad')
13 | 		self.real_len = tf.placeholder(tf.int32, [None], name='real_len')
14 | 
15 | 		l2_loss = tf.constant(0.0)
16 | 
17 | 		with tf.device('/cpu:0'), tf.name_scope('embedding'):
18 | 			if not non_static:
19 | 				W = tf.constant(embedding_mat, name='W')
20 | 			else:
21 | 				W = tf.Variable(embedding_mat, name='W')
22 | 			self.embedded_chars = tf.nn.embedding_lookup(W, self.input_x)
23 | 			emb = tf.expand_dims(self.embedded_chars, -1)
24 | 
25 | 		pooled_concat = []
26 | 		reduced = np.int32(np.ceil((sequence_length) * 1.0 / max_pool_size))
27 | 		
28 | 		for i, filter_size in enumerate(filter_sizes):
29 | 			with tf.name_scope('conv-maxpool-%s' % filter_size):
30 | 
31 | 				# Zero paddings so that the convolution output have dimension batch x sequence_length x emb_size x channel
32 | 				num_prio = (filter_size-1) // 2
33 | 				num_post = (filter_size-1) - num_prio
34 | 				pad_prio = tf.concat([self.pad] * num_prio,1)
35 | 				pad_post = tf.concat([self.pad] * num_post,1)
36 | 				emb_pad = tf.concat([pad_prio, emb, pad_post],1)
37 | 
38 | 				filter_shape = [filter_size, embedding_size, 1, num_filters]
39 | 				W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name='W')
40 | 				b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name='b')
41 | 				conv = tf.nn.conv2d(emb_pad, W, strides=[1, 1, 1, 1], padding='VALID', name='conv')
42 | 
43 | 				h = tf.nn.relu(tf.nn.bias_add(conv, b), name='relu')
44 | 
45 | 				# Maxpooling over the outputs
46 | 				pooled = tf.nn.max_pool(h, ksize=[1, max_pool_size, 1, 1], strides=[1, max_pool_size, 1, 1], padding='SAME', name='pool')
47 | 				pooled = tf.reshape(pooled, [-1, reduced, num_filters])
48 | 				pooled_concat.append(pooled)
49 | 
50 | 		pooled_concat = tf.concat(pooled_concat,2)
51 | 		pooled_concat = tf.nn.dropout(pooled_concat, self.dropout_keep_prob)
52 | 
53 | 		# lstm_cell = tf.nn.rnn_cell.LSTMCell(num_units=hidden_unit)
54 | 
55 | 		#lstm_cell = tf.nn.rnn_cell.GRUCell(num_units=hidden_unit)
56 | 		lstm_cell = tf.contrib.rnn.GRUCell(num_units=hidden_unit)
57 | 
58 | 		#lstm_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=self.dropout_keep_prob)
59 | 		lstm_cell = tf.contrib.rnn.DropoutWrapper(lstm_cell, output_keep_prob=self.dropout_keep_prob)
60 | 		
61 | 
62 | 		self._initial_state = lstm_cell.zero_state(self.batch_size, tf.float32)
63 | 		#inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(1, reduced, pooled_concat)]
64 | 		inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(pooled_concat,num_or_size_splits=int(reduced),axis=1)]
65 | 		#outputs, state = tf.nn.rnn(lstm_cell, inputs, initial_state=self._initial_state, sequence_length=self.real_len)
66 | 		outputs, state = tf.contrib.rnn.static_rnn(lstm_cell, inputs, initial_state=self._initial_state, sequence_length=self.real_len)
67 | 
68 | 		# Collect the appropriate last words into variable output (dimension = batch x embedding_size)
69 | 		output = outputs[0]
70 | 		with tf.variable_scope('Output'):
71 | 			tf.get_variable_scope().reuse_variables()
72 | 			one = tf.ones([1, hidden_unit], tf.float32)
73 | 			for i in range(1,len(outputs)):
74 | 				ind = self.real_len < (i+1)
75 | 				ind = tf.to_float(ind)
76 | 				ind = tf.expand_dims(ind, -1)
77 | 				mat = tf.matmul(ind, one)
78 | 				output = tf.add(tf.multiply(output, mat),tf.multiply(outputs[i], 1.0 - mat))
79 | 
80 | 		with tf.name_scope('output'):
81 | 			self.W = tf.Variable(tf.truncated_normal([hidden_unit, num_classes], stddev=0.1), name='W')
82 | 			b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name='b')
83 | 			l2_loss += tf.nn.l2_loss(W)
84 | 			l2_loss += tf.nn.l2_loss(b)
85 | 			self.scores = tf.nn.xw_plus_b(output, self.W, b, name='scores')
86 | 			self.predictions = tf.argmax(self.scores, 1, name='predictions')
87 | 
88 | 		with tf.name_scope('loss'):
89 | 			losses = tf.nn.softmax_cross_entropy_with_logits(labels = self.input_y, logits = self.scores) #  only named arguments accepted            
90 | 			self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss
91 | 
92 | 		with tf.name_scope('accuracy'):
93 | 			correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
94 | 			self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name='accuracy')
95 | 
96 | 		with tf.name_scope('num_correct'):
97 | 			correct = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
98 | 			self.num_correct = tf.reduce_sum(tf.cast(correct, 'float'))
99 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import json
  4 | import time
  5 | import shutil
  6 | import pickle
  7 | import logging
  8 | import data_helper
  9 | import numpy as np
 10 | import pandas as pd
 11 | import tensorflow as tf
 12 | from text_cnn_rnn import TextCNNRNN
 13 | from sklearn.model_selection import train_test_split
 14 | 
 15 | logging.getLogger().setLevel(logging.INFO)
 16 | 
 17 | def train_cnn_rnn():
 18 | 	input_file = sys.argv[1]
 19 | 	x_, y_, vocabulary, vocabulary_inv, df, labels = data_helper.load_data(input_file)
 20 | 
 21 | 	training_config = sys.argv[2]
 22 | 	params = json.loads(open(training_config).read())
 23 | 
 24 | 	# Assign a 300 dimension vector to each word
 25 | 	word_embeddings = data_helper.load_embeddings(vocabulary)
 26 | 	embedding_mat = [word_embeddings[word] for index, word in enumerate(vocabulary_inv)]
 27 | 	embedding_mat = np.array(embedding_mat, dtype = np.float32)
 28 | 
 29 | 	# Split the original dataset into train set and test set
 30 | 	x, x_test, y, y_test = train_test_split(x_, y_, test_size=0.1)
 31 | 
 32 | 	# Split the train set into train set and dev set
 33 | 	x_train, x_dev, y_train, y_dev = train_test_split(x, y, test_size=0.1)
 34 | 
 35 | 	logging.info('x_train: {}, x_dev: {}, x_test: {}'.format(len(x_train), len(x_dev), len(x_test)))
 36 | 	logging.info('y_train: {}, y_dev: {}, y_test: {}'.format(len(y_train), len(y_dev), len(y_test)))
 37 | 
 38 | 	# Create a directory, everything related to the training will be saved in this directory
 39 | 	timestamp = str(int(time.time()))
 40 | 	trained_dir = './trained_results_' + timestamp + '/'
 41 | 	if os.path.exists(trained_dir):
 42 | 		shutil.rmtree(trained_dir)
 43 | 	os.makedirs(trained_dir)
 44 | 
 45 | 	graph = tf.Graph()
 46 | 	with graph.as_default():
 47 | 		session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
 48 | 		sess = tf.Session(config=session_conf)
 49 | 		with sess.as_default():
 50 | 			cnn_rnn = TextCNNRNN(
 51 | 				embedding_mat=embedding_mat,
 52 | 				sequence_length=x_train.shape[1],
 53 | 				num_classes = y_train.shape[1],
 54 | 				non_static=params['non_static'],
 55 | 				hidden_unit=params['hidden_unit'],
 56 | 				max_pool_size=params['max_pool_size'],
 57 | 				filter_sizes=map(int, params['filter_sizes'].split(",")),
 58 | 				num_filters = params['num_filters'],
 59 | 				embedding_size = params['embedding_dim'],
 60 | 				l2_reg_lambda = params['l2_reg_lambda'])
 61 | 
 62 | 			global_step = tf.Variable(0, name='global_step', trainable=False)
 63 | 			optimizer = tf.train.RMSPropOptimizer(1e-3, decay=0.9)
 64 | 			grads_and_vars = optimizer.compute_gradients(cnn_rnn.loss)
 65 | 			train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)
 66 | 
 67 | 			# Checkpoint files will be saved in this directory during training
 68 | 			checkpoint_dir = './checkpoints_' + timestamp + '/'
 69 | 			if os.path.exists(checkpoint_dir):
 70 | 				shutil.rmtree(checkpoint_dir)
 71 | 			os.makedirs(checkpoint_dir)
 72 | 			checkpoint_prefix = os.path.join(checkpoint_dir, 'model')
 73 | 
 74 | 			def real_len(batches):
 75 | 				return [np.ceil(np.argmin(batch + [0]) * 1.0 / params['max_pool_size']) for batch in batches]
 76 | 
 77 | 			def train_step(x_batch, y_batch):
 78 | 				feed_dict = {
 79 | 					cnn_rnn.input_x: x_batch,
 80 | 					cnn_rnn.input_y: y_batch,
 81 | 					cnn_rnn.dropout_keep_prob: params['dropout_keep_prob'],
 82 | 					cnn_rnn.batch_size: len(x_batch),
 83 | 					cnn_rnn.pad: np.zeros([len(x_batch), 1, params['embedding_dim'], 1]),
 84 | 					cnn_rnn.real_len: real_len(x_batch),
 85 | 				}
 86 | 				_, step, loss, accuracy = sess.run([train_op, global_step, cnn_rnn.loss, cnn_rnn.accuracy], feed_dict)
 87 | 
 88 | 			def dev_step(x_batch, y_batch):
 89 | 				feed_dict = {
 90 | 					cnn_rnn.input_x: x_batch,
 91 | 					cnn_rnn.input_y: y_batch,
 92 | 					cnn_rnn.dropout_keep_prob: 1.0,
 93 | 					cnn_rnn.batch_size: len(x_batch),
 94 | 					cnn_rnn.pad: np.zeros([len(x_batch), 1, params['embedding_dim'], 1]),
 95 | 					cnn_rnn.real_len: real_len(x_batch),
 96 | 				}
 97 | 				step, loss, accuracy, num_correct, predictions = sess.run(
 98 | 					[global_step, cnn_rnn.loss, cnn_rnn.accuracy, cnn_rnn.num_correct, cnn_rnn.predictions], feed_dict)
 99 | 				return accuracy, loss, num_correct, predictions
100 | 
101 | 			saver = tf.train.Saver()
102 | 			sess.run(tf.global_variables_initializer())
103 | 
104 | 			# Training starts here
105 | 			train_batches = data_helper.batch_iter(list(zip(x_train, y_train)), params['batch_size'], params['num_epochs'])
106 | 			best_accuracy, best_at_step = 0, 0
107 | 
108 | 			# Train the model with x_train and y_train
109 | 			for train_batch in train_batches:
110 | 				x_train_batch, y_train_batch = zip(*train_batch)
111 | 				train_step(x_train_batch, y_train_batch)
112 | 				current_step = tf.train.global_step(sess, global_step)
113 | 
114 | 				# Evaluate the model with x_dev and y_dev
115 | 				if current_step % params['evaluate_every'] == 0:
116 | 					dev_batches = data_helper.batch_iter(list(zip(x_dev, y_dev)), params['batch_size'], 1)
117 | 
118 | 					total_dev_correct = 0
119 | 					for dev_batch in dev_batches:
120 | 						x_dev_batch, y_dev_batch = zip(*dev_batch)
121 | 						acc, loss, num_dev_correct, predictions = dev_step(x_dev_batch, y_dev_batch)
122 | 						total_dev_correct += num_dev_correct
123 | 					accuracy = float(total_dev_correct) / len(y_dev)
124 | 					logging.info('Accuracy on dev set: {}'.format(accuracy))
125 | 
126 | 					if accuracy >= best_accuracy:
127 | 						best_accuracy, best_at_step = accuracy, current_step
128 | 						path = saver.save(sess, checkpoint_prefix, global_step=current_step)
129 | 						logging.critical('Saved model {} at step {}'.format(path, best_at_step))
130 | 						logging.critical('Best accuracy {} at step {}'.format(best_accuracy, best_at_step))
131 | 			logging.critical('Training is complete, testing the best model on x_test and y_test')
132 | 
133 | 			# Save the model files to trained_dir. predict.py needs trained model files. 
134 | 			saver.save(sess, trained_dir + "best_model.ckpt")
135 | 
136 | 			# Evaluate x_test and y_test
137 | 			saver.restore(sess, checkpoint_prefix + '-' + str(best_at_step))
138 | 			test_batches = data_helper.batch_iter(list(zip(x_test, y_test)), params['batch_size'], 1, shuffle=False)
139 | 			total_test_correct = 0
140 | 			for test_batch in test_batches:
141 | 				x_test_batch, y_test_batch = zip(*test_batch)
142 | 				acc, loss, num_test_correct, predictions = dev_step(x_test_batch, y_test_batch)
143 | 				total_test_correct += int(num_test_correct)
144 | 			logging.critical('Accuracy on test set: {}'.format(float(total_test_correct) / len(y_test)))
145 | 
146 | 	# Save trained parameters and files since predict.py needs them
147 | 	with open(trained_dir + 'words_index.json', 'w') as outfile:
148 | 		json.dump(vocabulary, outfile, indent=4, ensure_ascii=False)
149 | 	with open(trained_dir + 'embeddings.pickle', 'wb') as outfile:
150 | 		pickle.dump(embedding_mat, outfile, pickle.HIGHEST_PROTOCOL)
151 | 	with open(trained_dir + 'labels.json', 'w') as outfile:
152 | 		json.dump(labels, outfile, indent=4, ensure_ascii=False)
153 | 
154 | 	params['sequence_length'] = x_train.shape[1]
155 | 	with open(trained_dir + 'trained_parameters.json', 'w') as outfile:
156 | 		json.dump(params, outfile, indent=4, sort_keys=True, ensure_ascii=False)
157 | 
158 | if __name__ == '__main__':
159 | 	# python3 train.py ./data/train.csv.zip ./training_config.json
160 | 	train_cnn_rnn()
161 | 


--------------------------------------------------------------------------------
/trained_results_1516404693/best_model.ckpt.data-00000-of-00001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiegzhan/multi-class-text-classification-cnn-rnn/5db803b7c156655ead270cb9788148dc8c7f6afd/trained_results_1516404693/best_model.ckpt.data-00000-of-00001


--------------------------------------------------------------------------------
/trained_results_1516404693/best_model.ckpt.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiegzhan/multi-class-text-classification-cnn-rnn/5db803b7c156655ead270cb9788148dc8c7f6afd/trained_results_1516404693/best_model.ckpt.index


--------------------------------------------------------------------------------
/trained_results_1516404693/best_model.ckpt.meta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiegzhan/multi-class-text-classification-cnn-rnn/5db803b7c156655ead270cb9788148dc8c7f6afd/trained_results_1516404693/best_model.ckpt.meta


--------------------------------------------------------------------------------
/trained_results_1516404693/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "best_model.ckpt"
2 | all_model_checkpoint_paths: "../checkpoints_1516404693/model-4200"
3 | all_model_checkpoint_paths: "../checkpoints_1516404693/model-5000"
4 | all_model_checkpoint_paths: "../checkpoints_1516404693/model-5200"
5 | all_model_checkpoint_paths: "../checkpoints_1516404693/model-5400"
6 | all_model_checkpoint_paths: "best_model.ckpt"
7 | 


--------------------------------------------------------------------------------
/trained_results_1516404693/embeddings.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiegzhan/multi-class-text-classification-cnn-rnn/5db803b7c156655ead270cb9788148dc8c7f6afd/trained_results_1516404693/embeddings.pickle


--------------------------------------------------------------------------------
/trained_results_1516404693/labels.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     "ARSON",
 3 |     "ASSAULT",
 4 |     "BAD CHECKS",
 5 |     "BRIBERY",
 6 |     "BURGLARY",
 7 |     "DISORDERLY CONDUCT",
 8 |     "DRIVING UNDER THE INFLUENCE",
 9 |     "DRUG/NARCOTIC",
10 |     "DRUNKENNESS",
11 |     "EMBEZZLEMENT",
12 |     "EXTORTION",
13 |     "FAMILY OFFENSES",
14 |     "FORGERY/COUNTERFEITING",
15 |     "FRAUD",
16 |     "GAMBLING",
17 |     "KIDNAPPING",
18 |     "LARCENY/THEFT",
19 |     "LIQUOR LAWS",
20 |     "LOITERING",
21 |     "MISSING PERSON",
22 |     "NON-CRIMINAL",
23 |     "OTHER OFFENSES",
24 |     "PORNOGRAPHY/OBSCENE MAT",
25 |     "PROSTITUTION",
26 |     "RECOVERED VEHICLE",
27 |     "ROBBERY",
28 |     "RUNAWAY",
29 |     "SECONDARY CODES",
30 |     "SEX OFFENSES FORCIBLE",
31 |     "SEX OFFENSES NON FORCIBLE",
32 |     "STOLEN PROPERTY",
33 |     "SUICIDE",
34 |     "SUSPICIOUS OCC",
35 |     "TREA",
36 |     "TRESPASS",
37 |     "VANDALISM",
38 |     "VEHICLE THEFT",
39 |     "WARRANTS",
40 |     "WEAPON LAWS"
41 | ]


--------------------------------------------------------------------------------
/trained_results_1516404693/trained_parameters.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "batch_size": 128,
 3 |     "dropout_keep_prob": 0.5,
 4 |     "embedding_dim": 300,
 5 |     "evaluate_every": 200,
 6 |     "filter_sizes": "3,4,5",
 7 |     "hidden_unit": 300,
 8 |     "l2_reg_lambda": 0.0,
 9 |     "max_pool_size": 4,
10 |     "non_static": false,
11 |     "num_epochs": 1,
12 |     "num_filters": 32,
13 |     "sequence_length": 14
14 | }


--------------------------------------------------------------------------------
/trained_results_1516404693/words_index.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "forced": 484,
  3 |     "written": 775,
  4 |     "smoked": 526,
  5 |     "hazardous": 633,
  6 |     "prowl": 168,
  7 |     "number": 530,
  8 |     "warehouse": 357,
  9 |     "dangerous": 213,
 10 |     "carjacking": 324,
 11 |     "ammunition": 463,
 12 |     "electrical": 691,
 13 |     "manslaughter": 693,
 14 |     "red": 551,
 15 |     "unlawful": 44,
 16 |     "administering": 549,
 17 |     "insured": 830,
 18 |     "sidewalks": 158,
 19 |     "gambling": 452,
 20 |     "crimes": 399,
 21 |     "assisting": 728,
 22 |     "restraining": 112,
 23 |     "commit": 422,
 24 |     "battery": 24,
 25 |     "advertising": 603,
 26 |     "encourage": 801,
 27 |     "making": 845,
 28 |     "police": 124,
 29 |     "box": 692,
 30 |     "rape": 218,
 31 |     "furnishing": 434,
 32 |     "intercourse": 461,
 33 |     "buses": 380,
 34 |     "aggravated": 55,
 35 |     "threating": 252,
 36 |     "fare": 575,
 37 |     "suspended": 31,
 38 |     "house": 72,
 39 |     "failure": 264,
 40 |     "counterfeit": 527,
 41 |     "windows": 121,
 42 |     "investigation": 93,
 43 |     "alcohol": 95,
 44 |     "disguise": 814,
 45 |     "incl": 682,
 46 |     "permit": 219,
 47 |     "heating": 786,
 48 |     "danger": 327,
 49 |     "control": 376,
 50 |     "physical": 216,
 51 |     "unlocked": 109,
 52 |     "identify": 901,
 53 |     "made": 284,
 54 |     "auction": 784,
 55 |     "under": 65,
 56 |     "motorcycle": 159,
 57 |     "religious": 802,
 58 |     "community": 502,
 59 |     "occurence": 427,
 60 |     "apparel": 902,
 61 |     "hate": 776,
 62 |     "methadone": 443,
 63 |     "dispute": 729,
 64 |     "services": 578,
 65 |     "traffic": 37,
 66 |     "near": 760,
 67 |     "lodging": 314,
 68 |     "fraudulent": 149,
 69 |     "representation": 903,
 70 |     "incident": 285,
 71 |     "unauthorized": 624,
 72 |     "justice": 629,
 73 |     "bodily": 63,
 74 |     "accepting": 650,
 75 |     "commercial": 241,
 76 |     "mental": 40,
 77 |     "violation": 13,
 78 |     "take": 763,
 79 |     "withhold": 720,
 80 |     "supervision": 503,
 81 |     "telegraph": 351,
 82 |     "possessing": 491,
 83 |     "elder": 299,
 84 |     "lights": 816,
 85 |     "offenses": 581,
 86 |     "sewers": 904,
 87 |     "collision": 390,
 88 |     "statute": 559,
 89 |     "matter": 509,
 90 |     "explosive": 466,
 91 |     "scene": 740,
 92 |     "wife": 777,
 93 |     "resulting": 456,
 94 |     "involved": 273,
 95 |     "order": 86,
 96 |     "where": 278,
 97 |     "post": 504,
 98 |     "rescuing": 787,
 99 |     "causing": 488,
100 |     "business": 683,
101 |     "exhibiting": 247,
102 |     "sidewalk": 547,
103 |     "paraphernalia": 82,
104 |     "flat": 209,
105 |     "ingestion": 548,
106 |     "hit": 391,
107 |     "obtaining": 233,
108 |     "meters": 741,
109 |     "alarm": 472,
110 |     "imprisonment": 196,
111 |     "complaint": 400,
112 |     "theft": 2,
113 |     "oil": 905,
114 |     "in": 89,
115 |     "plays": 846,
116 |     "resisting": 84,
117 |     "abortion": 788,
118 |     "common": 750,
119 |     "evidence": 220,
120 |     "silencer": 847,
121 |     "access": 350,
122 |     "operated": 453,
123 |     "\\)": 74,
124 |     "manufacture": 499,
125 |     "tire": 366,
126 |     "entry": 22,
127 |     "heed": 552,
128 |     "care": 474,
129 |     "bring": 471,
130 |     "adult": 79,
131 |     "explosives": 640,
132 |     "threats": 60,
133 |     "fraudulently": 510,
134 |     "damaging": 593,
135 |     "911": 778,
136 |     "disperse": 654,
137 |     "on": 41,
138 |     "estate": 684,
139 |     "stray": 597,
140 |     "claims": 583,
141 |     "are": 279,
142 |     "entering": 867,
143 |     "copying": 584,
144 |     "electrically": 869,
145 |     "corrections": 151,
146 |     "switch": 870,
147 |     "run": 392,
148 |     "proper": 475,
149 |     "instruments": 370,
150 |     "barbituates": 694,
151 |     "fire": 238,
152 |     "schoolyard": 643,
153 |     "no": 598,
154 |     "hole": 817,
155 |     "plate": 108,
156 |     "violence": 99,
157 |     "hallucinogenic": 379,
158 |     "youth": 698,
159 |     "posing": 906,
160 |     "lewd": 374,
161 |     "dismissal": 871,
162 |     "amphetamine": 119,
163 |     "attempting": 765,
164 |     "street": 66,
165 |     "riot": 531,
166 |     "possibly": 432,
167 |     "sufficient": 337,
168 |     "alter": 464,
169 |     "grand": 5,
170 |     "dancehall": 794,
171 |     "misconduct": 907,
172 |     "racing": 942,
173 |     "destitute": 848,
174 |     "inflicting": 457,
175 |     "bribery": 451,
176 |     "inmate": 615,
177 |     "distributors": 604,
178 |     "unusual": 428,
179 |     "keeper": 614,
180 |     "affixing": 849,
181 |     "camper": 536,
182 |     "disturbance": 492,
183 |     "change": 599,
184 |     "committing": 217,
185 |     "placing": 540,
186 |     "permission": 317,
187 |     "identification": 215,
188 |     "tear": 557,
189 |     "perjury": 590,
190 |     "alcoholic": 908,
191 |     "freight": 751,
192 |     "tab": 467,
193 |     "coins": 207,
194 |     "passenger": 752,
195 |     "p": 695,
196 |     "trespass": 630,
197 |     "bar": 803,
198 |     "victim": 232,
199 |     "object": 477,
200 |     "asphyxiation": 605,
201 |     "fireworks": 818,
202 |     "consent": 626,
203 |     "courtesy": 256,
204 |     "insurer": 819,
205 |     "invasion": 713,
206 |     "game": 245,
207 |     "during": 550,
208 |     "bldg": 283,
209 |     "embezzled": 429,
210 |     "industrial": 790,
211 |     "gov't": 909,
212 |     "grounds": 439,
213 |     "devices": 642,
214 |     "federal": 560,
215 |     "warrant": 34,
216 |     "return": 781,
217 |     "hotel": 255,
218 |     "gas": 524,
219 |     "procuring": 910,
220 |     "performances": 850,
221 |     "narcotics": 76,
222 |     "trafficking": 482,
223 |     "documents": 672,
224 |     "station": 445,
225 |     "switchblade": 500,
226 |     "code": 130,
227 |     "motor": 534,
228 |     "injunction": 696,
229 |     "store": 91,
230 |     "knife": 131,
231 |     "injurious": 730,
232 |     "dv": 178,
233 |     "roll": 566,
234 |     "marker": 721,
235 |     "animals": 411,
236 |     "truant": 436,
237 |     "noxious": 843,
238 |     "lost": 17,
239 |     "escapee": 655,
240 |     "chemicals": 341,
241 |     "general": 206,
242 |     "cheat": 460,
243 |     "calls": 125,
244 |     "with": 20,
245 |     "falsification": 851,
246 |     "soliciting": 423,
247 |     "view": 239,
248 |     "disturbing": 192,
249 |     "licensed": 742,
250 |     "selling": 470,
251 |     "id": 154,
252 |     "30": 773,
253 |     "offensive": 528,
254 |     "known": 645,
255 |     "tickets": 804,
256 |     "juvenile": 83,
257 |     "stealing": 378,
258 |     "tobacco": 493,
259 |     "penetration": 485,
260 |     "telephone": 343,
261 |     "prejudice": 302,
262 |     "commission": 648,
263 |     "alien": 306,
264 |     "scope": 938,
265 |     "harbor": 636,
266 |     "school": 282,
267 |     "visiting": 529,
268 |     "plates": 820,
269 |     "conceal": 717,
270 |     "towards": 231,
271 |     "siren": 553,
272 |     "bigamy": 911,
273 |     "protective": 561,
274 |     "tools": 173,
275 |     "flammable": 641,
276 |     "comply": 876,
277 |     "privacy": 712,
278 |     "playground": 623,
279 |     "checks": 113,
280 |     "outside": 54,
281 |     "assembly": 567,
282 |     "by": 70,
283 |     "resist": 407,
284 |     "overcharging": 780,
285 |     "misuse": 821,
286 |     "release": 505,
287 |     "against": 57,
288 |     "juveniles": 872,
289 |     "robbery": 35,
290 |     "cloned": 571,
291 |     "drowning": 805,
292 |     "other": 160,
293 |     "metals": 789,
294 |     "escape": 585,
295 |     "use": 80,
296 |     "lines": 344,
297 |     "addict": 301,
298 |     "mail": 574,
299 |     "aerosol": 639,
300 |     "oral": 362,
301 |     "meth": 123,
302 |     "unlawfully": 612,
303 |     "transfer": 646,
304 |     "concealed": 203,
305 |     "dispensing": 616,
306 |     "turned": 322,
307 |     "facility": 913,
308 |     "injury": 96,
309 |     "vio": 647,
310 |     "transportation": 244,
311 |     "demonstration": 731,
312 |     "grossly": 435,
313 |     "related": 162,
314 |     "harassing": 186,
315 |     "discharge": 251,
316 |     "bridges": 873,
317 |     "occupant": 681,
318 |     "distribution": 852,
319 |     "atm": 359,
320 |     "misc": 732,
321 |     "peyote": 914,
322 |     "shots": 345,
323 |     "locked": 8,
324 |     "deceive": 915,
325 |     "aggressive": 430,
326 |     "conductive": 874,
327 |     "threat": 412,
328 |     "prescription": 478,
329 |     "operating": 389,
330 |     "receiving": 136,
331 |     "taxi": 329,
332 |     "pornography": 465,
333 |     "machine": 444,
334 |     "eavesdropping": 853,
335 |     "drives": 656,
336 |     "lasers": 822,
337 |     "trailer": 533,
338 |     "comm": 438,
339 |     "detention": 115,
340 |     "witnesses": 455,
341 |     ",": 1,
342 |     "occurrence": 36,
343 |     "restrictions": 657,
344 |     "female": 277,
345 |     "uttering": 371,
346 |     "assault": 50,
347 |     "leading": 328,
348 |     "constrt": 297,
349 |     "construction": 126,
350 |     "age": 764,
351 |     "rights": 783,
352 |     "bank": 354,
353 |     "att": 224,
354 |     "teachers": 447,
355 |     "mischief": 15,
356 |     "habitual": 437,
357 |     "dwelling": 229,
358 |     "movies": 806,
359 |     "worship": 916,
360 |     "molest": 449,
361 |     "thoroughfare": 416,
362 |     "incomplete": 586,
363 |     "depositing": 815,
364 |     "extortion": 394,
365 |     "telecommunication": 753,
366 |     "kidnapper": 917,
367 |     "wearing": 743,
368 |     "not": 320,
369 |     "immoral": 325,
370 |     "peeping": 580,
371 |     "suspicious": 33,
372 |     "posted": 791,
373 |     "marshall": 424,
374 |     "attending": 918,
375 |     "breaking": 122,
376 |     "planting": 382,
377 |     "coin": 454,
378 |     "trick": 200,
379 |     "broadcast": 875,
380 |     "illegal": 807,
381 |     "firearms": 543,
382 |     "for": 49,
383 |     "department": 150,
384 |     "minor": 234,
385 |     "weapons": 714,
386 |     "city": 365,
387 |     "suicide": 321,
388 |     "embezzlement": 197,
389 |     "money": 143,
390 |     "u": 425,
391 |     "after": 266,
392 |     "transit": 330,
393 |     "occupied": 369,
394 |     "lawful": 587,
395 |     "decoding": 919,
396 |     "drag": 920,
397 |     "lynching": 715,
398 |     "jurisdiction": 69,
399 |     "firearm": 139,
400 |     "cabaret": 754,
401 |     "copulation": 363,
402 |     "dissuading": 413,
403 |     "mobile": 733,
404 |     "required": 631,
405 |     "semi": 823,
406 |     "trespassing": 107,
407 |     "sodomy": 408,
408 |     "barking": 658,
409 |     "megan's": 744,
410 |     "message": 808,
411 |     "owner": 685,
412 |     "shooting": 387,
413 |     "utility": 592,
414 |     "derivative": 576,
415 |     "cellular": 541,
416 |     "slashing": 367,
417 |     "chemical": 617,
418 |     "\\(": 75,
419 |     "materials": 644,
420 |     "200": 627,
421 |     "abandonment": 420,
422 |     "into": 386,
423 |     "prisoner": 755,
424 |     "sell": 525,
425 |     "s": 146,
426 |     "simple": 358,
427 |     "records": 313,
428 |     "child": 155,
429 |     "beverage": 921,
430 |     "firecrackers": 495,
431 |     "product": 756,
432 |     "witness": 414,
433 |     "real": 686,
434 |     "attempted": 77,
435 |     "service": 243,
436 |     "estab": 490,
437 |     "sending": 809,
438 |     "delinquency": 404,
439 |     "neglect": 501,
440 |     "building": 42,
441 |     "jail": 668,
442 |     "residence": 97,
443 |     "desertion": 468,
444 |     "trash": 558,
445 |     "solicits": 117,
446 |     "through": 824,
447 |     "innkeeper": 296,
448 |     "impersonating": 483,
449 |     "arson": 205,
450 |     "judicial": 673,
451 |     "mask": 757,
452 |     "display": 722,
453 |     "spousal": 687,
454 |     "officer": 111,
455 |     "purchase": 634,
456 |     "register": 287,
457 |     "driving": 167,
458 |     "driver": 419,
459 |     "caustic": 331,
460 |     "loitering": 157,
461 |     "trains": 718,
462 |     "jumping": 512,
463 |     "shoplifting": 68,
464 |     "death": 120,
465 |     "meeting": 674,
466 |     "engaging": 402,
467 |     "procurement": 922,
468 |     "bribe": 782,
469 |     "emergency": 469,
470 |     "felon": 259,
471 |     "100": 562,
472 |     "hire": 766,
473 |     "chain": 311,
474 |     "area": 825,
475 |     "carrying": 199,
476 |     "prohibited": 274,
477 |     "throwing": 448,
478 |     "destruction": 268,
479 |     "medical": 854,
480 |     "injuries": 275,
481 |     "acts": 356,
482 |     "loudspeaker": 881,
483 |     "nature": 923,
484 |     "system": 594,
485 |     "obstructions": 291,
486 |     "refusing": 659,
487 |     "sick": 395,
488 |     "fighting": 323,
489 |     "meetings": 826,
490 |     "device": 263,
491 |     "<PAD/>": 0,
492 |     "bomb": 368,
493 |     "grabbing": 745,
494 |     "prostitution": 102,
495 |     "tarasoff": 304,
496 |     "an": 204,
497 |     "member": 792,
498 |     "reformatory": 735,
499 |     "disabled": 572,
500 |     "located": 271,
501 |     "dies": 827,
502 |     "interfering": 660,
503 |     "life": 56,
504 |     "stolen": 10,
505 |     "credit": 71,
506 |     "forge": 479,
507 |     "removal": 707,
508 |     "computers": 309,
509 |     "recovered": 45,
510 |     "prior": 142,
511 |     "speeding": 480,
512 |     "push": 924,
513 |     "convicted": 262,
514 |     "upon": 651,
515 |     "stay": 174,
516 |     "glue": 877,
517 |     "campus": 697,
518 |     "syringe": 519,
519 |     "maintaining": 202,
520 |     "disorderly": 855,
521 |     "intercepting": 708,
522 |     "secondhand": 925,
523 |     "clone": 758,
524 |     "park": 332,
525 |     "offender": 295,
526 |     "heroin": 147,
527 |     "dump": 828,
528 |     "days": 774,
529 |     "vicious": 595,
530 |     "injured": 227,
531 |     "contributing": 405,
532 |     "opiates": 406,
533 |     "base": 58,
534 |     "over": 878,
535 |     "home": 516,
536 |     "obstructing": 415,
537 |     "furnish": 736,
538 |     "booking": 588,
539 |     "computer": 515,
540 |     "cards": 383,
541 |     "drug": 544,
542 |     "articles": 573,
543 |     "cohabitee": 104,
544 |     "strangulation": 517,
545 |     "restricted": 879,
546 |     "railroads": 880,
547 |     "guide": 829,
548 |     "parental": 348,
549 |     "pursesnatch": 280,
550 |     "pimping": 496,
551 |     "executive": 417,
552 |     "cocaine": 48,
553 |     "hypodermic": 520,
554 |     "a": 11,
555 |     "agents": 608,
556 |     "remaining": 704,
557 |     "investigative": 116,
558 |     "based": 303,
559 |     "pickpocket": 118,
560 |     "property": 7,
561 |     "threatening": 228,
562 |     "of": 3,
563 |     "violator": 737,
564 |     "limits": 364,
565 |     "arrest": 19,
566 |     "the": 53,
567 |     "registration": 222,
568 |     "track": 661,
569 |     "automated": 211,
570 |     "fired": 346,
571 |     "report": 94,
572 |     "accident": 326,
573 |     "apt": 446,
574 |     "around": 675,
575 |     "air": 336,
576 |     "satellite": 767,
577 |     "patient": 723,
578 |     "pass": 187,
579 |     "armed": 183,
580 |     "amount": 288,
581 |     "dumping": 514,
582 |     "found": 38,
583 |     "load": 882,
584 |     "locatn": 793,
585 |     "within": 352,
586 |     "private": 709,
587 |     "entertainment": 618,
588 |     "roadway": 856,
589 |     "spill": 810,
590 |     "legal": 372,
591 |     "impounded": 360,
592 |     "possible": 347,
593 |     "incest": 768,
594 |     "forgery": 110,
595 |     "vehicles": 47,
596 |     "murder": 926,
597 |     "misdemeanor": 179,
598 |     "poss": 145,
599 |     "lab": 927,
600 |     "stalking": 257,
601 |     "at": 270,
602 |     "parents": 883,
603 |     "drunk": 532,
604 |     "cars": 381,
605 |     "labor": 494,
606 |     "enroute": 46,
607 |     "misplaced": 537,
608 |     "cable": 929,
609 |     "make": 194,
610 |     "taking": 664,
611 |     "calif": 930,
612 |     "communications": 705,
613 |     "notes": 181,
614 |     "causes": 236,
615 |     "car": 535,
616 |     "natural": 235,
617 |     "safe": 403,
618 |     "judge": 706,
619 |     "court": 195,
620 |     "conduct": 441,
621 |     "influence": 106,
622 |     "fight": 932,
623 |     "auto": 6,
624 |     "400": 568,
625 |     "felony": 156,
626 |     "dating": 164,
627 |     "mayhem": 426,
628 |     "accessing": 596,
629 |     "malicious": 16,
630 |     "inflict": 105,
631 |     "peddling": 305,
632 |     "contraband": 738,
633 |     "seals": 622,
634 |     "strongarm": 103,
635 |     "spray": 726,
636 |     "light": 554,
637 |     "personation": 144,
638 |     "boat": 759,
639 |     "intoxicated": 487,
640 |     "negligent": 421,
641 |     "carrier": 746,
642 |     "vehicle": 27,
643 |     "state": 188,
644 |     "50": 513,
645 |     "abuse": 171,
646 |     "non": 189,
647 |     "aid": 635,
648 |     "substance": 129,
649 |     "apparatus": 609,
650 |     "bus": 747,
651 |     "seizing": 748,
652 |     "beyond": 377,
653 |     "armor": 885,
654 |     "toilet": 676,
655 |     "citizenship": 811,
656 |     "begging": 333,
657 |     "sniping": 933,
658 |     "sale": 62,
659 |     "cat": 857,
660 |     "act": 134,
661 |     "massage": 831,
662 |     "cultivating": 384,
663 |     "command": 653,
664 |     "political": 934,
665 |     "citation": 223,
666 |     "defrauding": 253,
667 |     "loaded": 184,
668 |     "law": 375,
669 |     "corporate": 621,
670 |     "poles": 858,
671 |     "force": 61,
672 |     "miscellaneous": 73,
673 |     "missing": 52,
674 |     "willful": 476,
675 |     "vacant": 607,
676 |     "funds": 338,
677 |     "falsifying": 677,
678 |     "visit": 176,
679 |     "leased": 663,
680 |     "card": 64,
681 |     "hot": 169,
682 |     "employee": 319,
683 |     "opium": 511,
684 |     "collector": 859,
685 |     "sex": 258,
686 |     "putting": 795,
687 |     "homicide": 312,
688 |     "unemployment": 935,
689 |     "needle": 521,
690 |     "peddler": 936,
691 |     "phone": 114,
692 |     "food": 832,
693 |     "face": 289,
694 |     "hospitalized": 860,
695 |     "w": 272,
696 |     "away": 175,
697 |     "products": 506,
698 |     "rob": 886,
699 |     "their": 522,
700 |     "room": 261,
701 |     "provide": 579,
702 |     "parole": 101,
703 |     "officers": 937,
704 |     "b": 699,
705 |     "consuming": 240,
706 |     "controlled": 127,
707 |     "contact": 796,
708 |     "minors": 861,
709 |     "loiters": 678,
710 |     "drivers": 28,
711 |     "penetrating": 887,
712 |     "ft": 563,
713 |     "open": 486,
714 |     "disrupts": 410,
715 |     "booth": 637,
716 |     "information": 361,
717 |     "public": 85,
718 |     "looking": 833,
719 |     "condition": 458,
720 |     "paint": 724,
721 |     "civil": 182,
722 |     "commotion": 431,
723 |     "loud": 834,
724 |     "receive": 177,
725 |     "pens": 725,
726 |     "teller": 212,
727 |     "place": 132,
728 |     "former": 161,
729 |     "revenue": 888,
730 |     "defraud": 835,
731 |     "automobile": 30,
732 |     "pretenses": 250,
733 |     "tom": 582,
734 |     "serious": 260,
735 |     "solicitations": 889,
736 |     "water": 862,
737 |     "cart": 939,
738 |     "bite": 214,
739 |     "infractions": 489,
740 |     "closure": 180,
741 |     "vin": 679,
742 |     "relationship": 165,
743 |     "brookers": 610,
744 |     "government": 440,
745 |     "throw": 836,
746 |     "owning": 931,
747 |     "prohibi": 619,
748 |     "concealment": 711,
749 |     "slugs": 797,
750 |     "disturbed": 39,
751 |     "possess": 442,
752 |     "cordless": 710,
753 |     "bookmaking": 890,
754 |     "library": 334,
755 |     "cash": 620,
756 |     "zone": 662,
757 |     "short": 600,
758 |     "opposite": 940,
759 |     "rock": 59,
760 |     "terrorizing": 863,
761 |     "sound": 739,
762 |     "exposure": 293,
763 |     "interferring": 497,
764 |     "marking": 941,
765 |     "video": 652,
766 |     "telling": 703,
767 |     "inciting": 602,
768 |     "location": 837,
769 |     "any": 864,
770 |     "prostitute": 591,
771 |     "sf": 133,
772 |     "nuisance": 172,
773 |     "purpose": 221,
774 |     "traumatic": 459,
775 |     "kidnapping": 307,
776 |     "presenting": 589,
777 |     "license": 21,
778 |     "municipal": 137,
779 |     "burns": 891,
780 |     "case": 25,
781 |     "peace": 148,
782 |     "publication": 770,
783 |     "counterfeiting": 201,
784 |     "children": 401,
785 |     "changing": 290,
786 |     "call": 249,
787 |     "bb": 388,
788 |     "authority": 254,
789 |     "drugs": 318,
790 |     "aided": 26,
791 |     "fraud": 688,
792 |     "12": 670,
793 |     "obscene": 170,
794 |     "application": 237,
795 |     "crowd": 838,
796 |     "dependent": 300,
797 |     "payment": 565,
798 |     "activities": 409,
799 |     "4390": 701,
800 |     "electronics": 577,
801 |     "suspension": 892,
802 |     "repairman": 749,
803 |     "streets": 292,
804 |     "agent": 538,
805 |     "offense": 689,
806 |     "priors": 473,
807 |     "sniper": 943,
808 |     "balloons": 893,
809 |     "agricultural": 899,
810 |     "deadly": 92,
811 |     "strip": 210,
812 |     "placard": 570,
813 |     "transportaion": 507,
814 |     "reckless": 342,
815 |     "cruelty": 398,
816 |     "burglary": 18,
817 |     "along": 665,
818 |     "fiduciary": 669,
819 |     "liqour": 385,
820 |     "damage": 396,
821 |     "scalping": 812,
822 |     "manner": 225,
823 |     "bathroom": 839,
824 |     "regulations": 894,
825 |     "recordings": 761,
826 |     "insurance": 944,
827 |     "as": 286,
828 |     "spouse": 166,
829 |     "suspect": 353,
830 |     "sold": 298,
831 |     "premise": 340,
832 |     "cause": 138,
833 |     "while": 163,
834 |     "substances": 418,
835 |     "poisoning": 702,
836 |     "from": 4,
837 |     "serial": 546,
838 |     "hospital": 945,
839 |     "crime": 339,
840 |     "health": 946,
841 |     "valet": 895,
842 |     "about": 625,
843 |     "trade": 840,
844 |     "possession": 12,
845 |     "secrets": 841,
846 |     "merchant": 335,
847 |     "escapes": 896,
848 |     "person": 43,
849 |     "and": 152,
850 |     "used": 281,
851 |     "using": 897,
852 |     "destroying": 865,
853 |     "inhabited": 230,
854 |     "student": 898,
855 |     "railroad": 523,
856 |     "encouraging": 716,
857 |     "disaster": 947,
858 |     "altered": 555,
859 |     "fictitious": 193,
860 |     "false": 78,
861 |     "search": 269,
862 |     "human": 518,
863 |     "recklessly": 393,
864 |     "unknown": 135,
865 |     "forged": 373,
866 |     "connection": 948,
867 |     "remove": 539,
868 |     "probation": 90,
869 |     "to": 29,
870 |     "attention": 771,
871 |     "without": 198,
872 |     "container": 481,
873 |     "cell": 308,
874 |     "sales": 128,
875 |     "fortune": 700,
876 |     "instrument": 498,
877 |     "knowledge": 190,
878 |     "revoked": 32,
879 |     "or": 14,
880 |     "marks": 601,
881 |     "truck": 88,
882 |     "warning": 248,
883 |     "animal": 680,
884 |     "petty": 9,
885 |     "swearing": 433,
886 |     "goods": 632,
887 |     "domestic": 100,
888 |     "discharging": 397,
889 |     "financial": 798,
890 |     "sharp": 508,
891 |     "interception": 900,
892 |     "charitable": 884,
893 |     "marijuana": 81,
894 |     "tampering": 246,
895 |     "accidental": 606,
896 |     "pandering": 545,
897 |     "bicycle": 141,
898 |     "annoy": 450,
899 |     "machines": 611,
900 |     "shelter": 349,
901 |     "caretaker": 462,
902 |     "apartment": 87,
903 |     "stamps": 842,
904 |     "credible": 772,
905 |     "returns": 315,
906 |     "custody": 734,
907 |     "laceration": 556,
908 |     "advertisments": 866,
909 |     "spitting": 628,
910 |     "weapon": 67,
911 |     "statements": 799,
912 |     "indecent": 294,
913 |     "parking": 727,
914 |     "package": 638,
915 |     "activity": 267,
916 |     "juror": 785,
917 |     "exploitation": 569,
918 |     "months": 671,
919 |     "sexual": 185,
920 |     "intent": 276,
921 |     "specific": 690,
922 |     "runaway": 191,
923 |     "establishment": 242,
924 |     "conspiracy": 153,
925 |     "aircraft": 564,
926 |     "graffiti": 140,
927 |     "fireman": 800,
928 |     "gang": 265,
929 |     "rental": 779,
930 |     "tv": 949,
931 |     "hours": 666,
932 |     "speakers": 844,
933 |     "destructive": 950,
934 |     "refusal": 951,
935 |     "phones": 310,
936 |     "notification": 226,
937 |     "entries": 316,
938 |     "highway": 542,
939 |     "presence": 649,
940 |     "purchasing": 667,
941 |     "official": 719,
942 |     "document": 813,
943 |     "full": 912,
944 |     "forcible": 51,
945 |     "audiovisual": 762,
946 |     "vandalism": 23,
947 |     "dog": 208,
948 |     "conviction": 613,
949 |     "gun": 98,
950 |     "keeping": 868,
951 |     "evading": 355,
952 |     "dealer": 928,
953 |     "curfew": 769
954 | }


--------------------------------------------------------------------------------
/training_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "batch_size": 128,
 3 |     "dropout_keep_prob": 0.5,
 4 |     "embedding_dim": 300,
 5 |     "evaluate_every": 200,
 6 |     "filter_sizes": "3,4,5",
 7 |     "hidden_unit": 300,
 8 |     "l2_reg_lambda": 0.0,
 9 |     "max_pool_size": 4,
10 |     "non_static": false,
11 |     "num_epochs": 1,
12 |     "num_filters": 32 
13 | }
14 | 


--------------------------------------------------------------------------------