├── .github ├── Alipay.jpeg ├── FUNDING.yml └── Wechat.jpeg ├── .gitignore ├── .travis.yml ├── HARNN ├── test_harnn.py ├── text_harnn.py ├── train_harnn.py └── visualization.py ├── LICENSE ├── README.md ├── Usage.md ├── data ├── Test_sample.json ├── Train_sample.json └── Validation_sample.json ├── requirements.txt └── utils ├── checkmate.py ├── data_helpers.py └── param_parser.py /.github/Alipay.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RandolphVI/Hierarchical-Multi-Label-Text-Classification/e3195986ef56d115fcb054a0b8cc0be2af6977df/.github/Alipay.jpeg -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | custom: ["https://github.com/RandolphVI/Hierarchical-Multi-Label-Text-Classification/blob/master/.github/Wechat.jpeg", "https://github.com/RandolphVI/Hierarchical-Multi-Label-Text-Classification/blob/master/.github/Alipay.jpeg"] 13 | -------------------------------------------------------------------------------- /.github/Wechat.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RandolphVI/Hierarchical-Multi-Label-Text-Classification/e3195986ef56d115fcb054a0b8cc0be2af6977df/.github/Wechat.jpeg -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ### Compiled source ### 2 | *.com 3 | *.class 4 | *.dll 5 | *.exe 6 | *.o 7 | *.so 8 | 9 | ### Packages ### 10 | # it's better to unpack these files and commit the raw source 11 | # git has its own built in compression methods 12 | *.7z 13 | *.dmg 14 | *.gz 15 | *.iso 16 | *.jar 17 | *.rar 18 | *.tar 19 | *.zip 20 | 21 | ### Logs and databases ### 22 | *.log 23 | *.sql 24 | *.sqlite 25 | 26 | ### Mac OS generated files ### 27 | .DS_Store 28 | .DS_Store? 29 | ._* 30 | .Spotlight-V100 31 | .Trashes 32 | ehthumbs.db 33 | Thumbs.db 34 | 35 | ### JetBrain config files ### 36 | .idea 37 | 38 | ### Python ### 39 | # Byte-compiled / optimized / DLL files 40 | *.npy 41 | __pycache__/ 42 | *.py[cod] 43 | *$py.class 44 | 45 | # Distribution / packaging 46 | .Python 47 | env/ 48 | build/ 49 | develop-eggs/ 50 | dist/ 51 | downloads/ 52 | eggs/ 53 | .eggs/ 54 | lib/ 55 | lib64/ 56 | parts/ 57 | sdist/ 58 | var/ 59 | *.egg-info/ 60 | .installed.cfg 61 | *.egg 62 | 63 | # PyInstaller 64 | # Usually these files are written by a python script from a template 65 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 66 | *.manifest 67 | *.spec 68 | 69 | # Installer logs 70 | pip-log.txt 71 | pip-delete-this-directory.txt 72 | 73 | # Unit test / coverage reports 74 | htmlcov/ 75 | .tox/ 76 | .coverage 77 | .coverage.* 78 | .cache 79 | nosetests.xml 80 | coverage.xml 81 | *,cover 82 | 83 | # Translations 84 | *.mo 85 | *.pot 86 | 87 | # Sphinx documentation 88 | docs/_build/ 89 | 90 | # PyBuilder 91 | target/ 92 | 93 | ### IPythonNotebook ### 94 | # Temporary data 95 | .ipynb_checkpoints/ 96 | 97 | ### Current Project ### 98 | # Data File 99 | *.txt 100 | *.tsv 101 | *.csv 102 | *.json 103 | *.jpg 104 | *.png 105 | *.html 106 | *.pickle 107 | *.kv 108 | *.pdf 109 | !/data 110 | !/data/train_sample.json 111 | !/data/validation_sample.json 112 | !/data/test_sample.json 113 | 114 | # Project File 115 | /HMC-LMLP 116 | /HMCN 117 | /SVM 118 | 119 | # Model File 120 | *.model 121 | *.pb 122 | runs/ 123 | graph/ 124 | 125 | # Analysis File 126 | Data Analysis.md 127 | 128 | # Log File 129 | logs/ 130 | 131 | # Related Code 132 | temp.py 133 | 134 | ### Else ### 135 | randolph/ 136 | Icon? 137 | *.graffle -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | matrix: 4 | include: 5 | - python: 3.6 6 | 7 | install: 8 | - pip install -r requirements.txt 9 | - pip install coveralls 10 | 11 | before_script: 12 | - export PYTHONPATH=$PWD 13 | 14 | script: 15 | - true # add other tests here 16 | - coveralls -------------------------------------------------------------------------------- /HARNN/test_harnn.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'Randolph' 3 | 4 | import os 5 | import sys 6 | import time 7 | import logging 8 | import numpy as np 9 | 10 | sys.path.append('../') 11 | logging.getLogger('tensorflow').disabled = True 12 | 13 | import tensorflow as tf 14 | from utils import checkmate as cm 15 | from utils import data_helpers as dh 16 | from utils import param_parser as parser 17 | from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, average_precision_score 18 | 19 | args = parser.parameter_parser() 20 | MODEL = dh.get_model_name() 21 | logger = dh.logger_fn("tflog", "logs/Test-{0}.log".format(time.asctime())) 22 | 23 | CPT_DIR = 'runs/' + MODEL + '/checkpoints/' 24 | BEST_CPT_DIR = 'runs/' + MODEL + '/bestcheckpoints/' 25 | SAVE_DIR = 'output/' + MODEL 26 | 27 | 28 | def create_input_data(data: dict): 29 | return zip(data['pad_seqs'], data['section'], data['subsection'], data['group'], 30 | data['subgroup'], data['onehot_labels'], data['labels']) 31 | 32 | 33 | def test_harnn(): 34 | """Test HARNN model.""" 35 | # Print parameters used for the model 36 | dh.tab_printer(args, logger) 37 | 38 | # Load word2vec model 39 | word2idx, embedding_matrix = dh.load_word2vec_matrix(args.word2vec_file) 40 | 41 | # Load data 42 | logger.info("Loading data...") 43 | logger.info("Data processing...") 44 | test_data = dh.load_data_and_labels(args, args.test_file, word2idx) 45 | 46 | # Load harnn model 47 | OPTION = dh._option(pattern=1) 48 | if OPTION == 'B': 49 | logger.info("Loading best model...") 50 | checkpoint_file = cm.get_best_checkpoint(BEST_CPT_DIR, select_maximum_value=True) 51 | else: 52 | logger.info("Loading latest model...") 53 | checkpoint_file = tf.train.latest_checkpoint(CPT_DIR) 54 | logger.info(checkpoint_file) 55 | 56 | graph = tf.Graph() 57 | with graph.as_default(): 58 | session_conf = tf.ConfigProto( 59 | allow_soft_placement=args.allow_soft_placement, 60 | log_device_placement=args.log_device_placement) 61 | session_conf.gpu_options.allow_growth = args.gpu_options_allow_growth 62 | sess = tf.Session(config=session_conf) 63 | with sess.as_default(): 64 | # Load the saved meta graph and restore variables 65 | saver = tf.train.import_meta_graph("{0}.meta".format(checkpoint_file)) 66 | saver.restore(sess, checkpoint_file) 67 | 68 | # Get the placeholders from the graph by name 69 | input_x = graph.get_operation_by_name("input_x").outputs[0] 70 | input_y_first = graph.get_operation_by_name("input_y_first").outputs[0] 71 | input_y_second = graph.get_operation_by_name("input_y_second").outputs[0] 72 | input_y_third = graph.get_operation_by_name("input_y_third").outputs[0] 73 | input_y_fourth = graph.get_operation_by_name("input_y_fourth").outputs[0] 74 | input_y = graph.get_operation_by_name("input_y").outputs[0] 75 | dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0] 76 | alpha = graph.get_operation_by_name("alpha").outputs[0] 77 | is_training = graph.get_operation_by_name("is_training").outputs[0] 78 | 79 | # Tensors we want to evaluate 80 | first_scores = graph.get_operation_by_name("first-output/scores").outputs[0] 81 | second_scores = graph.get_operation_by_name("second-output/scores").outputs[0] 82 | third_scores = graph.get_operation_by_name("third-output/scores").outputs[0] 83 | fourth_scores = graph.get_operation_by_name("fourth-output/scores").outputs[0] 84 | scores = graph.get_operation_by_name("output/scores").outputs[0] 85 | 86 | # Split the output nodes name by '|' if you have several output nodes 87 | output_node_names = "first-output/scores|second-output/scores|third-output/scores|fourth-output/scores|output/scores" 88 | 89 | # Save the .pb model file 90 | output_graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, 91 | output_node_names.split("|")) 92 | tf.train.write_graph(output_graph_def, "graph", "graph-harnn-{0}.pb".format(MODEL), as_text=False) 93 | 94 | # Generate batches for one epoch 95 | batches = dh.batch_iter(list(create_input_data(test_data)), args.batch_size, 1, shuffle=False) 96 | 97 | # Collect the predictions here 98 | true_labels = [] 99 | predicted_labels = [] 100 | predicted_scores = [] 101 | 102 | # Collect for calculating metrics 103 | true_onehot_labels = [[], [], [], [], []] 104 | predicted_onehot_scores = [[], [], [], [], []] 105 | predicted_onehot_labels = [[], [], [], [], []] 106 | 107 | for batch_test in batches: 108 | x, sec, subsec, group, subgroup, y_onehot, y = zip(*batch_test) 109 | 110 | y_batch_test_list = [y_onehot, sec, subsec, group, subgroup] 111 | 112 | feed_dict = { 113 | input_x: x, 114 | input_y_first: sec, 115 | input_y_second: subsec, 116 | input_y_third: group, 117 | input_y_fourth: subgroup, 118 | input_y: y_onehot, 119 | dropout_keep_prob: 1.0, 120 | alpha: args.alpha, 121 | is_training: False 122 | } 123 | batch_global_scores, batch_first_scores, batch_second_scores, batch_third_scores, batch_fourth_scores = \ 124 | sess.run([scores, first_scores, second_scores, third_scores, fourth_scores], feed_dict) 125 | 126 | batch_scores = [batch_global_scores, batch_first_scores, batch_second_scores, 127 | batch_third_scores, batch_fourth_scores] 128 | 129 | # Get the predicted labels by threshold 130 | batch_predicted_labels_ts, batch_predicted_scores_ts = \ 131 | dh.get_label_threshold(scores=batch_scores[0], threshold=args.threshold) 132 | 133 | # Add results to collection 134 | for labels in y: 135 | true_labels.append(labels) 136 | for labels in batch_predicted_labels_ts: 137 | predicted_labels.append(labels) 138 | for values in batch_predicted_scores_ts: 139 | predicted_scores.append(values) 140 | 141 | for index in range(len(predicted_onehot_scores)): 142 | for onehot_labels in y_batch_test_list[index]: 143 | true_onehot_labels[index].append(onehot_labels) 144 | for onehot_scores in batch_scores[index]: 145 | predicted_onehot_scores[index].append(onehot_scores) 146 | # Get one-hot prediction by threshold 147 | predicted_onehot_labels_ts = \ 148 | dh.get_onehot_label_threshold(scores=batch_scores[index], threshold=args.threshold) 149 | for onehot_labels in predicted_onehot_labels_ts: 150 | predicted_onehot_labels[index].append(onehot_labels) 151 | 152 | # Calculate Precision & Recall & F1 153 | for index in range(len(predicted_onehot_scores)): 154 | test_pre = precision_score(y_true=np.array(true_onehot_labels[index]), 155 | y_pred=np.array(predicted_onehot_labels[index]), average='micro') 156 | test_rec = recall_score(y_true=np.array(true_onehot_labels[index]), 157 | y_pred=np.array(predicted_onehot_labels[index]), average='micro') 158 | test_F1 = f1_score(y_true=np.array(true_onehot_labels[index]), 159 | y_pred=np.array(predicted_onehot_labels[index]), average='micro') 160 | test_auc = roc_auc_score(y_true=np.array(true_onehot_labels[index]), 161 | y_score=np.array(predicted_onehot_scores[index]), average='micro') 162 | test_prc = average_precision_score(y_true=np.array(true_onehot_labels[index]), 163 | y_score=np.array(predicted_onehot_scores[index]), average="micro") 164 | if index == 0: 165 | logger.info("[Global] Predict by threshold: Precision {0:g}, Recall {1:g}, " 166 | "F1 {2:g}, AUC {3:g}, AUPRC {4:g}" 167 | .format(test_pre, test_rec, test_F1, test_auc, test_prc)) 168 | else: 169 | logger.info("[Local] Predict by threshold in Level-{0}: Precision {1:g}, Recall {2:g}, " 170 | "F1 {3:g}, AUPRC {4:g}".format(index, test_pre, test_rec, test_F1, test_prc)) 171 | 172 | # Save the prediction result 173 | if not os.path.exists(SAVE_DIR): 174 | os.makedirs(SAVE_DIR) 175 | dh.create_prediction_file(output_file=SAVE_DIR + "/predictions.json", data_id=test_data['id'], 176 | true_labels=true_labels, predict_labels=predicted_labels, 177 | predict_scores=predicted_scores) 178 | logger.info("All Done.") 179 | 180 | 181 | if __name__ == '__main__': 182 | test_harnn() 183 | -------------------------------------------------------------------------------- /HARNN/text_harnn.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'Randolph' 3 | 4 | import tensorflow as tf 5 | 6 | 7 | class TextHARNN(object): 8 | """A HARNN for text classification.""" 9 | 10 | def __init__( 11 | self, sequence_length, vocab_size, embedding_type, embedding_size, lstm_hidden_size, attention_unit_size, 12 | fc_hidden_size, num_classes_list, total_classes, l2_reg_lambda=0.0, pretrained_embedding=None): 13 | 14 | # Placeholders for input, output, dropout_prob and training_tag 15 | self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x") 16 | self.input_y_first = tf.placeholder(tf.float32, [None, num_classes_list[0]], name="input_y_first") 17 | self.input_y_second = tf.placeholder(tf.float32, [None, num_classes_list[1]], name="input_y_second") 18 | self.input_y_third = tf.placeholder(tf.float32, [None, num_classes_list[2]], name="input_y_third") 19 | self.input_y_fourth = tf.placeholder(tf.float32, [None, num_classes_list[3]], name="input_y_fourth") 20 | self.input_y = tf.placeholder(tf.float32, [None, total_classes], name="input_y") 21 | self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") 22 | self.alpha = tf.placeholder(tf.float32, name="alpha") 23 | self.is_training = tf.placeholder(tf.bool, name="is_training") 24 | 25 | self.global_step = tf.Variable(0, trainable=False, name="Global_Step") 26 | 27 | def _attention(input_x, num_classes, name=""): 28 | """ 29 | Attention Layer. 30 | 31 | Args: 32 | input_x: [batch_size, sequence_length, lstm_hidden_size * 2] 33 | num_classes: The number of i th level classes. 34 | name: Scope name. 35 | Returns: 36 | attention_weight: [batch_size, num_classes, sequence_length] 37 | attention_out: [batch_size, lstm_hidden_size * 2] 38 | """ 39 | num_units = input_x.get_shape().as_list()[-1] 40 | with tf.name_scope(name + "attention"): 41 | W_s1 = tf.Variable(tf.truncated_normal(shape=[attention_unit_size, num_units], 42 | stddev=0.1, dtype=tf.float32), name="W_s1") 43 | W_s2 = tf.Variable(tf.truncated_normal(shape=[num_classes, attention_unit_size], 44 | stddev=0.1, dtype=tf.float32), name="W_s2") 45 | # attention_matrix: [batch_size, num_classes, sequence_length] 46 | attention_matrix = tf.map_fn( 47 | fn=lambda x: tf.matmul(W_s2, x), 48 | elems=tf.tanh( 49 | tf.map_fn( 50 | fn=lambda x: tf.matmul(W_s1, tf.transpose(x)), 51 | elems=input_x, 52 | dtype=tf.float32 53 | ) 54 | ) 55 | ) 56 | attention_weight = tf.nn.softmax(attention_matrix, name="attention") 57 | attention_out = tf.matmul(attention_weight, input_x) 58 | attention_out = tf.reduce_mean(attention_out, axis=1) 59 | return attention_weight, attention_out 60 | 61 | def _fc_layer(input_x, name=""): 62 | """ 63 | Fully Connected Layer. 64 | 65 | Args: 66 | input_x: [batch_size, *] 67 | name: Scope name. 68 | Returns: 69 | fc_out: [batch_size, fc_hidden_size] 70 | """ 71 | with tf.name_scope(name + "fc"): 72 | num_units = input_x.get_shape().as_list()[-1] 73 | W = tf.Variable(tf.truncated_normal(shape=[num_units, fc_hidden_size], 74 | stddev=0.1, dtype=tf.float32), name="W") 75 | b = tf.Variable(tf.constant(value=0.1, shape=[fc_hidden_size], dtype=tf.float32), name="b") 76 | fc = tf.nn.xw_plus_b(input_x, W, b) 77 | fc_out = tf.nn.relu(fc) 78 | return fc_out 79 | 80 | def _local_layer(input_x, input_att_weight, num_classes, name=""): 81 | """ 82 | Local Layer. 83 | 84 | Args: 85 | input_x: [batch_size, fc_hidden_size] 86 | input_att_weight: [batch_size, num_classes, sequence_length] 87 | num_classes: Number of classes. 88 | name: Scope name. 89 | Returns: 90 | logits: [batch_size, num_classes] 91 | scores: [batch_size, num_classes] 92 | visual: [batch_size, sequence_length] 93 | """ 94 | with tf.name_scope(name + "output"): 95 | num_units = input_x.get_shape().as_list()[-1] 96 | W = tf.Variable(tf.truncated_normal(shape=[num_units, num_classes], 97 | stddev=0.1, dtype=tf.float32), name="W") 98 | b = tf.Variable(tf.constant(value=0.1, shape=[num_classes], dtype=tf.float32), name="b") 99 | logits = tf.nn.xw_plus_b(input_x, W, b, name="logits") 100 | scores = tf.sigmoid(logits, name="scores") 101 | 102 | # shape of visual: [batch_size, sequence_length] 103 | visual = tf.multiply(input_att_weight, tf.expand_dims(scores, -1)) 104 | visual = tf.nn.softmax(visual) 105 | visual = tf.reduce_mean(visual, axis=1, name="visual") 106 | return logits, scores, visual 107 | 108 | def _linear(input_, output_size, initializer=None, scope="SimpleLinear"): 109 | """ 110 | Linear map: output[k] = sum_i(Matrix[k, i] * args[i] ) + Bias[k]. 111 | 112 | Args: 113 | input_: a tensor or a list of 2D, batch x n, Tensors. 114 | output_size: int, second dimension of W[i]. 115 | initializer: The initializer. 116 | scope: VariableScope for the created subgraph; defaults to "SimpleLinear". 117 | Returns: 118 | A 2D Tensor with shape [batch x output_size] equal to 119 | sum_i(args[i] * W[i]), where W[i]s are newly created matrices. 120 | Raises: 121 | ValueError: if some of the arguments has unspecified or wrong shape. 122 | """ 123 | 124 | shape = input_.get_shape().as_list() 125 | if len(shape) != 2: 126 | raise ValueError("Linear is expecting 2D arguments: {0}".format(str(shape))) 127 | if not shape[1]: 128 | raise ValueError("Linear expects shape[1] of arguments: {0}".format(str(shape))) 129 | input_size = shape[1] 130 | 131 | # Now the computation. 132 | with tf.variable_scope(scope): 133 | W = tf.get_variable("W", [input_size, output_size], dtype=input_.dtype) 134 | b = tf.get_variable("b", [output_size], dtype=input_.dtype, initializer=initializer) 135 | 136 | return tf.nn.xw_plus_b(input_, W, b) 137 | 138 | def _highway_layer(input_, size, num_layers=1, bias=-2.0): 139 | """ 140 | Highway Network (cf. http://arxiv.org/abs/1505.00387). 141 | t = sigmoid(Wx + b); h = relu(W'x + b') 142 | z = t * h + (1 - t) * x 143 | where t is transform gate, and (1 - t) is carry gate. 144 | """ 145 | 146 | for idx in range(num_layers): 147 | h = tf.nn.relu(_linear(input_, size, scope=("highway_h_{0}".format(idx)))) 148 | t = tf.sigmoid(_linear(input_, size, initializer=tf.constant_initializer(bias), 149 | scope=("highway_t_{0}".format(idx)))) 150 | output = t * h + (1. - t) * input_ 151 | input_ = output 152 | 153 | return output 154 | 155 | # Embedding Layer 156 | with tf.device("/cpu:0"), tf.name_scope("embedding"): 157 | # Use random generated the word vector by default 158 | # Can also be obtained through our own word vectors trained by our corpus 159 | if pretrained_embedding is None: 160 | self.embedding = tf.Variable(tf.random_uniform([vocab_size, embedding_size], minval=-1.0, maxval=1.0, 161 | dtype=tf.float32), trainable=True, name="embedding") 162 | else: 163 | if embedding_type == 0: 164 | self.embedding = tf.constant(pretrained_embedding, dtype=tf.float32, name="embedding") 165 | if embedding_type == 1: 166 | self.embedding = tf.Variable(pretrained_embedding, trainable=True, 167 | dtype=tf.float32, name="embedding") 168 | self.embedded_sentence = tf.nn.embedding_lookup(self.embedding, self.input_x) 169 | # Average Vectors 170 | # [batch_size, embedding_size] 171 | self.embedded_sentence_average = tf.reduce_mean(self.embedded_sentence, axis=1) 172 | 173 | # Bi-LSTM Layer 174 | with tf.name_scope("Bi-lstm"): 175 | lstm_fw_cell = tf.nn.rnn_cell.LSTMCell(lstm_hidden_size) # forward direction cell 176 | lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(lstm_hidden_size) # backward direction cell 177 | if self.dropout_keep_prob is not None: 178 | lstm_fw_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_fw_cell, output_keep_prob=self.dropout_keep_prob) 179 | lstm_bw_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_bw_cell, output_keep_prob=self.dropout_keep_prob) 180 | 181 | # Creates a dynamic bidirectional recurrent neural network 182 | # shape of `outputs`: tuple -> (outputs_fw, outputs_bw) 183 | # shape of `outputs_fw`: [batch_size, sequence_length, lstm_hidden_size] 184 | 185 | # shape of `state`: tuple -> (outputs_state_fw, output_state_bw) 186 | # shape of `outputs_state_fw`: tuple -> (c, h) c: memory cell; h: hidden state 187 | outputs, state = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell, lstm_bw_cell, 188 | self.embedded_sentence, dtype=tf.float32) 189 | # Concat output 190 | self.lstm_out = tf.concat(outputs, axis=2) # [batch_size, sequence_length, lstm_hidden_size * 2] 191 | self.lstm_out_pool = tf.reduce_mean(self.lstm_out, axis=1) # [batch_size, lstm_hidden_size * 2] 192 | 193 | # First Level 194 | self.first_att_weight, self.first_att_out = _attention(self.lstm_out, num_classes_list[0], name="first-") 195 | self.first_local_input = tf.concat([self.lstm_out_pool, self.first_att_out], axis=1) 196 | self.first_local_fc_out = _fc_layer(self.first_local_input, name="first-local-") 197 | self.first_logits, self.first_scores, self.first_visual = _local_layer( 198 | self.first_local_fc_out, self.first_att_weight, num_classes_list[0], name="first-") 199 | 200 | # Second Level 201 | self.second_att_input = tf.multiply(self.lstm_out, tf.expand_dims(self.first_visual, -1)) 202 | self.second_att_weight, self.second_att_out = _attention( 203 | self.second_att_input, num_classes_list[1], name="second-") 204 | self.second_local_input = tf.concat([self.lstm_out_pool, self.second_att_out], axis=1) 205 | self.second_local_fc_out = _fc_layer(self.second_local_input, name="second-local-") 206 | self.second_logits, self.second_scores, self.second_visual = _local_layer( 207 | self.second_local_fc_out, self.second_att_weight, num_classes_list[1], name="second-") 208 | 209 | # Third Level 210 | self.third_att_input = tf.multiply(self.lstm_out, tf.expand_dims(self.second_visual, -1)) 211 | self.third_att_weight, self.third_att_out = _attention( 212 | self.third_att_input, num_classes_list[2], name="third-") 213 | self.third_local_input = tf.concat([self.lstm_out_pool, self.third_att_out], axis=1) 214 | self.third_local_fc_out = _fc_layer(self.third_local_input, name="third-local-") 215 | self.third_logits, self.third_scores, self.third_visual = _local_layer( 216 | self.third_local_fc_out, self.third_att_weight, num_classes_list[2], name="third-") 217 | 218 | # Fourth Level 219 | self.fourth_att_input = tf.multiply(self.lstm_out, tf.expand_dims(self.third_visual, -1)) 220 | self.fourth_att_weight, self.fourth_att_out = _attention( 221 | self.fourth_att_input, num_classes_list[3], name="fourth-") 222 | self.fourth_local_input = tf.concat([self.lstm_out_pool, self.fourth_att_out], axis=1) 223 | self.fourth_local_fc_out = _fc_layer(self.fourth_local_input, name="fourth-local-") 224 | self.fourth_logits, self.fourth_scores, self.fourth_visual = _local_layer( 225 | self.fourth_local_fc_out, self.fourth_att_weight, num_classes_list[3], name="fourth-") 226 | 227 | # Concat 228 | # shape of ham_out: [batch_size, fc_hidden_size * 4] 229 | self.ham_out = tf.concat([self.first_local_fc_out, self.second_local_fc_out, 230 | self.third_local_fc_out, self.fourth_local_fc_out], axis=1) 231 | 232 | # Fully Connected Layer 233 | self.fc_out = _fc_layer(self.ham_out) 234 | 235 | # Highway Layer 236 | with tf.name_scope("highway"): 237 | self.highway = _highway_layer(self.fc_out, self.fc_out.get_shape()[1], num_layers=1, bias=0) 238 | 239 | # Add dropout 240 | with tf.name_scope("dropout"): 241 | self.h_drop = tf.nn.dropout(self.highway, self.dropout_keep_prob) 242 | 243 | # Global scores 244 | with tf.name_scope("global-output"): 245 | num_units = self.h_drop.get_shape().as_list()[-1] 246 | W = tf.Variable(tf.truncated_normal(shape=[num_units, total_classes], 247 | stddev=0.1, dtype=tf.float32), name="W") 248 | b = tf.Variable(tf.constant(value=0.1, shape=[total_classes], dtype=tf.float32), name="b") 249 | self.global_logits = tf.nn.xw_plus_b(self.h_drop, W, b, name="logits") 250 | self.global_scores = tf.sigmoid(self.global_logits, name="scores") 251 | 252 | with tf.name_scope("output"): 253 | self.local_scores = tf.concat([self.first_scores, self.second_scores, 254 | self.third_scores, self.fourth_scores], axis=1) 255 | self.scores = tf.add(self.alpha * self.global_scores, (1 - self.alpha) * self.local_scores, name="scores") 256 | 257 | # Calculate mean cross-entropy loss, L2 loss 258 | with tf.name_scope("loss"): 259 | def cal_loss(labels, logits, name): 260 | losses = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits) 261 | losses = tf.reduce_mean(tf.reduce_sum(losses, axis=1), name=name + "losses") 262 | return losses 263 | 264 | # Local Loss 265 | losses_1 = cal_loss(labels=self.input_y_first, logits=self.first_logits, name="first_") 266 | losses_2 = cal_loss(labels=self.input_y_second, logits=self.second_logits, name="second_") 267 | losses_3 = cal_loss(labels=self.input_y_third, logits=self.third_logits, name="third_") 268 | losses_4 = cal_loss(labels=self.input_y_fourth, logits=self.fourth_logits, name="fourth_") 269 | local_losses = tf.add_n([losses_1, losses_2, losses_3, losses_4], name="local_losses") 270 | 271 | # Global Loss 272 | global_losses = cal_loss(labels=self.input_y, logits=self.global_logits, name="global_") 273 | 274 | # L2 Loss 275 | l2_losses = tf.add_n([tf.nn.l2_loss(tf.cast(v, tf.float32)) for v in tf.trainable_variables()], 276 | name="l2_losses") * l2_reg_lambda 277 | self.loss = tf.add_n([local_losses, global_losses, l2_losses], name="loss") -------------------------------------------------------------------------------- /HARNN/train_harnn.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'Randolph' 3 | 4 | import os 5 | import sys 6 | import time 7 | import logging 8 | 9 | sys.path.append('../') 10 | logging.getLogger('tensorflow').disabled = True 11 | 12 | import numpy as np 13 | import tensorflow as tf 14 | from text_harnn import TextHARNN 15 | from utils import checkmate as cm 16 | from utils import data_helpers as dh 17 | from utils import param_parser as parser 18 | from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, average_precision_score 19 | 20 | args = parser.parameter_parser() 21 | OPTION = dh._option(pattern=0) 22 | logger = dh.logger_fn("tflog", "logs/{0}-{1}.log".format('Train' if OPTION == 'T' else 'Restore', time.asctime())) 23 | 24 | 25 | def create_input_data(data: dict): 26 | return zip(data['pad_seqs'], data['section'], data['subsection'], 27 | data['group'], data['subgroup'], data['onehot_labels']) 28 | 29 | 30 | def train_harnn(): 31 | """Training HARNN model.""" 32 | # Print parameters used for the model 33 | dh.tab_printer(args, logger) 34 | 35 | # Load word2vec model 36 | word2idx, embedding_matrix = dh.load_word2vec_matrix(args.word2vec_file) 37 | 38 | # Load sentences, labels, and training parameters 39 | logger.info("Loading data...") 40 | logger.info("Data processing...") 41 | train_data = dh.load_data_and_labels(args, args.train_file, word2idx) 42 | val_data = dh.load_data_and_labels(args, args.validation_file, word2idx) 43 | 44 | # Build a graph and harnn object 45 | with tf.Graph().as_default(): 46 | session_conf = tf.ConfigProto( 47 | allow_soft_placement=args.allow_soft_placement, 48 | log_device_placement=args.log_device_placement) 49 | session_conf.gpu_options.allow_growth = args.gpu_options_allow_growth 50 | sess = tf.Session(config=session_conf) 51 | with sess.as_default(): 52 | harnn = TextHARNN( 53 | sequence_length=args.pad_seq_len, 54 | vocab_size=len(word2idx), 55 | embedding_type=args.embedding_type, 56 | embedding_size=args.embedding_dim, 57 | lstm_hidden_size=args.lstm_dim, 58 | attention_unit_size=args.attention_dim, 59 | fc_hidden_size=args.fc_dim, 60 | num_classes_list=args.num_classes_list, 61 | total_classes=args.total_classes, 62 | l2_reg_lambda=args.l2_lambda, 63 | pretrained_embedding=embedding_matrix) 64 | 65 | # Define training procedure 66 | with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): 67 | learning_rate = tf.train.exponential_decay(learning_rate=args.learning_rate, 68 | global_step=harnn.global_step, 69 | decay_steps=args.decay_steps, 70 | decay_rate=args.decay_rate, 71 | staircase=True) 72 | optimizer = tf.train.AdamOptimizer(learning_rate) 73 | grads, vars = zip(*optimizer.compute_gradients(harnn.loss)) 74 | grads, _ = tf.clip_by_global_norm(grads, clip_norm=args.norm_ratio) 75 | train_op = optimizer.apply_gradients(zip(grads, vars), global_step=harnn.global_step, name="train_op") 76 | 77 | # Keep track of gradient values and sparsity (optional) 78 | grad_summaries = [] 79 | for g, v in zip(grads, vars): 80 | if g is not None: 81 | grad_hist_summary = tf.summary.histogram("{0}/grad/hist".format(v.name), g) 82 | sparsity_summary = tf.summary.scalar("{0}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) 83 | grad_summaries.append(grad_hist_summary) 84 | grad_summaries.append(sparsity_summary) 85 | grad_summaries_merged = tf.summary.merge(grad_summaries) 86 | 87 | # Output directory for models and summaries 88 | out_dir = dh.get_out_dir(OPTION, logger) 89 | checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) 90 | best_checkpoint_dir = os.path.abspath(os.path.join(out_dir, "bestcheckpoints")) 91 | 92 | # Summaries for loss 93 | loss_summary = tf.summary.scalar("loss", harnn.loss) 94 | 95 | # Train summaries 96 | train_summary_op = tf.summary.merge([loss_summary, grad_summaries_merged]) 97 | train_summary_dir = os.path.join(out_dir, "summaries", "train") 98 | train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) 99 | 100 | # Validation summaries 101 | validation_summary_op = tf.summary.merge([loss_summary]) 102 | validation_summary_dir = os.path.join(out_dir, "summaries", "validation") 103 | validation_summary_writer = tf.summary.FileWriter(validation_summary_dir, sess.graph) 104 | 105 | saver = tf.train.Saver(tf.global_variables(), max_to_keep=args.num_checkpoints) 106 | best_saver = cm.BestCheckpointSaver(save_dir=best_checkpoint_dir, num_to_keep=3, maximize=True) 107 | 108 | if OPTION == 'R': 109 | # Load harnn model 110 | logger.info("Loading model...") 111 | checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir) 112 | logger.info(checkpoint_file) 113 | 114 | # Load the saved meta graph and restore variables 115 | saver = tf.train.import_meta_graph("{0}.meta".format(checkpoint_file)) 116 | saver.restore(sess, checkpoint_file) 117 | if OPTION == 'T': 118 | if not os.path.exists(checkpoint_dir): 119 | os.makedirs(checkpoint_dir) 120 | sess.run(tf.global_variables_initializer()) 121 | sess.run(tf.local_variables_initializer()) 122 | 123 | # Save the embedding visualization 124 | saver.save(sess, os.path.join(out_dir, "embedding", "embedding.ckpt")) 125 | 126 | current_step = sess.run(harnn.global_step) 127 | 128 | def train_step(batch_data): 129 | """A single training step.""" 130 | x, sec, subsec, group, subgroup, y_onehot = zip(*batch_data) 131 | 132 | feed_dict = { 133 | harnn.input_x: x, 134 | harnn.input_y_first: sec, 135 | harnn.input_y_second: subsec, 136 | harnn.input_y_third: group, 137 | harnn.input_y_fourth: subgroup, 138 | harnn.input_y: y_onehot, 139 | harnn.dropout_keep_prob: args.dropout_rate, 140 | harnn.alpha: args.alpha, 141 | harnn.is_training: True 142 | } 143 | _, step, summaries, loss = sess.run( 144 | [train_op, harnn.global_step, train_summary_op, harnn.loss], feed_dict) 145 | logger.info("step {0}: loss {1:g}".format(step, loss)) 146 | train_summary_writer.add_summary(summaries, step) 147 | 148 | def validation_step(val_loader, writer=None): 149 | """Evaluates model on a validation set.""" 150 | batches_validation = dh.batch_iter(list(create_input_data(val_loader)), args.batch_size, 1) 151 | 152 | # Predict classes by threshold or topk ('ts': threshold; 'tk': topk) 153 | eval_counter, eval_loss = 0, 0.0 154 | eval_pre_tk = [0.0] * args.topK 155 | eval_rec_tk = [0.0] * args.topK 156 | eval_F1_tk = [0.0] * args.topK 157 | 158 | true_onehot_labels = [] 159 | predicted_onehot_scores = [] 160 | predicted_onehot_labels_ts = [] 161 | predicted_onehot_labels_tk = [[] for _ in range(args.topK)] 162 | 163 | for batch_validation in batches_validation: 164 | x, sec, subsec, group, subgroup, y_onehot = zip(*batch_validation) 165 | feed_dict = { 166 | harnn.input_x: x, 167 | harnn.input_y_first: sec, 168 | harnn.input_y_second: subsec, 169 | harnn.input_y_third: group, 170 | harnn.input_y_fourth: subgroup, 171 | harnn.input_y: y_onehot, 172 | harnn.dropout_keep_prob: 1.0, 173 | harnn.alpha: args.alpha, 174 | harnn.is_training: False 175 | } 176 | step, summaries, scores, cur_loss = sess.run( 177 | [harnn.global_step, validation_summary_op, harnn.scores, harnn.loss], feed_dict) 178 | 179 | # Prepare for calculating metrics 180 | for i in y_onehot: 181 | true_onehot_labels.append(i) 182 | for j in scores: 183 | predicted_onehot_scores.append(j) 184 | 185 | # Predict by threshold 186 | batch_predicted_onehot_labels_ts = \ 187 | dh.get_onehot_label_threshold(scores=scores, threshold=args.threshold) 188 | for k in batch_predicted_onehot_labels_ts: 189 | predicted_onehot_labels_ts.append(k) 190 | 191 | # Predict by topK 192 | for top_num in range(args.topK): 193 | batch_predicted_onehot_labels_tk = dh.get_onehot_label_topk(scores=scores, top_num=top_num+1) 194 | for i in batch_predicted_onehot_labels_tk: 195 | predicted_onehot_labels_tk[top_num].append(i) 196 | 197 | eval_loss = eval_loss + cur_loss 198 | eval_counter = eval_counter + 1 199 | 200 | if writer: 201 | writer.add_summary(summaries, step) 202 | 203 | eval_loss = float(eval_loss / eval_counter) 204 | 205 | # Calculate Precision & Recall & F1 206 | eval_pre_ts = precision_score(y_true=np.array(true_onehot_labels), 207 | y_pred=np.array(predicted_onehot_labels_ts), average='micro') 208 | eval_rec_ts = recall_score(y_true=np.array(true_onehot_labels), 209 | y_pred=np.array(predicted_onehot_labels_ts), average='micro') 210 | eval_F1_ts = f1_score(y_true=np.array(true_onehot_labels), 211 | y_pred=np.array(predicted_onehot_labels_ts), average='micro') 212 | 213 | for top_num in range(args.topK): 214 | eval_pre_tk[top_num] = precision_score(y_true=np.array(true_onehot_labels), 215 | y_pred=np.array(predicted_onehot_labels_tk[top_num]), 216 | average='micro') 217 | eval_rec_tk[top_num] = recall_score(y_true=np.array(true_onehot_labels), 218 | y_pred=np.array(predicted_onehot_labels_tk[top_num]), 219 | average='micro') 220 | eval_F1_tk[top_num] = f1_score(y_true=np.array(true_onehot_labels), 221 | y_pred=np.array(predicted_onehot_labels_tk[top_num]), 222 | average='micro') 223 | 224 | # Calculate the average AUC 225 | eval_auc = roc_auc_score(y_true=np.array(true_onehot_labels), 226 | y_score=np.array(predicted_onehot_scores), average='micro') 227 | # Calculate the average PR 228 | eval_prc = average_precision_score(y_true=np.array(true_onehot_labels), 229 | y_score=np.array(predicted_onehot_scores), average='micro') 230 | 231 | return eval_loss, eval_auc, eval_prc, eval_pre_ts, eval_rec_ts, eval_F1_ts, \ 232 | eval_pre_tk, eval_rec_tk, eval_F1_tk 233 | 234 | # Generate batches 235 | batches_train = dh.batch_iter(list(create_input_data(train_data)), args.batch_size, args.epochs) 236 | num_batches_per_epoch = int((len(train_data['pad_seqs']) - 1) / args.batch_size) + 1 237 | 238 | # Training loop. For each batch... 239 | for batch_train in batches_train: 240 | train_step(batch_train) 241 | current_step = tf.train.global_step(sess, harnn.global_step) 242 | 243 | if current_step % args.evaluate_steps == 0: 244 | logger.info("\nEvaluation:") 245 | eval_loss, eval_auc, eval_prc, \ 246 | eval_pre_ts, eval_rec_ts, eval_F1_ts, eval_pre_tk, eval_rec_tk, eval_F1_tk = \ 247 | validation_step(val_data, writer=validation_summary_writer) 248 | logger.info("All Validation set: Loss {0:g} | AUC {1:g} | AUPRC {2:g}" 249 | .format(eval_loss, eval_auc, eval_prc)) 250 | # Predict by threshold 251 | logger.info("Predict by threshold: Precision {0:g}, Recall {1:g}, F1 {2:g}" 252 | .format(eval_pre_ts, eval_rec_ts, eval_F1_ts)) 253 | # Predict by topK 254 | logger.info("Predict by topK:") 255 | for top_num in range(args.topK): 256 | logger.info("Top{0}: Precision {1:g}, Recall {2:g}, F1 {3:g}" 257 | .format(top_num+1, eval_pre_tk[top_num], eval_rec_tk[top_num], eval_F1_tk[top_num])) 258 | best_saver.handle(eval_prc, sess, current_step) 259 | if current_step % args.checkpoint_steps == 0: 260 | checkpoint_prefix = os.path.join(checkpoint_dir, "model") 261 | path = saver.save(sess, checkpoint_prefix, global_step=current_step) 262 | logger.info("Saved model checkpoint to {0}\n".format(path)) 263 | if current_step % num_batches_per_epoch == 0: 264 | current_epoch = current_step // num_batches_per_epoch 265 | logger.info("Epoch {0} has finished!".format(current_epoch)) 266 | 267 | logger.info("All Done.") 268 | 269 | 270 | if __name__ == '__main__': 271 | train_harnn() -------------------------------------------------------------------------------- /HARNN/visualization.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'Randolph' 3 | 4 | import sys 5 | import time 6 | import logging 7 | 8 | sys.path.append('../') 9 | logging.getLogger('tensorflow').disabled = True 10 | 11 | import tensorflow as tf 12 | from utils import checkmate as cm 13 | from utils import data_helpers as dh 14 | from utils import param_parser as parser 15 | 16 | args = parser.parameter_parser() 17 | MODEL = dh.get_model_name() 18 | logger = dh.logger_fn("tflog", "logs/Test-{0}.log".format(time.asctime())) 19 | 20 | CPT_DIR = 'runs/' + MODEL + '/checkpoints/' 21 | BEST_CPT_DIR = 'runs/' + MODEL + '/bestcheckpoints/' 22 | SAVE_DIR = 'output/' + MODEL 23 | 24 | 25 | def create_input_data(data: dict): 26 | return zip(data['pad_seqs'], data['content'], data['section'], data['subsection'], data['group'], 27 | data['subgroup'], data['onehot_labels']) 28 | 29 | 30 | def normalization(visual_list, visual_len, epsilon=1e-12): 31 | min_weight = min(visual_list[:visual_len]) 32 | max_weight = max(visual_list[:visual_len]) 33 | margin = max_weight - min_weight 34 | 35 | result = [] 36 | for i in range(visual_len): 37 | value = (visual_list[i] - min_weight) / (margin + epsilon) 38 | result.append(value) 39 | return result 40 | 41 | 42 | def create_visual_file(input_x, visual_list: list, seq_len): 43 | f = open('attention.html', 'w') 44 | f.write('\n') 45 | f.write('
\n') 46 | for visual in visual_list: 47 | f.write('

\n') 48 | for i in range(seq_len): 49 | alpha = "{:.2f}".format(visual[i]) 50 | word = input_x[0][i] 51 | f.write('\t{1}\n' 52 | .format(alpha, word)) 53 | f.write('

\n') 54 | f.write('
\n') 55 | f.write('') 56 | f.close() 57 | 58 | 59 | def visualize(): 60 | """Visualize HARNN model.""" 61 | 62 | # Load word2vec model 63 | word2idx, embedding_matrix = dh.load_word2vec_matrix(args.word2vec_file) 64 | 65 | # Load data 66 | logger.info("Loading data...") 67 | logger.info("Data processing...") 68 | test_data = dh.load_data_and_labels(args, args.test_file, word2idx) 69 | 70 | # Load harnn model 71 | OPTION = dh._option(pattern=1) 72 | if OPTION == 'B': 73 | logger.info("Loading best model...") 74 | checkpoint_file = cm.get_best_checkpoint(BEST_CPT_DIR, select_maximum_value=True) 75 | else: 76 | logger.info("Loading latest model...") 77 | checkpoint_file = tf.train.latest_checkpoint(CPT_DIR) 78 | logger.info(checkpoint_file) 79 | 80 | graph = tf.Graph() 81 | with graph.as_default(): 82 | session_conf = tf.ConfigProto( 83 | allow_soft_placement=args.allow_soft_placement, 84 | log_device_placement=args.log_device_placement) 85 | session_conf.gpu_options.allow_growth = args.gpu_options_allow_growth 86 | sess = tf.Session(config=session_conf) 87 | with sess.as_default(): 88 | # Load the saved meta graph and restore variables 89 | saver = tf.train.import_meta_graph("{0}.meta".format(checkpoint_file)) 90 | saver.restore(sess, checkpoint_file) 91 | 92 | # Get the placeholders from the graph by name 93 | input_x = graph.get_operation_by_name("input_x").outputs[0] 94 | input_y_first = graph.get_operation_by_name("input_y_first").outputs[0] 95 | input_y_second = graph.get_operation_by_name("input_y_second").outputs[0] 96 | input_y_third = graph.get_operation_by_name("input_y_third").outputs[0] 97 | input_y_fourth = graph.get_operation_by_name("input_y_fourth").outputs[0] 98 | input_y = graph.get_operation_by_name("input_y").outputs[0] 99 | dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0] 100 | alpha = graph.get_operation_by_name("alpha").outputs[0] 101 | is_training = graph.get_operation_by_name("is_training").outputs[0] 102 | 103 | # Tensors we want to evaluate 104 | first_visual = graph.get_operation_by_name("first-output/visual").outputs[0] 105 | second_visual = graph.get_operation_by_name("second-output/visual").outputs[0] 106 | third_visual = graph.get_operation_by_name("third-output/visual").outputs[0] 107 | fourth_visual = graph.get_operation_by_name("fourth-output/visual").outputs[0] 108 | 109 | # Split the output nodes name by '|' if you have several output nodes 110 | output_node_names = "first-output/visual|second-output/visual|third-output/visual|fourth-output/visual|output/scores" 111 | 112 | # Save the .pb model file 113 | output_graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, 114 | output_node_names.split("|")) 115 | tf.train.write_graph(output_graph_def, "graph", "graph-harnn-{0}.pb".format(MODEL), as_text=False) 116 | 117 | # Generate batches for one epoch 118 | batches = dh.batch_iter(list(create_input_data(test_data)), args.batch_size, 1, shuffle=False) 119 | 120 | for batch_test in batches: 121 | x, x_content, sec, subsec, group, subgroup, y_onehot = zip(*batch_test) 122 | 123 | feed_dict = { 124 | input_x: x, 125 | input_y_first: sec, 126 | input_y_second: subsec, 127 | input_y_third: group, 128 | input_y_fourth: subgroup, 129 | input_y: y_onehot, 130 | dropout_keep_prob: 1.0, 131 | alpha: args.alpha, 132 | is_training: False 133 | } 134 | batch_first_visual, batch_second_visual, batch_third_visual, batch_fourth_visual = \ 135 | sess.run([first_visual, second_visual, third_visual, fourth_visual], feed_dict) 136 | 137 | batch_visual = [batch_first_visual, batch_second_visual, batch_third_visual, batch_fourth_visual] 138 | 139 | seq_len = len(x_content[0]) 140 | pad_len = len(batch_first_visual[0]) 141 | length = (pad_len if seq_len >= pad_len else seq_len) 142 | visual_list = [] 143 | 144 | for visual in batch_visual: 145 | visual_list.append(normalization(visual[0].tolist(), length)) 146 | 147 | create_visual_file(x_content, visual_list, seq_len) 148 | logger.info("Done.") 149 | 150 | 151 | if __name__ == '__main__': 152 | visualize() 153 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Hierarchical Multi-Label Text Classification 2 | 3 | [![Python Version](https://img.shields.io/badge/language-python3.6-blue.svg)](https://www.python.org/downloads/) [![Build Status](https://travis-ci.org/RandolphVI/Hierarchical-Multi-Label-Text-Classification.svg?branch=master)](https://travis-ci.org/RandolphVI/Hierarchical-Multi-Label-Text-Classification)[![Codacy Badge](https://api.codacy.com/project/badge/Grade/80fe0da5f16146219a5d0a66f8c8ed70)](https://www.codacy.com/manual/chinawolfman/Hierarchical-Multi-Label-Text-Classification?utm_source=github.com&utm_medium=referral&utm_content=RandolphVI/Hierarchical-Multi-Label-Text-Classification&utm_campaign=Badge_Grade)[![License](https://img.shields.io/github/license/RandolphVI/Hierarchical-Multi-Label-Text-Classification.svg)](https://www.apache.org/licenses/LICENSE-2.0) 4 | 5 | This repository is my research project, which has been accepted by CIKM'19. The [paper](https://dl.acm.org/citation.cfm?id=3357384.3357885) is already published. 6 | 7 | The main objective of the project is to solve the hierarchical multi-label text classification (**HMTC**) problem. Different from the multi-label text classification, HMTC assigns each instance (object) into multiple categories and these categories are stored in a hierarchy structure, is a fundamental but challenging task of numerous applications. 8 | 9 | ## Requirements 10 | 11 | - Python 3.6 12 | - Tensorflow 1.15.0 13 | - Tensorboard 1.15.0 14 | - Sklearn 0.19.1 15 | - Numpy 1.16.2 16 | - Gensim 3.8.3 17 | - Tqdm 4.49.0 18 | 19 | ## Introduction 20 | 21 | Many real-world applications organize data in a hierarchical structure, where classes are specialized into subclasses or grouped into superclasses. For example, an electronic document (e.g. web-pages, digital libraries, patents and e-mails) is associated with multiple categories and all these categories are stored hierarchically in a **tree** or **Direct Acyclic Graph (DAG)**. 22 | 23 | It provides an elegant way to show the characteristics of data and a multi-dimensional perspective to tackle the classification problem via hierarchy structure. 24 | 25 | ![](https://farm8.staticflickr.com/7806/31717892987_e2e851eaaf_o.png) 26 | 27 | The Figure shows an example of predefined labels in hierarchical multi-label classification of documents in patent texts. 28 | 29 | - Documents are shown as colored rectangles, labels as rounded rectangles. 30 | - Circles in the rounded rectangles indicate that the corresponding document has been assigned the label. 31 | - Arrows indicate a hierarchical structure between labels. 32 | 33 | ## Project 34 | 35 | The project structure is below: 36 | 37 | ```text 38 | . 39 | ├── HARNN 40 | │   ├── train.py 41 | │   ├── layers.py 42 | │   ├── ham.py 43 | │   ├── test.py 44 | │   └── visualization.py 45 | ├── utils 46 | │   ├── checkmate.py 47 | │   ├── param_parser.py 48 | │ └── data_helpers.py 49 | ├── data 50 | │   ├── word2vec_100.model.* [Need Download] 51 | │   ├── Test_sample.json 52 | │   ├── Train_sample.json 53 | │   └── Validation_sample.json 54 | ├── LICENSE 55 | ├── README.md 56 | └── requirements.txt 57 | ``` 58 | 59 | ## Data 60 | 61 | You can download the [Patent Dataset](https://drive.google.com/open?id=1So3unr5p_vlYq31gE0Ly07Z2XTvD5QlM) used in the paper. And the [Word2vec model file](https://drive.google.com/file/d/1tZ9WPXkoJmWwtcnOU8S_KGPMp8wnYohR/view?usp=sharing) (dim=100) is also uploaded. **Make sure they are under the `/data` folder.** 62 | 63 | :warning: As for **Education Dataset**, they may be subject to copyright protection under Chinese law. Thus, detailed information is not provided. 64 | 65 | ### :octocat: Text Segment 66 | 67 | 1. You can use `nltk` package if you are going to deal with the English text data. 68 | 69 | 2. You can use `jieba` package if you are going to deal with the Chinese text data. 70 | 71 | ### :octocat: Data Format 72 | 73 | See data format in `/data` folder which including the data sample files. For example: 74 | 75 | ``` 76 | {"id": "3930316", 77 | "title": ["sighting", "firearm"], 78 | "abstract": ["rear", "sight", "firearm", "ha", "peephole", "device", "formed", "hollow", "tube", "end", ...], 79 | "section": [5], "subsection": [104], "group": [512], "subgroup": [6535], 80 | "labels": [5, 113, 649, 7333]} 81 | ``` 82 | 83 | - `id`: just the id. 84 | - `title` & `abstract`: it's the word segment (after cleaning stopwords). 85 | - `section` / `subsection` / `group` / `subgroup`: it's the first / second / third / fourth level category index. 86 | - `labels`: it's the total category which add the index offset. (I will explain that later) 87 | 88 | ### :octocat: How to construct the data? 89 | 90 | Use the sample of the Patent Dataset as an example. I will explain how to construct the label index. 91 | For patent dataset, the class number for each level is: [9, 128, 661, 8364]. 92 | 93 | **Step 1:** For the first level, Patent dataset has 9 classes. You should index these 9 classes first, like: 94 | 95 | ``` 96 | {"Chemistry": 0, "Physics": 1, "Electricity": 2, "XXX": 3, ..., "XXX": 8} 97 | ``` 98 | 99 | **Step 2**: Next, you index the next level (total **128** classes), like: 100 | 101 | ``` 102 | {"Inorganic Chemistry": 0, "Organic Chemistry": 1, "Nuclear Physics": 2, "XXX": 3, ..., "XXX": 127} 103 | ``` 104 | 105 | **Step 3**: Then, you index the third level (total **661** classes), like: 106 | 107 | ``` 108 | {"Steroids": 0, "Peptides": 1, "Heterocyclic Compounds": 2, ..., "XXX": 660} 109 | ``` 110 | 111 | **Step 4**: If you have the fourth level or deeper level, index them. 112 | 113 | **Step 5**: Now suppose you have one record (**id: 3930316** mentioned before): 114 | 115 | ``` 116 | {"id": "3930316", 117 | "title": ["sighting", "firearm"], 118 | "abstract": ["rear", "sight", "firearm", "ha", "peephole", "device", "formed", "hollow", "tube", "end", ...], 119 | "section": [5], "subsection": [104], "group": [512], "subgroup": [6535], 120 | "labels": [5, 104+9, 512+9+128, 6535+9+128+661]} 121 | ``` 122 | 123 | Thus, the record should be construed as follows: 124 | 125 | ``` 126 | {"id": "3930316", 127 | "title": ["sighting", "firearm"], 128 | "abstract": ["rear", "sight", "firearm", "ha", "peephole", "device", "formed", "hollow", "tube", "end", ...], 129 | "section": [5], "subsection": [104], "group": [512], "subgroup": [6535], 130 | "labels": [5, 113, 649, 7333]} 131 | ``` 132 | 133 | This repository can be used in other datasets (text classification) in two ways: 134 | 1. Modify your datasets into the same format of [the sample](https://github.com/RandolphVI/Hierarchical-Multi-Label-Text-Classification/tree/master/data). 135 | 2. Modify the data preprocess code in `data_helpers.py`. 136 | 137 | Anyway, it should depend on what your data and task are. 138 | 139 | ### :octocat: Pre-trained Word Vectors 140 | 141 | You can pre-training your word vectors(based on your corpus) in many ways: 142 | - Use `gensim` package to pre-train data. 143 | - Use `glove` tools to pre-train data. 144 | - Even can use `bert` to pre-train data. 145 | 146 | ## Usage 147 | 148 | See [Usage](https://github.com/RandolphVI/Hierarchical-Multi-Label-Text-Classification/blob/master/Usage.md). 149 | 150 | ## Network Structure 151 | 152 | ![](https://live.staticflickr.com/65535/48647692206_2e5e6e7f13_o.png) 153 | 154 | ## Reference 155 | 156 | **If you want to follow the paper or utilize the code, please note the following info in your work:** 157 | 158 | ```bibtex 159 | @inproceedings{huang2019hierarchical, 160 | author = {Wei Huang and 161 | Enhong Chen and 162 | Qi Liu and 163 | Yuying Chen and 164 | Zai Huang and 165 | Yang Liu and 166 | Zhou Zhao and 167 | Dan Zhang and 168 | Shijin Wang}, 169 | title = {Hierarchical Multi-label Text Classification: An Attention-based Recurrent Network Approach}, 170 | booktitle = {Proceedings of the 28th {ACM} {CIKM} International Conference on Information and Knowledge Management, {CIKM} 2019, Beijing, CHINA, Nov 3-7, 2019}, 171 | pages = {1051--1060}, 172 | year = {2019}, 173 | } 174 | ``` 175 | --- 176 | 177 | ## About Me 178 | 179 | 黄威,Randolph 180 | 181 | SCU SE Bachelor; USTC CS Ph.D. 182 | 183 | Email: chinawolfman@hotmail.com 184 | 185 | My Blog: [randolph.pro](http://randolph.pro) 186 | 187 | LinkedIn: [randolph's linkedin](https://www.linkedin.com/in/randolph-%E9%BB%84%E5%A8%81/) 188 | -------------------------------------------------------------------------------- /Usage.md: -------------------------------------------------------------------------------- 1 | # Usage 2 | 3 | ## Options 4 | 5 | ### Input and output options 6 | 7 | ``` 8 | --train-file STR Training file. Default is `data/Train_sample.json`. 9 | --validation-file STR Validation file. Default is `data/Validation_sample.json`. 10 | --test-file STR Testing file. Default is `data/Test_sample.json`. 11 | --word2vec-file STR Word2vec model file. Default is `data/word2vec_100.model`. 12 | ``` 13 | 14 | ### Model option 15 | 16 | ``` 17 | --pad-seq-len INT Padding Sequence length of data. Depends on data. 18 | --embedding-type INT The embedding type. Default is 1. 19 | --embedding-dim INT Dim of character embedding. Default is 100. 20 | --lstm-dim INT Dim of LSTM neurons. Default is 256. 21 | --lstm-layers INT Number of LSTM layers. Defatul is 1. 22 | --attention-dim INT Dim of Attention neurons. Default is 200. 23 | --attention-penalization BOOL Use attention penalization or not. Default is True. 24 | --fc-dim INT Dim of FC neurons. Default is 512. 25 | --dropout-rate FLOAT Dropout keep probability. Default is 0.5. 26 | --alpha FLOAT Weight of global part in loss cal. Default is 0.5. 27 | --num-classes-list LIST Each number of labels in hierarchical structure. Depends on data. 28 | --total-classes INT Total number of labels. Depends on data. 29 | --topK INT Number of top K prediction classes. Default is 5. 30 | --threshold FLOAT Threshold for prediction classes. Default is 0.5. 31 | ``` 32 | 33 | ### Training option 34 | 35 | ``` 36 | --epochs INT Number of epochs. Default is 20. 37 | --batch-size INT Batch size. Default is 32. 38 | --learning-rate FLOAT Adam learning rate. Default is 0.001. 39 | --decay-rate FLOAT Rate of decay for learning rate. Default is 0.95. 40 | --decay-steps INT How many steps before decy lr. Default is 500. 41 | --evaluate-steps INT How many steps to evluate val set. Default is 50. 42 | --l2-lambda FLOAT L2 regularization lambda. Default is 0.0. 43 | --checkpoint-steps INT How many steps to save model. Default is 50. 44 | --num-checkpoints INT Number of checkpoints to store. Default is 10. 45 | ``` 46 | 47 | ## Training 48 | 49 | The following commands train the model. 50 | 51 | ```bash 52 | $ python3 train_harnn.py 53 | ``` 54 | 55 | Training a model for a 30 epochs and set batch size as 256. 56 | 57 | ```bash 58 | $ python3 train_harnn.py --epochs 30 --batch-size 256 59 | ``` 60 | 61 | In the beginning, you will see the program shows: 62 | 63 | ![](https://live.staticflickr.com/65535/49737484641_a1fca341c6_o.png) 64 | 65 | **You need to choose Training or Restore. (T for Training and R for Restore)** 66 | 67 | After training, you will get the `/log` and `/run` folder. 68 | 69 | - `/log` folder saves the log info file. 70 | - `/run` folder saves the checkpoints. 71 | 72 | It should be like this: 73 | 74 | ```text 75 | . 76 | ├── logs 77 | ├── runs 78 | │   └── 1586077936 [a 10-digital format] 79 | │   ├── bestcheckpoints 80 | │   ├── checkpoints 81 | │   ├── embedding 82 | │   └── summaries 83 | ├── test_harnn.py 84 | ├── text_harnn.py 85 | └── train_harnn.py 86 | ``` 87 | 88 | **The programs name and identify the model by using the asctime (It should be 10-digital number, like 1586077936).** 89 | 90 | ## Restore 91 | 92 | When your model stops training for some reason and you want to restore training, you can: 93 | 94 | In the beginning, you will see the program shows: 95 | 96 | ![](https://live.staticflickr.com/65535/49737999667_b6cd3e0f94_o.png) 97 | 98 | **And you need to input R for restore.** 99 | 100 | Then you will be asked to give the model name (a 10-digital format, like 1586077936): 101 | 102 | ![](https://live.staticflickr.com/65535/49737156823_a5945fa958_o.png) 103 | 104 | And the model will continue training from the last time. 105 | 106 | ## Test 107 | 108 | The following commands test the model. 109 | 110 | ```bash 111 | $ python3 test_harnn.py 112 | ``` 113 | 114 | Then you will be asked to give the model name (a 10-digital format, like 1586077936): 115 | 116 | ![](https://live.staticflickr.com/65535/49737165843_56b8a25363_o.png) 117 | 118 | And you can choose to use the best model or the latest model **(B for Best, L for Latest)**: 119 | 120 | ![](https://live.staticflickr.com/65535/49737168723_08a512aea8_o.png) 121 | 122 | Finally, you can get the `predictions.json` file under the `/outputs` folder, it should be like: 123 | 124 | ```text 125 | . 126 | ├── graph 127 | ├── logs 128 | ├── output 129 | │   └── 1586077936 130 | │   └── predictions.json 131 | ├── runs 132 | │   └── 1586077936 133 | │   ├── bestcheckpoints 134 | │   ├── checkpoints 135 | │   ├── embedding 136 | │   └── summaries 137 | ├── test_harnn.py 138 | ├── text_harnn.py 139 | └── train_harnn.py 140 | ``` 141 | 142 | -------------------------------------------------------------------------------- /data/Train_sample.json: -------------------------------------------------------------------------------- 1 | {"id": "3930316", "title": ["sighting", "firearm"], "abstract": ["rear", "sight", "firearm", "ha", "peephole", "device", "formed", "hollow", "tube", "end", "closed", "peephole", "peephole", "ha", "central", "orifice", "orifice", "peephole", "rear", "side", "ha", "larger", "diameter", "orifice", "peephole", "front", "sight", "side", "peephole", "pivotally", "mounted", "cooperates", "elastic", "member", "hold", "peephole", "tube-opening", "tube-closing", "position", "embodiment", "peephole", "provided", "end", "tube"], "section": [5], "subsection": [104], "group": [512], "subgroup": [6535], "labels": [5, 113, 649, 7333]} 2 | {"id": "3930329", "title": ["bait", "molding", "device"], "abstract": ["bait", "molding", "device", "forming", "securing", "moldable", "bait", "material", "bread", "dough", "fishhook", "mold", "formed", "cup", "shaped", "mold", "section", "secured", "pliers-like", "device", "opening", "closing", "mold", "section", "mold", "elongate", "configuration", "accommodate", "entire", "fishhook", "moldable", "bait", "material", "surrounding", "fishhook", "mold", "section", "mold", "section", "includes", "groove", "edge", "mold", "permit", "fishing", "line", "attached", "hook", "pa", "mold", "closed"], "section": [0], "subsection": [0], "group": [7], "subgroup": [155], "labels": [0, 9, 144, 953]} 3 | {"id": "3930333", "title": ["coupling", "member", "toy", "vehicle", "drive", "system"], "abstract": ["coupling", "member", "toy", "vehicle", "drive", "system", "employed", "play", "situation", "coupling", "member", "generally", "comprised", "exaggerated", "triangularly-shaped", "portion", "funnel-shaped", "portion", "exaggerated", "triangularly-shaped", "portion", "toy", "vehicle", "driven", "forward", "backward", "complete", "u-turn", "disengaged", "drive", "chain", "drive", "system", "funnel-shaped", "portion", "ensures", "toy", "vehicle", "engaged", "driven", "chain", "child", "desire", "drive", "toy", "vehicle", "forward", "direction"], "section": [0], "subsection": [14], "group": [82], "subgroup": [1090, 1086], "labels": [0, 23, 219, 1888, 1884]} 4 | {"id": "3930351", "title": ["method", "apparatus", "transferring", "yarn", "package", "doffed", "textile", "machine", "container"], "abstract": ["present", "invention", "relates", "method", "apparatus", "transferring", "yarn", "package", "doffed", "textile", "machine", "provided", "conveyer", "belt", "disposed", "longitudinal", "direction", "thereof", "container", "yarn", "package", "carried", "end", "conveyer", "dropped", "container", "positioned", "receiving", "position", "end", "conveyer", "dropping", "distance", "end", "portion", "carrying", "surface", "conveyer", "surface", "receiving", "yarn", "package", "container", "maintained", "substantially", "predetermined", "distance", "larger", "width", "package", "smaller", "width", "package"], "section": [8, 1], "subsection": [127, 46], "group": [235, 659, 240], "subgroup": [8245, 2945, 3140, 3119], "labels": [8, 1, 136, 55, 372, 796, 377, 9043, 3743, 3938, 3917]} 5 | {"id": "3930440", "title": ["device", "conveying", "rolled", "food"], "abstract": ["device", "conveying", "transferring", "rolled", "food", "semi-finished", "material", "chain", "procedural", "step", "step", "comb-shaped", "cradle", "secured", "stage", "processing", "device", "travelling", "comb-shaped", "cradle", "pa", "fixed", "cradle", "upwardly", "material", "fixed", "cradle", "moved", "travelling", "cradle", "move", "fixed", "cradle", "stage", "pa", "fixed", "cradle", "downwardly", "semi-finished", "material", "put", "fixed", "cradle", "stage"], "section": [0], "subsection": [1], "group": [12], "subgroup": [215], "labels": [0, 10, 149, 1013]} 6 | {"id": "3930463", "title": ["vapor", "deposition", "apparatus", "including", "three-compartment", "evaporator"], "abstract": ["evaporation", "metal", "production", "alloy", "deposition", "component", "vapour", "phase", "carried", "controllably", "heated", "source", "comprising", "melting", "compartment", "operation", "metal", "melted", "mixing", "compartment", "constriction", "passage", "melting", "mixing", "compartment", "minimise", "back", "mixing", "molten", "metal", "evaporation", "compartment", "supply", "metal", "mixing", "compartment", "evaporation", "compartment", "surface", "molten", "metal", "heating", "preferably", "electron", "beam", "heating"], "section": [2], "subsection": [68], "group": [334], "subgroup": [4548], "labels": [2, 77, 471, 5346]} 7 | {"id": "3930582", "title": ["system", "testing", "paper", "money"], "abstract": ["test", "genuineness", "condition", "dollar", "bill", "substantially", "identical", "paper", "thickness", "gauge", "positioned", "path", "paper", "determine", "deviation", "thickness", "passing", "specimen", "predetermined", "thickness", "reference", "specimen", "scanned", "concurrently", "therewith", "measured", "deviation", "fed", "processor", "count", "positive", "negative", "deviation", "multiplicity", "incremental", "period", "determines", "nature", "irregularity", "count", "deviation", "sign", "gauge", "measuring", "thickness", "parallel", "track", "system", "discriminate", "irregularity", "gap", "overlapping", "adhesive", "tape", "dog-eared", "corner"], "section": [6], "subsection": [112], "group": [568], "subgroup": [7139], "labels": [6, 121, 705, 7937]} 8 | {"id": "3930737", "title": ["stud", "assembly"], "abstract": ["stud", "assembly", "produced", "holding", "fixed", "position", "head", "portion", "gripping", "surface", "extending", "base", "deformable", "washer", "extended", "base", "stud", "member", "provided", "receptacle", "end", "portion", "positioned", "respect", "base", "head", "interengaging", "cooperating", "surface", "receptacle", "base", "aligned", "provide", "locking", "aperture", "receiving", "washer", "finally", "washer", "deformed", "extends", "locking", "aperture", "locking", "stud", "member", "head", "forming", "unitary", "stud", "assembly"], "section": [8, 1], "subsection": [26, 127], "group": [660, 125], "subgroup": [1688, 8350], "labels": [8, 1, 35, 136, 797, 262, 2486, 9148]} 9 | {"id": "3930764", "title": ["air", "tool", "overspeed", "shutoff", "device"], "abstract": ["overspeed", "shutoff", "device", "rotary", "pneumatic", "tool", "disclosed", "device", "operable", "shut", "air", "supply", "motor", "failure", "governor", "function", "properly", "preventing", "overspeeding", "motor", "device", "includes", "valve", "closing", "plate", "positioned", "path", "air", "flow", "pneumatic", "motor", "upstream", "inlet", "port", "passage", "air", "motor", "normal", "operation", "tool", "valve", "plate", "rotates", "motor", "drive", "shaft", "retained", "position", "spaced", "air", "inlet", "port", "locking", "mechanism", "engaging", "drive", "shaft", "tool", "prevent", "axial", "movement", "therealong", "plate", "locking", "device", "comprises", "cantilever", "mounted", "spring", "wire", "engagement", "groove", "drive", "shaft", "centrifugally", "responsive", "weight", "operably", "connected", "disengage", "wire", "groove", "response", "attainment", "predetermined", "rotary", "speed", "failure", "main", "governor", "tool", "consequent", "acceleration", "motor", "predetermined", "speed", "wire", "disengaged", "shaft", "groove", "air", "pressure", "drop", "valve", "inlet", "port", "closure", "plate", "move", "cover", "inlet", "port", "stopping", "flow", "air", "motor"], "section": [8, 5], "subsection": [127, 88], "group": [416, 660, 415], "subgroup": [5270, 8340, 5280], "labels": [8, 5, 136, 97, 553, 797, 552, 6068, 9138, 6078]} 10 | {"id": "3930775", "title": ["testing", "correcting", "metering", "accuracy", "multihole", "spinnerets"], "abstract": ["method", "apparatus", "off-line", "testing", "correcting", "metering", "accuracy", "split", "multihole", "spinneret", "fed", "single", "metered", "stream", "determining", "end", "end", "variation", "flow", "rate", "reworking", "number", "hole", "lower", "flow", "rate", "side", "split", "spinneret", "calculated", "multiplying", "average", "test", "percent", "bias", "split", "spinneret", "predetermined", "constant", "bring", "end", "end", "variation", "flow", "rate", "required", "accuracy"], "section": [6, 3], "subsection": [106, 72], "group": [346, 518], "subgroup": [4630, 6587], "labels": [6, 3, 115, 81, 483, 655, 5428, 7385]} 11 | {"id": "3930811", "title": ["reactor", "pressure", "gasification", "coal"], "abstract": ["reactor", "continuous", "gasification", "coal", "superatmospheric", "pressure", "elevated", "temperature", "gaseous", "gasifying", "agent", "free", "oxygen", "oxygen-free", "gasifying", "agent", "steam", "carbon", "dioxide", "disclosed", "reactor", "includes", "substantially", "conical", "rotary", "grate", "rotatably", "mounted", "lower", "portion", "reactor", "housing", "rotary", "grate", "feed", "gasifying", "agent", "discharge", "gasification", "residue", "notwithstanding", "inside", "diameter", "reactor", "housing", "clearance", "rotary", "grate", "housing", "millimeter", "height", "annular", "rim", "rotary", "grate", "millimeter", "vertical", "distance", "rotary", "grate", "housing", "bottom", "millimeter"], "section": [2], "subsection": [61], "group": [304], "subgroup": [4224, 4225], "labels": [2, 70, 441, 5022, 5023]} 12 | {"id": "3930926", "title": ["apparatus", "forming", "tubular", "fibrous", "insulatory", "article"], "abstract": ["apparatus", "forming", "edge", "mineral", "fiber", "blanket", "advance", "processing", "path", "guide", "surface", "shaped", "continuously", "direct", "feathered", "ragged", "longitudinal", "edge", "uncured", "mat", "mineral", "fiber", "introduced", "forming", "apparatus", "gathering", "fiber", "control", "uniformity", "density", "edge", "portion", "mass", "guide", "comprise", "surface", "extending", "radially", "outward", "effective", "end", "pressure", "roll", "cooperating", "rotatable", "mandrel", "formation", "tube", "fiber", "mat", "surface", "encountered", "advancing", "blanket", "raise", "feathered", "edge", "raised", "edge", "turned", "major", "body", "portion", "blanket", "surface", "surface", "parallel", "path", "advance", "establish", "desired", "margin", "maintain", "margin", "blanket", "compacted", "major", "face", "surface", "provided", "form", "shape", "marginal", "edge", "mass", "mineral", "fiber", "obtain", "desired", "configuration", "square", "chamfered", "edge"], "section": [3], "subsection": [75], "group": [360], "subgroup": [4712], "labels": [3, 84, 497, 5510]} 13 | {"id": "3931027", "title": ["cellulose", "material", "treated", "thermosetting", "resin", "improved", "physical", "property", "elevated", "temperature"], "abstract": ["cellulose", "material", "improved", "resistance", "thermal", "deterioration", "application", "insulation", "material", "electrical", "apparatus", "cellulose", "material", "treated", "aqueous", "dispersion", "liquid", "uncured", "crosslinkable", "thermosetting", "resin", "epoxy", "resin", "water", "soluble", "nitrogen-containing", "compound", "curing", "resin", "cellulose", "molecule", "believed", "enter", "crosslinking", "reaction", "provide", "treated", "cellulosic", "product", "capable", "withstanding", "deteriorating", "action", "heat", "extended", "period", "time", "protein", "material", "casein", "isolated", "soy", "protein", "added", "treating", "liquid", "protein", "contributes", "additional", "nitrogen", "group", "treating", "medium", "increase", "thermal", "stability", "cellulose", "act", "film", "thermal", "stability", "cellulose", "material", "improved", "addition", "organic", "amine", "melamine", "treating", "liquid"], "section": [7], "subsection": [120], "group": [602, 600], "subgroup": [7402, 7376], "labels": [7, 129, 739, 737, 8200, 8174]} 14 | {"id": "3931047", "title": ["catalyst", "composition", "room", "temperature", "vulcanizing", "silicone", "composition", "catalyzed", "composition", "therefrom"], "abstract": ["stable", "catalyst", "composition", "providing", "faster", "cure", "time", "room", "temperature", "vulcanizing", "organopolysiloxane", "composition", "comprise", "stannous", "salt", "branched", "chain", "alkyl", "carboxylic", "acid", "carbon", "atom", "stabilizing", "carrier", "therefor", "methyl", "alkyl", "polysiloxane", "fluid", "hydroxy", "aryl", "substituents", "catalyst", "composition", "uniquely", "adapted", "provide", "injectable", "composition", "curable", "low", "density", "high", "compressive", "strength", "syntactic", "foam", "custom", "fitting", "footwear", "wearer"], "section": [2], "subsection": [59], "group": [290, 286], "subgroup": [3858, 3993, 3964], "labels": [2, 68, 427, 423, 4656, 4791, 4762]} 15 | {"id": "3931124", "title": ["fluoroelastomer", "composition"], "abstract": ["fluoroelastomer", "composition", "fluoroelastomer", "member", "selected", "group", "consisting", "bivalent", "metal", "oxide", "bivalent", "metal", "hydroxide", "mixture", "bivalent", "metal", "oxide", "metal", "hydroxide", "metal", "salt", "weak", "acid", "aromatic", "polyhydroxy", "compound", "quaternary", "ammonium", "compound", "-", "-", "diaza-bicyclo", "-", "fluoro-rubber", "low", "compression", "set", "excellent", "elastic", "property", "fluoroelastomer", "composition", "handled", "processed", "safety", "cured", "good", "cure", "rate", "ha", "excellent", "storage", "property", "cure", "rate", "composition", "accelerated", "addition", "water", "metal", "compound", "produce", "water", "reacting", "hydrogen", "fluoride"], "section": [2], "subsection": [59], "group": [289, 290], "subgroup": [3938, 3942, 3965], "labels": [2, 68, 426, 427, 4736, 4740, 4763]} 16 | {"id": "3931140", "title": ["h-gly-gly-tyr-ala", "-", "somatostatin"], "abstract": ["growth", "hormone", "release", "inhibiting", "compound", "protamine", "zinc", "protamine", "aluminum", "non-toxic", "acid", "addition", "salt", "thereof", "linear", "heptadecapeptide", "intermediate"], "section": [2, 8, 0], "subsection": [58, 12, 127], "group": [659, 282, 68], "subgroup": [8287, 843, 8335, 3713], "labels": [2, 8, 0, 67, 21, 136, 796, 419, 205, 9085, 1641, 9133, 4511]} 17 | {"id": "3931330", "title": ["process", "production", "benzaldehyde"], "abstract": ["liquid", "phase", "co-oxidation", "process", "production", "benzaldehyde", "compound", "structural", "formula", "selected", "group", "radical", "consisting", "hydrogen", "halogen", "methyl", "methoxy", "comprising", "admixing", "toluene", "compound", "structural", "formula", "defined", "aliphatic", "saturated", "aldehyde", "carbon", "atom", "molar", "ratio", "toluene", "compound", "aldehyde", "preferably", "mol", "toluene", "compound", "mol", "aldehyde", "oxygen", "gas", "consisting", "carbon", "hydrogen", "oxygen", "atom", "temperature", "range", "degree", "degree"], "section": [2], "subsection": [58], "group": [276], "subgroup": [3551, 3548], "labels": [2, 67, 413, 4349, 4346]} 18 | {"id": "3931389", "title": ["process", "desulfurizing", "hot", "gas"], "abstract": ["gas", "produced", "reacting", "fuel", "oxygen", "gas", "water", "vapor", "pressure", "desulfurized", "scrubbing", "concentrated", "solution", "alkali", "salt", "weak", "inorganic", "acid", "temperature", "atmospheric-pressure", "boiling", "point", "solution", "column", "maintaining", "exchange", "ratio", "cubic", "meter", "concentrated", "solution", "standard", "cubic", "meter", "hydrogen", "sulfide", "gas", "purified"], "section": [2, 1], "subsection": [61, 15], "group": [305, 86], "subgroup": [4226, 1165], "labels": [2, 1, 70, 24, 442, 223, 5024, 1963]} 19 | {"id": "3931401", "title": ["-", "", "-", "adenosine", "carboxamides", "increasing", "coronary", "sinus", "partial", "pressure", "oxygen"], "abstract": ["amide", "-", "", "-", "adensine", "carboxylic", "acid", "represented", "formula", "hydrogen", "loweralkyl", "loweralkenyl", "loweralkynyl", "cycloalkyl", "hydrogen", "acyl", "form", "isopropylidene", "benzylidene", "moiety", "pharmaceutically", "acceptable", "acid", "addition", "salt", "thereof", "compound", "hydrogen", "treating", "cardiovascular", "disorder", "anti-anginal", "anti-hypertensive", "agent", "compound", "acyl", "form", "isopropylidene", "benzylidene", "moiety", "intermediate", "preparation", "final", "product", "=", "hydrogen"], "section": [2], "subsection": [58], "group": [280], "subgroup": [3686], "labels": [2, 67, 417, 4484]} 20 | {"id": "3931405", "title": ["penicillin", "ester", "method", "composition", "treating", "infectious", "disease"], "abstract": ["penicillin", "ester", "formula", "alkyl", "carbon", "atom", "phenyl", "thienyl", "furyl", "phenyl", "substituted", "member", "group", "consisting", "halogen", "hydroxy", "amino", "selected", "group", "consisting", "--", "tetrazolyl", "--", "nhso", "selected", "group", "consisting", "radical", "-", "defined", "specification", "ester", "resorbed", "oral", "administration"], "section": [2, 8], "subsection": [125, 58], "group": [278, 277, 655], "subgroup": [8088, 3658, 3676], "labels": [2, 8, 134, 67, 415, 414, 792, 8886, 4456, 4474]} 21 | {"id": "3931465", "title": ["blooming", "control", "charge", "coupled", "imager"], "abstract": ["improved", "operational", "blooming", "control", "circuit", "charge", "coupled", "device", "image", "sensing", "array", "accumulated", "", "region", "substrate", "driven", "depletion", "", "end", "integration", "time", "prior", "transfer", "content", "register", "found", "improve", "resolution", "reproduced", "image"], "section": [7], "subsection": [123, 120], "group": [607, 639], "subgroup": [7557, 7948], "labels": [7, 132, 129, 744, 776, 8355, 8746]} 22 | {"id": "3931487", "title": ["electric", "momentary", "action", "push-button", "switch"], "abstract": ["electric", "switch", "comprising", "casing", "base", "central", "terminal", "lateral", "terminal", "mounted", "thereon", "pushbutton", "slidably", "mounted", "opening", "casing", "opposite", "base", "metal", "switching", "member", "sliding", "contact", "terminal", "brought", "contact", "terminal", "spring", "mounted", "switching", "member", "pushbutton", "urge", "push-button", "released", "position", "push", "switching", "member", "contact", "terminal", "terminal", "actuator", "integral", "push-button", "depressed", "tilt", "switching", "member", "terminal", "move", "switching", "member", "terminal", "contact", "terminal"], "section": [7], "subsection": [120], "group": [604], "subgroup": [7421], "labels": [7, 129, 741, 8219]} 23 | {"id": "3931544", "title": ["fast", "warm", "electronic", "ballast", "circuit", "high", "pressure", "discharge", "lamp"], "abstract": ["electronic", "ballast", "circuit", "reducing", "warm", "time", "high", "intensity", "discharge", "hid", "lamp", "lamp", "current", "flow", "abruptly", "reduced", "switching", "responsive", "load", "voltage", "variation", "attainment", "power", "amount", "sufficient", "activate", "hid", "lamp"], "section": [8, 7], "subsection": [124, 127], "group": [644, 659], "subgroup": [8212, 8032], "labels": [8, 7, 133, 136, 781, 796, 9010, 8830]} 24 | {"id": "3931566", "title": ["temperature", "compensated", "current", "sensing", "circuit", "power", "supply"], "abstract": ["converter", "power", "supply", "provided", "current", "regulating", "circuit", "magnetoresistive", "element", "disposed", "flux", "coupling", "proximity", "output", "inductor", "power", "supply", "providing", "control", "signal", "output", "current", "thereof", "element", "mounted", "air", "gap", "split-loop", "core", "inductor"], "section": [7], "subsection": [121], "group": [619], "subgroup": [7722, 7718], "labels": [7, 130, 756, 8520, 8516]} 25 | {"id": "3931604", "title": ["sampling", "automatic", "equalizer"], "abstract": ["automatic", "transversal", "equalizer", "provided", "input", "signal", "sampled", "stored", "series", "capacitor", "capacitor", "voltage", "sequentially", "recalled", "application", "analog", "multiplier", "receives", "sequence", "coefficient", "voltage", "stored", "series", "capacitor", "single", "analog", "multiplier", "performs", "function", "plural", "voltage", "controlled", "attenuator", "prior", "art", "equalizer"], "section": [7], "subsection": [123], "group": [637], "subgroup": [7901], "labels": [7, 132, 774, 8699]} 26 | {"id": "3931623", "title": ["reliable", "earth", "terminal", "satellite", "communication"], "abstract": ["reliable", "earth", "terminal", "satellite", "communication", "system", "capable", "unattended", "operation", "extended", "period", "time", "disclosed", "terminal", "includes", "antenna", "single", "fixed", "reflector", "provide", "multiple", "beam", "positioned", "small", "feed", "motion", "transmitter", "modular", "construction", "low", "power", "traveling", "wave", "tube", "power", "amplifier", "increment", "operating", "band", "transmitting", "chain", "designed", "carry", "voice", "data", "television", "signal", "satellite", "include", "modulator", "amplifier", "band-limiting", "filter", "frequency", "converter", "power", "amplifier", "amplifier", "capable", "operating", "full", "operating", "band", "operation", "amplifier", "limited", "assigned", "increment", "band-limiting", "filter", "single", "redundant", "high", "power", "amplifier", "provided", "remotely", "switched", "transmitting", "chain", "event", "failure", "amplifier", "output", "connected", "antenna", "directional", "filter", "multiplexer", "receiver", "includes", "low", "noise", "preamplifier", "featuring", "modular", "fail", "-", "soft", "", "design", "receiving", "chain", "low", "noise", "preamplifier", "channelized", "band", "increment", "transmitting", "chain", "separate", "converter", "demodulator", "module", "carrier", "subsystem", "broadband", "channel", "bandwidth", "determined", "intermediate", "frequency", "band", "pa", "filter", "prime", "power", "low", "voltage", "battery", "bank", "constantly", "recharged", "commercial", "power", "source", "terminal", "operated", "limited", "period", "time", "solely", "battery", "band", "commercial", "outage", "back-up", "motor", "generator", "recharging", "power", "extended", "period", "commercial", "outage", "terminal", "monitored", "controlled", "central", "control", "point", "terminal", "automatic", "self-protecting", "remote", "control", "limited", "parameter", "adjustment", "required", "normal", "operation", "antenna", "feed", "positioning", "change", "transmitter", "power", "switching", "spare", "power", "amplifier", "turning", "carrier"], "section": [8, 7], "subsection": [123, 125], "group": [633, 653], "subgroup": [8079, 7869, 7855], "labels": [8, 7, 132, 134, 770, 790, 8877, 8667, 8653]} 27 | {"id": "3931667", "title": ["interlocking", "attachment", "device"], "abstract": ["attachment", "device", "comprising", "filament", "laterally", "oriented", "bar", "end", "hollow", "body", "member", "end", "wall", "body", "member", "opening", "therethrough", "large", "receive", "filament", "bar", "parallel", "orientation", "width", "smaller", "length", "bar", "prevent", "withdrawal", "bar", "hollow", "interior", "subsequent", "insertion", "self-contained", "interlocked", "attachment", "obtained"], "section": [6, 8], "subsection": [127, 114], "group": [660, 577], "subgroup": [8345, 8348, 7219], "labels": [6, 8, 136, 123, 797, 714, 9143, 9146, 8017]} 28 | {"id": "3931730", "title": ["ramp", "current", "apparatus", "method", "sensitivity", "testing"], "abstract": ["ramp", "current", "method", "sensitivity", "testing", "dynamic", "record", "btained", "current", "voltage", "energy", "resistance", "instantaneous", "power", "fire", "electroexplosive", "device", "method", "valuable", "information", "gained", "firing", "minimum", "sampling", "utilized", "ramp", "method", "defective", "item", "detected", "recorded", "le", "sensitive", "device", "erroneous", "data", "point", "contributing", "accurate", "firing", "data"], "section": [6], "subsection": [106], "group": [528], "subgroup": [6738], "labels": [6, 115, 665, 7536]} 29 | {"id": "3931732", "title": ["sharp", "edge", "tester"], "abstract": ["hand", "held", "tool", "testing", "sharpness", "edge", "determine", "presence", "absence", "safety", "hazard", "rotatable", "mandrel", "driven", "torque", "spring", "velocity", "single", "rotation", "mandrel", "carrying", "covering", "testing", "material", "engaged", "edge", "tested", "automatically", "driving", "mandrel", "presence", "predetermined", "contact", "force", "pressure", "test", "material", "test", "edge", "included", "adjustably", "regulating", "mandrel", "speed", "rotation", "contacting", "pressure", "requisite", "effect", "testing", "operation"], "section": [6], "subsection": [106], "group": [528], "subgroup": [6711], "labels": [6, 115, 665, 7509]} 30 | {"id": "3931738", "title": ["device", "monitoring", "fluid", "pressure", "mechanism", "hydrostatic", "fluid", "bearing"], "abstract": ["pressurized", "operating", "fluid", "oil", "hydrostatic", "fluid", "bearing", "directed", "chamber", "plunger", "urging", "plunger", "position", "monitoring", "pressure", "force", "slightly", "le", "force", "operating", "fluid", "operating", "fluid", "minimum", "working", "pressure", "exerted", "plunger", "tending", "move", "plunger", "position", "pressure", "operating", "fluid", "drop", "minimum", "working", "pressure", "plunger", "moved", "position", "movement", "sensed", "sensing", "malfunction"], "section": [6, 7, 5], "subsection": [93, 106, 120], "group": [526, 604, 444], "subgroup": [6683, 5696, 5690, 7475], "labels": [6, 7, 5, 102, 115, 129, 663, 741, 581, 7481, 6494, 6488, 8273]} 31 | {"id": "3931908", "title": ["insulated", "tank"], "abstract": ["invention", "relates", "improved", "corner", "construction", "cryogenic", "tank", "generally", "rectilinear", "cross", "section", "internal", "surface", "tank", "insulated", "foamed", "material", "direct", "contact", "cryogenic", "liquid"], "section": [8, 1, 5], "subsection": [127, 46, 95], "group": [462, 237, 659], "subgroup": [6020, 8170, 6026, 2987, 6024, 3033, 6027, 6015, 6019, 6016], "labels": [8, 1, 5, 136, 55, 104, 599, 374, 796, 6818, 8968, 6824, 3785, 6822, 3831, 6825, 6813, 6817, 6814]} 32 | {"id": "3931912", "title": ["two-part", "hair", "dye", "hair", "bleach", "package"], "abstract": ["pressurized", "package", "conventional", "pressure", "propellant", "divided", "compartment", "arranged", "mixing", "content", "simultaneously", "dispensing", "compartment", "peroxide", "solution", "hair", "treating", "composition", "including", "selected", "compound", "quantity", "sufficient", "prevent", "development", "unsafe", "pressure", "peroxide", "decompose", "package"], "section": [8, 0], "subsection": [12, 127], "group": [73, 659, 68], "subgroup": [919, 838, 8253, 852], "labels": [8, 0, 21, 136, 210, 796, 205, 1717, 1636, 9051, 1650]} 33 | {"id": "3931915", "title": ["liquid-containing", "cartridge", "device", "dispensing", "measured", "amount", "liquid", "cartridge"], "abstract": ["invention", "concerned", "container", "receiving", "liquid", "material", "fitting", "end", "adapted", "provide", "discharge", "port", "liquid", "container", "end", "plunger", "piston", "adapted", "moved", "inside", "wall", "container", "discharge", "predetermined", "aliquot", "", "dos", "", "liquid", "container", "port", "invention", "comprises", "device", "adapted", "receive", "liquid-containing", "cartridge", "package", "type", "mentioned", "coacting", "package", "move", "plunger", "container", "dose-dispensing", "-", "discharge", "manner", "gradually", "accelerate", "movement", "plunger", "position", "rest", "container", "maximum", "rate", "motion", "gradually", "decelerate", "motion", "plunger", "final", "position", "end", "dose-discharge", "device", "comprises", "accurately", "predetermining", "measuring", "", "amount", "liquid", "discharged", "individual", "movement", "plunger", "device", "comprise", "repeatedly", "actuating", "piston", "accelerating-decelerating", "movement", "discharge", "plurality", "measured", "sample", "repeatedly", "non-spurt", "non-splash", "condition"], "section": [6, 0], "subsection": [106, 12], "group": [70, 521], "subgroup": [904, 6625], "labels": [6, 0, 115, 21, 207, 658, 1702, 7423]} 34 | {"id": "3931984", "title": ["anti-loading", "tray", "shopping", "cart"], "abstract": ["disclosed", "anti-loading", "tray", "assembly", "mounting", "lower", "frame", "conventional", "nestable", "shopping", "cart", "position", "basket", "prevent", "pilferage", "loading", "article", "cart", "basket", "tray", "includes", "plurality", "interconnecting", "elongated", "strut", "cross", "member", "size", "shape", "arrangement", "define", "forwardly-and-downwardly", "sloping", "plane-like", "area", "lateral", "wing", "prevent", "loading", "article", "tray", "lower", "frame", "cart", "simplified", "connection", "tray", "existing", "structure", "cart", "frame", "anti-loading", "tray", "economical", "add-on", "feature", "arrangement", "simple", "inexpensive", "tray", "easily", "manufactured", "readily", "installed", "factory", "existing", "cart", "field"], "section": [1], "subsection": [43], "group": [217], "subgroup": [2671], "labels": [1, 52, 354, 3469]} 35 | {"id": "3932026", "title": ["liquid", "crystal", "display", "assembly", "dielectric", "coated", "electrode"], "abstract": ["display", "assembly", "nematic", "liquid", "crystal", "sandwiched", "supporting", "substrate", "substrate", "ha", "layer", "conductive", "coating", "inside", "surface", "overcoated", "dielectric", "film", "layer", "separate", "conductive", "layer", "liquid", "crystal", "material", "embodiment", "include", "varied", "thickness", "dielectric", "association", "image", "lead", "portion", "conductive", "coating"], "section": [6], "subsection": [107], "group": [538], "subgroup": [6846], "labels": [6, 116, 675, 7644]} 36 | {"id": "3932045", "title": ["rolling", "contact", "joint"], "abstract": ["apparatus", "disclosed", "rolling", "contact", "joint", "prosthetic", "joint", "knee", "joint", "application", "requiring", "movable", "section", "mechanical", "joint", "joint", "variety", "form", "depending", "situation", "essence", "includes", "body", "surface", "portion", "contact", "body", "movable", "relative", "constrained", "movement", "nature", "surface", "contact", "flexible", "strap", "positioned", "contact", "body", "basic", "configuration", "including", "pair", "cylinder", "utilized", "flexible", "strap", "wrapped", "completely", "partially", "cylinder", "provide", "joint", "substantially", "restraint", "motion", "low", "friction", "due", "rolling", "contact", "contacting", "surface", "cylinder", "addition", "body", "pair", "cylindrical", "surface", "diameter", "respect", "body", "contact", "cylindrical", "surface", "flexible", "strap", "wrapped", "cylindrical", "surface", "cylindrical", "surface", "concentric", "respect", "diameter", "proper", "ratio", "substantially", "resistance", "motion", "rolling", "contact", "friction", "low", "cylindrical", "surface", "concentric", "flexible", "strap", "strained", "rotation", "spring", "action", "provided", "device", "shape", "body", "positioning", "flexible", "strap", "determines", "type", "motion", "combination", "addition", "embodiment", "rolling", "contact", "joint", "prosthetic", "knee", "joint"], "section": [8, 0, 5], "subsection": [94, 12, 127], "group": [660, 448, 64], "subgroup": [5738, 754, 8361, 8350], "labels": [8, 0, 5, 103, 21, 136, 797, 585, 201, 6536, 1552, 9159, 9148]} 37 | {"id": "3932082", "title": ["forming", "reinforced", "concrete", "module"], "abstract": ["apparatus", "constructing", "reinforced", "concrete", "modular", "construction", "unit", "comprising", "longitudinally", "extending", "multi-sided", "construction", "unit", "reinforcing", "bar", "mesh", "formed", "cage", "desired", "size", "shape", "unit", "cage", "mounted", "rotatable", "shaft", "raised", "lowered", "relative", "horizontal", "bed", "side", "modular", "construction", "unit", "turn", "positioned", "perimeter", "form", "bed", "concrete", "poured", "finished", "flat", "form", "side", "modular", "unit", "side", "ha", "cured", "sufficient", "time", "insure", "structural", "integrity", "cage", "raised", "rotated", "align", "side", "cage", "horizontal", "bed", "cage", "lowered", "position", "adjacent", "bed", "suitable", "form", "concrete", "poured", "reinforcing", "structure", "side", "cage", "form", "side", "wall", "building", "successive", "side", "similarly", "formed", "longitudinally", "extending", "hollow", "construction", "unit", "ha", "completed", "end", "wall", "formed", "tubular", "member", "enclose", "modular", "building", "unit"], "section": [1], "subsection": [31], "group": [151], "subgroup": [1908, 1905], "labels": [1, 40, 288, 2706, 2703]} 38 | {"id": "3932087", "title": ["arrangement", "moulding", "press", "parted", "press", "tool", "production", "hot-pressed", "plastic", "material", "product", "grammophone", "record"], "abstract": ["moulding", "press", "parted", "press", "tool", "producing", "hot-pressed", "product", "plastic", "material", "grammophone", "record", "arrangement", "holding", "handling", "product", "movable", "pressing", "area", "stripping", "apparatus", "situated", "area", "arrangement", "incorporating", "holder", "provided", "anchoring", "pressed", "product", "utilizing", "excess", "material", "exuding", "pressing", "tool", "half"], "section": [8, 1], "subsection": [32, 127], "group": [158, 659, 155], "subgroup": [8254, 2064, 1947], "labels": [8, 1, 41, 136, 295, 796, 292, 9052, 2862, 2745]} 39 | {"id": "3932245", "title": ["mechanical", "embossing", "foamed", "sheet", "material"], "abstract": ["decorative", "sheet", "material", "foamed", "vinyl", "floor", "covering", "comprising", "preferably", "base", "substrate", "asbestos", "felt", "layer", "foam", "cellular", "resin", "material", "base", "portion", "thickness", "providing", "relief", "pattern", "foam", "land", "large", "cell", "foam", "valley", "crushed", "smaller", "cell", "cell", "wall", "bonded", "layer", "non-cellular", "transparent", "resin", "material", "overlying", "land", "valley", "area", "relief", "pattern", "printed", "color", "pattern", "design", "provided", "foam", "resin", "layer", "transparent", "resin", "layer", "colored", "area", "pattern", "design", "accurate", "registration", "predetermined", "relation", "crushed", "valley", "area", "foam", "layer", "addition", "relief", "color", "pattern", "product", "pattern", "effect", "registration", "relief", "color", "pattern", "pattern", "light", "reflective", "characteristic", "exposed", "surface", "transparent", "layer", "apparatus", "method", "producing", "covering", "material", "invention", "disclosed"], "section": [3, 8, 1], "subsection": [32, 40, 127, 77], "group": [660, 372, 186, 155], "subgroup": [1956, 8354, 1948, 8342, 4856, 2360], "labels": [3, 8, 1, 41, 49, 136, 86, 797, 509, 323, 292, 2754, 9152, 2746, 9140, 5654, 3158]} 40 | {"id": "3932261", "title": ["electrode", "assembly", "electrolytic", "cell"], "abstract": ["electrode", "provided", "electrolytic", "cell", "employing", "metal", "electrode", "electrode", "comprises", "electrode", "surface", "positioned", "parallel", "space", "conductive", "support", "conductive", "support", "separately", "attached", "electrode", "surface", "positioned", "space", "electrode", "surface", "conductive", "support", "attached", "substantially", "perpendicular", "electrode", "plate", "electrode", "assembly", "employed", "electrolytic", "cell", "producing", "chlorine", "caustic", "soda", "oxychlorine", "compound", "electrolkysis", "alkali", "metal", "chloride", "solution"], "section": [2], "subsection": [69], "group": [338], "subgroup": [4574, 4575], "labels": [2, 78, 475, 5372, 5373]} 41 | {"id": "3932281", "title": ["leaf", "trap", "kit", "swimming", "pool"], "abstract": ["leaf", "trap", "kit", "swimming", "pool", "includes", "inverted", "perforate", "basket", "fitted", "main", "drain", "outlet", "pool", "dome-like", "housing", "open", "underside", "lateral", "opening", "admit", "leaf", "space", "housing", "inverted", "basket", "top", "opening", "leaf", "removed", "vacuum", "cleaner", "head", "fittable", "housing", "remove", "leaf", "top", "opening", "housing"], "section": [4], "subsection": [84], "group": [402], "subgroup": [5106], "labels": [4, 93, 539, 5904]} 42 | {"id": "3932340", "title": ["nylon", "coating", "composition"], "abstract": ["coating", "composition", "disclosed", "producing", "alcohol-insoluble", "film", "comprises", "mixture", "alcohol-soluble", "nylon", "copolymer", "alcohol-soluble", "alkoxymethylated", "nylon", "acid", "catalyst"], "section": [2], "subsection": [60, 59], "group": [293, 290], "subgroup": [4069, 3964], "labels": [2, 69, 68, 430, 427, 4867, 4762]} 43 | {"id": "3932378", "title": ["sulfonated", "disazo", "dyestuff", "ether", "group"], "abstract": ["disazo", "compound", "formula", "represents", "sulphobenzene", "sulphonaphthalene", "radical", "represents", "hydrogen", "atom", "low", "molecular", "alkyl", "alkoxy", "radical", "represents", "low", "molecular", "alkylene", "radical", "represents", "functional", "radical", "--", "--", "ortho", "-", "para-position", "azo", "bridge", "process", "preparation", "dyestuff", "dyestuff", "provide", "natural", "synthetic", "polyamide", "wool", "nylon", "dyeing", "excellent", "general", "fastness", "property"], "section": [2, 8], "subsection": [127, 60], "group": [291, 659], "subgroup": [4011, 8327, 4015], "labels": [2, 8, 136, 69, 428, 796, 4809, 9125, 4813]} 44 | {"id": "3932404", "title": ["process", "making", "pyrazines", "-", "butadiene"], "abstract": ["process", "preparation", "amino-pyrazine", "falling", "formula", "represent", "hydrogen", "atom", "alkyl", "aryl", "group", "carbon", "atom", "pyrazine", "ring", "form", "cyclic", "hydrocarbon", "carbon", "atom", "represents", "cyano", "carboxy", "carbonamido", "alkoxy-carbonyl", "aryloxycarbonyl", "group", "comprises", "subjecting", "-", "butadiene", "general", "formula", "zi", "represent", "alkyl", "group", "nitrogen", "atom", "form", "heterocyclic", "compound", "possibly", "hetero", "atom", "action", "ammonia", "reacting", "-", "butadiene", "formula", "obtained", "basic", "agent", "compound", "formula", "viii", "represent", "alkyl", "aryl", "group", "carbon", "atom", "pyrazine", "ring", "form", "cyclic", "hydrocarbon", "carbon", "atom", "intermediate", "compound", "formula", "meaning", "", "represent", "alkyl", "group", "nitrogen", "atom", "form", "heterocyclic", "compound", "possibly", "hetero-atom", "intermediate", "compound", "formula", "meaning"], "section": [2], "subsection": [58], "group": [277], "subgroup": [3590], "labels": [2, 67, 414, 4388]} 45 | {"id": "3932429", "title": ["azabicyclo", "octane", "derivative", "process", "preparing"], "abstract": ["compound", "formula", "hydrogen", "phenyl", "alkyl", "carbon", "atom", "cycloalkyl", "carbon", "atom", "alkyl", "carbon", "atom", "substituent", "selected", "group", "consisting", "phenyl", "benzoyl", "hydrogen", "alkyl", "carbon", "atom", "hydrogen", "alkanoyl", "carbon", "atom", "benzoyl", "nicotinoyl", "disclosed", "method", "preparing", "compound", "disclosed", "compound", "pharmaceutically", "acceptable", "acid", "addition", "salt", "thereof", "analgesic", "agent"], "section": [2], "subsection": [58], "group": [277], "subgroup": [3575], "labels": [2, 67, 414, 4373]} 46 | {"id": "3932447", "title": ["benzimidazoles"], "abstract": ["benzimidazole", "derivative", "formula", "alkyl", "carbon", "atom", "selected", "group", "consisting", "tetrahydrofurfuryl", "saturated", "unsaturated", "oxygen", "heterocycle", "carbon", "atom", "unsaturated", "hydrocarbon", "radical", "carbon", "atom", "halogen", "atom", "posse", "fungicidal", "activity"], "section": [2], "subsection": [58], "group": [276, 277], "subgroup": [3587, 3503, 3616, 3615], "labels": [2, 67, 413, 414, 4385, 4301, 4414, 4413]} 47 | {"id": "3932475", "title": ["process", "producing", "trimethyl-p-benzoquinone"], "abstract": ["process", "producing", "trimethyl-p-benzoquinone", "halogenating", "-", "-", "trimethylphenol", "oxidizing", "resulting", "-", "-", "disclosed", "resulting", "compound", "readily", "converted", "trimethylhydroquinone", "starting", "material", "producing", "vitamin"], "section": [2], "subsection": [58], "group": [276], "subgroup": [3549, 3537, 3553, 3535], "labels": [2, 67, 413, 4347, 4335, 4351, 4333]} 48 | {"id": "3932485", "title": ["improved", "preparation", "wittig", "salt", "vinyl", "beta", "-", "ionol"], "abstract": ["improved", "preparation", "wittig", "salt", "alpha", "beta", "-", "unsaturated", "alcohol", "treating", "alcohol", "phosphine", "basic", "medium", "presence", "salt", "weak", "organic", "base", "strong", "acid", "wittig", "salt", "reacted", "unsaturated", "aldehyde", "form", "polyene", "compound"], "section": [2], "subsection": [58], "group": [276, 278], "subgroup": [3542, 3676, 3497], "labels": [2, 67, 413, 415, 4340, 4474, 4295]} 49 | {"id": "3932488", "title": ["etherification", "bark", "extract", "condensed", "tannin"], "abstract": ["polyphenolic", "extract", "coniferous", "tree", "bark", "condensed", "tannin", "wood", "quebracho", "extract", "etherified", "reaction", "elevated", "temperature", "presence", "alkaline", "catalyst", "olefin", "double", "bond-activated", "carbonyl", "group", "structure", "effective", "olefin", "acrolein", "reaction", "product", "produced", "high", "yield", "water", "alkali", "soluble", "act", "good", "dispersants"], "section": [2], "subsection": [60, 59], "group": [287, 298], "subgroup": [4162, 3867], "labels": [2, 69, 68, 424, 435, 4960, 4665]} 50 | {"id": "3932491", "title": ["process", "optical", "resolution", "racemic", "lysine", "sulphanilate"], "abstract": ["optical", "resolution", "lysine", "form", "racemic", "lysine", "sulphanilate", "enhanced", "addition", "supersaturated", "solution", "racemic", "lysine", "sulphanilate", "substance", "suppress", "formation", "seed", "racemic", "lysine", "sulphanilate", "supersatured", "solution", "seed-suppressing", "substance", "added", "lysine", "lysine", "acetate", "lysine", "carbonate", "amino", "acetic", "acid", "glycerol", "yield", "optically", "active", "lysine", "sulphanilate", "improved", "disclosed", "process"], "section": [2], "subsection": [58], "group": [275], "subgroup": [3445], "labels": [2, 67, 412, 4243]} 51 | {"id": "3932575", "title": ["method", "making", "multilayered", "packaging", "tray", "deep-drawing"], "abstract": ["packaging", "tray", "provided", "bottom", "side", "wall", "outwardly", "directed", "flange", "surrounding", "periphery", "side", "wall", "tray", "consists", "outer", "layer", "moldable", "stretchable", "synthetic", "plastic", "material", "intermediate", "layer", "liquid-absorbing", "material", "insignificant", "stretchability", "molding", "tray", "side", "wall", "thereof", "formed", "stretching", "plastic", "material", "softening", "operation", "adhesive", "bond", "realized", "outer", "layer", "stretched", "tray", "side", "wall", "present", "partially", "ruptured", "intermediate", "layer"], "section": [1], "subsection": [32, 46], "group": [237, 155], "subgroup": [1952, 2966, 3033, 2967], "labels": [1, 41, 55, 374, 292, 2750, 3764, 3831, 3765]} 52 | {"id": "3932615", "title": ["process", "preparation", "granule"], "abstract": ["granule", "prepared", "subjecting", "crystalline", "sugar", "basis", "adjuvant", "binder-containing", "solution", "mixing", "apparatus", "crushing", "drying", "conventional", "technique", "resultant", "product", "characterized", "uniform", "granular", "size", "good", "disintegrating", "property", "great", "apparent", "density", "abrasion", "resistance", "basis", "comprises", "member", "selected", "group", "penicillin", "tetracycline", "movobiocin", "kanamycin", "paromomycin", "midecamycin"], "section": [2, 0, 1], "subsection": [64, 12, 15], "group": [324, 88, 68], "subgroup": [1200, 4454, 853], "labels": [2, 0, 1, 73, 21, 24, 461, 225, 205, 1998, 5252, 1651]} 53 | {"id": "3932635", "title": ["cyclic", "progestogen-interrupted", "estrogen", "oral", "contraceptive", "regimen"], "abstract": ["invention", "relates", "method", "fertility", "control", "cyclic", "progestogen-interrupted", "estrogen", "oral", "contraceptive", "regimen", "day", "menstrual", "flow", "day", "day", "medication", "administration", "cycle", "combined", "formulation", "estrogen", "progestogen", "substance", "administered", "day", "cycle", "day", "including", "day", "cycle", "formulation", "progestogen", "substance", "active", "component", "administered", "day", "cycle", "day", "combination", "formulation", "administered", "including", "day", "cycle", "regimen", "combination", "estrogen", "progestogen", "administered", "starting", "day", "cycle", "continuing", "day", "day", "cycle", "starting", "day", "cycle", "continuing", "day", "day", "cycle", "progestogen", "administered", "remaining", "day", "dosage-free", "regimen", "completed", "placebo", "nonhormonal", "supplement", "dispensing", "package", "holding", "unit", "dosage", "form", "oral", "ingestion", "unit", "dosage", "form", "daily", "sequence", "single", "cycle", "medication", "administration"], "section": [8, 0], "subsection": [12, 127], "group": [659, 68], "subgroup": [8287, 839, 837], "labels": [8, 0, 21, 136, 796, 205, 9085, 1637, 1635]} 54 | {"id": "3932790", "title": ["ground", "fault", "interrupter", "reversed", "line", "polarity", "lamp", "indicator"], "abstract": ["ground", "fault", "interrupter", "gfi", "provided", "reversed", "line", "polarity", "lamp", "indicator", "proper", "installation", "gfi", "reversed", "line", "polarity", "lamp", "indicator", "includes", "push", "button", "lamp", "connected", "series", "line", "conductor", "gfi", "ground", "wiring", "system", "ground", "conductor", "series", "connection", "line", "ground", "conductor", "case", "reversed", "line", "polarity", "lamp", "indicator", "check", "open", "circuit", "ground", "conductor"], "section": [6, 7], "subsection": [106, 121], "group": [531, 616], "subgroup": [7661, 6782], "labels": [6, 7, 115, 130, 668, 753, 8459, 7580]} 55 | {"id": "3932792", "title": ["sealed", "pump", "drive", "circuit", "therefor"], "abstract": ["completely", "sealed", "magnetically", "driven", "pump", "piston", "armature", "driven", "electrical", "winding", "unique", "electrical", "driving", "circuit", "provided", "pump", "embodying", "feedback", "winding", "magnetically", "coupled", "driving", "winding", "pump", "controlling", "reciprocation", "drive", "circuit", "facilitate", "driving", "rate", "embodying", "solid", "state", "bistable", "flip-flop", "component", "adaptable", "embodied", "computer", "low", "power", "logic", "device"], "section": [7], "subsection": [121], "group": [618], "subgroup": [7702], "labels": [7, 130, 755, 8500]} 56 | {"id": "3932802", "title": ["controlled", "power", "transferring", "device", "method", "utilizing", "reactance", "controlled", "development", "opposing", "magnetic", "flux"], "abstract": ["power", "transferring", "method", "device", "controlled", "reactance", "type", "designed", "regulate", "control", "application", "alternating", "current", "electric", "power", "load", "reactance", "controlled", "signal", "controlled", "develop", "controlled", "magnetic", "flux", "opposition", "reactive", "magnetic", "flux", "resulting", "flux", "cancellation", "effectively", "eliminating", "reactance", "device", "includes", "reactance", "core", "coil", "core", "connected", "circuit", "power", "transfer", "controlled", "opposing", "magnetic", "flux", "core", "developed", "coil", "core", "end", "connected", "end", "coil", "controllable", "scr", "connect", "end", "coil", "end", "coil", "place", "coil", "parallel", "coil", "arranged", "reactor", "core", "parallel", "current", "coil", "produce", "opposing", "magnetic", "flux", "core", "selective", "operation", "controllable", "reactance", "device", "varied", "wide", "range", "efficient", "power", "transfer"], "section": [6], "subsection": [110], "group": [553], "subgroup": [7015], "labels": [6, 119, 690, 7813]} 57 | {"id": "3932806", "title": ["surge", "comparison", "type", "coil", "tester"], "abstract": ["low", "voltage", "pulse", "applied", "pulse", "transformer", "produce", "high", "voltage", "pulse", "turn", "applied", "capacitor", "diode", "coil", "test", "standard", "reference", "coil", "waveform", "resulting", "surge", "current", "test", "reference", "coil", "superimposed", "cathode", "ray", "tube", "test", "coil", "judged", "defective", "waveform", "substantially", "resistor", "connected", "parallel", "capacitor", "discharge", "capacitor", "pulse", "diode", "prevents", "capacitor", "discharging", "coil", "high", "voltage", "pulse", "applied", "alternately", "coil", "waveform", "displayed", "single", "beam", "cathode", "ray", "tube", "simultaneously", "waveform", "displayed", "dual", "beam", "cathode", "ray", "tube"], "section": [6], "subsection": [106], "group": [531], "subgroup": [6779, 6782], "labels": [6, 115, 668, 7577, 7580]} 58 | {"id": "3932911", "title": ["structure", "mounting", "air", "moving", "vacuum", "cleaner"], "abstract": ["structure", "mounting", "air", "moving", "apparatus", "vacuum", "cleaner", "including", "support", "shoulder", "formed", "integral", "housing", "portion", "vacuum", "cleaner", "spring", "bracket", "removably", "supporting", "air", "moving", "apparatus", "shoulder", "air", "flow", "passage", "bracket", "embrace", "air", "moving", "apparatus", "defines", "opposite", "end", "portion", "resting", "support", "shoulder"], "section": [0], "subsection": [11], "group": [60], "subgroup": [696], "labels": [0, 20, 197, 1494]} 59 | {"id": "3932969", "title": ["ferrocement", "structure", "method"], "abstract": ["ferrocement", "structure", "method", "producing", "comprising", "providing", "load-bearing", "framework", "covering", "framework", "strong", "flexible", "sheet-like", "material", "flexible", "metal", "reinforcing", "material", "applying", "cement", "mortar", "thereover", "cover", "reinforcing", "material", "framework", "made", "easily", "fabricated", "wooden", "rib"], "section": [4], "subsection": [84], "group": [397], "subgroup": [5040, 5033], "labels": [4, 93, 534, 5838, 5831]} 60 | {"id": "3933034", "title": ["hydrostatic", "stress", "gauge", "system"], "abstract": ["hydrostatic", "stress", "gage", "including", "sphere", "incompressible", "fluid", "positioned", "inside", "drum", "structure", "pair", "interconnected", "flat", "spiral", "coil", "forming", "self-resonant", "tuned", "circuit", "change", "pressure", "sphere", "variation", "distance", "coil", "changing", "resonant", "frequency", "measured", "device", "stress", "measured"], "section": [6], "subsection": [106], "group": [526], "subgroup": [6681], "labels": [6, 115, 663, 7479]} 61 | {"id": "3933066", "title": ["dual", "speed", "stacker", "paddle", "assembly"], "abstract": ["dual", "speed", "stacker", "assembly", "connection", "high", "speed", "machine", "slicing", "stacking", "weighing", "food", "product", "stacker", "ha", "mating", "paddle", "move", "slow", "speed", "collection", "required", "number", "slice", "stack", "rotated", "high", "speed", "drop", "stack", "slice", "conveyor", "bring", "blade", "position", "receive", "collection", "slice", "paddle", "rotated", "low", "inertia", "motor", "connected", "timing", "belt", "bevel", "gear", "arrangement"], "section": [8, 1], "subsection": [127, 29], "group": [660, 138], "subgroup": [1844, 8363], "labels": [8, 1, 136, 38, 797, 275, 2642, 9161]} 62 | {"id": "3933190", "title": ["method", "fabricating", "shell", "mold", "production", "superalloy", "casting"], "abstract": ["method", "producing", "shell", "mold", "investment", "casting", "subsequent", "directional", "solidification", "nickel", "cobalt", "based", "superalloys", "shell", "mold", "composed", "high", "purity", "alumina", "characterized", "presence", "silica", "trace", "form", "shell", "mold", "present", "invention", "nonreactive", "molten", "nickel", "cobalt", "base", "superalloys", "exposure", "hour", "additionally", "alumina", "shell", "mold", "present", "invention", "ha", "unique", "combination", "mechanical", "strength", "stability", "elevated", "temperature"], "section": [1], "subsection": [25], "group": [115], "subgroup": [1521, 1509], "labels": [1, 34, 252, 2319, 2307]} 63 | {"id": "3933241", "title": ["package", "construction"], "abstract": ["package", "construction", "provided", "includes", "collapsible", "tee", "member", "supporting", "ball", "upright", "manner", "hit", "bat", "tee", "member", "erected", "bat", "ball", "held", "tee", "member", "form", "therewith", "self-contained", "package", "construction", "tee", "member", "held", "collapsed", "condition"], "section": [1], "subsection": [46], "group": [237], "subgroup": [3030, 3033], "labels": [1, 55, 374, 3828, 3831]} 64 | {"id": "3933294", "title": ["file", "folder", "rigid", "spine"], "abstract": ["one-piece", "file", "folder", "vertical", "lateral", "rotary", "similar", "file", "expandable", "pocket", "inside", "paper", "substantially", "rigid", "spine", "closed", "end", "folded", "edge", "folder", "indexing", "identifying", "paper", "folder", "file", "folder", "filed", "visible", "rigid", "spine", "vertically", "horizontally", "file"], "section": [1], "subsection": [38], "group": [180], "subgroup": [2319], "labels": [1, 47, 317, 3117]} 65 | {"id": "3933374", "title": ["tandem", "trailer", "system"], "abstract": ["intermediate", "semi-trailer", "unit", "towed", "highway", "tractor", "tow", "standard", "cargo", "semi-trailer", "standard", "trailer", "attached", "intermediate", "trailer", "unit", "wheel", "mounted", "portion", "chassis", "permanently", "extends", "rearwardly", "cargo", "container", "intermediate", "trailer", "unit", "wheel", "positioned", "ahead", "rearmost", "bogie", "intermediate", "trailer", "unit", "provided", "form", "temporarily", "horizontal", "platform", "wheel", "loading", "unloading", "cargo", "container"], "section": [1], "subsection": [41, 43], "group": [200, 219], "subgroup": [2698, 2504], "labels": [1, 50, 52, 337, 356, 3496, 3302]} 66 | {"id": "3933392", "title": ["wheel", "rim"], "abstract": ["safety", "wheel", "rim", "pneumatic", "tire", "ha", "removable", "band", "securable", "obstruct", "mouth", "receive", "bead", "tire", "fitting", "tire", "rim", "band", "locked", "radial", "expansion", "position", "tire", "bead", "bead", "accidentally", "enter", "event", "deflation", "tire", "travelling", "offset", "median", "plane", "rim", "spaced", "bead-retaining", "flange", "securing", "band", "shown", "inflation", "valve", "stem", "screw-threaded", "outer", "overlapping", "end", "band", "inflated", "band", "secured", "screw", "arranged", "tighten", "band", "circumferentially"], "section": [8, 1], "subsection": [127, 41], "group": [660, 189, 190], "subgroup": [2398, 2413, 8341, 2376], "labels": [8, 1, 136, 50, 797, 326, 327, 3196, 3211, 9139, 3174]} 67 | {"id": "3933404", "title": ["strain", "limiting", "mechanism"], "abstract": ["electrical", "connector", "assembly", "incorporating", "limiting", "cable", "tension", "predetermined", "precluding", "mechanical", "failure", "cable"], "section": [8, 7], "subsection": [120, 127], "group": [611, 659], "subgroup": [7610, 8264], "labels": [8, 7, 129, 136, 748, 796, 8408, 9062]} 68 | {"id": "3933440", "title": ["chemical", "reaction", "vessel"], "abstract": ["gas-tight", "chemical", "reaction", "vessel", "chemical", "analysis", "intractable", "material", "glass", "reaction", "vessel", "comprises", "body", "concentric", "chamber", "adapted", "reagent", "adapted", "receive", "sample", "cap", "turn", "adapted", "receive", "sample", "reaction", "vessel", "ha", "sealing", "member", "sealing", "chamber", "body", "cap", "securing", "sealing", "member", "body", "body", "sealing", "member", "sample", "cap", "made", "material", "desirably", "polytetrafluoroethylene", "chemically", "inert", "reagent"], "section": [6, 1], "subsection": [106, 15], "group": [528, 89], "subgroup": [1237, 6748], "labels": [6, 1, 115, 24, 665, 226, 2035, 7546]} 69 | {"id": "3933442", "title": ["laminated", "body"], "abstract": ["porous", "seal", "element", "usable", "blade", "tip", "seal", "turbomachine", "element", "labyrinth", "seal", "made", "large", "number", "strip", "disposed", "edgewise", "sealing", "face", "element", "extending", "direction", "relative", "movement", "seal", "element", "strip", "groove", "extending", "strip", "discharge", "cooling", "fluid", "air", "presence", "groove", "low", "density", "structure", "seal", "face", "seal", "element", "abraded", "rubbing", "contact", "metering", "coolant", "rear", "face", "seal", "element", "seal", "element", "fabricated", "etching", "sheet", "sheet", "defines", "number", "parallel", "strip", "joined", "weak", "tie", "groove", "extending", "strip", "stacking", "sheet", "bonding", "separating", "bonded", "structure", "weak", "tie", "stack", "strip", "defines", "seal", "element"], "section": [8, 1, 5], "subsection": [26, 125, 92, 127, 88], "group": [660, 656, 443, 120, 417, 125], "subgroup": [8354, 1611, 1689, 8097, 5681, 5285], "labels": [8, 1, 5, 35, 134, 101, 136, 97, 797, 793, 580, 257, 554, 262, 9152, 2409, 2487, 8895, 6479, 6083]} 70 | {"id": "3933462", "title": ["mixture", "substituted", "benzothiadiazinones", "benzonitriles", "herbicide"], "abstract": ["herbicide", "mixture", "compound", "formula", "denotes", "lower", "alkyl", "maximum", "carbon", "atom", "salt", "alkali", "metal", "alkaline", "earth", "metal", "ammonium", "hydroxyalkylammonium", "alkylammonium", "hydrazine", "salt", "salt", "sodium", "lithium", "potassium", "calcium", "iron", "methylammonium", "trimethylammonium", "ethylammonium", "diethanolammonium", "ethanolammonium", "dimethylamine", "dimethylethanolamine", "hydrazine", "phenylhydrazine", "compound", "formula", "denotes", "hydroxy", "radical", "denotes", "halogen", "denotes", "integer"], "section": [2, 0], "subsection": [0, 58], "group": [276, 10], "subgroup": [191, 3563, 186, 192, 190], "labels": [2, 0, 9, 67, 413, 147, 989, 4361, 984, 990, 988]} 71 | {"id": "3933569", "title": ["tool", "welding", "plastic", "film", "wrapping", "object"], "abstract": ["tool", "interconnecting", "welding", "web", "plastic", "film", "enclosing", "package", "good", "tool", "comprising", "section", "arranged", "pressed", "securely", "holding", "welding", "film", "web", "separate", "web", "welding", "tool", "comprises", "arranged", "tighten", "film", "good", "maintaining", "stretch", "film", "web", "extending", "welding", "point", "web", "supply", "roll", "tension-free", "condition", "welding", "operation", "proper", "ensure", "high-quality", "durable", "welding", "seam"], "section": [8, 1], "subsection": [32, 127, 46], "group": [660, 235, 155], "subgroup": [8342, 1960, 2946, 1961], "labels": [8, 1, 41, 136, 55, 797, 372, 292, 9140, 2758, 3744, 2759]} 72 | {"id": "3933721", "title": ["flame", "retardant", "plasticized", "composition"], "abstract": ["resinous", "polymer", "vinyl", "chloride", "plasticized", "-", "dibromoterephthalate", "ester"], "section": [2], "subsection": [59], "group": [289, 290], "subgroup": [3942, 3965], "labels": [2, 68, 426, 427, 4740, 4763]} 73 | {"id": "3933731", "title": ["plastic", "composition", "liberating", "reduced", "amount", "toxic", "gas"], "abstract": ["composite", "disclosed", "comprises", "specific", "metal-compound", "additive", "plastic", "material", "gypsum", "calcium", "sulfite", "composite", "improved", "emits", "substantially", "toxic", "gas", "sulfur", "dioxide", "hydrogen", "sulfide", "burned", "ash", "thereof", "contacted", "water"], "section": [2], "subsection": [59], "group": [289, 290], "subgroup": [3941, 3946], "labels": [2, 68, 426, 427, 4739, 4744]} 74 | {"id": "3933753", "title": ["alkenylaromatic", "polymer", "alpha", "-", "ketoalhydic", "group"], "abstract": ["alkenylaromatic", "polymer", "provided", "derived", "mol", "monomer", "general", "formula", "", "represents", "hydrogen", "atom", "methyl", "radical", "", "represents", "hydrogen", "atom", "methyl", "ethyl", "radical", "mol", "non-aromatic", "ethylenically", "unsaturated", "monomer", "optionally", "crosslinked", "mol", "relative", "monomer", "formula", "polyvinyl", "monomer", "alpha", "-", "ketoaldehyde", "group", "formula", "equ", "--", "--", "cho", "ii", "present", "aromatic", "ring", "polymer", "polymer", "find", "utility", "extracting", "sulphur", "nitrogen-containing", "compound", "solution", "urea", "solution", "resulting", "dialysis", "ultrafiltration", "human", "blood"], "section": [2, 0, 1], "subsection": [12, 15, 59], "group": [285, 88, 70], "subgroup": [3745, 3817, 873, 1201], "labels": [2, 0, 1, 21, 24, 68, 422, 225, 207, 4543, 4615, 1671, 1999]} 75 | {"id": "3933774", "title": ["modified", "polymer", "diisopropenylbenzene"], "abstract": ["modified", "copolymer", "vinyl", "aromatic", "compound", "diene", "hydrocarbon", "diisopropenylbenzene", "polymerized", "unit", "group", "general", "formula", "copolymer", "intermediate", "manufacture", "block", "copolymer", "graft", "copolymer", "suitable", "impact-resistant", "plastic", "polymeric", "antistatic", "agent"], "section": [2], "subsection": [59], "group": [285, 284], "subgroup": [3796, 3772, 3817, 3786, 3737, 3742], "labels": [2, 68, 422, 421, 4594, 4570, 4615, 4584, 4535, 4540]} 76 | {"id": "3933835", "title": ["aliphatically", "substituted", "aryl-chalcogeno-hydrocarbon", "derivative"], "abstract": ["compound", "formula", "equ", "--", "ph", "--", "--", "alk", "--", "adamantyl", "ph", "phenylene", "optionally", "substituted", "amino", "nitro", "lower", "alkyl", "lower", "alkoxy", "halogen", "trifluoromethyl", "oxy", "thio", "alk", "alkylene", "atom", "alkenylene", "atom", "free", "esterfied", "amidised", "carboxyl", "sulpho", "sulphonamido", "therapeutically", "acceptable", "salt", "thereof", "anti-allergic", "hypolipidaemic", "agent"], "section": [2], "subsection": [58], "group": [276], "subgroup": [3558, 3518, 3526, 3524, 3515], "labels": [2, 67, 413, 4356, 4316, 4324, 4322, 4313]} 77 | {"id": "3933860", "title": ["-", "n-acyl-n-arylamino", "lactones"], "abstract": ["-", "n-acyl-n-arylamino", "-", "gamma", "-", "lactones", "delta", "-", "lactones", "gamma", "-", "lactams", "delta", "-", "lactams", "fungicidal", "activity"], "section": [2], "subsection": [58], "group": [277], "subgroup": [3574, 3615], "labels": [2, 67, 414, 4372, 4413]} 78 | {"id": "3933890", "title": ["-", "trihydroxyprostanoic", "acid"], "abstract": ["cyclopentane", "derivative", "formula", "symbol", "represent", "hydrogen", "alkyl", "represents", "symbol", "combination", "represents", "methylene", "represents", "hydroxymethylene", "represents", "ethylene", "represents", "hydroxymethylene", "represents", "carbonyl", "represents", "methylene", "represents", "ethylene", "trans-vinylene", "represents", "hydroxymethylene", "carbonyl", "represents", "hydroxymethylene", "represents", "methylene", "represents", "ethylene", "trans-vinylene", "represents", "hydroxymethylene", "compound", "possessing", "pharmacological", "property", "production", "hypotension", "bronchodilatation", "inhibition", "gastric", "acid", "secretion", "stimulation", "uterine", "contraction"], "section": [2], "subsection": [58], "group": [276, 278], "subgroup": [3676, 3543], "labels": [2, 67, 413, 415, 4474, 4341]} 79 | {"id": "3933894", "title": ["n-arylsulfonyl", "carbamate"], "abstract": ["present", "invention", "relates", "amine", "alkali", "metal", "alkaline", "earth", "metal", "salt", "n-benzene", "sulfonyl", "carbamic", "acid", "ester", "lower", "alkenyl", "n-benzene", "sulfonyl", "carbamate", "compound", "herbicide"], "section": [2, 0], "subsection": [0, 58], "group": [276, 10], "subgroup": [3520, 192], "labels": [2, 0, 9, 67, 413, 147, 4318, 990]} 80 | {"id": "3933899", "title": ["pge", "-", "oxa-phenylene", "compound"], "abstract": ["invention", "group", "pge", "-", "type", "oxa-phenylene", "compound", "process", "making", "compound", "variety", "pharmacological", "purpose", "including", "anti-ulcer", "inhibition", "platelet", "aggregation", "increase", "nasal", "patency", "labor", "inducement", "term", "wound", "healing"], "section": [2], "subsection": [58], "group": [276, 277], "subgroup": [3537, 3558, 3534, 3543, 3555, 3620, 3535], "labels": [2, 67, 413, 414, 4335, 4356, 4332, 4341, 4353, 4418, 4333]} 81 | {"id": "3933925", "title": ["hydrolysis", "toluene", "diamine", "produce", "methyl", "resorcinol"], "abstract": ["methyl", "resorcinol", "produced", "hydrolysis", "toluene", "diamine", "aqueous", "excess", "ammonium", "bisulfate", "reactant", "contacted", "elevated", "temperature", "period", "time", "sufficient", "hydrolyze", "toluene", "diamine", "methyl", "resorcinol", "methyl", "resorcinol", "produced", "separated", "reaction", "mixture", "ammonium", "sulfate", "regenerated", "ammonium", "bisulfate", "removing", "water", "thermally", "decomposing", "by-product", "ammonium", "sulfate", "elevated", "temperature"], "section": [2, 0], "subsection": [58, 12, 52], "group": [276, 257, 73, 68], "subgroup": [916, 3537, 852, 3262, 917, 919, 3535], "labels": [2, 0, 67, 21, 61, 413, 394, 210, 205, 1714, 4335, 1650, 4060, 1715, 1717, 4333]} 82 | {"id": "3933929", "title": ["process", "purification", "p-nitrophenol"], "abstract": ["purification", "process", "p-nitrophenol", "obtained", "nitration", "phenol", "separation", "crude", "nitrophenols", "steam", "distillation", "remove", "o-nitrophenol", "cooling", "broth", "obtained", "sodium", "bisulphite", "ph", "deposit", "crystal", "p-nitrophenol", "improvement", "consists", "stirring", "crystal", "water", "degree", "-", "degree", "give", "mixture", "excess", "p-nitrophenol", "solubility", "temperature", "separating", "upper", "layer", "p-nitrophenol", "water", "obtained", "cooling", "degree", "-", "degree", "separating", "layer", "water", "p-nitrophenol", "cooling", "degree", "collecting", "crystal", "deposited"], "section": [2], "subsection": [58], "group": [276], "subgroup": [3458, 3456], "labels": [2, 67, 413, 4256, 4254]} 83 | {"id": "3933936", "title": ["rapid", "setting", "adhesive", "compound"], "abstract": ["mixture", "curable", "phenolic", "resin", "organic", "diaziridine", "strong", "adhesive", "curable", "short", "time", "moderate", "temperature", "make", "strong", "bond", "wood", "metal", "plastic"], "section": [2, 8], "subsection": [127, 60, 59], "group": [660, 297, 289, 290], "subgroup": [3942, 8354, 3981, 3989, 4108], "labels": [2, 8, 136, 69, 68, 797, 434, 426, 427, 4740, 9152, 4779, 4787, 4906]} 84 | {"id": "3933960", "title": ["method", "extruding", "fiber", "reinforced", "plural", "layered", "plastic", "tube"], "abstract": ["specification", "discloses", "method", "making", "reinforced", "tube", "comprising", "continuously", "extruding", "viscous", "material", "reinforcing", "fibre", "concentric", "set", "discrete", "passage", "producing", "laminar", "flow", "passage", "causing", "material", "accelerate", "entry", "passage", "preventing", "deceleration", "thereof", "passage", "fibre", "orientate", "material", "lengthwise", "passage", "bringing", "extruded", "material", "respective", "passage", "form", "layer", "material", "fibre", "lying", "helix", "opposite", "hand", "simultaneously", "hauling-off", "extruded", "layer", "regulated", "rate", "control", "angle", "helix", "fibre", "lie", "allowing", "layer", "consolidate", "single", "tube"], "section": [1], "subsection": [32], "group": [157, 155], "subgroup": [1950, 2016, 2043], "labels": [1, 41, 294, 292, 2748, 2814, 2841]} 85 | {"id": "3934010", "title": ["insecticidal", "composition", "method", "utilizing", "phosphoric", "acid", "phenylsulphonamide", "ester"], "abstract": ["insecticidal", "acaricidal", "composition", "method", "combating", "insect", "acaricide", "provided", "active", "insecticidal", "ingredient", "phosphoric", "acid", "phenylsulphonamide", "ester", "formula", "represents", "oxygen", "sulphur", "represents", "alkyl", "carbon", "atom", "represents", "alkyl", "carbon", "atom", "alkenyl", "alkinyl", "carbon", "atom", "alkoxyalkyl", "carbon", "atom", "moiety", "alkylthioalkyl", "carbon", "atom", "moiety", "haloalkyl", "carbon", "atom", "represents", "hydrogen", "alkyl", "carbon", "atom", "alkenyl", "carbon", "atom", "represents", "hydrogen", "represents", "halogen", "alkyl", "carbon", "atom", "represents", "alkyl", "carbon", "atom", "represents", "hydrogen", "halogen", "alkyl", "carbon", "atom"], "section": [2, 8], "subsection": [58, 127], "group": [278, 659], "subgroup": [3676, 8253], "labels": [2, 8, 67, 136, 415, 796, 4474, 9051]} 86 | {"id": "3934042", "title": ["method", "apparatus", "irradiative", "treatment", "beverage"], "abstract": ["method", "apparatus", "irradiative", "treatment", "beverage", "milk", "beer", "wine", "fruit", "juice", "sterilize", "pasteurize", "beverage", "pumped", "system", "contact", "air", "entering", "beverage", "heat", "exchanged", "exiting", "beverage", "subjected", "ultra-violet", "irradiation", "heat", "exchange", "exiting", "beverage", "case", "milk", "homogenization", "place", "heat", "exchange", "returning", "beverage", "heating", "beverage", "infra-red", "irradiation", "elevated", "temperature", "infra-red", "heating", "beverage", "held", "elevated", "temperature", "insulated", "conduit", "return", "heat", "exchange", "entering", "beverage", "irradiation", "beverage", "performed", "passing", "beverage", "transparent", "conduit", "fused", "quartz", "improved", "taste", "shortened", "cycle", "time", "lower", "treatment", "temperature", "prolonged", "shelf", "life", "beverage", "obtained"], "section": [0], "subsection": [3], "group": [17, 23], "subgroup": [252, 286], "labels": [0, 12, 154, 160, 1050, 1084]} 87 | {"id": "3934109", "title": ["latch", "pivot", "latch", "needle"], "abstract": ["knitting", "machine", "latch", "needle", "latch", "pivot", "formed", "displacing", "portion", "wall", "slot", "displaced", "portion", "extend", "pivot", "hole", "latch", "displaced", "portion", "wall", "fused", "high", "energy", "heat", "source", "emitting", "sufficient", "energy", "drill", "hole", "displaced", "portion", "melt", "displaced", "portion"], "section": [3, 1], "subsection": [75, 26], "group": [124, 356], "subgroup": [1678, 4702], "labels": [3, 1, 84, 35, 261, 493, 2476, 5500]} 88 | {"id": "3934190", "title": ["signal", "compressor", "expanders"], "abstract": ["compressor", "expanders", "effecting", "dynamic", "range", "modification", "constructed", "connecting", "reactive", "network", "series", "voltage", "dividing", "action", "parallel", "current", "dividing", "action", "output", "signal", "derived", "voltage", "current", "network", "includes", "series", "parallel", "variable", "resistance", "variable", "resistance", "controlled", "dependence", "voltage", "thereacross", "sense", "required", "achieve", "compression", "expansion", "case", "resistance", "change", "shift", "turnover", "frequency", "circuit", "exclude", "large", "amplitude", "component", "amplitude", "increase", "reduction", "applied", "low", "level", "component", "restricted", "frequency", "band", "create", "compressor", "expander", "action"], "section": [7], "subsection": [123, 122], "group": [627, 633], "subgroup": [7800, 7855], "labels": [7, 132, 131, 764, 770, 8598, 8653]} 89 | {"id": "3934194", "title": ["solid", "state", "flyback", "transformer", "checker"], "abstract": ["instrument", "providing", "information", "condition", "horizontal", "output", "transformer", "television", "set"], "section": [6, 7], "subsection": [106, 123], "group": [531, 639], "subgroup": [7938, 6782], "labels": [6, 7, 115, 132, 668, 776, 8736, 7580]} 90 | {"id": "3934245", "title": ["alphanumeric", "display", "computer-linked", "typewriter", "console"], "abstract": ["present", "invention", "relates", "alphanumeric", "display", "unit", "improving", "identification", "quantity", "selected", "key", "button", "computer-linked", "typewriter", "console", "usage", "light-emitting", "identification", "indicia", "attached", "adjacent", "selected", "key", "button", "light-emitting", "display", "matrix", "format", "consisting", "set", "fiber", "optic", "member", "arranged", "orthogonal", "row", "column", "encoding", "stencil", "tab", "inserted", "path", "light", "generated", "incandescent", "bulb", "operational", "alignment", "set", "fiber", "optic", "member"], "section": [6, 7], "subsection": [111, 122], "group": [630, 558, 632], "subgroup": [7058, 7830, 7849], "labels": [6, 7, 120, 131, 767, 695, 769, 7856, 8628, 8647]} 91 | {"id": "3934259", "title": ["all-sky", "camera", "apparatus", "time-resolved", "lightning", "photography"], "abstract": ["pair", "all-sky", "camera", "equipped", "degree", "fisheye-nikkor", "disposed", "lens", "pointing", "vertically", "camera", "rotated", "axis", "passing", "zenith", "maintained", "stationary", "disposition", "desired", "counter-rotated", "relative", "rotational", "movement", "film", "camera", "measure", "displacement", "image", "formed", "respective", "film", "angular", "deviation", "produced", "displacement", "measured", "determine", "time", "development", "lightning", "discharge"], "section": [6], "subsection": [108, 107], "group": [539, 536], "subgroup": [6877, 6820], "labels": [6, 117, 116, 676, 673, 7675, 7618]} 92 | {"id": "3934302", "title": ["portable", "multi-purpose", "rechargeable", "cigarette", "lighter"], "abstract": ["multi-purpose", "cigarette", "lighter", "rechargeable", "ni-cd", "battery", "comprises", "heated", "coil", "cigarette", "lighter", "general", "smoking", "purpose", "incorporates", "built-in", "vacuum", "cleaner", "electric", "lamp"], "section": [8, 0, 5], "subsection": [98, 127, 11], "group": [60, 484, 659], "subgroup": [694, 8135, 6214], "labels": [8, 0, 5, 107, 136, 20, 197, 621, 796, 1492, 8933, 7012]} 93 | {"id": "3934427", "title": ["dispensing", "machine"], "abstract": ["machine", "dispenses", "ready-made", "milk", "shake", "freezing", "chamber", "ha", "dispensing", "valve", "porting", "valve", "element", "arranged", "flow", "semi-frozen", "comestible", "flavoring", "material", "occurs", "simultaneously", "beater", "mix", "dispensing", "conduit", "flavoring", "material", "ha", "unique", "coupling", "valve", "block", "quick-release", "coupling", "intermediate", "end", "release", "manually", "operable", "sampling", "valve", "connected", "conduit", "selectively", "draw", "sample", "flavoring", "pump", "provided", "sucking", "liquid", "comestible", "gas", "preselected", "proportion", "delivering", "bottom", "freezing", "chamber", "vent", "located", "valve", "block", "vent", "air", "freezing", "chamber", "start-up", "machine", "vent", "ha", "inlet", "located", "level", "liquid", "gas", "volume", "chamber", "equal", "respective", "proportion", "pumped"], "section": [0], "subsection": [3], "group": [20], "subgroup": [265], "labels": [0, 12, 157, 1063]} 94 | {"id": "3934473", "title": ["fluid", "flow", "meter", "counter", "rotating", "turbine", "impeller"], "abstract": ["fluid", "flow", "meter", "independantly", "counter", "rotating", "turbine", "impeller", "disclosed", "fluid", "characteristic", "upstream", "flow", "disturbance", "minimal", "variation", "volume", "flow", "rate", "measurement", "meter", "result", "fluidynamic", "interaction", "impeller", "angular", "velocity", "impeller", "sensed", "conventional", "manner", "velocity", "signal", "added", "total", "volume", "thruput", "rate", "flow", "optionally", "compared", "occurance", "mechanical", "electronic", "degradation"], "section": [6], "subsection": [106], "group": [521], "subgroup": [6624], "labels": [6, 115, 658, 7422]} 95 | {"id": "3934489", "title": ["rear", "view", "mirror", "vehicle"], "abstract": ["remote", "control", "exterior", "rear", "view", "mirror", "vehicle", "mirror", "adjusted", "stationary", "housing", "rotation", "single", "control", "member", "coupled", "mirror", "head", "adjustment", "head", "plane"], "section": [8, 1], "subsection": [127, 41], "group": [202, 660], "subgroup": [2517, 8361], "labels": [8, 1, 136, 50, 339, 797, 3315, 9159]} 96 | {"id": "3934629", "title": ["screw", "driver"], "abstract": ["screw", "driver", "comprising", "torque", "responsive", "clutch", "determining", "final", "tightening", "torque", "output", "spindle", "connected", "forwardly", "extending", "screw", "bit", "spindle", "axially", "displaceable", "forward", "rest", "position", "intermediate", "tightening", "position", "rear", "position", "dog", "spindle", "engage", "dog", "driving", "part", "clutch", "inactivation", "clutch", "spindle", "spring-biased", "normal", "tightening", "rest", "position", "permitted", "reoccupy", "normal", "tightening", "position", "final", "tightening", "sequence", "stud", "element", "screw", "driver", "housing", "arranged", "abut", "screw", "landing", "surface", "automatically", "preventing", "screw", "driver", "housing", "clutch", "spindle", "screw", "bit", "final", "position", "ensuring", "reactivation", "clutch"], "section": [1], "subsection": [28], "group": [130], "subgroup": [1769], "labels": [1, 37, 267, 2567]} 97 | {"id": "3934680", "title": ["safety", "latch", "automotive", "hoist"], "abstract": ["disclosed", "safety", "latch", "operable", "automotive", "hoist", "rack", "gear", "connected", "movable", "hoist", "piston", "piston", "carry", "hoist", "superstructure", "disposed", "cylinder", "piston", "telescope", "raise", "lower", "hoist", "response", "hydraulic", "pneumatic", "pressure", "acting", "thereon", "pinion", "gear", "mounted", "latch", "engagement", "rack", "slip", "clutch", "mechanism", "drive", "operating", "lever", "connected", "latch", "dog", "securely", "engaging", "rack", "prevent", "movement", "rack", "piston", "downwardly", "respect", "cylinder", "latch", "operating", "mechanism", "operated", "manually", "completely", "disengage", "latch", "dog", "rack", "hoist", "lowered", "slip", "clutch", "mechanism", "provided", "lost", "motion", "coupling", "member", "latch", "operating", "member", "latch", "dog", "latch", "dog", "remains", "engagement", "rack", "system", "raising", "hoist", "inoperative"], "section": [1], "subsection": [47], "group": [244], "subgroup": [3186], "labels": [1, 56, 381, 3984]} 98 | {"id": "3934690", "title": ["magnetic", "spring", "clutch"], "abstract": ["helical", "clutch", "spring", "ha", "end", "thereof", "fixed", "continuously", "rotating", "input", "member", "clutch", "turn", "spring", "partially", "envelop", "floating", "magnetic", "ring", "secured", "rotatable", "output", "member", "clutch", "input", "output", "member", "made", "nonmagnetic", "material", "clutch", "coil", "energized", "magnetic", "flux", "path", "passing", "clutch", "spring", "magnetic", "ring", "clutch", "spring", "tighten", "magnetic", "ring", "provide", "driving", "connection", "input", "output", "member"], "section": [5], "subsection": [94], "group": [449], "subgroup": [5808, 5809, 5821, 5791], "labels": [5, 103, 586, 6606, 6607, 6619, 6589]} 99 | {"id": "3934898", "title": ["passenger", "restraint", "device"], "abstract": ["passenger", "restraint", "device", "automotive", "vehicle", "end", "passenger", "restraint", "arm", "pivoted", "member", "slidably", "displaceable", "door", "motion", "transmitting", "strap", "interconnects", "restraint", "arm", "member", "slidable", "door", "member", "carry", "latching", "locking", "element", "automatically", "connected", "operative", "relation", "arm", "ha", "swung", "door", "passenger", "restraint", "position", "latch", "manually", "releasable", "passenger"], "section": [1], "subsection": [41], "group": [202], "subgroup": [2530], "labels": [1, 50, 339, 3328]} 100 | {"id": "3934974", "title": ["solution", "ethylauramine", "hydrochloride", "thiodiglycol"], "abstract": ["solution", "ethylauramine", "hydrochloride", "thiodiglycol", "process", "preparation", "solution", "dry", "ethylauramine", "hydrochloride", "water", "crystallisation", "dissolved", "thiodiglycol", "process", "colouration", "colouring", "agent", "solution"], "section": [2], "subsection": [60], "group": [291], "subgroup": [4030], "labels": [2, 69, 428, 4828]} 101 | {"id": "3934991", "title": ["nitric", "oxide", "analysis", "scrubber", "therefor"], "abstract": ["method", "analyzing", "nitric", "oxide", "gas", "stream", "nitrogen", "dioxide", "scrubber", "apparatus", "selectively", "removing", "nitrogen", "dioxide", "gas", "stream", "nitric", "oxide", "scrubber", "apparatus", "comprises", "container", "inlet", "port", "gas", "stream", "outlet", "port", "scrubber", "material", "container", "includes", "silver", "carbonate", "scrubber", "ha", "efficiency", "capacity", "part", "million", "hour", "nitrogen", "dioxide", "removal", "gram", "silver", "carbonate", "method", "involves", "passing", "gas", "stream", "scrubber", "material", "silver", "carbonate", "remove", "nitrogen", "dioxide", "gas", "stream", "passing", "nitric", "oxide", "unattenuated", "conveying", "gas", "stream", "scrubber", "material", "analyzer", "nitric", "oxide", "gas", "stream", "analyzed", "analyzer", "determine", "nitric", "oxide", "concentration"], "section": [6, 8], "subsection": [106, 127], "group": [660, 528], "subgroup": [8355, 6747, 6706], "labels": [6, 8, 115, 136, 797, 665, 9153, 7545, 7504]} 102 | {"id": "3935020", "title": ["faraday", "rotation", "glass"], "abstract": ["faraday", "rotation", "glass", "exhibiting", "high", "verdet", "constant", "low", "susceptability", "devitrification", "formed", "introducing", "high", "quantity", "rare", "earth", "oxide", "borate", "glass", "base", "glass", "melted", "standard", "environmental", "condition", "made", "large", "scale"], "section": [2], "subsection": [54], "group": [264], "subgroup": [3367, 3368], "labels": [2, 63, 401, 4165, 4166]} 103 | {"id": "3935052", "title": ["acid", "engraving", "machine", "device"], "abstract": ["acid", "engraving", "device", "reservoir", "acid", "valve", "meter", "acid", "flow", "reservoir", "ball", "point", "etching", "pen", "apply", "acid", "work", "surface"], "section": [2], "subsection": [68], "group": [336], "subgroup": [4563], "labels": [2, 77, 473, 5361]} 104 | {"id": "3935076", "title": ["stage", "separation", "system"], "abstract": ["invention", "improvement", "hot", "water", "process", "recovering", "bitumen", "tar", "sand", "aqueous", "slurry", "tar", "sand", "introduced", "vessel", "termed", "sand", "separation", "cell", "body", "hot", "water", "coarse", "sand", "settle", "discharged", "tailing", "top", "product", "comprising", "bitumen", "water", "fine", "sand", "transferred", "vessel", "termed", "froth", "formation", "cell", "body", "hot", "water", "cell", "bitumen", "form", "froth", "recovered", "fine", "solid", "water", "recycled", "lower", "end", "sand", "separation", "cell", "coarse", "sand", "ha", "previously", "removed", "sand", "separation", "cell", "good", "distribution", "feed", "cross-sectional", "area", "froth", "formation", "cell", "achieved", "lead", "good", "recovery", "froth", "quality", "recycling", "fine", "froth", "formation", "cell", "vicinity", "tailing", "outlet", "sand", "separation", "cell", "fine", "eliminated", "system", "middling", "dragstream", "required", "prior", "art"], "section": [2, 1], "subsection": [61, 15], "group": [300, 302, 86], "subgroup": [1112, 4184, 4188], "labels": [2, 1, 70, 24, 437, 439, 223, 1910, 4982, 4986]} 105 | {"id": "3935170", "title": ["trivalent", "antimony", "catalyst"], "abstract": ["antimony", "catalyst", "polyester", "condensation", "reaction", "comprising", "trivalent", "antimony", "valence", "occupied", "dianion", "radical", "-", "diol", "anion", "radical", "organic", "carboxylic", "acid", "molar", "ratio", "antimony", "dianion", "radical", "-", "diol", "anion", "radical", "organic", "carboxylic", "acid", "antimony", "compound", "prepared", "reacting", "mixture", "-", "diol", "trivalent", "antimony", "reactant", "represented", "formula", "sb", "antimony", "anion", "radical", "organic", "carboxylic", "acid", "selected", "group", "consisting", "anion", "alcohol", "anion", "organic", "carboxylic", "acid", "mixture", "thereof"], "section": [2, 8, 1], "subsection": [125, 58, 15, 59], "group": [286, 278, 88, 655], "subgroup": [1213, 3676, 8088, 1218, 3850], "labels": [2, 8, 1, 134, 67, 24, 68, 423, 415, 225, 792, 2011, 4474, 8886, 2016, 4648]} 106 | {"id": "3935292", "title": ["cast", "polymerization", "process", "improved", "mold", "release"], "abstract": ["process", "producing", "cast", "polymer", "carried", "mold", "treated", "step", "coating", "glass", "surface", "mold", "polysiloxane", "heating", "coated", "glass", "surface", "temperature", "range", "degree", "temperature", "coated", "polysiloxane", "hardened", "wiping", "coated", "surface", "baked", "polysiloxane", "easy", "mold", "release", "attained", "repeated", "mold"], "section": [1], "subsection": [32], "group": [158, 155], "subgroup": [1942, 2060], "labels": [1, 41, 295, 292, 2740, 2858]} 107 | {"id": "3935306", "title": ["toothpaste", "formulation"], "abstract": ["toothpaste", "formulation", "dispersed", "plurality", "agglomerated", "particle", "dental", "polishing", "agent", "visible", "palpable", "substantially", "insoluble", "toothpaste", "disclosed", "agglomerate", "comprise", "individually", "impalpable", "particle", "water", "insoluble", "dental", "polishing", "agent", "include", "agglomerating", "agent", "reduced", "smaller", "sized", "particle", "dental", "polishing", "agent", "subjected", "mild", "mechanical", "agitation", "toothbrushing", "agglomerate", "suited", "incorporation", "transparent", "gel", "dental", "vehicle", "provide", "special", "effect", "supplemental", "cleaning", "polishing", "characteristic", "adversely", "affecting", "visual", "clarity", "finished", "toothpaste"], "section": [0], "subsection": [12], "group": [73, 68], "subgroup": [913, 852], "labels": [0, 21, 210, 205, 1711, 1650]} 108 | {"id": "3935313", "title": ["pharmaceutical", "composition", "-", "-", "process", "treatment", "hypertension", "therewith"], "abstract": ["invention", "relates", "process", "treatment", "hypertension", "disorder", "derived", "therefrom", "comprising", "administering", "patient", "suffering", "hypertension", "therapeutically", "effective", "amount", "compound", "selected", "-", "-", "acid", "addition", "salt", "therapeutically", "acceptable", "acid"], "section": [2], "subsection": [58], "group": [277], "subgroup": [3577], "labels": [2, 67, 414, 4375]} 109 | {"id": "3935316", "title": ["method", "formulation"], "abstract": ["method", "controlling", "parasitic", "worm", "animal", "method", "employ", "chemical", "compound", "chloride", "formulation", "compound", "diluent", "carrier", "adjuvant", "discussed", "exemplified", "synthesis", "compound"], "section": [2], "subsection": [58], "group": [276], "subgroup": [3486], "labels": [2, 67, 413, 4284]} 110 | {"id": "3935322", "title": ["chip", "separating", "fried", "ribbon"], "abstract": ["method", "apparatus", "preparing", "chip-type", "snack", "disclosed", "dough", "prepared", "sheeted", "elongated", "shaped", "ribbon", "connected", "dough", "piece", "cut", "dough", "sheet", "ribbon", "passed", "deep", "fat", "fryer", "severed", "individual", "chip"], "section": [8, 0], "subsection": [3, 127, 11, 1], "group": [58, 11, 23, 659], "subgroup": [205, 8255, 279, 662, 290], "labels": [8, 0, 12, 136, 20, 10, 195, 148, 160, 796, 1003, 9053, 1077, 1460, 1088]} 111 | {"id": "3935358", "title": ["process", "preparing", "hollow", "rib-reinforced", "laminated", "structure"], "abstract": ["invention", "process", "preparing", "hollow", "rib-reinforced", "laminated", "article", "placing", "sheet", "opposing", "mold", "platen", "sheet", "aligned", "sheet", "surface", "oppose", "sheet", "thermoplastic", "material", "heated", "thermoforming", "temperature", "sheet", "provided", "groove", "integral", "projection", "form", "fluid", "passageway", "mold", "platen", "provided", "mold", "caivty", "form", "shaped", "article", "rib", "closing", "mold", "platen", "contact", "sheet", "introducing", "fluid", "fluid", "passageway", "distend", "thermoplastic", "sheet", "mold", "cavity", "forming", "shaped", "article", "rib", "sheet", "maintain", "contact", "nondistended", "area"], "section": [8, 1], "subsection": [32, 127, 35], "group": [660, 155, 164], "subgroup": [8354, 2192, 2188, 2136, 1951, 1936, 2140, 2190], "labels": [8, 1, 41, 136, 44, 797, 292, 301, 9152, 2990, 2986, 2934, 2749, 2734, 2938, 2988]} 112 | {"id": "3935384", "title": ["network", "generating", "crt", "control", "signal", "enhancing", "edge", "television", "image"], "abstract": ["low", "cost", "network", "television", "receiver", "receiving", "video", "signal", "generating", "therefrom", "control", "signal", "modulating", "scan", "velocity", "crt", "electron", "beam", "delayed", "video", "signal", "intensity", "modulating", "crt", "electron", "beam", "picture", "information", "network", "delay", "line", "generating", "control", "signal", "delayed", "video", "signal", "video", "signal", "applied", "impedance", "delay", "coupled", "reflecting", "termination", "impedance", "impedance", "substantially", "equal", "characteristic", "impedance", "delay", "output", "terminal", "control", "signal", "included", "point", "impedance", "delay", "delayed", "video", "signal", "generated", "responsive", "signal", "received", "reflecting", "termination", "delay", "network", "generate", "preshoot", "overshoot", "peaking", "component", "peaking", "delayed", "video", "signal"], "section": [7], "subsection": [123], "group": [639], "subgroup": [7949], "labels": [7, 132, 776, 8747]} 113 | {"id": "3935494", "title": ["single", "substrate", "plasma", "discharge", "cell"], "abstract": ["gaseous", "display", "device", "memory", "disclosed", "requires", "single", "dielectric", "substrate", "layer", "orthogonal", "conductor", "laid", "thereon", "layer", "separated", "dielectric", "layer", "substrate", "layer", "thereon", "enclosed", "gaseous", "environment", "conductor", "brought", "envelope", "facilitate", "application", "signal", "dielectric", "barrier", "conveniently", "established", "substrate", "control", "shape", "individual", "discharge", "prevent", "crosstalk"], "section": [7], "subsection": [120], "group": [605], "subgroup": [7502], "labels": [7, 129, 742, 8300]} 114 | {"id": "3935547", "title": ["high", "pressure", "gas", "laser", "uniform", "field", "electrode", "configuration", "irradiation", "corona", "discharge"], "abstract": ["molecular", "gas", "laser", "capable", "operating", "atmospheric", "pressure", "electrical", "energy", "coupled", "active", "molecular", "gas", "medium", "comprising", "molecule", "vibrational", "rotational", "energy", "level", "electric", "field", "transverse", "lasing", "axis", "applying", "impulse", "voltage", "electrode", "configuration", "high", "current", "glow", "discharge", "created", "pulse", "discharge", "place", "electrode", "parallel", "planar", "surface", "facing", "lateral", "edge", "face", "suitably", "profiled", "avoid", "field", "concentration", "provide", "diffused", "glow", "discharge", "uniform", "electric", "field", "transverse", "lasing", "axis", "initiatory", "electron", "required", "produce", "high", "current", "diffused", "glow", "provided", "generating", "intense", "burst", "corona", "gap", "spacer", "member", "high", "dielectric", "constant", "interposed", "electrode", "specifically", "voltage", "pulse", "applied", "gap", "spacer", "element", "high", "field", "appears", "interface", "generates", "intense", "burst", "corona", "ultraviolet", "irradiation", "cathode", "resulting", "emission", "electron"], "section": [7], "subsection": [120], "group": [612], "subgroup": [7634], "labels": [7, 129, 749, 8432]} 115 | {"id": "3935551", "title": ["filter", "arrangement", "converter", "circuit"], "abstract": ["improved", "filtering", "arrangement", "converter", "circuit", "highpass", "filter", "tuned", "cutoff", "frequency", "le", "lowest", "harmonic", "filtered", "provided", "customary", "plurality", "filter", "tuned", "individual", "harmonic", "current", "pulse", "converter", "circuit", "highpass", "filter", "tuned", "filter", "harmonic", "starting", "harmonic", "ha", "resonant", "frequency", "harmonic"], "section": [7], "subsection": [121], "group": [619], "subgroup": [7716], "labels": [7, 130, 756, 8514]} 116 | {"id": "3935607", "title": ["inflatable", "boat"], "abstract": ["inflatable", "boat", "comprising", "inflatable", "tube", "outer", "tube", "tube", "outer", "tube", "fabricated", "flat", "sheet", "stock", "method", "involving", "stitching", "bottom", "seam", "outer", "tube", "outer", "tube", "place", "inflated", "tube"], "section": [1], "subsection": [44], "group": [225], "subgroup": [2802], "labels": [1, 53, 362, 3600]} 117 | {"id": "3935636", "title": ["method", "making", "pressure", "transducer"], "abstract": ["low", "cost", "pressure", "transducer", "manufactured", "assembled", "automatic", "semiautomatic", "production", "line", "technique", "transducer", "includes", "pressure", "fitting", "diaphragm", "strain", "gage", "comprising", "bridge", "circuit", "tab", "lead", "bridge", "circuit", "termination", "board", "contained", "case", "end", "case", "swaged", "flange", "fitting", "order", "sealingly", "clamp", "diaphragm", "shoulder", "case", "fitting", "flange", "bridge", "portion", "strain", "gage", "adhesively", "secured", "diaphragm", "pressure", "distribution", "member", "bridge", "pressure", "applied", "assembly", "heat-curing", "adhesive", "secures", "gage", "diaphragm", "termination", "board", "ha", "terminal", "number", "conductive", "strip", "positioned", "case", "conductive", "strip", "pressed", "electrical", "contact", "lead", "tab", "gage", "end", "case", "swaged", "termination", "board", "lock", "place"], "section": [6, 8, 7], "subsection": [106, 120, 127], "group": [660, 526, 601], "subgroup": [7381, 8347, 6694, 8342], "labels": [6, 8, 7, 115, 129, 136, 797, 663, 738, 8179, 9145, 7492, 9140]} 118 | {"id": "3935745", "title": ["pore", "water", "pressure", "measuring", "device"], "abstract": ["pore", "water", "pressure", "metering", "device", "incorporating", "pressure", "meter", "force", "meter", "influenced", "pressure", "meter", "device", "includes", "power", "member", "arranged", "control", "pressure", "exerted", "pressure", "meter", "force", "meter", "applying", "overriding", "force", "pressure", "meter", "stop", "influence", "force", "meter", "removing", "overriding", "force", "pressure", "meter", "influence", "force", "meter", "resumed"], "section": [6, 4], "subsection": [106, 87], "group": [526, 534, 411], "subgroup": [6816, 6694, 5234], "labels": [6, 4, 115, 96, 663, 671, 548, 7614, 7492, 6032]} 119 | {"id": "3935861", "title": ["protective", "breathing", "mask", "compressed", "air", "supply", "breathing"], "abstract": ["protective", "breathing", "mask", "comprises", "face", "encircling", "mask", "body", "connected", "compressed", "gas", "line", "supply", "compressed", "gas", "thereto", "mask", "includes", "encircling", "rim", "portion", "defines", "air", "seal", "cavity", "provided", "passage", "body", "separated", "body", "connected", "compressed", "gas", "line", "supplying", "gas", "seal", "cavity", "rim", "includes", "lip", "engages", "face", "wearer", "gas", "circulated", "cavity", "escape", "lip", "face", "mask", "interior", "gas", "conduit", "advantageously", "includes", "lung", "demand", "inlet", "valve", "open", "front", "side", "mask", "ha", "pa", "passage", "throttle", "seal", "cavity", "seal", "cavity", "advantageously", "defined", "outwardly", "formed", "annular", "bead", "annular", "tubular", "member", "opening", "periphery", "directing", "compressed", "gas", "cavity", "mask", "interior"], "section": [0], "subsection": [13], "group": [74], "subgroup": [926, 937], "labels": [0, 22, 211, 1724, 1735]} 120 | {"id": "3935906", "title": ["adjustable", "height", "soil", "conditioner", "frame", "extending", "rearwardly", "cultivating", "implement"], "abstract": ["soil", "conditioner", "combination", "cultivating", "implement", "drawn", "tractor", "break", "soil", "leave", "prepared", "seedbed", "single", "operation", "conditioner", "mounted", "framework", "extending", "rearwardly", "cultivator", "comprises", "set", "reel", "mounted", "framework", "free", "rotation", "transverse", "axis", "set", "axial", "blade", "member", "provided", "reel", "blade", "equiangularly", "spaced", "axis", "arranged", "blade", "set", "angularly", "spaced", "midway", "adjacent", "blade", "set", "set", "blade", "include", "ground-engaging", "edge", "spaced", "radius", "axis", "edge", "remaining", "set", "blade", "spaced", "radius", "axis", "le", "radius", "radius", "set", "blade", "enable", "conditioner", "towed", "field", "freely-rotating", "blade", "breaking", "clod", "dirt", "left", "cultivator", "blade", "clogged", "dirt", "conditioner", "includes", "adjustment", "structure", "elevationally", "adjusting", "blade", "edge", "relative", "cultivator", "adjustment", "structure", "includes", "adjustment", "bracket", "connecting", "portion", "soil", "conditioner", "frame", "bracket", "provided", "plurality", "aperture", "aperture", "selectively", "aligned", "set", "aperture", "portion", "soil", "conditioner", "frame", "placing", "bolt", "aligned", "aperture", "elevation", "blade", "edge", "selectively", "adjusted"], "section": [0], "subsection": [0], "group": [0], "subgroup": [10, 15], "labels": [0, 9, 137, 808, 813]} 121 | {"id": "3935924", "title": ["vibratory", "material", "paper", "pulp", "carbon", "fiber"], "abstract": ["vibratory", "plate", "pulp", "chopped", "carbon", "fiber", "mixed", "uniformly", "paper", "pulp", "beaten", "degree", "higher", "cc", "canadian", "standard", "freeness"], "section": [6, 3, 7], "subsection": [123, 80, 115], "group": [641, 382, 585], "subgroup": [4904, 7263, 7991], "labels": [6, 3, 7, 132, 89, 124, 778, 519, 722, 5702, 8061, 8789]} 122 | {"id": "3935957", "title": ["insulation", "double", "walled", "cryogenic", "storage", "tank"], "abstract": ["thermal", "insulation", "material", "affixed", "outer", "surface", "sidewall", "double", "walled", "storage", "tank", "spaced", "outer", "sidewall", "form", "gaseous", "space", "therebetween", "blackish", "wall", "radially", "outer", "face", "insulating", "material", "face", "tank", "outer", "sidewall"], "section": [8, 5], "subsection": [125, 127, 95], "group": [654, 462, 659], "subgroup": [8085, 6020, 8170, 6024, 6027, 6015, 6019, 6016], "labels": [8, 5, 134, 136, 104, 791, 599, 796, 8883, 6818, 8968, 6822, 6825, 6813, 6817, 6814]} 123 | {"id": "3935963", "title": ["cap", "locking", "member"], "abstract": ["cap", "locking", "member", "locking", "cap", "closed", "position", "relative", "neck", "spout", "container", "prevent", "removal", "unscrewing", "cap", "child", "providing", "safety", "factor", "adult", "unscrew", "cap", "hand", "cap", "locking", "member", "secured", "top", "container", "ha", "upwardly", "extending", "portion", "ha", "notch", "recess", "transversely", "extending", "front", "edge", "thereof", "engaging", "underside", "cap", "front", "edge", "imbedded", "underside", "cap", "manner", "prevent", "counterrotation", "unscrewing", "cap", "container", "held", "hand", "finger", "hand", "applying", "sufficient", "manual", "pressure", "depress", "manually", "engageable", "portion", "locking", "effect", "disengagement", "cap", "simultaneously", "rotating", "cap", "counterclockwise", "direction", "hand", "unscrew", "cap", "locking", "member", "formed", "essentially", "spring", "metal", "integrally", "formed", "inexpensive", "produce"], "section": [1], "subsection": [46], "group": [237], "subgroup": [3018], "labels": [1, 55, 374, 3816]} 124 | {"id": "3935985", "title": ["support", "welding", "head", "carriage"], "abstract": ["support", "carriage", "welding", "head", "intended", "automatic", "welding", "metal", "plate", "comprising", "plane", "area", "framed", "corrugation", "running", "perpendicular", "direction", "intersecting", "form", "abutment", "surface", "end", "corrugation", "support", "comprises", "base", "equipped", "fixing", "base", "plane", "area", "centering", "base", "respect", "corrugation", "surrounding", "plane", "area", "contact", "plurality", "abutment", "surface", "adjustable", "stop", "defining", "distance", "base", "plane", "area", "contact", "plurality", "corrugation", "base", "including", "mounting", "base", "guide", "bar", "designed", "receive", "carriage", "translationally"], "section": [1], "subsection": [26], "group": [124], "subgroup": [1685, 1687], "labels": [1, 35, 261, 2483, 2485]} 125 | {"id": "3935995", "title": ["swinging", "bucket", "centrifuge", "rotor"], "abstract": ["swinging", "bucket", "centrifuge", "rotor", "ha", "plurality", "peripheral", "cavity", "adapted", "seat", "swinging", "bucket", "cavity", "ha", "hanger", "slideably", "positioned", "receptacle", "rear", "cavity", "receptacle", "prevents", "rotation", "hanger", "path", "movement", "extremity", "hanger", "form", "hook", "adapted", "support", "cross-pin", "located", "swinging", "bucket", "cap", "cross-pin", "positioned", "bucket", "hang", "properly", "hook", "manner", "hook", "ha", "outwardly", "downwardly", "sloping", "entrance", "opening", "aid", "properly", "hanging", "bucket"], "section": [1], "subsection": [18], "group": [95], "subgroup": [1292], "labels": [1, 27, 232, 2090]} 126 | {"id": "3936046", "title": ["front", "side", "sheet", "registering", "apparatus"], "abstract": ["sheet", "feeding", "device", "adapted", "separate", "single", "sheet", "stack", "sheet", "forward", "separated", "sheet", "stack", "subsequent", "processing", "device", "adapted", "side-register", "sheet", "drawn", "stack", "front-register", "sheet", "sheet-forwarding", "mechanism", "sheet", "feeding", "device"], "section": [1], "subsection": [46], "group": [240], "subgroup": [3122, 3149, 3131, 3100], "labels": [1, 55, 377, 3920, 3947, 3929, 3898]} 127 | {"id": "3936055", "title": ["golf", "practice", "device"], "abstract": ["golf", "shot", "practice", "stage", "comprising", "frame", "side", "panel", "define", "green", "", "fairway", "", "playing", "surface", "panel", "positionable", "angle", "horizontal", "supported", "angle", "defining", "wedge", "enable", "practice", "ball", "lie", "stage", "foldable", "compact", "form", "storage", "wedge", "enclosed", "folded-up", "stage"], "section": [0], "subsection": [14], "group": [77], "subgroup": [1028, 992], "labels": [0, 23, 214, 1826, 1790]} 128 | {"id": "3936204", "title": ["tape", "clamp"], "abstract": ["clamp", "securing", "loop", "end", "flat", "metallic", "tape", "type", "transmitting", "linear", "motion", "remote", "control", "master-slave", "manipulator", "securing", "clamp", "manipulator", "element", "transmits", "motion", "tape", "moved", "force", "transmitted", "tape", "clamp", "characterized", "independence", "tape", "loop", "variation", "tensile", "load", "disclosed", "clamp", "includes", "elongated", "tubular", "frame", "intermediate", "open", "section", "anchoring", "pin", "roller", "mounted", "free", "end", "tape", "secured", "introduced", "end", "frame", "passed", "pin", "roller", "partial", "peripheral", "engagement", "therewith", "brought", "back", "end", "frame", "tape", "clamping", "mechanism", "adjacent", "frame", "end", "force", "abutting", "tape", "surface", "inside", "surface", "frame", "rigidly", "secure", "tape", "loop", "clamp", "join", "tape", "end", "connect", "tape", "cable", "anchor", "tape", "manipulator", "part"], "section": [8, 1, 5], "subsection": [28, 127, 94], "group": [660, 136, 451], "subgroup": [5849, 1815, 8350], "labels": [8, 1, 5, 37, 136, 103, 797, 273, 588, 6647, 2613, 9148]} -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow==1.15.0 2 | tensorflow_tensorboard==1.5.1 3 | tensorboard==1.15.0 4 | matplotlib==2.2.3 5 | tflearn==0.3.2 6 | gensim==3.8.3 7 | numpy==1.16.2 8 | Pillow==5.4.1 9 | python_gflags==3.1.2 10 | scikit_learn==0.19.1 11 | texttable==1.6.3 12 | tqdm==4.49.0 13 | google-compute-engine==2.8.13 -------------------------------------------------------------------------------- /utils/checkmate.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import json 4 | import numpy as np 5 | import tensorflow as tf 6 | 7 | 8 | class BestCheckpointSaver(object): 9 | """Maintains a directory containing only the best n checkpoints. 10 | Inside the directory is a best_checkpoints JSON file containing a dictionary 11 | mapping of the best checkpoint filepaths to the values by which the checkpoints 12 | are compared. Only the best n checkpoints are contained in the directory and JSON file. 13 | This is a light-weight wrapper class only intended to work in simple, 14 | non-distributed settings. It is not intended to work with the tf.Estimator 15 | framework. 16 | """ 17 | def __init__(self, save_dir, num_to_keep=1, maximize=True, saver=None): 18 | """Creates a `BestCheckpointSaver`. 19 | `BestCheckpointSaver` acts as a wrapper class around a `tf.train.Saver`. 20 | 21 | Args: 22 | save_dir: The directory in which the checkpoint files will be saved. 23 | num_to_keep: The number of best checkpoint files to retain. 24 | maximize: Define 'best' values to be the highest values. For example, 25 | set this to True if selecting for the checkpoints with the highest 26 | given accuracy. Or set to False to select for checkpoints with the 27 | lowest given error rate. 28 | saver: A `tf.train.Saver` to use for saving checkpoints. A default 29 | `tf.train.Saver` will be created if none is provided. 30 | """ 31 | self._num_to_keep = num_to_keep 32 | self._save_dir = save_dir 33 | self._save_path = os.path.join(save_dir, 'model') 34 | self._maximize = maximize 35 | self._saver = saver if saver else tf.train.Saver( 36 | max_to_keep=None, 37 | save_relative_paths=True 38 | ) 39 | 40 | if not os.path.exists(save_dir): 41 | os.makedirs(save_dir) 42 | self.best_checkpoints_file = os.path.join(save_dir, 'best_checkpoints') 43 | 44 | def handle(self, value, sess, global_step): 45 | """Updates the set of best checkpoints based on the given result. 46 | 47 | Args: 48 | value: The value by which to rank the checkpoint. 49 | sess: A tf.Session to use to save the checkpoint. 50 | global_step: The global step. 51 | """ 52 | current_ckpt = 'model-{}'.format(global_step) 53 | value = float(value) 54 | if not os.path.exists(self.best_checkpoints_file): 55 | self._save_best_checkpoints_file({current_ckpt: value}) 56 | self._saver.save(sess, self._save_path, global_step) 57 | return 58 | 59 | best_checkpoints = self._load_best_checkpoints_file() 60 | 61 | if len(best_checkpoints) < self._num_to_keep: 62 | best_checkpoints[current_ckpt] = value 63 | self._save_best_checkpoints_file(best_checkpoints) 64 | self._saver.save(sess, self._save_path, global_step) 65 | return 66 | 67 | if self._maximize: 68 | should_save = not all(current_best >= value 69 | for current_best in best_checkpoints.values()) 70 | else: 71 | should_save = not all(current_best <= value 72 | for current_best in best_checkpoints.values()) 73 | if should_save: 74 | best_checkpoint_list = self._sort(best_checkpoints) 75 | 76 | worst_checkpoint = os.path.join(self._save_dir, 77 | best_checkpoint_list.pop(-1)[0]) 78 | self._remove_outdated_checkpoint_files(worst_checkpoint) 79 | self._update_internal_saver_state(best_checkpoint_list) 80 | 81 | best_checkpoints = dict(best_checkpoint_list) 82 | best_checkpoints[current_ckpt] = value 83 | self._save_best_checkpoints_file(best_checkpoints) 84 | 85 | self._saver.save(sess, self._save_path, global_step) 86 | 87 | def _save_best_checkpoints_file(self, updated_best_checkpoints): 88 | with open(self.best_checkpoints_file, 'w') as f: 89 | json.dump(updated_best_checkpoints, f, indent=3) 90 | 91 | def _remove_outdated_checkpoint_files(self, worst_checkpoint): 92 | os.remove(os.path.join(self._save_dir, 'checkpoint')) 93 | for ckpt_file in glob.glob(worst_checkpoint + '.*'): 94 | os.remove(ckpt_file) 95 | 96 | def _update_internal_saver_state(self, best_checkpoint_list): 97 | best_checkpoint_files = [ 98 | (ckpt[0], np.inf) # TODO: Try to use actual file timestamp 99 | for ckpt in best_checkpoint_list 100 | ] 101 | self._saver.set_last_checkpoints_with_time(best_checkpoint_files) 102 | 103 | def _load_best_checkpoints_file(self): 104 | with open(self.best_checkpoints_file, 'r') as f: 105 | best_checkpoints = json.load(f) 106 | return best_checkpoints 107 | 108 | def _sort(self, best_checkpoints): 109 | best_checkpoints = [ 110 | (ckpt, best_checkpoints[ckpt]) 111 | for ckpt in sorted(best_checkpoints, 112 | key=best_checkpoints.get, 113 | reverse=self._maximize) 114 | ] 115 | return best_checkpoints 116 | 117 | 118 | def get_best_checkpoint(best_checkpoint_dir, select_maximum_value=True): 119 | """Returns filepath to the best checkpoint. 120 | Reads the best_checkpoints file in the best_checkpoint_dir directory. 121 | Returns the filepath in the best_checkpoints file associated with 122 | the highest value if select_maximum_value is True, or the filepath 123 | associated with the lowest value if select_maximum_value is False. 124 | 125 | Args: 126 | best_checkpoint_dir: Directory containing best_checkpoints JSON file. 127 | select_maximum_value: If True, select the filepath associated 128 | with the highest value. Otherwise, select the filepath associated 129 | with the lowest value. 130 | Returns: 131 | The full path to the best checkpoint file. 132 | """ 133 | best_checkpoints_file = os.path.join(best_checkpoint_dir, 'best_checkpoints') 134 | assert os.path.exists(best_checkpoints_file) 135 | with open(best_checkpoints_file, 'r') as f: 136 | best_checkpoints = json.load(f) 137 | best_checkpoints = [ 138 | ckpt for ckpt in sorted(best_checkpoints, 139 | key=best_checkpoints.get, 140 | reverse=select_maximum_value) 141 | ] 142 | return os.path.join(os.path.abspath(best_checkpoint_dir), best_checkpoints[0]) 143 | -------------------------------------------------------------------------------- /utils/data_helpers.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | __author__ = 'Randolph' 3 | 4 | import os 5 | import time 6 | import heapq 7 | import gensim 8 | import logging 9 | import json 10 | import numpy as np 11 | from collections import OrderedDict 12 | from pylab import * 13 | from texttable import Texttable 14 | from gensim.models import KeyedVectors 15 | from tflearn.data_utils import pad_sequences 16 | 17 | 18 | def _option(pattern): 19 | """ 20 | Get the option according to the pattern. 21 | pattern 0: Choose training or restore. 22 | pattern 1: Choose best or latest checkpoint. 23 | 24 | Args: 25 | pattern: 0 for training step. 1 for testing step. 26 | Returns: 27 | The OPTION. 28 | """ 29 | if pattern == 0: 30 | OPTION = input("[Input] Train or Restore? (T/R): ") 31 | while not (OPTION.upper() in ['T', 'R']): 32 | OPTION = input("[Warning] The format of your input is illegal, please re-input: ") 33 | if pattern == 1: 34 | OPTION = input("Load Best or Latest Model? (B/L): ") 35 | while not (OPTION.isalpha() and OPTION.upper() in ['B', 'L']): 36 | OPTION = input("[Warning] The format of your input is illegal, please re-input: ") 37 | return OPTION.upper() 38 | 39 | 40 | def logger_fn(name, input_file, level=logging.INFO): 41 | """ 42 | The Logger. 43 | 44 | Args: 45 | name: The name of the logger. 46 | input_file: The logger file path. 47 | level: The logger level. 48 | Returns: 49 | The logger. 50 | """ 51 | logger = logging.getLogger(name) 52 | logger.setLevel(level) 53 | log_dir = os.path.dirname(input_file) 54 | if not os.path.exists(log_dir): 55 | os.makedirs(log_dir) 56 | formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') 57 | 58 | # File Handler 59 | fh = logging.FileHandler(input_file, mode='w') 60 | fh.setFormatter(formatter) 61 | logger.addHandler(fh) 62 | 63 | # stream Handler 64 | sh = logging.StreamHandler() 65 | sh.setFormatter(formatter) 66 | sh.setLevel(logging.INFO) 67 | logger.addHandler(sh) 68 | return logger 69 | 70 | 71 | def tab_printer(args, logger): 72 | """ 73 | Function to print the logs in a nice tabular format. 74 | 75 | Args: 76 | args: Parameters used for the model. 77 | logger: The logger. 78 | """ 79 | args = vars(args) 80 | keys = sorted(args.keys()) 81 | t = Texttable() 82 | t.add_rows([[k.replace("_", " ").capitalize(), args[k]] for k in keys]) 83 | t.add_rows([["Parameter", "Value"]]) 84 | logger.info('\n' + t.draw()) 85 | 86 | 87 | def get_out_dir(option, logger): 88 | """ 89 | Get the out dir for saving model checkpoints. 90 | 91 | Args: 92 | option: Train or Restore. 93 | logger: The logger. 94 | Returns: 95 | The output dir for model checkpoints. 96 | """ 97 | if option == 'T': 98 | timestamp = str(int(time.time())) 99 | out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp)) 100 | logger.info("Writing to {0}\n".format(out_dir)) 101 | if option == 'R': 102 | MODEL = input("[Input] Please input the checkpoints model you want to restore, " 103 | "it should be like (1490175368): ") # The model you want to restore 104 | 105 | while not (MODEL.isdigit() and len(MODEL) == 10): 106 | MODEL = input("[Warning] The format of your input is illegal, please re-input: ") 107 | out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", MODEL)) 108 | logger.info("Writing to {0}\n".format(out_dir)) 109 | return out_dir 110 | 111 | 112 | def get_model_name(): 113 | """ 114 | Get the model name used for test. 115 | 116 | Returns: 117 | The model name. 118 | """ 119 | MODEL = input("[Input] Please input the model file you want to test, it should be like (1490175368): ") 120 | 121 | while not (MODEL.isdigit() and len(MODEL) == 10): 122 | MODEL = input("[Warning] The format of your input is illegal, " 123 | "it should be like (1490175368), please re-input: ") 124 | return MODEL 125 | 126 | 127 | def create_prediction_file(output_file, data_id, true_labels, predict_labels, predict_scores): 128 | """ 129 | Create the prediction file. 130 | 131 | Args: 132 | output_file: The all classes predicted results provided by network. 133 | data_id: The data record id info provided by dict . 134 | true_labels: The all true labels. 135 | predict_labels: The all predict labels by threshold. 136 | predict_scores: The all predict scores by threshold. 137 | Raises: 138 | IOError: If the prediction file is not a .json file. 139 | """ 140 | if not output_file.endswith('.json'): 141 | raise IOError("[Error] The prediction file is not a json file." 142 | "Please make sure the prediction data is a json file.") 143 | with open(output_file, 'w') as fout: 144 | data_size = len(predict_labels) 145 | for i in range(data_size): 146 | data_record = OrderedDict([ 147 | ('id', data_id[i]), 148 | ('labels', [int(i) for i in true_labels[i]]), 149 | ('predict_labels', [int(i) for i in predict_labels[i]]), 150 | ('predict_scores', [round(i, 4) for i in predict_scores[i]]) 151 | ]) 152 | fout.write(json.dumps(data_record, ensure_ascii=False) + '\n') 153 | 154 | 155 | def get_onehot_label_threshold(scores, threshold=0.5): 156 | """ 157 | Get the predicted one-hot labels based on the threshold. 158 | If there is no predict score greater than threshold, then choose the label which has the max predict score. 159 | 160 | Args: 161 | scores: The all classes predicted scores provided by network. 162 | threshold: The threshold (default: 0.5). 163 | Returns: 164 | predicted_onehot_labels: The predicted labels (one-hot). 165 | """ 166 | predicted_onehot_labels = [] 167 | scores = np.ndarray.tolist(scores) 168 | for score in scores: 169 | count = 0 170 | onehot_labels_list = [0] * len(score) 171 | for index, predict_score in enumerate(score): 172 | if predict_score >= threshold: 173 | onehot_labels_list[index] = 1 174 | count += 1 175 | if count == 0: 176 | max_score_index = score.index(max(score)) 177 | onehot_labels_list[max_score_index] = 1 178 | predicted_onehot_labels.append(onehot_labels_list) 179 | return predicted_onehot_labels 180 | 181 | 182 | def get_onehot_label_topk(scores, top_num=1): 183 | """ 184 | Get the predicted one-hot labels based on the topK. 185 | 186 | Args: 187 | scores: The all classes predicted scores provided by network. 188 | top_num: The max topK number (default: 5). 189 | Returns: 190 | predicted_onehot_labels: The predicted labels (one-hot). 191 | """ 192 | predicted_onehot_labels = [] 193 | scores = np.ndarray.tolist(scores) 194 | for score in scores: 195 | onehot_labels_list = [0] * len(score) 196 | max_num_index_list = list(map(score.index, heapq.nlargest(top_num, score))) 197 | for i in max_num_index_list: 198 | onehot_labels_list[i] = 1 199 | predicted_onehot_labels.append(onehot_labels_list) 200 | return predicted_onehot_labels 201 | 202 | 203 | def get_label_threshold(scores, threshold=0.5): 204 | """ 205 | Get the predicted labels based on the threshold. 206 | If there is no predict score greater than threshold, then choose the label which has the max predict score. 207 | Note: Only Used in `test_model.py` 208 | 209 | Args: 210 | scores: The all classes predicted scores provided by network. 211 | threshold: The threshold (default: 0.5). 212 | Returns: 213 | predicted_labels: The predicted labels. 214 | predicted_scores: The predicted scores. 215 | """ 216 | predicted_labels = [] 217 | predicted_scores = [] 218 | scores = np.ndarray.tolist(scores) 219 | for score in scores: 220 | count = 0 221 | index_list = [] 222 | score_list = [] 223 | for index, predict_score in enumerate(score): 224 | if predict_score >= threshold: 225 | index_list.append(index) 226 | score_list.append(predict_score) 227 | count += 1 228 | if count == 0: 229 | index_list.append(score.index(max(score))) 230 | score_list.append(max(score)) 231 | predicted_labels.append(index_list) 232 | predicted_scores.append(score_list) 233 | return predicted_labels, predicted_scores 234 | 235 | 236 | def get_label_topk(scores, top_num=1): 237 | """ 238 | Get the predicted labels based on the topK. 239 | Note: Only Used in `test_model.py` 240 | 241 | Args: 242 | scores: The all classes predicted scores provided by network. 243 | top_num: The max topK number (default: 5). 244 | Returns: 245 | The predicted labels. 246 | """ 247 | predicted_labels = [] 248 | predicted_scores = [] 249 | scores = np.ndarray.tolist(scores) 250 | for score in scores: 251 | score_list = [] 252 | index_list = np.argsort(score)[-top_num:] 253 | index_list = index_list[::-1] 254 | for index in index_list: 255 | score_list.append(score[index]) 256 | predicted_labels.append(np.ndarray.tolist(index_list)) 257 | predicted_scores.append(score_list) 258 | return predicted_labels, predicted_scores 259 | 260 | 261 | def create_metadata_file(word2vec_file, output_file): 262 | """ 263 | Create the metadata file based on the corpus file (Used for the Embedding Visualization later). 264 | 265 | Args: 266 | word2vec_file: The word2vec file. 267 | output_file: The metadata file path. 268 | Raises: 269 | IOError: If word2vec model file doesn't exist. 270 | """ 271 | if not os.path.isfile(word2vec_file): 272 | raise IOError("[Error] The word2vec file doesn't exist.") 273 | 274 | wv = KeyedVectors.load(word2vec_file, mmap='r') 275 | word2idx = dict([(k, v.index) for k, v in wv.vocab.items()]) 276 | word2idx_sorted = [(k, word2idx[k]) for k in sorted(word2idx, key=word2idx.get, reverse=False)] 277 | 278 | with open(output_file, 'w+') as fout: 279 | for word in word2idx_sorted: 280 | if word[0] is None: 281 | print("[Warning] Empty Line, should replaced by any thing else, or will cause a bug of tensorboard") 282 | fout.write('' + '\n') 283 | else: 284 | fout.write(word[0] + '\n') 285 | 286 | 287 | def load_word2vec_matrix(word2vec_file): 288 | """ 289 | Get the word2idx dict and embedding matrix. 290 | 291 | Args: 292 | word2vec_file: The word2vec file. 293 | Returns: 294 | word2idx: The word2idx dict. 295 | embedding_matrix: The word2vec model matrix. 296 | Raises: 297 | IOError: If word2vec model file doesn't exist. 298 | """ 299 | if not os.path.isfile(word2vec_file): 300 | raise IOError("[Error] The word2vec file doesn't exist. ") 301 | 302 | wv = KeyedVectors.load(word2vec_file, mmap='r') 303 | 304 | word2idx = OrderedDict({"_UNK": 0}) 305 | embedding_size = wv.vector_size 306 | for k, v in wv.vocab.items(): 307 | word2idx[k] = v.index + 1 308 | vocab_size = len(word2idx) 309 | 310 | embedding_matrix = np.zeros([vocab_size, embedding_size]) 311 | for key, value in word2idx.items(): 312 | if key == "_UNK": 313 | embedding_matrix[value] = [0. for _ in range(embedding_size)] 314 | else: 315 | embedding_matrix[value] = wv[key] 316 | return word2idx, embedding_matrix 317 | 318 | 319 | def load_data_and_labels(args, input_file, word2idx: dict): 320 | """ 321 | Load research data from files, padding sentences and generate one-hot labels. 322 | 323 | Args: 324 | args: The arguments. 325 | input_file: The research record. 326 | word2idx: The word2idx dict. 327 | Returns: 328 | The dict (includes the record tokenindex and record labels) 329 | Raises: 330 | IOError: If word2vec model file doesn't exist 331 | """ 332 | if not input_file.endswith('.json'): 333 | raise IOError("[Error] The research record is not a json file. " 334 | "Please preprocess the research record into the json file.") 335 | 336 | def _token_to_index(x: list): 337 | result = [] 338 | for item in x: 339 | if item not in word2idx.keys(): 340 | result.append(word2idx['_UNK']) 341 | else: 342 | word_idx = word2idx[item] 343 | result.append(word_idx) 344 | return result 345 | 346 | def _create_onehot_labels(labels_index, num_labels): 347 | label = [0] * num_labels 348 | for item in labels_index: 349 | label[int(item)] = 1 350 | return label 351 | 352 | Data = dict() 353 | with open(input_file) as fin: 354 | Data['id'] = [] 355 | Data['content_index'] = [] 356 | Data['content'] = [] 357 | Data['section'] = [] 358 | Data['subsection'] = [] 359 | Data['group'] = [] 360 | Data['subgroup'] = [] 361 | Data['onehot_labels'] = [] 362 | Data['labels'] = [] 363 | 364 | for eachline in fin: 365 | record = json.loads(eachline) 366 | id = record['id'] 367 | content = record['abstract'] 368 | section = record['section'] 369 | subsection = record['subsection'] 370 | group = record['group'] 371 | subgroup = record['subgroup'] 372 | labels = record['labels'] 373 | 374 | Data['id'].append(id) 375 | Data['content_index'].append(_token_to_index(content)) 376 | Data['content'].append(content) 377 | Data['section'].append(_create_onehot_labels(section, args.num_classes_list[0])) 378 | Data['subsection'].append(_create_onehot_labels(subsection, args.num_classes_list[1])) 379 | Data['group'].append(_create_onehot_labels(group, args.num_classes_list[2])) 380 | Data['subgroup'].append(_create_onehot_labels(subgroup, args.num_classes_list[3])) 381 | Data['onehot_labels'].append(_create_onehot_labels(labels, args.total_classes)) 382 | Data['labels'].append(labels) 383 | Data['pad_seqs'] = pad_sequences(Data['content_index'], maxlen=args.pad_seq_len, value=0.) 384 | return Data 385 | 386 | 387 | def batch_iter(data, batch_size, num_epochs, shuffle=True): 388 | """ 389 | 含有 yield 说明不是一个普通函数,是一个 Generator. 390 | 函数效果:对 data,一共分成 num_epochs 个阶段(epoch),在每个 epoch 内,如果 shuffle=True,就将 data 重新洗牌, 391 | 批量生成 (yield) 一批一批的重洗过的 data,每批大小是 batch_size,一共生成 int(len(data)/batch_size)+1 批。 392 | 393 | Args: 394 | data: The data. 395 | batch_size: The size of the data batch. 396 | num_epochs: The number of epochs. 397 | shuffle: Shuffle or not (default: True). 398 | Returns: 399 | A batch iterator for data set. 400 | """ 401 | data = np.array(data) 402 | data_size = len(data) 403 | num_batches_per_epoch = int((data_size - 1) / batch_size) + 1 404 | for epoch in range(num_epochs): 405 | # Shuffle the data at each epoch 406 | if shuffle: 407 | shuffle_indices = np.random.permutation(np.arange(data_size)) 408 | shuffled_data = data[shuffle_indices] 409 | else: 410 | shuffled_data = data 411 | for batch_num in range(num_batches_per_epoch): 412 | start_index = batch_num * batch_size 413 | end_index = min((batch_num + 1) * batch_size, data_size) 414 | yield shuffled_data[start_index:end_index] 415 | -------------------------------------------------------------------------------- /utils/param_parser.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | 4 | def parameter_parser(): 5 | """ 6 | A method to parse up command line parameters. 7 | The default hyperparameters give good results without cross-validation. 8 | """ 9 | parser = argparse.ArgumentParser(description="Run HARNN.") 10 | 11 | # Data Parameters 12 | parser.add_argument("--train-file", nargs="?", default="../data/Train_sample.json", help="Training data.") 13 | parser.add_argument("--validation-file", nargs="?", default="../data/Validation_sample.json", help="Validation data.") 14 | parser.add_argument("--test-file", nargs="?", default="../data/Test_sample.json", help="Testing data.") 15 | parser.add_argument("--metadata-file", nargs="?", default="../data/metadata.tsv", 16 | help="Metadata file for embedding visualization.") 17 | parser.add_argument("--word2vec-file", nargs="?", default="../data/word2vec_100.kv", 18 | help="Word2vec file for embedding characters (the dim need to be the same as embedding dim).") 19 | 20 | # Model Hyperparameters 21 | parser.add_argument("--pad-seq-len", type=int, default=150, help="Padding sequence length. (depends on the data)") 22 | parser.add_argument("--embedding-type", type=int, default=1, help="The embedding type.") 23 | parser.add_argument("--embedding-dim", type=int, default=100, help="Dimensionality of character embedding.") 24 | parser.add_argument("--lstm-dim", type=int, default=256, help="Dimensionality of LSTM neurons.") 25 | parser.add_argument("--lstm-layers", type=int, default=1, help="Number of LSTM layers.") 26 | parser.add_argument("--attention-dim", type=int, default=200, help="Dimensionality of Attention neurons.") 27 | parser.add_argument("--attention-penalization", type=bool, default=True, help="Use attention penalization or not.") 28 | parser.add_argument("--fc-dim", type=int, default=512, help="Dimensionality for FC neurons.") 29 | parser.add_argument("--dropout-rate", type=float, default=0.5, help="Dropout keep probability.") 30 | parser.add_argument("--alpha", type=float, default=0.5, help="Weight of global part in scores cal.") 31 | parser.add_argument("--num-classes-list", type=list, default=[9, 128, 661, 8364], 32 | help="Each number of labels in hierarchical structure. (depends on the task)") 33 | parser.add_argument("--total-classes", type=int, default=9162, help="Total number of labels. (depends on the task)") 34 | parser.add_argument("--topK", type=int, default=5, help="Number of top K prediction classes.") 35 | parser.add_argument("--threshold", type=float, default=0.5, help="Threshold for prediction classes.") 36 | 37 | # Training Parameters 38 | parser.add_argument("--epochs", type=int, default=20, help="Number of training epochs.") 39 | parser.add_argument("--batch-size", type=int, default=32, help="Batch Size.") 40 | parser.add_argument("--learning-rate", type=float, default=0.001, help="Learning rate.") 41 | parser.add_argument("--decay-rate", type=float, default=0.95, help="Rate of decay for learning rate.") 42 | parser.add_argument("--decay-steps", type=int, default=500, help="How many steps before decay learning rate.") 43 | parser.add_argument("--evaluate-steps", type=int, default=10, help="Evaluate model on val set after how many steps.") 44 | parser.add_argument("--norm-ratio", type=float, default=1.25, 45 | help="The ratio of the sum of gradients norms of trainable variable.") 46 | parser.add_argument("--l2-lambda", type=float, default=0.0, help="L2 regularization lambda.") 47 | parser.add_argument("--checkpoint-steps", type=int, default=10, help="Save model after how many steps.") 48 | parser.add_argument("--num-checkpoints", type=int, default=5, help="Number of checkpoints to store.") 49 | 50 | # Misc Parameters 51 | parser.add_argument("--allow-soft-placement", type=bool, default=True, help="Allow device soft device placement.") 52 | parser.add_argument("--log-device-placement", type=bool, default=False, help="Log placement of ops on devices.") 53 | parser.add_argument("--gpu-options-allow-growth", type=bool, default=True, help="Allow gpu options growth.") 54 | 55 | return parser.parse_args() --------------------------------------------------------------------------------