├── .github
├── Alipay.jpeg
├── FUNDING.yml
└── Wechat.jpeg
├── .gitignore
├── .travis.yml
├── HARNN
├── test_harnn.py
├── text_harnn.py
├── train_harnn.py
└── visualization.py
├── LICENSE
├── README.md
├── Usage.md
├── data
├── Test_sample.json
├── Train_sample.json
└── Validation_sample.json
├── requirements.txt
└── utils
├── checkmate.py
├── data_helpers.py
└── param_parser.py
/.github/Alipay.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RandolphVI/Hierarchical-Multi-Label-Text-Classification/e3195986ef56d115fcb054a0b8cc0be2af6977df/.github/Alipay.jpeg
--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 |
3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
4 | patreon: # Replace with a single Patreon username
5 | open_collective: # Replace with a single Open Collective username
6 | ko_fi: # Replace with a single Ko-fi username
7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | custom: ["https://github.com/RandolphVI/Hierarchical-Multi-Label-Text-Classification/blob/master/.github/Wechat.jpeg", "https://github.com/RandolphVI/Hierarchical-Multi-Label-Text-Classification/blob/master/.github/Alipay.jpeg"]
13 |
--------------------------------------------------------------------------------
/.github/Wechat.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RandolphVI/Hierarchical-Multi-Label-Text-Classification/e3195986ef56d115fcb054a0b8cc0be2af6977df/.github/Wechat.jpeg
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ### Compiled source ###
2 | *.com
3 | *.class
4 | *.dll
5 | *.exe
6 | *.o
7 | *.so
8 |
9 | ### Packages ###
10 | # it's better to unpack these files and commit the raw source
11 | # git has its own built in compression methods
12 | *.7z
13 | *.dmg
14 | *.gz
15 | *.iso
16 | *.jar
17 | *.rar
18 | *.tar
19 | *.zip
20 |
21 | ### Logs and databases ###
22 | *.log
23 | *.sql
24 | *.sqlite
25 |
26 | ### Mac OS generated files ###
27 | .DS_Store
28 | .DS_Store?
29 | ._*
30 | .Spotlight-V100
31 | .Trashes
32 | ehthumbs.db
33 | Thumbs.db
34 |
35 | ### JetBrain config files ###
36 | .idea
37 |
38 | ### Python ###
39 | # Byte-compiled / optimized / DLL files
40 | *.npy
41 | __pycache__/
42 | *.py[cod]
43 | *$py.class
44 |
45 | # Distribution / packaging
46 | .Python
47 | env/
48 | build/
49 | develop-eggs/
50 | dist/
51 | downloads/
52 | eggs/
53 | .eggs/
54 | lib/
55 | lib64/
56 | parts/
57 | sdist/
58 | var/
59 | *.egg-info/
60 | .installed.cfg
61 | *.egg
62 |
63 | # PyInstaller
64 | # Usually these files are written by a python script from a template
65 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
66 | *.manifest
67 | *.spec
68 |
69 | # Installer logs
70 | pip-log.txt
71 | pip-delete-this-directory.txt
72 |
73 | # Unit test / coverage reports
74 | htmlcov/
75 | .tox/
76 | .coverage
77 | .coverage.*
78 | .cache
79 | nosetests.xml
80 | coverage.xml
81 | *,cover
82 |
83 | # Translations
84 | *.mo
85 | *.pot
86 |
87 | # Sphinx documentation
88 | docs/_build/
89 |
90 | # PyBuilder
91 | target/
92 |
93 | ### IPythonNotebook ###
94 | # Temporary data
95 | .ipynb_checkpoints/
96 |
97 | ### Current Project ###
98 | # Data File
99 | *.txt
100 | *.tsv
101 | *.csv
102 | *.json
103 | *.jpg
104 | *.png
105 | *.html
106 | *.pickle
107 | *.kv
108 | *.pdf
109 | !/data
110 | !/data/train_sample.json
111 | !/data/validation_sample.json
112 | !/data/test_sample.json
113 |
114 | # Project File
115 | /HMC-LMLP
116 | /HMCN
117 | /SVM
118 |
119 | # Model File
120 | *.model
121 | *.pb
122 | runs/
123 | graph/
124 |
125 | # Analysis File
126 | Data Analysis.md
127 |
128 | # Log File
129 | logs/
130 |
131 | # Related Code
132 | temp.py
133 |
134 | ### Else ###
135 | randolph/
136 | Icon?
137 | *.graffle
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 |
3 | matrix:
4 | include:
5 | - python: 3.6
6 |
7 | install:
8 | - pip install -r requirements.txt
9 | - pip install coveralls
10 |
11 | before_script:
12 | - export PYTHONPATH=$PWD
13 |
14 | script:
15 | - true # add other tests here
16 | - coveralls
--------------------------------------------------------------------------------
/HARNN/test_harnn.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'Randolph'
3 |
4 | import os
5 | import sys
6 | import time
7 | import logging
8 | import numpy as np
9 |
10 | sys.path.append('../')
11 | logging.getLogger('tensorflow').disabled = True
12 |
13 | import tensorflow as tf
14 | from utils import checkmate as cm
15 | from utils import data_helpers as dh
16 | from utils import param_parser as parser
17 | from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, average_precision_score
18 |
19 | args = parser.parameter_parser()
20 | MODEL = dh.get_model_name()
21 | logger = dh.logger_fn("tflog", "logs/Test-{0}.log".format(time.asctime()))
22 |
23 | CPT_DIR = 'runs/' + MODEL + '/checkpoints/'
24 | BEST_CPT_DIR = 'runs/' + MODEL + '/bestcheckpoints/'
25 | SAVE_DIR = 'output/' + MODEL
26 |
27 |
28 | def create_input_data(data: dict):
29 | return zip(data['pad_seqs'], data['section'], data['subsection'], data['group'],
30 | data['subgroup'], data['onehot_labels'], data['labels'])
31 |
32 |
33 | def test_harnn():
34 | """Test HARNN model."""
35 | # Print parameters used for the model
36 | dh.tab_printer(args, logger)
37 |
38 | # Load word2vec model
39 | word2idx, embedding_matrix = dh.load_word2vec_matrix(args.word2vec_file)
40 |
41 | # Load data
42 | logger.info("Loading data...")
43 | logger.info("Data processing...")
44 | test_data = dh.load_data_and_labels(args, args.test_file, word2idx)
45 |
46 | # Load harnn model
47 | OPTION = dh._option(pattern=1)
48 | if OPTION == 'B':
49 | logger.info("Loading best model...")
50 | checkpoint_file = cm.get_best_checkpoint(BEST_CPT_DIR, select_maximum_value=True)
51 | else:
52 | logger.info("Loading latest model...")
53 | checkpoint_file = tf.train.latest_checkpoint(CPT_DIR)
54 | logger.info(checkpoint_file)
55 |
56 | graph = tf.Graph()
57 | with graph.as_default():
58 | session_conf = tf.ConfigProto(
59 | allow_soft_placement=args.allow_soft_placement,
60 | log_device_placement=args.log_device_placement)
61 | session_conf.gpu_options.allow_growth = args.gpu_options_allow_growth
62 | sess = tf.Session(config=session_conf)
63 | with sess.as_default():
64 | # Load the saved meta graph and restore variables
65 | saver = tf.train.import_meta_graph("{0}.meta".format(checkpoint_file))
66 | saver.restore(sess, checkpoint_file)
67 |
68 | # Get the placeholders from the graph by name
69 | input_x = graph.get_operation_by_name("input_x").outputs[0]
70 | input_y_first = graph.get_operation_by_name("input_y_first").outputs[0]
71 | input_y_second = graph.get_operation_by_name("input_y_second").outputs[0]
72 | input_y_third = graph.get_operation_by_name("input_y_third").outputs[0]
73 | input_y_fourth = graph.get_operation_by_name("input_y_fourth").outputs[0]
74 | input_y = graph.get_operation_by_name("input_y").outputs[0]
75 | dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0]
76 | alpha = graph.get_operation_by_name("alpha").outputs[0]
77 | is_training = graph.get_operation_by_name("is_training").outputs[0]
78 |
79 | # Tensors we want to evaluate
80 | first_scores = graph.get_operation_by_name("first-output/scores").outputs[0]
81 | second_scores = graph.get_operation_by_name("second-output/scores").outputs[0]
82 | third_scores = graph.get_operation_by_name("third-output/scores").outputs[0]
83 | fourth_scores = graph.get_operation_by_name("fourth-output/scores").outputs[0]
84 | scores = graph.get_operation_by_name("output/scores").outputs[0]
85 |
86 | # Split the output nodes name by '|' if you have several output nodes
87 | output_node_names = "first-output/scores|second-output/scores|third-output/scores|fourth-output/scores|output/scores"
88 |
89 | # Save the .pb model file
90 | output_graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def,
91 | output_node_names.split("|"))
92 | tf.train.write_graph(output_graph_def, "graph", "graph-harnn-{0}.pb".format(MODEL), as_text=False)
93 |
94 | # Generate batches for one epoch
95 | batches = dh.batch_iter(list(create_input_data(test_data)), args.batch_size, 1, shuffle=False)
96 |
97 | # Collect the predictions here
98 | true_labels = []
99 | predicted_labels = []
100 | predicted_scores = []
101 |
102 | # Collect for calculating metrics
103 | true_onehot_labels = [[], [], [], [], []]
104 | predicted_onehot_scores = [[], [], [], [], []]
105 | predicted_onehot_labels = [[], [], [], [], []]
106 |
107 | for batch_test in batches:
108 | x, sec, subsec, group, subgroup, y_onehot, y = zip(*batch_test)
109 |
110 | y_batch_test_list = [y_onehot, sec, subsec, group, subgroup]
111 |
112 | feed_dict = {
113 | input_x: x,
114 | input_y_first: sec,
115 | input_y_second: subsec,
116 | input_y_third: group,
117 | input_y_fourth: subgroup,
118 | input_y: y_onehot,
119 | dropout_keep_prob: 1.0,
120 | alpha: args.alpha,
121 | is_training: False
122 | }
123 | batch_global_scores, batch_first_scores, batch_second_scores, batch_third_scores, batch_fourth_scores = \
124 | sess.run([scores, first_scores, second_scores, third_scores, fourth_scores], feed_dict)
125 |
126 | batch_scores = [batch_global_scores, batch_first_scores, batch_second_scores,
127 | batch_third_scores, batch_fourth_scores]
128 |
129 | # Get the predicted labels by threshold
130 | batch_predicted_labels_ts, batch_predicted_scores_ts = \
131 | dh.get_label_threshold(scores=batch_scores[0], threshold=args.threshold)
132 |
133 | # Add results to collection
134 | for labels in y:
135 | true_labels.append(labels)
136 | for labels in batch_predicted_labels_ts:
137 | predicted_labels.append(labels)
138 | for values in batch_predicted_scores_ts:
139 | predicted_scores.append(values)
140 |
141 | for index in range(len(predicted_onehot_scores)):
142 | for onehot_labels in y_batch_test_list[index]:
143 | true_onehot_labels[index].append(onehot_labels)
144 | for onehot_scores in batch_scores[index]:
145 | predicted_onehot_scores[index].append(onehot_scores)
146 | # Get one-hot prediction by threshold
147 | predicted_onehot_labels_ts = \
148 | dh.get_onehot_label_threshold(scores=batch_scores[index], threshold=args.threshold)
149 | for onehot_labels in predicted_onehot_labels_ts:
150 | predicted_onehot_labels[index].append(onehot_labels)
151 |
152 | # Calculate Precision & Recall & F1
153 | for index in range(len(predicted_onehot_scores)):
154 | test_pre = precision_score(y_true=np.array(true_onehot_labels[index]),
155 | y_pred=np.array(predicted_onehot_labels[index]), average='micro')
156 | test_rec = recall_score(y_true=np.array(true_onehot_labels[index]),
157 | y_pred=np.array(predicted_onehot_labels[index]), average='micro')
158 | test_F1 = f1_score(y_true=np.array(true_onehot_labels[index]),
159 | y_pred=np.array(predicted_onehot_labels[index]), average='micro')
160 | test_auc = roc_auc_score(y_true=np.array(true_onehot_labels[index]),
161 | y_score=np.array(predicted_onehot_scores[index]), average='micro')
162 | test_prc = average_precision_score(y_true=np.array(true_onehot_labels[index]),
163 | y_score=np.array(predicted_onehot_scores[index]), average="micro")
164 | if index == 0:
165 | logger.info("[Global] Predict by threshold: Precision {0:g}, Recall {1:g}, "
166 | "F1 {2:g}, AUC {3:g}, AUPRC {4:g}"
167 | .format(test_pre, test_rec, test_F1, test_auc, test_prc))
168 | else:
169 | logger.info("[Local] Predict by threshold in Level-{0}: Precision {1:g}, Recall {2:g}, "
170 | "F1 {3:g}, AUPRC {4:g}".format(index, test_pre, test_rec, test_F1, test_prc))
171 |
172 | # Save the prediction result
173 | if not os.path.exists(SAVE_DIR):
174 | os.makedirs(SAVE_DIR)
175 | dh.create_prediction_file(output_file=SAVE_DIR + "/predictions.json", data_id=test_data['id'],
176 | true_labels=true_labels, predict_labels=predicted_labels,
177 | predict_scores=predicted_scores)
178 | logger.info("All Done.")
179 |
180 |
181 | if __name__ == '__main__':
182 | test_harnn()
183 |
--------------------------------------------------------------------------------
/HARNN/text_harnn.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'Randolph'
3 |
4 | import tensorflow as tf
5 |
6 |
7 | class TextHARNN(object):
8 | """A HARNN for text classification."""
9 |
10 | def __init__(
11 | self, sequence_length, vocab_size, embedding_type, embedding_size, lstm_hidden_size, attention_unit_size,
12 | fc_hidden_size, num_classes_list, total_classes, l2_reg_lambda=0.0, pretrained_embedding=None):
13 |
14 | # Placeholders for input, output, dropout_prob and training_tag
15 | self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x")
16 | self.input_y_first = tf.placeholder(tf.float32, [None, num_classes_list[0]], name="input_y_first")
17 | self.input_y_second = tf.placeholder(tf.float32, [None, num_classes_list[1]], name="input_y_second")
18 | self.input_y_third = tf.placeholder(tf.float32, [None, num_classes_list[2]], name="input_y_third")
19 | self.input_y_fourth = tf.placeholder(tf.float32, [None, num_classes_list[3]], name="input_y_fourth")
20 | self.input_y = tf.placeholder(tf.float32, [None, total_classes], name="input_y")
21 | self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
22 | self.alpha = tf.placeholder(tf.float32, name="alpha")
23 | self.is_training = tf.placeholder(tf.bool, name="is_training")
24 |
25 | self.global_step = tf.Variable(0, trainable=False, name="Global_Step")
26 |
27 | def _attention(input_x, num_classes, name=""):
28 | """
29 | Attention Layer.
30 |
31 | Args:
32 | input_x: [batch_size, sequence_length, lstm_hidden_size * 2]
33 | num_classes: The number of i th level classes.
34 | name: Scope name.
35 | Returns:
36 | attention_weight: [batch_size, num_classes, sequence_length]
37 | attention_out: [batch_size, lstm_hidden_size * 2]
38 | """
39 | num_units = input_x.get_shape().as_list()[-1]
40 | with tf.name_scope(name + "attention"):
41 | W_s1 = tf.Variable(tf.truncated_normal(shape=[attention_unit_size, num_units],
42 | stddev=0.1, dtype=tf.float32), name="W_s1")
43 | W_s2 = tf.Variable(tf.truncated_normal(shape=[num_classes, attention_unit_size],
44 | stddev=0.1, dtype=tf.float32), name="W_s2")
45 | # attention_matrix: [batch_size, num_classes, sequence_length]
46 | attention_matrix = tf.map_fn(
47 | fn=lambda x: tf.matmul(W_s2, x),
48 | elems=tf.tanh(
49 | tf.map_fn(
50 | fn=lambda x: tf.matmul(W_s1, tf.transpose(x)),
51 | elems=input_x,
52 | dtype=tf.float32
53 | )
54 | )
55 | )
56 | attention_weight = tf.nn.softmax(attention_matrix, name="attention")
57 | attention_out = tf.matmul(attention_weight, input_x)
58 | attention_out = tf.reduce_mean(attention_out, axis=1)
59 | return attention_weight, attention_out
60 |
61 | def _fc_layer(input_x, name=""):
62 | """
63 | Fully Connected Layer.
64 |
65 | Args:
66 | input_x: [batch_size, *]
67 | name: Scope name.
68 | Returns:
69 | fc_out: [batch_size, fc_hidden_size]
70 | """
71 | with tf.name_scope(name + "fc"):
72 | num_units = input_x.get_shape().as_list()[-1]
73 | W = tf.Variable(tf.truncated_normal(shape=[num_units, fc_hidden_size],
74 | stddev=0.1, dtype=tf.float32), name="W")
75 | b = tf.Variable(tf.constant(value=0.1, shape=[fc_hidden_size], dtype=tf.float32), name="b")
76 | fc = tf.nn.xw_plus_b(input_x, W, b)
77 | fc_out = tf.nn.relu(fc)
78 | return fc_out
79 |
80 | def _local_layer(input_x, input_att_weight, num_classes, name=""):
81 | """
82 | Local Layer.
83 |
84 | Args:
85 | input_x: [batch_size, fc_hidden_size]
86 | input_att_weight: [batch_size, num_classes, sequence_length]
87 | num_classes: Number of classes.
88 | name: Scope name.
89 | Returns:
90 | logits: [batch_size, num_classes]
91 | scores: [batch_size, num_classes]
92 | visual: [batch_size, sequence_length]
93 | """
94 | with tf.name_scope(name + "output"):
95 | num_units = input_x.get_shape().as_list()[-1]
96 | W = tf.Variable(tf.truncated_normal(shape=[num_units, num_classes],
97 | stddev=0.1, dtype=tf.float32), name="W")
98 | b = tf.Variable(tf.constant(value=0.1, shape=[num_classes], dtype=tf.float32), name="b")
99 | logits = tf.nn.xw_plus_b(input_x, W, b, name="logits")
100 | scores = tf.sigmoid(logits, name="scores")
101 |
102 | # shape of visual: [batch_size, sequence_length]
103 | visual = tf.multiply(input_att_weight, tf.expand_dims(scores, -1))
104 | visual = tf.nn.softmax(visual)
105 | visual = tf.reduce_mean(visual, axis=1, name="visual")
106 | return logits, scores, visual
107 |
108 | def _linear(input_, output_size, initializer=None, scope="SimpleLinear"):
109 | """
110 | Linear map: output[k] = sum_i(Matrix[k, i] * args[i] ) + Bias[k].
111 |
112 | Args:
113 | input_: a tensor or a list of 2D, batch x n, Tensors.
114 | output_size: int, second dimension of W[i].
115 | initializer: The initializer.
116 | scope: VariableScope for the created subgraph; defaults to "SimpleLinear".
117 | Returns:
118 | A 2D Tensor with shape [batch x output_size] equal to
119 | sum_i(args[i] * W[i]), where W[i]s are newly created matrices.
120 | Raises:
121 | ValueError: if some of the arguments has unspecified or wrong shape.
122 | """
123 |
124 | shape = input_.get_shape().as_list()
125 | if len(shape) != 2:
126 | raise ValueError("Linear is expecting 2D arguments: {0}".format(str(shape)))
127 | if not shape[1]:
128 | raise ValueError("Linear expects shape[1] of arguments: {0}".format(str(shape)))
129 | input_size = shape[1]
130 |
131 | # Now the computation.
132 | with tf.variable_scope(scope):
133 | W = tf.get_variable("W", [input_size, output_size], dtype=input_.dtype)
134 | b = tf.get_variable("b", [output_size], dtype=input_.dtype, initializer=initializer)
135 |
136 | return tf.nn.xw_plus_b(input_, W, b)
137 |
138 | def _highway_layer(input_, size, num_layers=1, bias=-2.0):
139 | """
140 | Highway Network (cf. http://arxiv.org/abs/1505.00387).
141 | t = sigmoid(Wx + b); h = relu(W'x + b')
142 | z = t * h + (1 - t) * x
143 | where t is transform gate, and (1 - t) is carry gate.
144 | """
145 |
146 | for idx in range(num_layers):
147 | h = tf.nn.relu(_linear(input_, size, scope=("highway_h_{0}".format(idx))))
148 | t = tf.sigmoid(_linear(input_, size, initializer=tf.constant_initializer(bias),
149 | scope=("highway_t_{0}".format(idx))))
150 | output = t * h + (1. - t) * input_
151 | input_ = output
152 |
153 | return output
154 |
155 | # Embedding Layer
156 | with tf.device("/cpu:0"), tf.name_scope("embedding"):
157 | # Use random generated the word vector by default
158 | # Can also be obtained through our own word vectors trained by our corpus
159 | if pretrained_embedding is None:
160 | self.embedding = tf.Variable(tf.random_uniform([vocab_size, embedding_size], minval=-1.0, maxval=1.0,
161 | dtype=tf.float32), trainable=True, name="embedding")
162 | else:
163 | if embedding_type == 0:
164 | self.embedding = tf.constant(pretrained_embedding, dtype=tf.float32, name="embedding")
165 | if embedding_type == 1:
166 | self.embedding = tf.Variable(pretrained_embedding, trainable=True,
167 | dtype=tf.float32, name="embedding")
168 | self.embedded_sentence = tf.nn.embedding_lookup(self.embedding, self.input_x)
169 | # Average Vectors
170 | # [batch_size, embedding_size]
171 | self.embedded_sentence_average = tf.reduce_mean(self.embedded_sentence, axis=1)
172 |
173 | # Bi-LSTM Layer
174 | with tf.name_scope("Bi-lstm"):
175 | lstm_fw_cell = tf.nn.rnn_cell.LSTMCell(lstm_hidden_size) # forward direction cell
176 | lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(lstm_hidden_size) # backward direction cell
177 | if self.dropout_keep_prob is not None:
178 | lstm_fw_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_fw_cell, output_keep_prob=self.dropout_keep_prob)
179 | lstm_bw_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_bw_cell, output_keep_prob=self.dropout_keep_prob)
180 |
181 | # Creates a dynamic bidirectional recurrent neural network
182 | # shape of `outputs`: tuple -> (outputs_fw, outputs_bw)
183 | # shape of `outputs_fw`: [batch_size, sequence_length, lstm_hidden_size]
184 |
185 | # shape of `state`: tuple -> (outputs_state_fw, output_state_bw)
186 | # shape of `outputs_state_fw`: tuple -> (c, h) c: memory cell; h: hidden state
187 | outputs, state = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell, lstm_bw_cell,
188 | self.embedded_sentence, dtype=tf.float32)
189 | # Concat output
190 | self.lstm_out = tf.concat(outputs, axis=2) # [batch_size, sequence_length, lstm_hidden_size * 2]
191 | self.lstm_out_pool = tf.reduce_mean(self.lstm_out, axis=1) # [batch_size, lstm_hidden_size * 2]
192 |
193 | # First Level
194 | self.first_att_weight, self.first_att_out = _attention(self.lstm_out, num_classes_list[0], name="first-")
195 | self.first_local_input = tf.concat([self.lstm_out_pool, self.first_att_out], axis=1)
196 | self.first_local_fc_out = _fc_layer(self.first_local_input, name="first-local-")
197 | self.first_logits, self.first_scores, self.first_visual = _local_layer(
198 | self.first_local_fc_out, self.first_att_weight, num_classes_list[0], name="first-")
199 |
200 | # Second Level
201 | self.second_att_input = tf.multiply(self.lstm_out, tf.expand_dims(self.first_visual, -1))
202 | self.second_att_weight, self.second_att_out = _attention(
203 | self.second_att_input, num_classes_list[1], name="second-")
204 | self.second_local_input = tf.concat([self.lstm_out_pool, self.second_att_out], axis=1)
205 | self.second_local_fc_out = _fc_layer(self.second_local_input, name="second-local-")
206 | self.second_logits, self.second_scores, self.second_visual = _local_layer(
207 | self.second_local_fc_out, self.second_att_weight, num_classes_list[1], name="second-")
208 |
209 | # Third Level
210 | self.third_att_input = tf.multiply(self.lstm_out, tf.expand_dims(self.second_visual, -1))
211 | self.third_att_weight, self.third_att_out = _attention(
212 | self.third_att_input, num_classes_list[2], name="third-")
213 | self.third_local_input = tf.concat([self.lstm_out_pool, self.third_att_out], axis=1)
214 | self.third_local_fc_out = _fc_layer(self.third_local_input, name="third-local-")
215 | self.third_logits, self.third_scores, self.third_visual = _local_layer(
216 | self.third_local_fc_out, self.third_att_weight, num_classes_list[2], name="third-")
217 |
218 | # Fourth Level
219 | self.fourth_att_input = tf.multiply(self.lstm_out, tf.expand_dims(self.third_visual, -1))
220 | self.fourth_att_weight, self.fourth_att_out = _attention(
221 | self.fourth_att_input, num_classes_list[3], name="fourth-")
222 | self.fourth_local_input = tf.concat([self.lstm_out_pool, self.fourth_att_out], axis=1)
223 | self.fourth_local_fc_out = _fc_layer(self.fourth_local_input, name="fourth-local-")
224 | self.fourth_logits, self.fourth_scores, self.fourth_visual = _local_layer(
225 | self.fourth_local_fc_out, self.fourth_att_weight, num_classes_list[3], name="fourth-")
226 |
227 | # Concat
228 | # shape of ham_out: [batch_size, fc_hidden_size * 4]
229 | self.ham_out = tf.concat([self.first_local_fc_out, self.second_local_fc_out,
230 | self.third_local_fc_out, self.fourth_local_fc_out], axis=1)
231 |
232 | # Fully Connected Layer
233 | self.fc_out = _fc_layer(self.ham_out)
234 |
235 | # Highway Layer
236 | with tf.name_scope("highway"):
237 | self.highway = _highway_layer(self.fc_out, self.fc_out.get_shape()[1], num_layers=1, bias=0)
238 |
239 | # Add dropout
240 | with tf.name_scope("dropout"):
241 | self.h_drop = tf.nn.dropout(self.highway, self.dropout_keep_prob)
242 |
243 | # Global scores
244 | with tf.name_scope("global-output"):
245 | num_units = self.h_drop.get_shape().as_list()[-1]
246 | W = tf.Variable(tf.truncated_normal(shape=[num_units, total_classes],
247 | stddev=0.1, dtype=tf.float32), name="W")
248 | b = tf.Variable(tf.constant(value=0.1, shape=[total_classes], dtype=tf.float32), name="b")
249 | self.global_logits = tf.nn.xw_plus_b(self.h_drop, W, b, name="logits")
250 | self.global_scores = tf.sigmoid(self.global_logits, name="scores")
251 |
252 | with tf.name_scope("output"):
253 | self.local_scores = tf.concat([self.first_scores, self.second_scores,
254 | self.third_scores, self.fourth_scores], axis=1)
255 | self.scores = tf.add(self.alpha * self.global_scores, (1 - self.alpha) * self.local_scores, name="scores")
256 |
257 | # Calculate mean cross-entropy loss, L2 loss
258 | with tf.name_scope("loss"):
259 | def cal_loss(labels, logits, name):
260 | losses = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits)
261 | losses = tf.reduce_mean(tf.reduce_sum(losses, axis=1), name=name + "losses")
262 | return losses
263 |
264 | # Local Loss
265 | losses_1 = cal_loss(labels=self.input_y_first, logits=self.first_logits, name="first_")
266 | losses_2 = cal_loss(labels=self.input_y_second, logits=self.second_logits, name="second_")
267 | losses_3 = cal_loss(labels=self.input_y_third, logits=self.third_logits, name="third_")
268 | losses_4 = cal_loss(labels=self.input_y_fourth, logits=self.fourth_logits, name="fourth_")
269 | local_losses = tf.add_n([losses_1, losses_2, losses_3, losses_4], name="local_losses")
270 |
271 | # Global Loss
272 | global_losses = cal_loss(labels=self.input_y, logits=self.global_logits, name="global_")
273 |
274 | # L2 Loss
275 | l2_losses = tf.add_n([tf.nn.l2_loss(tf.cast(v, tf.float32)) for v in tf.trainable_variables()],
276 | name="l2_losses") * l2_reg_lambda
277 | self.loss = tf.add_n([local_losses, global_losses, l2_losses], name="loss")
--------------------------------------------------------------------------------
/HARNN/train_harnn.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'Randolph'
3 |
4 | import os
5 | import sys
6 | import time
7 | import logging
8 |
9 | sys.path.append('../')
10 | logging.getLogger('tensorflow').disabled = True
11 |
12 | import numpy as np
13 | import tensorflow as tf
14 | from text_harnn import TextHARNN
15 | from utils import checkmate as cm
16 | from utils import data_helpers as dh
17 | from utils import param_parser as parser
18 | from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, average_precision_score
19 |
20 | args = parser.parameter_parser()
21 | OPTION = dh._option(pattern=0)
22 | logger = dh.logger_fn("tflog", "logs/{0}-{1}.log".format('Train' if OPTION == 'T' else 'Restore', time.asctime()))
23 |
24 |
25 | def create_input_data(data: dict):
26 | return zip(data['pad_seqs'], data['section'], data['subsection'],
27 | data['group'], data['subgroup'], data['onehot_labels'])
28 |
29 |
30 | def train_harnn():
31 | """Training HARNN model."""
32 | # Print parameters used for the model
33 | dh.tab_printer(args, logger)
34 |
35 | # Load word2vec model
36 | word2idx, embedding_matrix = dh.load_word2vec_matrix(args.word2vec_file)
37 |
38 | # Load sentences, labels, and training parameters
39 | logger.info("Loading data...")
40 | logger.info("Data processing...")
41 | train_data = dh.load_data_and_labels(args, args.train_file, word2idx)
42 | val_data = dh.load_data_and_labels(args, args.validation_file, word2idx)
43 |
44 | # Build a graph and harnn object
45 | with tf.Graph().as_default():
46 | session_conf = tf.ConfigProto(
47 | allow_soft_placement=args.allow_soft_placement,
48 | log_device_placement=args.log_device_placement)
49 | session_conf.gpu_options.allow_growth = args.gpu_options_allow_growth
50 | sess = tf.Session(config=session_conf)
51 | with sess.as_default():
52 | harnn = TextHARNN(
53 | sequence_length=args.pad_seq_len,
54 | vocab_size=len(word2idx),
55 | embedding_type=args.embedding_type,
56 | embedding_size=args.embedding_dim,
57 | lstm_hidden_size=args.lstm_dim,
58 | attention_unit_size=args.attention_dim,
59 | fc_hidden_size=args.fc_dim,
60 | num_classes_list=args.num_classes_list,
61 | total_classes=args.total_classes,
62 | l2_reg_lambda=args.l2_lambda,
63 | pretrained_embedding=embedding_matrix)
64 |
65 | # Define training procedure
66 | with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
67 | learning_rate = tf.train.exponential_decay(learning_rate=args.learning_rate,
68 | global_step=harnn.global_step,
69 | decay_steps=args.decay_steps,
70 | decay_rate=args.decay_rate,
71 | staircase=True)
72 | optimizer = tf.train.AdamOptimizer(learning_rate)
73 | grads, vars = zip(*optimizer.compute_gradients(harnn.loss))
74 | grads, _ = tf.clip_by_global_norm(grads, clip_norm=args.norm_ratio)
75 | train_op = optimizer.apply_gradients(zip(grads, vars), global_step=harnn.global_step, name="train_op")
76 |
77 | # Keep track of gradient values and sparsity (optional)
78 | grad_summaries = []
79 | for g, v in zip(grads, vars):
80 | if g is not None:
81 | grad_hist_summary = tf.summary.histogram("{0}/grad/hist".format(v.name), g)
82 | sparsity_summary = tf.summary.scalar("{0}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
83 | grad_summaries.append(grad_hist_summary)
84 | grad_summaries.append(sparsity_summary)
85 | grad_summaries_merged = tf.summary.merge(grad_summaries)
86 |
87 | # Output directory for models and summaries
88 | out_dir = dh.get_out_dir(OPTION, logger)
89 | checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
90 | best_checkpoint_dir = os.path.abspath(os.path.join(out_dir, "bestcheckpoints"))
91 |
92 | # Summaries for loss
93 | loss_summary = tf.summary.scalar("loss", harnn.loss)
94 |
95 | # Train summaries
96 | train_summary_op = tf.summary.merge([loss_summary, grad_summaries_merged])
97 | train_summary_dir = os.path.join(out_dir, "summaries", "train")
98 | train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)
99 |
100 | # Validation summaries
101 | validation_summary_op = tf.summary.merge([loss_summary])
102 | validation_summary_dir = os.path.join(out_dir, "summaries", "validation")
103 | validation_summary_writer = tf.summary.FileWriter(validation_summary_dir, sess.graph)
104 |
105 | saver = tf.train.Saver(tf.global_variables(), max_to_keep=args.num_checkpoints)
106 | best_saver = cm.BestCheckpointSaver(save_dir=best_checkpoint_dir, num_to_keep=3, maximize=True)
107 |
108 | if OPTION == 'R':
109 | # Load harnn model
110 | logger.info("Loading model...")
111 | checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir)
112 | logger.info(checkpoint_file)
113 |
114 | # Load the saved meta graph and restore variables
115 | saver = tf.train.import_meta_graph("{0}.meta".format(checkpoint_file))
116 | saver.restore(sess, checkpoint_file)
117 | if OPTION == 'T':
118 | if not os.path.exists(checkpoint_dir):
119 | os.makedirs(checkpoint_dir)
120 | sess.run(tf.global_variables_initializer())
121 | sess.run(tf.local_variables_initializer())
122 |
123 | # Save the embedding visualization
124 | saver.save(sess, os.path.join(out_dir, "embedding", "embedding.ckpt"))
125 |
126 | current_step = sess.run(harnn.global_step)
127 |
128 | def train_step(batch_data):
129 | """A single training step."""
130 | x, sec, subsec, group, subgroup, y_onehot = zip(*batch_data)
131 |
132 | feed_dict = {
133 | harnn.input_x: x,
134 | harnn.input_y_first: sec,
135 | harnn.input_y_second: subsec,
136 | harnn.input_y_third: group,
137 | harnn.input_y_fourth: subgroup,
138 | harnn.input_y: y_onehot,
139 | harnn.dropout_keep_prob: args.dropout_rate,
140 | harnn.alpha: args.alpha,
141 | harnn.is_training: True
142 | }
143 | _, step, summaries, loss = sess.run(
144 | [train_op, harnn.global_step, train_summary_op, harnn.loss], feed_dict)
145 | logger.info("step {0}: loss {1:g}".format(step, loss))
146 | train_summary_writer.add_summary(summaries, step)
147 |
148 | def validation_step(val_loader, writer=None):
149 | """Evaluates model on a validation set."""
150 | batches_validation = dh.batch_iter(list(create_input_data(val_loader)), args.batch_size, 1)
151 |
152 | # Predict classes by threshold or topk ('ts': threshold; 'tk': topk)
153 | eval_counter, eval_loss = 0, 0.0
154 | eval_pre_tk = [0.0] * args.topK
155 | eval_rec_tk = [0.0] * args.topK
156 | eval_F1_tk = [0.0] * args.topK
157 |
158 | true_onehot_labels = []
159 | predicted_onehot_scores = []
160 | predicted_onehot_labels_ts = []
161 | predicted_onehot_labels_tk = [[] for _ in range(args.topK)]
162 |
163 | for batch_validation in batches_validation:
164 | x, sec, subsec, group, subgroup, y_onehot = zip(*batch_validation)
165 | feed_dict = {
166 | harnn.input_x: x,
167 | harnn.input_y_first: sec,
168 | harnn.input_y_second: subsec,
169 | harnn.input_y_third: group,
170 | harnn.input_y_fourth: subgroup,
171 | harnn.input_y: y_onehot,
172 | harnn.dropout_keep_prob: 1.0,
173 | harnn.alpha: args.alpha,
174 | harnn.is_training: False
175 | }
176 | step, summaries, scores, cur_loss = sess.run(
177 | [harnn.global_step, validation_summary_op, harnn.scores, harnn.loss], feed_dict)
178 |
179 | # Prepare for calculating metrics
180 | for i in y_onehot:
181 | true_onehot_labels.append(i)
182 | for j in scores:
183 | predicted_onehot_scores.append(j)
184 |
185 | # Predict by threshold
186 | batch_predicted_onehot_labels_ts = \
187 | dh.get_onehot_label_threshold(scores=scores, threshold=args.threshold)
188 | for k in batch_predicted_onehot_labels_ts:
189 | predicted_onehot_labels_ts.append(k)
190 |
191 | # Predict by topK
192 | for top_num in range(args.topK):
193 | batch_predicted_onehot_labels_tk = dh.get_onehot_label_topk(scores=scores, top_num=top_num+1)
194 | for i in batch_predicted_onehot_labels_tk:
195 | predicted_onehot_labels_tk[top_num].append(i)
196 |
197 | eval_loss = eval_loss + cur_loss
198 | eval_counter = eval_counter + 1
199 |
200 | if writer:
201 | writer.add_summary(summaries, step)
202 |
203 | eval_loss = float(eval_loss / eval_counter)
204 |
205 | # Calculate Precision & Recall & F1
206 | eval_pre_ts = precision_score(y_true=np.array(true_onehot_labels),
207 | y_pred=np.array(predicted_onehot_labels_ts), average='micro')
208 | eval_rec_ts = recall_score(y_true=np.array(true_onehot_labels),
209 | y_pred=np.array(predicted_onehot_labels_ts), average='micro')
210 | eval_F1_ts = f1_score(y_true=np.array(true_onehot_labels),
211 | y_pred=np.array(predicted_onehot_labels_ts), average='micro')
212 |
213 | for top_num in range(args.topK):
214 | eval_pre_tk[top_num] = precision_score(y_true=np.array(true_onehot_labels),
215 | y_pred=np.array(predicted_onehot_labels_tk[top_num]),
216 | average='micro')
217 | eval_rec_tk[top_num] = recall_score(y_true=np.array(true_onehot_labels),
218 | y_pred=np.array(predicted_onehot_labels_tk[top_num]),
219 | average='micro')
220 | eval_F1_tk[top_num] = f1_score(y_true=np.array(true_onehot_labels),
221 | y_pred=np.array(predicted_onehot_labels_tk[top_num]),
222 | average='micro')
223 |
224 | # Calculate the average AUC
225 | eval_auc = roc_auc_score(y_true=np.array(true_onehot_labels),
226 | y_score=np.array(predicted_onehot_scores), average='micro')
227 | # Calculate the average PR
228 | eval_prc = average_precision_score(y_true=np.array(true_onehot_labels),
229 | y_score=np.array(predicted_onehot_scores), average='micro')
230 |
231 | return eval_loss, eval_auc, eval_prc, eval_pre_ts, eval_rec_ts, eval_F1_ts, \
232 | eval_pre_tk, eval_rec_tk, eval_F1_tk
233 |
234 | # Generate batches
235 | batches_train = dh.batch_iter(list(create_input_data(train_data)), args.batch_size, args.epochs)
236 | num_batches_per_epoch = int((len(train_data['pad_seqs']) - 1) / args.batch_size) + 1
237 |
238 | # Training loop. For each batch...
239 | for batch_train in batches_train:
240 | train_step(batch_train)
241 | current_step = tf.train.global_step(sess, harnn.global_step)
242 |
243 | if current_step % args.evaluate_steps == 0:
244 | logger.info("\nEvaluation:")
245 | eval_loss, eval_auc, eval_prc, \
246 | eval_pre_ts, eval_rec_ts, eval_F1_ts, eval_pre_tk, eval_rec_tk, eval_F1_tk = \
247 | validation_step(val_data, writer=validation_summary_writer)
248 | logger.info("All Validation set: Loss {0:g} | AUC {1:g} | AUPRC {2:g}"
249 | .format(eval_loss, eval_auc, eval_prc))
250 | # Predict by threshold
251 | logger.info("Predict by threshold: Precision {0:g}, Recall {1:g}, F1 {2:g}"
252 | .format(eval_pre_ts, eval_rec_ts, eval_F1_ts))
253 | # Predict by topK
254 | logger.info("Predict by topK:")
255 | for top_num in range(args.topK):
256 | logger.info("Top{0}: Precision {1:g}, Recall {2:g}, F1 {3:g}"
257 | .format(top_num+1, eval_pre_tk[top_num], eval_rec_tk[top_num], eval_F1_tk[top_num]))
258 | best_saver.handle(eval_prc, sess, current_step)
259 | if current_step % args.checkpoint_steps == 0:
260 | checkpoint_prefix = os.path.join(checkpoint_dir, "model")
261 | path = saver.save(sess, checkpoint_prefix, global_step=current_step)
262 | logger.info("Saved model checkpoint to {0}\n".format(path))
263 | if current_step % num_batches_per_epoch == 0:
264 | current_epoch = current_step // num_batches_per_epoch
265 | logger.info("Epoch {0} has finished!".format(current_epoch))
266 |
267 | logger.info("All Done.")
268 |
269 |
270 | if __name__ == '__main__':
271 | train_harnn()
--------------------------------------------------------------------------------
/HARNN/visualization.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'Randolph'
3 |
4 | import sys
5 | import time
6 | import logging
7 |
8 | sys.path.append('../')
9 | logging.getLogger('tensorflow').disabled = True
10 |
11 | import tensorflow as tf
12 | from utils import checkmate as cm
13 | from utils import data_helpers as dh
14 | from utils import param_parser as parser
15 |
16 | args = parser.parameter_parser()
17 | MODEL = dh.get_model_name()
18 | logger = dh.logger_fn("tflog", "logs/Test-{0}.log".format(time.asctime()))
19 |
20 | CPT_DIR = 'runs/' + MODEL + '/checkpoints/'
21 | BEST_CPT_DIR = 'runs/' + MODEL + '/bestcheckpoints/'
22 | SAVE_DIR = 'output/' + MODEL
23 |
24 |
25 | def create_input_data(data: dict):
26 | return zip(data['pad_seqs'], data['content'], data['section'], data['subsection'], data['group'],
27 | data['subgroup'], data['onehot_labels'])
28 |
29 |
30 | def normalization(visual_list, visual_len, epsilon=1e-12):
31 | min_weight = min(visual_list[:visual_len])
32 | max_weight = max(visual_list[:visual_len])
33 | margin = max_weight - min_weight
34 |
35 | result = []
36 | for i in range(visual_len):
37 | value = (visual_list[i] - min_weight) / (margin + epsilon)
38 | result.append(value)
39 | return result
40 |
41 |
42 | def create_visual_file(input_x, visual_list: list, seq_len):
43 | f = open('attention.html', 'w')
44 | f.write('
\n')
45 | f.write('\n')
46 | for visual in visual_list:
47 | f.write('
\n')
48 | for i in range(seq_len):
49 | alpha = "{:.2f}".format(visual[i])
50 | word = input_x[0][i]
51 | f.write('\t{1}\n'
52 | .format(alpha, word))
53 | f.write('
\n')
54 | f.write('
\n')
55 | f.write('')
56 | f.close()
57 |
58 |
59 | def visualize():
60 | """Visualize HARNN model."""
61 |
62 | # Load word2vec model
63 | word2idx, embedding_matrix = dh.load_word2vec_matrix(args.word2vec_file)
64 |
65 | # Load data
66 | logger.info("Loading data...")
67 | logger.info("Data processing...")
68 | test_data = dh.load_data_and_labels(args, args.test_file, word2idx)
69 |
70 | # Load harnn model
71 | OPTION = dh._option(pattern=1)
72 | if OPTION == 'B':
73 | logger.info("Loading best model...")
74 | checkpoint_file = cm.get_best_checkpoint(BEST_CPT_DIR, select_maximum_value=True)
75 | else:
76 | logger.info("Loading latest model...")
77 | checkpoint_file = tf.train.latest_checkpoint(CPT_DIR)
78 | logger.info(checkpoint_file)
79 |
80 | graph = tf.Graph()
81 | with graph.as_default():
82 | session_conf = tf.ConfigProto(
83 | allow_soft_placement=args.allow_soft_placement,
84 | log_device_placement=args.log_device_placement)
85 | session_conf.gpu_options.allow_growth = args.gpu_options_allow_growth
86 | sess = tf.Session(config=session_conf)
87 | with sess.as_default():
88 | # Load the saved meta graph and restore variables
89 | saver = tf.train.import_meta_graph("{0}.meta".format(checkpoint_file))
90 | saver.restore(sess, checkpoint_file)
91 |
92 | # Get the placeholders from the graph by name
93 | input_x = graph.get_operation_by_name("input_x").outputs[0]
94 | input_y_first = graph.get_operation_by_name("input_y_first").outputs[0]
95 | input_y_second = graph.get_operation_by_name("input_y_second").outputs[0]
96 | input_y_third = graph.get_operation_by_name("input_y_third").outputs[0]
97 | input_y_fourth = graph.get_operation_by_name("input_y_fourth").outputs[0]
98 | input_y = graph.get_operation_by_name("input_y").outputs[0]
99 | dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0]
100 | alpha = graph.get_operation_by_name("alpha").outputs[0]
101 | is_training = graph.get_operation_by_name("is_training").outputs[0]
102 |
103 | # Tensors we want to evaluate
104 | first_visual = graph.get_operation_by_name("first-output/visual").outputs[0]
105 | second_visual = graph.get_operation_by_name("second-output/visual").outputs[0]
106 | third_visual = graph.get_operation_by_name("third-output/visual").outputs[0]
107 | fourth_visual = graph.get_operation_by_name("fourth-output/visual").outputs[0]
108 |
109 | # Split the output nodes name by '|' if you have several output nodes
110 | output_node_names = "first-output/visual|second-output/visual|third-output/visual|fourth-output/visual|output/scores"
111 |
112 | # Save the .pb model file
113 | output_graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def,
114 | output_node_names.split("|"))
115 | tf.train.write_graph(output_graph_def, "graph", "graph-harnn-{0}.pb".format(MODEL), as_text=False)
116 |
117 | # Generate batches for one epoch
118 | batches = dh.batch_iter(list(create_input_data(test_data)), args.batch_size, 1, shuffle=False)
119 |
120 | for batch_test in batches:
121 | x, x_content, sec, subsec, group, subgroup, y_onehot = zip(*batch_test)
122 |
123 | feed_dict = {
124 | input_x: x,
125 | input_y_first: sec,
126 | input_y_second: subsec,
127 | input_y_third: group,
128 | input_y_fourth: subgroup,
129 | input_y: y_onehot,
130 | dropout_keep_prob: 1.0,
131 | alpha: args.alpha,
132 | is_training: False
133 | }
134 | batch_first_visual, batch_second_visual, batch_third_visual, batch_fourth_visual = \
135 | sess.run([first_visual, second_visual, third_visual, fourth_visual], feed_dict)
136 |
137 | batch_visual = [batch_first_visual, batch_second_visual, batch_third_visual, batch_fourth_visual]
138 |
139 | seq_len = len(x_content[0])
140 | pad_len = len(batch_first_visual[0])
141 | length = (pad_len if seq_len >= pad_len else seq_len)
142 | visual_list = []
143 |
144 | for visual in batch_visual:
145 | visual_list.append(normalization(visual[0].tolist(), length))
146 |
147 | create_visual_file(x_content, visual_list, seq_len)
148 | logger.info("Done.")
149 |
150 |
151 | if __name__ == '__main__':
152 | visualize()
153 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Hierarchical Multi-Label Text Classification
2 |
3 | [](https://www.python.org/downloads/) [](https://travis-ci.org/RandolphVI/Hierarchical-Multi-Label-Text-Classification)[](https://www.codacy.com/manual/chinawolfman/Hierarchical-Multi-Label-Text-Classification?utm_source=github.com&utm_medium=referral&utm_content=RandolphVI/Hierarchical-Multi-Label-Text-Classification&utm_campaign=Badge_Grade)[](https://www.apache.org/licenses/LICENSE-2.0)
4 |
5 | This repository is my research project, which has been accepted by CIKM'19. The [paper](https://dl.acm.org/citation.cfm?id=3357384.3357885) is already published.
6 |
7 | The main objective of the project is to solve the hierarchical multi-label text classification (**HMTC**) problem. Different from the multi-label text classification, HMTC assigns each instance (object) into multiple categories and these categories are stored in a hierarchy structure, is a fundamental but challenging task of numerous applications.
8 |
9 | ## Requirements
10 |
11 | - Python 3.6
12 | - Tensorflow 1.15.0
13 | - Tensorboard 1.15.0
14 | - Sklearn 0.19.1
15 | - Numpy 1.16.2
16 | - Gensim 3.8.3
17 | - Tqdm 4.49.0
18 |
19 | ## Introduction
20 |
21 | Many real-world applications organize data in a hierarchical structure, where classes are specialized into subclasses or grouped into superclasses. For example, an electronic document (e.g. web-pages, digital libraries, patents and e-mails) is associated with multiple categories and all these categories are stored hierarchically in a **tree** or **Direct Acyclic Graph (DAG)**.
22 |
23 | It provides an elegant way to show the characteristics of data and a multi-dimensional perspective to tackle the classification problem via hierarchy structure.
24 |
25 | 
26 |
27 | The Figure shows an example of predefined labels in hierarchical multi-label classification of documents in patent texts.
28 |
29 | - Documents are shown as colored rectangles, labels as rounded rectangles.
30 | - Circles in the rounded rectangles indicate that the corresponding document has been assigned the label.
31 | - Arrows indicate a hierarchical structure between labels.
32 |
33 | ## Project
34 |
35 | The project structure is below:
36 |
37 | ```text
38 | .
39 | ├── HARNN
40 | │ ├── train.py
41 | │ ├── layers.py
42 | │ ├── ham.py
43 | │ ├── test.py
44 | │ └── visualization.py
45 | ├── utils
46 | │ ├── checkmate.py
47 | │ ├── param_parser.py
48 | │ └── data_helpers.py
49 | ├── data
50 | │ ├── word2vec_100.model.* [Need Download]
51 | │ ├── Test_sample.json
52 | │ ├── Train_sample.json
53 | │ └── Validation_sample.json
54 | ├── LICENSE
55 | ├── README.md
56 | └── requirements.txt
57 | ```
58 |
59 | ## Data
60 |
61 | You can download the [Patent Dataset](https://drive.google.com/open?id=1So3unr5p_vlYq31gE0Ly07Z2XTvD5QlM) used in the paper. And the [Word2vec model file](https://drive.google.com/file/d/1tZ9WPXkoJmWwtcnOU8S_KGPMp8wnYohR/view?usp=sharing) (dim=100) is also uploaded. **Make sure they are under the `/data` folder.**
62 |
63 | :warning: As for **Education Dataset**, they may be subject to copyright protection under Chinese law. Thus, detailed information is not provided.
64 |
65 | ### :octocat: Text Segment
66 |
67 | 1. You can use `nltk` package if you are going to deal with the English text data.
68 |
69 | 2. You can use `jieba` package if you are going to deal with the Chinese text data.
70 |
71 | ### :octocat: Data Format
72 |
73 | See data format in `/data` folder which including the data sample files. For example:
74 |
75 | ```
76 | {"id": "3930316",
77 | "title": ["sighting", "firearm"],
78 | "abstract": ["rear", "sight", "firearm", "ha", "peephole", "device", "formed", "hollow", "tube", "end", ...],
79 | "section": [5], "subsection": [104], "group": [512], "subgroup": [6535],
80 | "labels": [5, 113, 649, 7333]}
81 | ```
82 |
83 | - `id`: just the id.
84 | - `title` & `abstract`: it's the word segment (after cleaning stopwords).
85 | - `section` / `subsection` / `group` / `subgroup`: it's the first / second / third / fourth level category index.
86 | - `labels`: it's the total category which add the index offset. (I will explain that later)
87 |
88 | ### :octocat: How to construct the data?
89 |
90 | Use the sample of the Patent Dataset as an example. I will explain how to construct the label index.
91 | For patent dataset, the class number for each level is: [9, 128, 661, 8364].
92 |
93 | **Step 1:** For the first level, Patent dataset has 9 classes. You should index these 9 classes first, like:
94 |
95 | ```
96 | {"Chemistry": 0, "Physics": 1, "Electricity": 2, "XXX": 3, ..., "XXX": 8}
97 | ```
98 |
99 | **Step 2**: Next, you index the next level (total **128** classes), like:
100 |
101 | ```
102 | {"Inorganic Chemistry": 0, "Organic Chemistry": 1, "Nuclear Physics": 2, "XXX": 3, ..., "XXX": 127}
103 | ```
104 |
105 | **Step 3**: Then, you index the third level (total **661** classes), like:
106 |
107 | ```
108 | {"Steroids": 0, "Peptides": 1, "Heterocyclic Compounds": 2, ..., "XXX": 660}
109 | ```
110 |
111 | **Step 4**: If you have the fourth level or deeper level, index them.
112 |
113 | **Step 5**: Now suppose you have one record (**id: 3930316** mentioned before):
114 |
115 | ```
116 | {"id": "3930316",
117 | "title": ["sighting", "firearm"],
118 | "abstract": ["rear", "sight", "firearm", "ha", "peephole", "device", "formed", "hollow", "tube", "end", ...],
119 | "section": [5], "subsection": [104], "group": [512], "subgroup": [6535],
120 | "labels": [5, 104+9, 512+9+128, 6535+9+128+661]}
121 | ```
122 |
123 | Thus, the record should be construed as follows:
124 |
125 | ```
126 | {"id": "3930316",
127 | "title": ["sighting", "firearm"],
128 | "abstract": ["rear", "sight", "firearm", "ha", "peephole", "device", "formed", "hollow", "tube", "end", ...],
129 | "section": [5], "subsection": [104], "group": [512], "subgroup": [6535],
130 | "labels": [5, 113, 649, 7333]}
131 | ```
132 |
133 | This repository can be used in other datasets (text classification) in two ways:
134 | 1. Modify your datasets into the same format of [the sample](https://github.com/RandolphVI/Hierarchical-Multi-Label-Text-Classification/tree/master/data).
135 | 2. Modify the data preprocess code in `data_helpers.py`.
136 |
137 | Anyway, it should depend on what your data and task are.
138 |
139 | ### :octocat: Pre-trained Word Vectors
140 |
141 | You can pre-training your word vectors(based on your corpus) in many ways:
142 | - Use `gensim` package to pre-train data.
143 | - Use `glove` tools to pre-train data.
144 | - Even can use `bert` to pre-train data.
145 |
146 | ## Usage
147 |
148 | See [Usage](https://github.com/RandolphVI/Hierarchical-Multi-Label-Text-Classification/blob/master/Usage.md).
149 |
150 | ## Network Structure
151 |
152 | 
153 |
154 | ## Reference
155 |
156 | **If you want to follow the paper or utilize the code, please note the following info in your work:**
157 |
158 | ```bibtex
159 | @inproceedings{huang2019hierarchical,
160 | author = {Wei Huang and
161 | Enhong Chen and
162 | Qi Liu and
163 | Yuying Chen and
164 | Zai Huang and
165 | Yang Liu and
166 | Zhou Zhao and
167 | Dan Zhang and
168 | Shijin Wang},
169 | title = {Hierarchical Multi-label Text Classification: An Attention-based Recurrent Network Approach},
170 | booktitle = {Proceedings of the 28th {ACM} {CIKM} International Conference on Information and Knowledge Management, {CIKM} 2019, Beijing, CHINA, Nov 3-7, 2019},
171 | pages = {1051--1060},
172 | year = {2019},
173 | }
174 | ```
175 | ---
176 |
177 | ## About Me
178 |
179 | 黄威,Randolph
180 |
181 | SCU SE Bachelor; USTC CS Ph.D.
182 |
183 | Email: chinawolfman@hotmail.com
184 |
185 | My Blog: [randolph.pro](http://randolph.pro)
186 |
187 | LinkedIn: [randolph's linkedin](https://www.linkedin.com/in/randolph-%E9%BB%84%E5%A8%81/)
188 |
--------------------------------------------------------------------------------
/Usage.md:
--------------------------------------------------------------------------------
1 | # Usage
2 |
3 | ## Options
4 |
5 | ### Input and output options
6 |
7 | ```
8 | --train-file STR Training file. Default is `data/Train_sample.json`.
9 | --validation-file STR Validation file. Default is `data/Validation_sample.json`.
10 | --test-file STR Testing file. Default is `data/Test_sample.json`.
11 | --word2vec-file STR Word2vec model file. Default is `data/word2vec_100.model`.
12 | ```
13 |
14 | ### Model option
15 |
16 | ```
17 | --pad-seq-len INT Padding Sequence length of data. Depends on data.
18 | --embedding-type INT The embedding type. Default is 1.
19 | --embedding-dim INT Dim of character embedding. Default is 100.
20 | --lstm-dim INT Dim of LSTM neurons. Default is 256.
21 | --lstm-layers INT Number of LSTM layers. Defatul is 1.
22 | --attention-dim INT Dim of Attention neurons. Default is 200.
23 | --attention-penalization BOOL Use attention penalization or not. Default is True.
24 | --fc-dim INT Dim of FC neurons. Default is 512.
25 | --dropout-rate FLOAT Dropout keep probability. Default is 0.5.
26 | --alpha FLOAT Weight of global part in loss cal. Default is 0.5.
27 | --num-classes-list LIST Each number of labels in hierarchical structure. Depends on data.
28 | --total-classes INT Total number of labels. Depends on data.
29 | --topK INT Number of top K prediction classes. Default is 5.
30 | --threshold FLOAT Threshold for prediction classes. Default is 0.5.
31 | ```
32 |
33 | ### Training option
34 |
35 | ```
36 | --epochs INT Number of epochs. Default is 20.
37 | --batch-size INT Batch size. Default is 32.
38 | --learning-rate FLOAT Adam learning rate. Default is 0.001.
39 | --decay-rate FLOAT Rate of decay for learning rate. Default is 0.95.
40 | --decay-steps INT How many steps before decy lr. Default is 500.
41 | --evaluate-steps INT How many steps to evluate val set. Default is 50.
42 | --l2-lambda FLOAT L2 regularization lambda. Default is 0.0.
43 | --checkpoint-steps INT How many steps to save model. Default is 50.
44 | --num-checkpoints INT Number of checkpoints to store. Default is 10.
45 | ```
46 |
47 | ## Training
48 |
49 | The following commands train the model.
50 |
51 | ```bash
52 | $ python3 train_harnn.py
53 | ```
54 |
55 | Training a model for a 30 epochs and set batch size as 256.
56 |
57 | ```bash
58 | $ python3 train_harnn.py --epochs 30 --batch-size 256
59 | ```
60 |
61 | In the beginning, you will see the program shows:
62 |
63 | 
64 |
65 | **You need to choose Training or Restore. (T for Training and R for Restore)**
66 |
67 | After training, you will get the `/log` and `/run` folder.
68 |
69 | - `/log` folder saves the log info file.
70 | - `/run` folder saves the checkpoints.
71 |
72 | It should be like this:
73 |
74 | ```text
75 | .
76 | ├── logs
77 | ├── runs
78 | │ └── 1586077936 [a 10-digital format]
79 | │ ├── bestcheckpoints
80 | │ ├── checkpoints
81 | │ ├── embedding
82 | │ └── summaries
83 | ├── test_harnn.py
84 | ├── text_harnn.py
85 | └── train_harnn.py
86 | ```
87 |
88 | **The programs name and identify the model by using the asctime (It should be 10-digital number, like 1586077936).**
89 |
90 | ## Restore
91 |
92 | When your model stops training for some reason and you want to restore training, you can:
93 |
94 | In the beginning, you will see the program shows:
95 |
96 | 
97 |
98 | **And you need to input R for restore.**
99 |
100 | Then you will be asked to give the model name (a 10-digital format, like 1586077936):
101 |
102 | 
103 |
104 | And the model will continue training from the last time.
105 |
106 | ## Test
107 |
108 | The following commands test the model.
109 |
110 | ```bash
111 | $ python3 test_harnn.py
112 | ```
113 |
114 | Then you will be asked to give the model name (a 10-digital format, like 1586077936):
115 |
116 | 
117 |
118 | And you can choose to use the best model or the latest model **(B for Best, L for Latest)**:
119 |
120 | 
121 |
122 | Finally, you can get the `predictions.json` file under the `/outputs` folder, it should be like:
123 |
124 | ```text
125 | .
126 | ├── graph
127 | ├── logs
128 | ├── output
129 | │ └── 1586077936
130 | │ └── predictions.json
131 | ├── runs
132 | │ └── 1586077936
133 | │ ├── bestcheckpoints
134 | │ ├── checkpoints
135 | │ ├── embedding
136 | │ └── summaries
137 | ├── test_harnn.py
138 | ├── text_harnn.py
139 | └── train_harnn.py
140 | ```
141 |
142 |
--------------------------------------------------------------------------------
/data/Train_sample.json:
--------------------------------------------------------------------------------
1 | {"id": "3930316", "title": ["sighting", "firearm"], "abstract": ["rear", "sight", "firearm", "ha", "peephole", "device", "formed", "hollow", "tube", "end", "closed", "peephole", "peephole", "ha", "central", "orifice", "orifice", "peephole", "rear", "side", "ha", "larger", "diameter", "orifice", "peephole", "front", "sight", "side", "peephole", "pivotally", "mounted", "cooperates", "elastic", "member", "hold", "peephole", "tube-opening", "tube-closing", "position", "embodiment", "peephole", "provided", "end", "tube"], "section": [5], "subsection": [104], "group": [512], "subgroup": [6535], "labels": [5, 113, 649, 7333]}
2 | {"id": "3930329", "title": ["bait", "molding", "device"], "abstract": ["bait", "molding", "device", "forming", "securing", "moldable", "bait", "material", "bread", "dough", "fishhook", "mold", "formed", "cup", "shaped", "mold", "section", "secured", "pliers-like", "device", "opening", "closing", "mold", "section", "mold", "elongate", "configuration", "accommodate", "entire", "fishhook", "moldable", "bait", "material", "surrounding", "fishhook", "mold", "section", "mold", "section", "includes", "groove", "edge", "mold", "permit", "fishing", "line", "attached", "hook", "pa", "mold", "closed"], "section": [0], "subsection": [0], "group": [7], "subgroup": [155], "labels": [0, 9, 144, 953]}
3 | {"id": "3930333", "title": ["coupling", "member", "toy", "vehicle", "drive", "system"], "abstract": ["coupling", "member", "toy", "vehicle", "drive", "system", "employed", "play", "situation", "coupling", "member", "generally", "comprised", "exaggerated", "triangularly-shaped", "portion", "funnel-shaped", "portion", "exaggerated", "triangularly-shaped", "portion", "toy", "vehicle", "driven", "forward", "backward", "complete", "u-turn", "disengaged", "drive", "chain", "drive", "system", "funnel-shaped", "portion", "ensures", "toy", "vehicle", "engaged", "driven", "chain", "child", "desire", "drive", "toy", "vehicle", "forward", "direction"], "section": [0], "subsection": [14], "group": [82], "subgroup": [1090, 1086], "labels": [0, 23, 219, 1888, 1884]}
4 | {"id": "3930351", "title": ["method", "apparatus", "transferring", "yarn", "package", "doffed", "textile", "machine", "container"], "abstract": ["present", "invention", "relates", "method", "apparatus", "transferring", "yarn", "package", "doffed", "textile", "machine", "provided", "conveyer", "belt", "disposed", "longitudinal", "direction", "thereof", "container", "yarn", "package", "carried", "end", "conveyer", "dropped", "container", "positioned", "receiving", "position", "end", "conveyer", "dropping", "distance", "end", "portion", "carrying", "surface", "conveyer", "surface", "receiving", "yarn", "package", "container", "maintained", "substantially", "predetermined", "distance", "larger", "width", "package", "smaller", "width", "package"], "section": [8, 1], "subsection": [127, 46], "group": [235, 659, 240], "subgroup": [8245, 2945, 3140, 3119], "labels": [8, 1, 136, 55, 372, 796, 377, 9043, 3743, 3938, 3917]}
5 | {"id": "3930440", "title": ["device", "conveying", "rolled", "food"], "abstract": ["device", "conveying", "transferring", "rolled", "food", "semi-finished", "material", "chain", "procedural", "step", "step", "comb-shaped", "cradle", "secured", "stage", "processing", "device", "travelling", "comb-shaped", "cradle", "pa", "fixed", "cradle", "upwardly", "material", "fixed", "cradle", "moved", "travelling", "cradle", "move", "fixed", "cradle", "stage", "pa", "fixed", "cradle", "downwardly", "semi-finished", "material", "put", "fixed", "cradle", "stage"], "section": [0], "subsection": [1], "group": [12], "subgroup": [215], "labels": [0, 10, 149, 1013]}
6 | {"id": "3930463", "title": ["vapor", "deposition", "apparatus", "including", "three-compartment", "evaporator"], "abstract": ["evaporation", "metal", "production", "alloy", "deposition", "component", "vapour", "phase", "carried", "controllably", "heated", "source", "comprising", "melting", "compartment", "operation", "metal", "melted", "mixing", "compartment", "constriction", "passage", "melting", "mixing", "compartment", "minimise", "back", "mixing", "molten", "metal", "evaporation", "compartment", "supply", "metal", "mixing", "compartment", "evaporation", "compartment", "surface", "molten", "metal", "heating", "preferably", "electron", "beam", "heating"], "section": [2], "subsection": [68], "group": [334], "subgroup": [4548], "labels": [2, 77, 471, 5346]}
7 | {"id": "3930582", "title": ["system", "testing", "paper", "money"], "abstract": ["test", "genuineness", "condition", "dollar", "bill", "substantially", "identical", "paper", "thickness", "gauge", "positioned", "path", "paper", "determine", "deviation", "thickness", "passing", "specimen", "predetermined", "thickness", "reference", "specimen", "scanned", "concurrently", "therewith", "measured", "deviation", "fed", "processor", "count", "positive", "negative", "deviation", "multiplicity", "incremental", "period", "determines", "nature", "irregularity", "count", "deviation", "sign", "gauge", "measuring", "thickness", "parallel", "track", "system", "discriminate", "irregularity", "gap", "overlapping", "adhesive", "tape", "dog-eared", "corner"], "section": [6], "subsection": [112], "group": [568], "subgroup": [7139], "labels": [6, 121, 705, 7937]}
8 | {"id": "3930737", "title": ["stud", "assembly"], "abstract": ["stud", "assembly", "produced", "holding", "fixed", "position", "head", "portion", "gripping", "surface", "extending", "base", "deformable", "washer", "extended", "base", "stud", "member", "provided", "receptacle", "end", "portion", "positioned", "respect", "base", "head", "interengaging", "cooperating", "surface", "receptacle", "base", "aligned", "provide", "locking", "aperture", "receiving", "washer", "finally", "washer", "deformed", "extends", "locking", "aperture", "locking", "stud", "member", "head", "forming", "unitary", "stud", "assembly"], "section": [8, 1], "subsection": [26, 127], "group": [660, 125], "subgroup": [1688, 8350], "labels": [8, 1, 35, 136, 797, 262, 2486, 9148]}
9 | {"id": "3930764", "title": ["air", "tool", "overspeed", "shutoff", "device"], "abstract": ["overspeed", "shutoff", "device", "rotary", "pneumatic", "tool", "disclosed", "device", "operable", "shut", "air", "supply", "motor", "failure", "governor", "function", "properly", "preventing", "overspeeding", "motor", "device", "includes", "valve", "closing", "plate", "positioned", "path", "air", "flow", "pneumatic", "motor", "upstream", "inlet", "port", "passage", "air", "motor", "normal", "operation", "tool", "valve", "plate", "rotates", "motor", "drive", "shaft", "retained", "position", "spaced", "air", "inlet", "port", "locking", "mechanism", "engaging", "drive", "shaft", "tool", "prevent", "axial", "movement", "therealong", "plate", "locking", "device", "comprises", "cantilever", "mounted", "spring", "wire", "engagement", "groove", "drive", "shaft", "centrifugally", "responsive", "weight", "operably", "connected", "disengage", "wire", "groove", "response", "attainment", "predetermined", "rotary", "speed", "failure", "main", "governor", "tool", "consequent", "acceleration", "motor", "predetermined", "speed", "wire", "disengaged", "shaft", "groove", "air", "pressure", "drop", "valve", "inlet", "port", "closure", "plate", "move", "cover", "inlet", "port", "stopping", "flow", "air", "motor"], "section": [8, 5], "subsection": [127, 88], "group": [416, 660, 415], "subgroup": [5270, 8340, 5280], "labels": [8, 5, 136, 97, 553, 797, 552, 6068, 9138, 6078]}
10 | {"id": "3930775", "title": ["testing", "correcting", "metering", "accuracy", "multihole", "spinnerets"], "abstract": ["method", "apparatus", "off-line", "testing", "correcting", "metering", "accuracy", "split", "multihole", "spinneret", "fed", "single", "metered", "stream", "determining", "end", "end", "variation", "flow", "rate", "reworking", "number", "hole", "lower", "flow", "rate", "side", "split", "spinneret", "calculated", "multiplying", "average", "test", "percent", "bias", "split", "spinneret", "predetermined", "constant", "bring", "end", "end", "variation", "flow", "rate", "required", "accuracy"], "section": [6, 3], "subsection": [106, 72], "group": [346, 518], "subgroup": [4630, 6587], "labels": [6, 3, 115, 81, 483, 655, 5428, 7385]}
11 | {"id": "3930811", "title": ["reactor", "pressure", "gasification", "coal"], "abstract": ["reactor", "continuous", "gasification", "coal", "superatmospheric", "pressure", "elevated", "temperature", "gaseous", "gasifying", "agent", "free", "oxygen", "oxygen-free", "gasifying", "agent", "steam", "carbon", "dioxide", "disclosed", "reactor", "includes", "substantially", "conical", "rotary", "grate", "rotatably", "mounted", "lower", "portion", "reactor", "housing", "rotary", "grate", "feed", "gasifying", "agent", "discharge", "gasification", "residue", "notwithstanding", "inside", "diameter", "reactor", "housing", "clearance", "rotary", "grate", "housing", "millimeter", "height", "annular", "rim", "rotary", "grate", "millimeter", "vertical", "distance", "rotary", "grate", "housing", "bottom", "millimeter"], "section": [2], "subsection": [61], "group": [304], "subgroup": [4224, 4225], "labels": [2, 70, 441, 5022, 5023]}
12 | {"id": "3930926", "title": ["apparatus", "forming", "tubular", "fibrous", "insulatory", "article"], "abstract": ["apparatus", "forming", "edge", "mineral", "fiber", "blanket", "advance", "processing", "path", "guide", "surface", "shaped", "continuously", "direct", "feathered", "ragged", "longitudinal", "edge", "uncured", "mat", "mineral", "fiber", "introduced", "forming", "apparatus", "gathering", "fiber", "control", "uniformity", "density", "edge", "portion", "mass", "guide", "comprise", "surface", "extending", "radially", "outward", "effective", "end", "pressure", "roll", "cooperating", "rotatable", "mandrel", "formation", "tube", "fiber", "mat", "surface", "encountered", "advancing", "blanket", "raise", "feathered", "edge", "raised", "edge", "turned", "major", "body", "portion", "blanket", "surface", "surface", "parallel", "path", "advance", "establish", "desired", "margin", "maintain", "margin", "blanket", "compacted", "major", "face", "surface", "provided", "form", "shape", "marginal", "edge", "mass", "mineral", "fiber", "obtain", "desired", "configuration", "square", "chamfered", "edge"], "section": [3], "subsection": [75], "group": [360], "subgroup": [4712], "labels": [3, 84, 497, 5510]}
13 | {"id": "3931027", "title": ["cellulose", "material", "treated", "thermosetting", "resin", "improved", "physical", "property", "elevated", "temperature"], "abstract": ["cellulose", "material", "improved", "resistance", "thermal", "deterioration", "application", "insulation", "material", "electrical", "apparatus", "cellulose", "material", "treated", "aqueous", "dispersion", "liquid", "uncured", "crosslinkable", "thermosetting", "resin", "epoxy", "resin", "water", "soluble", "nitrogen-containing", "compound", "curing", "resin", "cellulose", "molecule", "believed", "enter", "crosslinking", "reaction", "provide", "treated", "cellulosic", "product", "capable", "withstanding", "deteriorating", "action", "heat", "extended", "period", "time", "protein", "material", "casein", "isolated", "soy", "protein", "added", "treating", "liquid", "protein", "contributes", "additional", "nitrogen", "group", "treating", "medium", "increase", "thermal", "stability", "cellulose", "act", "film", "thermal", "stability", "cellulose", "material", "improved", "addition", "organic", "amine", "melamine", "treating", "liquid"], "section": [7], "subsection": [120], "group": [602, 600], "subgroup": [7402, 7376], "labels": [7, 129, 739, 737, 8200, 8174]}
14 | {"id": "3931047", "title": ["catalyst", "composition", "room", "temperature", "vulcanizing", "silicone", "composition", "catalyzed", "composition", "therefrom"], "abstract": ["stable", "catalyst", "composition", "providing", "faster", "cure", "time", "room", "temperature", "vulcanizing", "organopolysiloxane", "composition", "comprise", "stannous", "salt", "branched", "chain", "alkyl", "carboxylic", "acid", "carbon", "atom", "stabilizing", "carrier", "therefor", "methyl", "alkyl", "polysiloxane", "fluid", "hydroxy", "aryl", "substituents", "catalyst", "composition", "uniquely", "adapted", "provide", "injectable", "composition", "curable", "low", "density", "high", "compressive", "strength", "syntactic", "foam", "custom", "fitting", "footwear", "wearer"], "section": [2], "subsection": [59], "group": [290, 286], "subgroup": [3858, 3993, 3964], "labels": [2, 68, 427, 423, 4656, 4791, 4762]}
15 | {"id": "3931124", "title": ["fluoroelastomer", "composition"], "abstract": ["fluoroelastomer", "composition", "fluoroelastomer", "member", "selected", "group", "consisting", "bivalent", "metal", "oxide", "bivalent", "metal", "hydroxide", "mixture", "bivalent", "metal", "oxide", "metal", "hydroxide", "metal", "salt", "weak", "acid", "aromatic", "polyhydroxy", "compound", "quaternary", "ammonium", "compound", "-", "-", "diaza-bicyclo", "-", "fluoro-rubber", "low", "compression", "set", "excellent", "elastic", "property", "fluoroelastomer", "composition", "handled", "processed", "safety", "cured", "good", "cure", "rate", "ha", "excellent", "storage", "property", "cure", "rate", "composition", "accelerated", "addition", "water", "metal", "compound", "produce", "water", "reacting", "hydrogen", "fluoride"], "section": [2], "subsection": [59], "group": [289, 290], "subgroup": [3938, 3942, 3965], "labels": [2, 68, 426, 427, 4736, 4740, 4763]}
16 | {"id": "3931140", "title": ["h-gly-gly-tyr-ala", "-", "somatostatin"], "abstract": ["growth", "hormone", "release", "inhibiting", "compound", "protamine", "zinc", "protamine", "aluminum", "non-toxic", "acid", "addition", "salt", "thereof", "linear", "heptadecapeptide", "intermediate"], "section": [2, 8, 0], "subsection": [58, 12, 127], "group": [659, 282, 68], "subgroup": [8287, 843, 8335, 3713], "labels": [2, 8, 0, 67, 21, 136, 796, 419, 205, 9085, 1641, 9133, 4511]}
17 | {"id": "3931330", "title": ["process", "production", "benzaldehyde"], "abstract": ["liquid", "phase", "co-oxidation", "process", "production", "benzaldehyde", "compound", "structural", "formula", "selected", "group", "radical", "consisting", "hydrogen", "halogen", "methyl", "methoxy", "comprising", "admixing", "toluene", "compound", "structural", "formula", "defined", "aliphatic", "saturated", "aldehyde", "carbon", "atom", "molar", "ratio", "toluene", "compound", "aldehyde", "preferably", "mol", "toluene", "compound", "mol", "aldehyde", "oxygen", "gas", "consisting", "carbon", "hydrogen", "oxygen", "atom", "temperature", "range", "degree", "degree"], "section": [2], "subsection": [58], "group": [276], "subgroup": [3551, 3548], "labels": [2, 67, 413, 4349, 4346]}
18 | {"id": "3931389", "title": ["process", "desulfurizing", "hot", "gas"], "abstract": ["gas", "produced", "reacting", "fuel", "oxygen", "gas", "water", "vapor", "pressure", "desulfurized", "scrubbing", "concentrated", "solution", "alkali", "salt", "weak", "inorganic", "acid", "temperature", "atmospheric-pressure", "boiling", "point", "solution", "column", "maintaining", "exchange", "ratio", "cubic", "meter", "concentrated", "solution", "standard", "cubic", "meter", "hydrogen", "sulfide", "gas", "purified"], "section": [2, 1], "subsection": [61, 15], "group": [305, 86], "subgroup": [4226, 1165], "labels": [2, 1, 70, 24, 442, 223, 5024, 1963]}
19 | {"id": "3931401", "title": ["-", "", "-", "adenosine", "carboxamides", "increasing", "coronary", "sinus", "partial", "pressure", "oxygen"], "abstract": ["amide", "-", "", "-", "adensine", "carboxylic", "acid", "represented", "formula", "hydrogen", "loweralkyl", "loweralkenyl", "loweralkynyl", "cycloalkyl", "hydrogen", "acyl", "form", "isopropylidene", "benzylidene", "moiety", "pharmaceutically", "acceptable", "acid", "addition", "salt", "thereof", "compound", "hydrogen", "treating", "cardiovascular", "disorder", "anti-anginal", "anti-hypertensive", "agent", "compound", "acyl", "form", "isopropylidene", "benzylidene", "moiety", "intermediate", "preparation", "final", "product", "=", "hydrogen"], "section": [2], "subsection": [58], "group": [280], "subgroup": [3686], "labels": [2, 67, 417, 4484]}
20 | {"id": "3931405", "title": ["penicillin", "ester", "method", "composition", "treating", "infectious", "disease"], "abstract": ["penicillin", "ester", "formula", "alkyl", "carbon", "atom", "phenyl", "thienyl", "furyl", "phenyl", "substituted", "member", "group", "consisting", "halogen", "hydroxy", "amino", "selected", "group", "consisting", "--", "tetrazolyl", "--", "nhso", "selected", "group", "consisting", "radical", "-", "defined", "specification", "ester", "resorbed", "oral", "administration"], "section": [2, 8], "subsection": [125, 58], "group": [278, 277, 655], "subgroup": [8088, 3658, 3676], "labels": [2, 8, 134, 67, 415, 414, 792, 8886, 4456, 4474]}
21 | {"id": "3931465", "title": ["blooming", "control", "charge", "coupled", "imager"], "abstract": ["improved", "operational", "blooming", "control", "circuit", "charge", "coupled", "device", "image", "sensing", "array", "accumulated", "", "region", "substrate", "driven", "depletion", "", "end", "integration", "time", "prior", "transfer", "content", "register", "found", "improve", "resolution", "reproduced", "image"], "section": [7], "subsection": [123, 120], "group": [607, 639], "subgroup": [7557, 7948], "labels": [7, 132, 129, 744, 776, 8355, 8746]}
22 | {"id": "3931487", "title": ["electric", "momentary", "action", "push-button", "switch"], "abstract": ["electric", "switch", "comprising", "casing", "base", "central", "terminal", "lateral", "terminal", "mounted", "thereon", "pushbutton", "slidably", "mounted", "opening", "casing", "opposite", "base", "metal", "switching", "member", "sliding", "contact", "terminal", "brought", "contact", "terminal", "spring", "mounted", "switching", "member", "pushbutton", "urge", "push-button", "released", "position", "push", "switching", "member", "contact", "terminal", "terminal", "actuator", "integral", "push-button", "depressed", "tilt", "switching", "member", "terminal", "move", "switching", "member", "terminal", "contact", "terminal"], "section": [7], "subsection": [120], "group": [604], "subgroup": [7421], "labels": [7, 129, 741, 8219]}
23 | {"id": "3931544", "title": ["fast", "warm", "electronic", "ballast", "circuit", "high", "pressure", "discharge", "lamp"], "abstract": ["electronic", "ballast", "circuit", "reducing", "warm", "time", "high", "intensity", "discharge", "hid", "lamp", "lamp", "current", "flow", "abruptly", "reduced", "switching", "responsive", "load", "voltage", "variation", "attainment", "power", "amount", "sufficient", "activate", "hid", "lamp"], "section": [8, 7], "subsection": [124, 127], "group": [644, 659], "subgroup": [8212, 8032], "labels": [8, 7, 133, 136, 781, 796, 9010, 8830]}
24 | {"id": "3931566", "title": ["temperature", "compensated", "current", "sensing", "circuit", "power", "supply"], "abstract": ["converter", "power", "supply", "provided", "current", "regulating", "circuit", "magnetoresistive", "element", "disposed", "flux", "coupling", "proximity", "output", "inductor", "power", "supply", "providing", "control", "signal", "output", "current", "thereof", "element", "mounted", "air", "gap", "split-loop", "core", "inductor"], "section": [7], "subsection": [121], "group": [619], "subgroup": [7722, 7718], "labels": [7, 130, 756, 8520, 8516]}
25 | {"id": "3931604", "title": ["sampling", "automatic", "equalizer"], "abstract": ["automatic", "transversal", "equalizer", "provided", "input", "signal", "sampled", "stored", "series", "capacitor", "capacitor", "voltage", "sequentially", "recalled", "application", "analog", "multiplier", "receives", "sequence", "coefficient", "voltage", "stored", "series", "capacitor", "single", "analog", "multiplier", "performs", "function", "plural", "voltage", "controlled", "attenuator", "prior", "art", "equalizer"], "section": [7], "subsection": [123], "group": [637], "subgroup": [7901], "labels": [7, 132, 774, 8699]}
26 | {"id": "3931623", "title": ["reliable", "earth", "terminal", "satellite", "communication"], "abstract": ["reliable", "earth", "terminal", "satellite", "communication", "system", "capable", "unattended", "operation", "extended", "period", "time", "disclosed", "terminal", "includes", "antenna", "single", "fixed", "reflector", "provide", "multiple", "beam", "positioned", "small", "feed", "motion", "transmitter", "modular", "construction", "low", "power", "traveling", "wave", "tube", "power", "amplifier", "increment", "operating", "band", "transmitting", "chain", "designed", "carry", "voice", "data", "television", "signal", "satellite", "include", "modulator", "amplifier", "band-limiting", "filter", "frequency", "converter", "power", "amplifier", "amplifier", "capable", "operating", "full", "operating", "band", "operation", "amplifier", "limited", "assigned", "increment", "band-limiting", "filter", "single", "redundant", "high", "power", "amplifier", "provided", "remotely", "switched", "transmitting", "chain", "event", "failure", "amplifier", "output", "connected", "antenna", "directional", "filter", "multiplexer", "receiver", "includes", "low", "noise", "preamplifier", "featuring", "modular", "fail", "-", "soft", "", "design", "receiving", "chain", "low", "noise", "preamplifier", "channelized", "band", "increment", "transmitting", "chain", "separate", "converter", "demodulator", "module", "carrier", "subsystem", "broadband", "channel", "bandwidth", "determined", "intermediate", "frequency", "band", "pa", "filter", "prime", "power", "low", "voltage", "battery", "bank", "constantly", "recharged", "commercial", "power", "source", "terminal", "operated", "limited", "period", "time", "solely", "battery", "band", "commercial", "outage", "back-up", "motor", "generator", "recharging", "power", "extended", "period", "commercial", "outage", "terminal", "monitored", "controlled", "central", "control", "point", "terminal", "automatic", "self-protecting", "remote", "control", "limited", "parameter", "adjustment", "required", "normal", "operation", "antenna", "feed", "positioning", "change", "transmitter", "power", "switching", "spare", "power", "amplifier", "turning", "carrier"], "section": [8, 7], "subsection": [123, 125], "group": [633, 653], "subgroup": [8079, 7869, 7855], "labels": [8, 7, 132, 134, 770, 790, 8877, 8667, 8653]}
27 | {"id": "3931667", "title": ["interlocking", "attachment", "device"], "abstract": ["attachment", "device", "comprising", "filament", "laterally", "oriented", "bar", "end", "hollow", "body", "member", "end", "wall", "body", "member", "opening", "therethrough", "large", "receive", "filament", "bar", "parallel", "orientation", "width", "smaller", "length", "bar", "prevent", "withdrawal", "bar", "hollow", "interior", "subsequent", "insertion", "self-contained", "interlocked", "attachment", "obtained"], "section": [6, 8], "subsection": [127, 114], "group": [660, 577], "subgroup": [8345, 8348, 7219], "labels": [6, 8, 136, 123, 797, 714, 9143, 9146, 8017]}
28 | {"id": "3931730", "title": ["ramp", "current", "apparatus", "method", "sensitivity", "testing"], "abstract": ["ramp", "current", "method", "sensitivity", "testing", "dynamic", "record", "btained", "current", "voltage", "energy", "resistance", "instantaneous", "power", "fire", "electroexplosive", "device", "method", "valuable", "information", "gained", "firing", "minimum", "sampling", "utilized", "ramp", "method", "defective", "item", "detected", "recorded", "le", "sensitive", "device", "erroneous", "data", "point", "contributing", "accurate", "firing", "data"], "section": [6], "subsection": [106], "group": [528], "subgroup": [6738], "labels": [6, 115, 665, 7536]}
29 | {"id": "3931732", "title": ["sharp", "edge", "tester"], "abstract": ["hand", "held", "tool", "testing", "sharpness", "edge", "determine", "presence", "absence", "safety", "hazard", "rotatable", "mandrel", "driven", "torque", "spring", "velocity", "single", "rotation", "mandrel", "carrying", "covering", "testing", "material", "engaged", "edge", "tested", "automatically", "driving", "mandrel", "presence", "predetermined", "contact", "force", "pressure", "test", "material", "test", "edge", "included", "adjustably", "regulating", "mandrel", "speed", "rotation", "contacting", "pressure", "requisite", "effect", "testing", "operation"], "section": [6], "subsection": [106], "group": [528], "subgroup": [6711], "labels": [6, 115, 665, 7509]}
30 | {"id": "3931738", "title": ["device", "monitoring", "fluid", "pressure", "mechanism", "hydrostatic", "fluid", "bearing"], "abstract": ["pressurized", "operating", "fluid", "oil", "hydrostatic", "fluid", "bearing", "directed", "chamber", "plunger", "urging", "plunger", "position", "monitoring", "pressure", "force", "slightly", "le", "force", "operating", "fluid", "operating", "fluid", "minimum", "working", "pressure", "exerted", "plunger", "tending", "move", "plunger", "position", "pressure", "operating", "fluid", "drop", "minimum", "working", "pressure", "plunger", "moved", "position", "movement", "sensed", "sensing", "malfunction"], "section": [6, 7, 5], "subsection": [93, 106, 120], "group": [526, 604, 444], "subgroup": [6683, 5696, 5690, 7475], "labels": [6, 7, 5, 102, 115, 129, 663, 741, 581, 7481, 6494, 6488, 8273]}
31 | {"id": "3931908", "title": ["insulated", "tank"], "abstract": ["invention", "relates", "improved", "corner", "construction", "cryogenic", "tank", "generally", "rectilinear", "cross", "section", "internal", "surface", "tank", "insulated", "foamed", "material", "direct", "contact", "cryogenic", "liquid"], "section": [8, 1, 5], "subsection": [127, 46, 95], "group": [462, 237, 659], "subgroup": [6020, 8170, 6026, 2987, 6024, 3033, 6027, 6015, 6019, 6016], "labels": [8, 1, 5, 136, 55, 104, 599, 374, 796, 6818, 8968, 6824, 3785, 6822, 3831, 6825, 6813, 6817, 6814]}
32 | {"id": "3931912", "title": ["two-part", "hair", "dye", "hair", "bleach", "package"], "abstract": ["pressurized", "package", "conventional", "pressure", "propellant", "divided", "compartment", "arranged", "mixing", "content", "simultaneously", "dispensing", "compartment", "peroxide", "solution", "hair", "treating", "composition", "including", "selected", "compound", "quantity", "sufficient", "prevent", "development", "unsafe", "pressure", "peroxide", "decompose", "package"], "section": [8, 0], "subsection": [12, 127], "group": [73, 659, 68], "subgroup": [919, 838, 8253, 852], "labels": [8, 0, 21, 136, 210, 796, 205, 1717, 1636, 9051, 1650]}
33 | {"id": "3931915", "title": ["liquid-containing", "cartridge", "device", "dispensing", "measured", "amount", "liquid", "cartridge"], "abstract": ["invention", "concerned", "container", "receiving", "liquid", "material", "fitting", "end", "adapted", "provide", "discharge", "port", "liquid", "container", "end", "plunger", "piston", "adapted", "moved", "inside", "wall", "container", "discharge", "predetermined", "aliquot", "", "dos", "", "liquid", "container", "port", "invention", "comprises", "device", "adapted", "receive", "liquid-containing", "cartridge", "package", "type", "mentioned", "coacting", "package", "move", "plunger", "container", "dose-dispensing", "-", "discharge", "manner", "gradually", "accelerate", "movement", "plunger", "position", "rest", "container", "maximum", "rate", "motion", "gradually", "decelerate", "motion", "plunger", "final", "position", "end", "dose-discharge", "device", "comprises", "accurately", "predetermining", "measuring", "", "amount", "liquid", "discharged", "individual", "movement", "plunger", "device", "comprise", "repeatedly", "actuating", "piston", "accelerating-decelerating", "movement", "discharge", "plurality", "measured", "sample", "repeatedly", "non-spurt", "non-splash", "condition"], "section": [6, 0], "subsection": [106, 12], "group": [70, 521], "subgroup": [904, 6625], "labels": [6, 0, 115, 21, 207, 658, 1702, 7423]}
34 | {"id": "3931984", "title": ["anti-loading", "tray", "shopping", "cart"], "abstract": ["disclosed", "anti-loading", "tray", "assembly", "mounting", "lower", "frame", "conventional", "nestable", "shopping", "cart", "position", "basket", "prevent", "pilferage", "loading", "article", "cart", "basket", "tray", "includes", "plurality", "interconnecting", "elongated", "strut", "cross", "member", "size", "shape", "arrangement", "define", "forwardly-and-downwardly", "sloping", "plane-like", "area", "lateral", "wing", "prevent", "loading", "article", "tray", "lower", "frame", "cart", "simplified", "connection", "tray", "existing", "structure", "cart", "frame", "anti-loading", "tray", "economical", "add-on", "feature", "arrangement", "simple", "inexpensive", "tray", "easily", "manufactured", "readily", "installed", "factory", "existing", "cart", "field"], "section": [1], "subsection": [43], "group": [217], "subgroup": [2671], "labels": [1, 52, 354, 3469]}
35 | {"id": "3932026", "title": ["liquid", "crystal", "display", "assembly", "dielectric", "coated", "electrode"], "abstract": ["display", "assembly", "nematic", "liquid", "crystal", "sandwiched", "supporting", "substrate", "substrate", "ha", "layer", "conductive", "coating", "inside", "surface", "overcoated", "dielectric", "film", "layer", "separate", "conductive", "layer", "liquid", "crystal", "material", "embodiment", "include", "varied", "thickness", "dielectric", "association", "image", "lead", "portion", "conductive", "coating"], "section": [6], "subsection": [107], "group": [538], "subgroup": [6846], "labels": [6, 116, 675, 7644]}
36 | {"id": "3932045", "title": ["rolling", "contact", "joint"], "abstract": ["apparatus", "disclosed", "rolling", "contact", "joint", "prosthetic", "joint", "knee", "joint", "application", "requiring", "movable", "section", "mechanical", "joint", "joint", "variety", "form", "depending", "situation", "essence", "includes", "body", "surface", "portion", "contact", "body", "movable", "relative", "constrained", "movement", "nature", "surface", "contact", "flexible", "strap", "positioned", "contact", "body", "basic", "configuration", "including", "pair", "cylinder", "utilized", "flexible", "strap", "wrapped", "completely", "partially", "cylinder", "provide", "joint", "substantially", "restraint", "motion", "low", "friction", "due", "rolling", "contact", "contacting", "surface", "cylinder", "addition", "body", "pair", "cylindrical", "surface", "diameter", "respect", "body", "contact", "cylindrical", "surface", "flexible", "strap", "wrapped", "cylindrical", "surface", "cylindrical", "surface", "concentric", "respect", "diameter", "proper", "ratio", "substantially", "resistance", "motion", "rolling", "contact", "friction", "low", "cylindrical", "surface", "concentric", "flexible", "strap", "strained", "rotation", "spring", "action", "provided", "device", "shape", "body", "positioning", "flexible", "strap", "determines", "type", "motion", "combination", "addition", "embodiment", "rolling", "contact", "joint", "prosthetic", "knee", "joint"], "section": [8, 0, 5], "subsection": [94, 12, 127], "group": [660, 448, 64], "subgroup": [5738, 754, 8361, 8350], "labels": [8, 0, 5, 103, 21, 136, 797, 585, 201, 6536, 1552, 9159, 9148]}
37 | {"id": "3932082", "title": ["forming", "reinforced", "concrete", "module"], "abstract": ["apparatus", "constructing", "reinforced", "concrete", "modular", "construction", "unit", "comprising", "longitudinally", "extending", "multi-sided", "construction", "unit", "reinforcing", "bar", "mesh", "formed", "cage", "desired", "size", "shape", "unit", "cage", "mounted", "rotatable", "shaft", "raised", "lowered", "relative", "horizontal", "bed", "side", "modular", "construction", "unit", "turn", "positioned", "perimeter", "form", "bed", "concrete", "poured", "finished", "flat", "form", "side", "modular", "unit", "side", "ha", "cured", "sufficient", "time", "insure", "structural", "integrity", "cage", "raised", "rotated", "align", "side", "cage", "horizontal", "bed", "cage", "lowered", "position", "adjacent", "bed", "suitable", "form", "concrete", "poured", "reinforcing", "structure", "side", "cage", "form", "side", "wall", "building", "successive", "side", "similarly", "formed", "longitudinally", "extending", "hollow", "construction", "unit", "ha", "completed", "end", "wall", "formed", "tubular", "member", "enclose", "modular", "building", "unit"], "section": [1], "subsection": [31], "group": [151], "subgroup": [1908, 1905], "labels": [1, 40, 288, 2706, 2703]}
38 | {"id": "3932087", "title": ["arrangement", "moulding", "press", "parted", "press", "tool", "production", "hot-pressed", "plastic", "material", "product", "grammophone", "record"], "abstract": ["moulding", "press", "parted", "press", "tool", "producing", "hot-pressed", "product", "plastic", "material", "grammophone", "record", "arrangement", "holding", "handling", "product", "movable", "pressing", "area", "stripping", "apparatus", "situated", "area", "arrangement", "incorporating", "holder", "provided", "anchoring", "pressed", "product", "utilizing", "excess", "material", "exuding", "pressing", "tool", "half"], "section": [8, 1], "subsection": [32, 127], "group": [158, 659, 155], "subgroup": [8254, 2064, 1947], "labels": [8, 1, 41, 136, 295, 796, 292, 9052, 2862, 2745]}
39 | {"id": "3932245", "title": ["mechanical", "embossing", "foamed", "sheet", "material"], "abstract": ["decorative", "sheet", "material", "foamed", "vinyl", "floor", "covering", "comprising", "preferably", "base", "substrate", "asbestos", "felt", "layer", "foam", "cellular", "resin", "material", "base", "portion", "thickness", "providing", "relief", "pattern", "foam", "land", "large", "cell", "foam", "valley", "crushed", "smaller", "cell", "cell", "wall", "bonded", "layer", "non-cellular", "transparent", "resin", "material", "overlying", "land", "valley", "area", "relief", "pattern", "printed", "color", "pattern", "design", "provided", "foam", "resin", "layer", "transparent", "resin", "layer", "colored", "area", "pattern", "design", "accurate", "registration", "predetermined", "relation", "crushed", "valley", "area", "foam", "layer", "addition", "relief", "color", "pattern", "product", "pattern", "effect", "registration", "relief", "color", "pattern", "pattern", "light", "reflective", "characteristic", "exposed", "surface", "transparent", "layer", "apparatus", "method", "producing", "covering", "material", "invention", "disclosed"], "section": [3, 8, 1], "subsection": [32, 40, 127, 77], "group": [660, 372, 186, 155], "subgroup": [1956, 8354, 1948, 8342, 4856, 2360], "labels": [3, 8, 1, 41, 49, 136, 86, 797, 509, 323, 292, 2754, 9152, 2746, 9140, 5654, 3158]}
40 | {"id": "3932261", "title": ["electrode", "assembly", "electrolytic", "cell"], "abstract": ["electrode", "provided", "electrolytic", "cell", "employing", "metal", "electrode", "electrode", "comprises", "electrode", "surface", "positioned", "parallel", "space", "conductive", "support", "conductive", "support", "separately", "attached", "electrode", "surface", "positioned", "space", "electrode", "surface", "conductive", "support", "attached", "substantially", "perpendicular", "electrode", "plate", "electrode", "assembly", "employed", "electrolytic", "cell", "producing", "chlorine", "caustic", "soda", "oxychlorine", "compound", "electrolkysis", "alkali", "metal", "chloride", "solution"], "section": [2], "subsection": [69], "group": [338], "subgroup": [4574, 4575], "labels": [2, 78, 475, 5372, 5373]}
41 | {"id": "3932281", "title": ["leaf", "trap", "kit", "swimming", "pool"], "abstract": ["leaf", "trap", "kit", "swimming", "pool", "includes", "inverted", "perforate", "basket", "fitted", "main", "drain", "outlet", "pool", "dome-like", "housing", "open", "underside", "lateral", "opening", "admit", "leaf", "space", "housing", "inverted", "basket", "top", "opening", "leaf", "removed", "vacuum", "cleaner", "head", "fittable", "housing", "remove", "leaf", "top", "opening", "housing"], "section": [4], "subsection": [84], "group": [402], "subgroup": [5106], "labels": [4, 93, 539, 5904]}
42 | {"id": "3932340", "title": ["nylon", "coating", "composition"], "abstract": ["coating", "composition", "disclosed", "producing", "alcohol-insoluble", "film", "comprises", "mixture", "alcohol-soluble", "nylon", "copolymer", "alcohol-soluble", "alkoxymethylated", "nylon", "acid", "catalyst"], "section": [2], "subsection": [60, 59], "group": [293, 290], "subgroup": [4069, 3964], "labels": [2, 69, 68, 430, 427, 4867, 4762]}
43 | {"id": "3932378", "title": ["sulfonated", "disazo", "dyestuff", "ether", "group"], "abstract": ["disazo", "compound", "formula", "represents", "sulphobenzene", "sulphonaphthalene", "radical", "represents", "hydrogen", "atom", "low", "molecular", "alkyl", "alkoxy", "radical", "represents", "low", "molecular", "alkylene", "radical", "represents", "functional", "radical", "--", "--", "ortho", "-", "para-position", "azo", "bridge", "process", "preparation", "dyestuff", "dyestuff", "provide", "natural", "synthetic", "polyamide", "wool", "nylon", "dyeing", "excellent", "general", "fastness", "property"], "section": [2, 8], "subsection": [127, 60], "group": [291, 659], "subgroup": [4011, 8327, 4015], "labels": [2, 8, 136, 69, 428, 796, 4809, 9125, 4813]}
44 | {"id": "3932404", "title": ["process", "making", "pyrazines", "-", "butadiene"], "abstract": ["process", "preparation", "amino-pyrazine", "falling", "formula", "represent", "hydrogen", "atom", "alkyl", "aryl", "group", "carbon", "atom", "pyrazine", "ring", "form", "cyclic", "hydrocarbon", "carbon", "atom", "represents", "cyano", "carboxy", "carbonamido", "alkoxy-carbonyl", "aryloxycarbonyl", "group", "comprises", "subjecting", "-", "butadiene", "general", "formula", "zi", "represent", "alkyl", "group", "nitrogen", "atom", "form", "heterocyclic", "compound", "possibly", "hetero", "atom", "action", "ammonia", "reacting", "-", "butadiene", "formula", "obtained", "basic", "agent", "compound", "formula", "viii", "represent", "alkyl", "aryl", "group", "carbon", "atom", "pyrazine", "ring", "form", "cyclic", "hydrocarbon", "carbon", "atom", "intermediate", "compound", "formula", "meaning", "", "represent", "alkyl", "group", "nitrogen", "atom", "form", "heterocyclic", "compound", "possibly", "hetero-atom", "intermediate", "compound", "formula", "meaning"], "section": [2], "subsection": [58], "group": [277], "subgroup": [3590], "labels": [2, 67, 414, 4388]}
45 | {"id": "3932429", "title": ["azabicyclo", "octane", "derivative", "process", "preparing"], "abstract": ["compound", "formula", "hydrogen", "phenyl", "alkyl", "carbon", "atom", "cycloalkyl", "carbon", "atom", "alkyl", "carbon", "atom", "substituent", "selected", "group", "consisting", "phenyl", "benzoyl", "hydrogen", "alkyl", "carbon", "atom", "hydrogen", "alkanoyl", "carbon", "atom", "benzoyl", "nicotinoyl", "disclosed", "method", "preparing", "compound", "disclosed", "compound", "pharmaceutically", "acceptable", "acid", "addition", "salt", "thereof", "analgesic", "agent"], "section": [2], "subsection": [58], "group": [277], "subgroup": [3575], "labels": [2, 67, 414, 4373]}
46 | {"id": "3932447", "title": ["benzimidazoles"], "abstract": ["benzimidazole", "derivative", "formula", "alkyl", "carbon", "atom", "selected", "group", "consisting", "tetrahydrofurfuryl", "saturated", "unsaturated", "oxygen", "heterocycle", "carbon", "atom", "unsaturated", "hydrocarbon", "radical", "carbon", "atom", "halogen", "atom", "posse", "fungicidal", "activity"], "section": [2], "subsection": [58], "group": [276, 277], "subgroup": [3587, 3503, 3616, 3615], "labels": [2, 67, 413, 414, 4385, 4301, 4414, 4413]}
47 | {"id": "3932475", "title": ["process", "producing", "trimethyl-p-benzoquinone"], "abstract": ["process", "producing", "trimethyl-p-benzoquinone", "halogenating", "-", "-", "trimethylphenol", "oxidizing", "resulting", "-", "-", "disclosed", "resulting", "compound", "readily", "converted", "trimethylhydroquinone", "starting", "material", "producing", "vitamin"], "section": [2], "subsection": [58], "group": [276], "subgroup": [3549, 3537, 3553, 3535], "labels": [2, 67, 413, 4347, 4335, 4351, 4333]}
48 | {"id": "3932485", "title": ["improved", "preparation", "wittig", "salt", "vinyl", "beta", "-", "ionol"], "abstract": ["improved", "preparation", "wittig", "salt", "alpha", "beta", "-", "unsaturated", "alcohol", "treating", "alcohol", "phosphine", "basic", "medium", "presence", "salt", "weak", "organic", "base", "strong", "acid", "wittig", "salt", "reacted", "unsaturated", "aldehyde", "form", "polyene", "compound"], "section": [2], "subsection": [58], "group": [276, 278], "subgroup": [3542, 3676, 3497], "labels": [2, 67, 413, 415, 4340, 4474, 4295]}
49 | {"id": "3932488", "title": ["etherification", "bark", "extract", "condensed", "tannin"], "abstract": ["polyphenolic", "extract", "coniferous", "tree", "bark", "condensed", "tannin", "wood", "quebracho", "extract", "etherified", "reaction", "elevated", "temperature", "presence", "alkaline", "catalyst", "olefin", "double", "bond-activated", "carbonyl", "group", "structure", "effective", "olefin", "acrolein", "reaction", "product", "produced", "high", "yield", "water", "alkali", "soluble", "act", "good", "dispersants"], "section": [2], "subsection": [60, 59], "group": [287, 298], "subgroup": [4162, 3867], "labels": [2, 69, 68, 424, 435, 4960, 4665]}
50 | {"id": "3932491", "title": ["process", "optical", "resolution", "racemic", "lysine", "sulphanilate"], "abstract": ["optical", "resolution", "lysine", "form", "racemic", "lysine", "sulphanilate", "enhanced", "addition", "supersaturated", "solution", "racemic", "lysine", "sulphanilate", "substance", "suppress", "formation", "seed", "racemic", "lysine", "sulphanilate", "supersatured", "solution", "seed-suppressing", "substance", "added", "lysine", "lysine", "acetate", "lysine", "carbonate", "amino", "acetic", "acid", "glycerol", "yield", "optically", "active", "lysine", "sulphanilate", "improved", "disclosed", "process"], "section": [2], "subsection": [58], "group": [275], "subgroup": [3445], "labels": [2, 67, 412, 4243]}
51 | {"id": "3932575", "title": ["method", "making", "multilayered", "packaging", "tray", "deep-drawing"], "abstract": ["packaging", "tray", "provided", "bottom", "side", "wall", "outwardly", "directed", "flange", "surrounding", "periphery", "side", "wall", "tray", "consists", "outer", "layer", "moldable", "stretchable", "synthetic", "plastic", "material", "intermediate", "layer", "liquid-absorbing", "material", "insignificant", "stretchability", "molding", "tray", "side", "wall", "thereof", "formed", "stretching", "plastic", "material", "softening", "operation", "adhesive", "bond", "realized", "outer", "layer", "stretched", "tray", "side", "wall", "present", "partially", "ruptured", "intermediate", "layer"], "section": [1], "subsection": [32, 46], "group": [237, 155], "subgroup": [1952, 2966, 3033, 2967], "labels": [1, 41, 55, 374, 292, 2750, 3764, 3831, 3765]}
52 | {"id": "3932615", "title": ["process", "preparation", "granule"], "abstract": ["granule", "prepared", "subjecting", "crystalline", "sugar", "basis", "adjuvant", "binder-containing", "solution", "mixing", "apparatus", "crushing", "drying", "conventional", "technique", "resultant", "product", "characterized", "uniform", "granular", "size", "good", "disintegrating", "property", "great", "apparent", "density", "abrasion", "resistance", "basis", "comprises", "member", "selected", "group", "penicillin", "tetracycline", "movobiocin", "kanamycin", "paromomycin", "midecamycin"], "section": [2, 0, 1], "subsection": [64, 12, 15], "group": [324, 88, 68], "subgroup": [1200, 4454, 853], "labels": [2, 0, 1, 73, 21, 24, 461, 225, 205, 1998, 5252, 1651]}
53 | {"id": "3932635", "title": ["cyclic", "progestogen-interrupted", "estrogen", "oral", "contraceptive", "regimen"], "abstract": ["invention", "relates", "method", "fertility", "control", "cyclic", "progestogen-interrupted", "estrogen", "oral", "contraceptive", "regimen", "day", "menstrual", "flow", "day", "day", "medication", "administration", "cycle", "combined", "formulation", "estrogen", "progestogen", "substance", "administered", "day", "cycle", "day", "including", "day", "cycle", "formulation", "progestogen", "substance", "active", "component", "administered", "day", "cycle", "day", "combination", "formulation", "administered", "including", "day", "cycle", "regimen", "combination", "estrogen", "progestogen", "administered", "starting", "day", "cycle", "continuing", "day", "day", "cycle", "starting", "day", "cycle", "continuing", "day", "day", "cycle", "progestogen", "administered", "remaining", "day", "dosage-free", "regimen", "completed", "placebo", "nonhormonal", "supplement", "dispensing", "package", "holding", "unit", "dosage", "form", "oral", "ingestion", "unit", "dosage", "form", "daily", "sequence", "single", "cycle", "medication", "administration"], "section": [8, 0], "subsection": [12, 127], "group": [659, 68], "subgroup": [8287, 839, 837], "labels": [8, 0, 21, 136, 796, 205, 9085, 1637, 1635]}
54 | {"id": "3932790", "title": ["ground", "fault", "interrupter", "reversed", "line", "polarity", "lamp", "indicator"], "abstract": ["ground", "fault", "interrupter", "gfi", "provided", "reversed", "line", "polarity", "lamp", "indicator", "proper", "installation", "gfi", "reversed", "line", "polarity", "lamp", "indicator", "includes", "push", "button", "lamp", "connected", "series", "line", "conductor", "gfi", "ground", "wiring", "system", "ground", "conductor", "series", "connection", "line", "ground", "conductor", "case", "reversed", "line", "polarity", "lamp", "indicator", "check", "open", "circuit", "ground", "conductor"], "section": [6, 7], "subsection": [106, 121], "group": [531, 616], "subgroup": [7661, 6782], "labels": [6, 7, 115, 130, 668, 753, 8459, 7580]}
55 | {"id": "3932792", "title": ["sealed", "pump", "drive", "circuit", "therefor"], "abstract": ["completely", "sealed", "magnetically", "driven", "pump", "piston", "armature", "driven", "electrical", "winding", "unique", "electrical", "driving", "circuit", "provided", "pump", "embodying", "feedback", "winding", "magnetically", "coupled", "driving", "winding", "pump", "controlling", "reciprocation", "drive", "circuit", "facilitate", "driving", "rate", "embodying", "solid", "state", "bistable", "flip-flop", "component", "adaptable", "embodied", "computer", "low", "power", "logic", "device"], "section": [7], "subsection": [121], "group": [618], "subgroup": [7702], "labels": [7, 130, 755, 8500]}
56 | {"id": "3932802", "title": ["controlled", "power", "transferring", "device", "method", "utilizing", "reactance", "controlled", "development", "opposing", "magnetic", "flux"], "abstract": ["power", "transferring", "method", "device", "controlled", "reactance", "type", "designed", "regulate", "control", "application", "alternating", "current", "electric", "power", "load", "reactance", "controlled", "signal", "controlled", "develop", "controlled", "magnetic", "flux", "opposition", "reactive", "magnetic", "flux", "resulting", "flux", "cancellation", "effectively", "eliminating", "reactance", "device", "includes", "reactance", "core", "coil", "core", "connected", "circuit", "power", "transfer", "controlled", "opposing", "magnetic", "flux", "core", "developed", "coil", "core", "end", "connected", "end", "coil", "controllable", "scr", "connect", "end", "coil", "end", "coil", "place", "coil", "parallel", "coil", "arranged", "reactor", "core", "parallel", "current", "coil", "produce", "opposing", "magnetic", "flux", "core", "selective", "operation", "controllable", "reactance", "device", "varied", "wide", "range", "efficient", "power", "transfer"], "section": [6], "subsection": [110], "group": [553], "subgroup": [7015], "labels": [6, 119, 690, 7813]}
57 | {"id": "3932806", "title": ["surge", "comparison", "type", "coil", "tester"], "abstract": ["low", "voltage", "pulse", "applied", "pulse", "transformer", "produce", "high", "voltage", "pulse", "turn", "applied", "capacitor", "diode", "coil", "test", "standard", "reference", "coil", "waveform", "resulting", "surge", "current", "test", "reference", "coil", "superimposed", "cathode", "ray", "tube", "test", "coil", "judged", "defective", "waveform", "substantially", "resistor", "connected", "parallel", "capacitor", "discharge", "capacitor", "pulse", "diode", "prevents", "capacitor", "discharging", "coil", "high", "voltage", "pulse", "applied", "alternately", "coil", "waveform", "displayed", "single", "beam", "cathode", "ray", "tube", "simultaneously", "waveform", "displayed", "dual", "beam", "cathode", "ray", "tube"], "section": [6], "subsection": [106], "group": [531], "subgroup": [6779, 6782], "labels": [6, 115, 668, 7577, 7580]}
58 | {"id": "3932911", "title": ["structure", "mounting", "air", "moving", "vacuum", "cleaner"], "abstract": ["structure", "mounting", "air", "moving", "apparatus", "vacuum", "cleaner", "including", "support", "shoulder", "formed", "integral", "housing", "portion", "vacuum", "cleaner", "spring", "bracket", "removably", "supporting", "air", "moving", "apparatus", "shoulder", "air", "flow", "passage", "bracket", "embrace", "air", "moving", "apparatus", "defines", "opposite", "end", "portion", "resting", "support", "shoulder"], "section": [0], "subsection": [11], "group": [60], "subgroup": [696], "labels": [0, 20, 197, 1494]}
59 | {"id": "3932969", "title": ["ferrocement", "structure", "method"], "abstract": ["ferrocement", "structure", "method", "producing", "comprising", "providing", "load-bearing", "framework", "covering", "framework", "strong", "flexible", "sheet-like", "material", "flexible", "metal", "reinforcing", "material", "applying", "cement", "mortar", "thereover", "cover", "reinforcing", "material", "framework", "made", "easily", "fabricated", "wooden", "rib"], "section": [4], "subsection": [84], "group": [397], "subgroup": [5040, 5033], "labels": [4, 93, 534, 5838, 5831]}
60 | {"id": "3933034", "title": ["hydrostatic", "stress", "gauge", "system"], "abstract": ["hydrostatic", "stress", "gage", "including", "sphere", "incompressible", "fluid", "positioned", "inside", "drum", "structure", "pair", "interconnected", "flat", "spiral", "coil", "forming", "self-resonant", "tuned", "circuit", "change", "pressure", "sphere", "variation", "distance", "coil", "changing", "resonant", "frequency", "measured", "device", "stress", "measured"], "section": [6], "subsection": [106], "group": [526], "subgroup": [6681], "labels": [6, 115, 663, 7479]}
61 | {"id": "3933066", "title": ["dual", "speed", "stacker", "paddle", "assembly"], "abstract": ["dual", "speed", "stacker", "assembly", "connection", "high", "speed", "machine", "slicing", "stacking", "weighing", "food", "product", "stacker", "ha", "mating", "paddle", "move", "slow", "speed", "collection", "required", "number", "slice", "stack", "rotated", "high", "speed", "drop", "stack", "slice", "conveyor", "bring", "blade", "position", "receive", "collection", "slice", "paddle", "rotated", "low", "inertia", "motor", "connected", "timing", "belt", "bevel", "gear", "arrangement"], "section": [8, 1], "subsection": [127, 29], "group": [660, 138], "subgroup": [1844, 8363], "labels": [8, 1, 136, 38, 797, 275, 2642, 9161]}
62 | {"id": "3933190", "title": ["method", "fabricating", "shell", "mold", "production", "superalloy", "casting"], "abstract": ["method", "producing", "shell", "mold", "investment", "casting", "subsequent", "directional", "solidification", "nickel", "cobalt", "based", "superalloys", "shell", "mold", "composed", "high", "purity", "alumina", "characterized", "presence", "silica", "trace", "form", "shell", "mold", "present", "invention", "nonreactive", "molten", "nickel", "cobalt", "base", "superalloys", "exposure", "hour", "additionally", "alumina", "shell", "mold", "present", "invention", "ha", "unique", "combination", "mechanical", "strength", "stability", "elevated", "temperature"], "section": [1], "subsection": [25], "group": [115], "subgroup": [1521, 1509], "labels": [1, 34, 252, 2319, 2307]}
63 | {"id": "3933241", "title": ["package", "construction"], "abstract": ["package", "construction", "provided", "includes", "collapsible", "tee", "member", "supporting", "ball", "upright", "manner", "hit", "bat", "tee", "member", "erected", "bat", "ball", "held", "tee", "member", "form", "therewith", "self-contained", "package", "construction", "tee", "member", "held", "collapsed", "condition"], "section": [1], "subsection": [46], "group": [237], "subgroup": [3030, 3033], "labels": [1, 55, 374, 3828, 3831]}
64 | {"id": "3933294", "title": ["file", "folder", "rigid", "spine"], "abstract": ["one-piece", "file", "folder", "vertical", "lateral", "rotary", "similar", "file", "expandable", "pocket", "inside", "paper", "substantially", "rigid", "spine", "closed", "end", "folded", "edge", "folder", "indexing", "identifying", "paper", "folder", "file", "folder", "filed", "visible", "rigid", "spine", "vertically", "horizontally", "file"], "section": [1], "subsection": [38], "group": [180], "subgroup": [2319], "labels": [1, 47, 317, 3117]}
65 | {"id": "3933374", "title": ["tandem", "trailer", "system"], "abstract": ["intermediate", "semi-trailer", "unit", "towed", "highway", "tractor", "tow", "standard", "cargo", "semi-trailer", "standard", "trailer", "attached", "intermediate", "trailer", "unit", "wheel", "mounted", "portion", "chassis", "permanently", "extends", "rearwardly", "cargo", "container", "intermediate", "trailer", "unit", "wheel", "positioned", "ahead", "rearmost", "bogie", "intermediate", "trailer", "unit", "provided", "form", "temporarily", "horizontal", "platform", "wheel", "loading", "unloading", "cargo", "container"], "section": [1], "subsection": [41, 43], "group": [200, 219], "subgroup": [2698, 2504], "labels": [1, 50, 52, 337, 356, 3496, 3302]}
66 | {"id": "3933392", "title": ["wheel", "rim"], "abstract": ["safety", "wheel", "rim", "pneumatic", "tire", "ha", "removable", "band", "securable", "obstruct", "mouth", "receive", "bead", "tire", "fitting", "tire", "rim", "band", "locked", "radial", "expansion", "position", "tire", "bead", "bead", "accidentally", "enter", "event", "deflation", "tire", "travelling", "offset", "median", "plane", "rim", "spaced", "bead-retaining", "flange", "securing", "band", "shown", "inflation", "valve", "stem", "screw-threaded", "outer", "overlapping", "end", "band", "inflated", "band", "secured", "screw", "arranged", "tighten", "band", "circumferentially"], "section": [8, 1], "subsection": [127, 41], "group": [660, 189, 190], "subgroup": [2398, 2413, 8341, 2376], "labels": [8, 1, 136, 50, 797, 326, 327, 3196, 3211, 9139, 3174]}
67 | {"id": "3933404", "title": ["strain", "limiting", "mechanism"], "abstract": ["electrical", "connector", "assembly", "incorporating", "limiting", "cable", "tension", "predetermined", "precluding", "mechanical", "failure", "cable"], "section": [8, 7], "subsection": [120, 127], "group": [611, 659], "subgroup": [7610, 8264], "labels": [8, 7, 129, 136, 748, 796, 8408, 9062]}
68 | {"id": "3933440", "title": ["chemical", "reaction", "vessel"], "abstract": ["gas-tight", "chemical", "reaction", "vessel", "chemical", "analysis", "intractable", "material", "glass", "reaction", "vessel", "comprises", "body", "concentric", "chamber", "adapted", "reagent", "adapted", "receive", "sample", "cap", "turn", "adapted", "receive", "sample", "reaction", "vessel", "ha", "sealing", "member", "sealing", "chamber", "body", "cap", "securing", "sealing", "member", "body", "body", "sealing", "member", "sample", "cap", "made", "material", "desirably", "polytetrafluoroethylene", "chemically", "inert", "reagent"], "section": [6, 1], "subsection": [106, 15], "group": [528, 89], "subgroup": [1237, 6748], "labels": [6, 1, 115, 24, 665, 226, 2035, 7546]}
69 | {"id": "3933442", "title": ["laminated", "body"], "abstract": ["porous", "seal", "element", "usable", "blade", "tip", "seal", "turbomachine", "element", "labyrinth", "seal", "made", "large", "number", "strip", "disposed", "edgewise", "sealing", "face", "element", "extending", "direction", "relative", "movement", "seal", "element", "strip", "groove", "extending", "strip", "discharge", "cooling", "fluid", "air", "presence", "groove", "low", "density", "structure", "seal", "face", "seal", "element", "abraded", "rubbing", "contact", "metering", "coolant", "rear", "face", "seal", "element", "seal", "element", "fabricated", "etching", "sheet", "sheet", "defines", "number", "parallel", "strip", "joined", "weak", "tie", "groove", "extending", "strip", "stacking", "sheet", "bonding", "separating", "bonded", "structure", "weak", "tie", "stack", "strip", "defines", "seal", "element"], "section": [8, 1, 5], "subsection": [26, 125, 92, 127, 88], "group": [660, 656, 443, 120, 417, 125], "subgroup": [8354, 1611, 1689, 8097, 5681, 5285], "labels": [8, 1, 5, 35, 134, 101, 136, 97, 797, 793, 580, 257, 554, 262, 9152, 2409, 2487, 8895, 6479, 6083]}
70 | {"id": "3933462", "title": ["mixture", "substituted", "benzothiadiazinones", "benzonitriles", "herbicide"], "abstract": ["herbicide", "mixture", "compound", "formula", "denotes", "lower", "alkyl", "maximum", "carbon", "atom", "salt", "alkali", "metal", "alkaline", "earth", "metal", "ammonium", "hydroxyalkylammonium", "alkylammonium", "hydrazine", "salt", "salt", "sodium", "lithium", "potassium", "calcium", "iron", "methylammonium", "trimethylammonium", "ethylammonium", "diethanolammonium", "ethanolammonium", "dimethylamine", "dimethylethanolamine", "hydrazine", "phenylhydrazine", "compound", "formula", "denotes", "hydroxy", "radical", "denotes", "halogen", "denotes", "integer"], "section": [2, 0], "subsection": [0, 58], "group": [276, 10], "subgroup": [191, 3563, 186, 192, 190], "labels": [2, 0, 9, 67, 413, 147, 989, 4361, 984, 990, 988]}
71 | {"id": "3933569", "title": ["tool", "welding", "plastic", "film", "wrapping", "object"], "abstract": ["tool", "interconnecting", "welding", "web", "plastic", "film", "enclosing", "package", "good", "tool", "comprising", "section", "arranged", "pressed", "securely", "holding", "welding", "film", "web", "separate", "web", "welding", "tool", "comprises", "arranged", "tighten", "film", "good", "maintaining", "stretch", "film", "web", "extending", "welding", "point", "web", "supply", "roll", "tension-free", "condition", "welding", "operation", "proper", "ensure", "high-quality", "durable", "welding", "seam"], "section": [8, 1], "subsection": [32, 127, 46], "group": [660, 235, 155], "subgroup": [8342, 1960, 2946, 1961], "labels": [8, 1, 41, 136, 55, 797, 372, 292, 9140, 2758, 3744, 2759]}
72 | {"id": "3933721", "title": ["flame", "retardant", "plasticized", "composition"], "abstract": ["resinous", "polymer", "vinyl", "chloride", "plasticized", "-", "dibromoterephthalate", "ester"], "section": [2], "subsection": [59], "group": [289, 290], "subgroup": [3942, 3965], "labels": [2, 68, 426, 427, 4740, 4763]}
73 | {"id": "3933731", "title": ["plastic", "composition", "liberating", "reduced", "amount", "toxic", "gas"], "abstract": ["composite", "disclosed", "comprises", "specific", "metal-compound", "additive", "plastic", "material", "gypsum", "calcium", "sulfite", "composite", "improved", "emits", "substantially", "toxic", "gas", "sulfur", "dioxide", "hydrogen", "sulfide", "burned", "ash", "thereof", "contacted", "water"], "section": [2], "subsection": [59], "group": [289, 290], "subgroup": [3941, 3946], "labels": [2, 68, 426, 427, 4739, 4744]}
74 | {"id": "3933753", "title": ["alkenylaromatic", "polymer", "alpha", "-", "ketoalhydic", "group"], "abstract": ["alkenylaromatic", "polymer", "provided", "derived", "mol", "monomer", "general", "formula", "", "represents", "hydrogen", "atom", "methyl", "radical", "", "represents", "hydrogen", "atom", "methyl", "ethyl", "radical", "mol", "non-aromatic", "ethylenically", "unsaturated", "monomer", "optionally", "crosslinked", "mol", "relative", "monomer", "formula", "polyvinyl", "monomer", "alpha", "-", "ketoaldehyde", "group", "formula", "equ", "--", "--", "cho", "ii", "present", "aromatic", "ring", "polymer", "polymer", "find", "utility", "extracting", "sulphur", "nitrogen-containing", "compound", "solution", "urea", "solution", "resulting", "dialysis", "ultrafiltration", "human", "blood"], "section": [2, 0, 1], "subsection": [12, 15, 59], "group": [285, 88, 70], "subgroup": [3745, 3817, 873, 1201], "labels": [2, 0, 1, 21, 24, 68, 422, 225, 207, 4543, 4615, 1671, 1999]}
75 | {"id": "3933774", "title": ["modified", "polymer", "diisopropenylbenzene"], "abstract": ["modified", "copolymer", "vinyl", "aromatic", "compound", "diene", "hydrocarbon", "diisopropenylbenzene", "polymerized", "unit", "group", "general", "formula", "copolymer", "intermediate", "manufacture", "block", "copolymer", "graft", "copolymer", "suitable", "impact-resistant", "plastic", "polymeric", "antistatic", "agent"], "section": [2], "subsection": [59], "group": [285, 284], "subgroup": [3796, 3772, 3817, 3786, 3737, 3742], "labels": [2, 68, 422, 421, 4594, 4570, 4615, 4584, 4535, 4540]}
76 | {"id": "3933835", "title": ["aliphatically", "substituted", "aryl-chalcogeno-hydrocarbon", "derivative"], "abstract": ["compound", "formula", "equ", "--", "ph", "--", "--", "alk", "--", "adamantyl", "ph", "phenylene", "optionally", "substituted", "amino", "nitro", "lower", "alkyl", "lower", "alkoxy", "halogen", "trifluoromethyl", "oxy", "thio", "alk", "alkylene", "atom", "alkenylene", "atom", "free", "esterfied", "amidised", "carboxyl", "sulpho", "sulphonamido", "therapeutically", "acceptable", "salt", "thereof", "anti-allergic", "hypolipidaemic", "agent"], "section": [2], "subsection": [58], "group": [276], "subgroup": [3558, 3518, 3526, 3524, 3515], "labels": [2, 67, 413, 4356, 4316, 4324, 4322, 4313]}
77 | {"id": "3933860", "title": ["-", "n-acyl-n-arylamino", "lactones"], "abstract": ["-", "n-acyl-n-arylamino", "-", "gamma", "-", "lactones", "delta", "-", "lactones", "gamma", "-", "lactams", "delta", "-", "lactams", "fungicidal", "activity"], "section": [2], "subsection": [58], "group": [277], "subgroup": [3574, 3615], "labels": [2, 67, 414, 4372, 4413]}
78 | {"id": "3933890", "title": ["-", "trihydroxyprostanoic", "acid"], "abstract": ["cyclopentane", "derivative", "formula", "symbol", "represent", "hydrogen", "alkyl", "represents", "symbol", "combination", "represents", "methylene", "represents", "hydroxymethylene", "represents", "ethylene", "represents", "hydroxymethylene", "represents", "carbonyl", "represents", "methylene", "represents", "ethylene", "trans-vinylene", "represents", "hydroxymethylene", "carbonyl", "represents", "hydroxymethylene", "represents", "methylene", "represents", "ethylene", "trans-vinylene", "represents", "hydroxymethylene", "compound", "possessing", "pharmacological", "property", "production", "hypotension", "bronchodilatation", "inhibition", "gastric", "acid", "secretion", "stimulation", "uterine", "contraction"], "section": [2], "subsection": [58], "group": [276, 278], "subgroup": [3676, 3543], "labels": [2, 67, 413, 415, 4474, 4341]}
79 | {"id": "3933894", "title": ["n-arylsulfonyl", "carbamate"], "abstract": ["present", "invention", "relates", "amine", "alkali", "metal", "alkaline", "earth", "metal", "salt", "n-benzene", "sulfonyl", "carbamic", "acid", "ester", "lower", "alkenyl", "n-benzene", "sulfonyl", "carbamate", "compound", "herbicide"], "section": [2, 0], "subsection": [0, 58], "group": [276, 10], "subgroup": [3520, 192], "labels": [2, 0, 9, 67, 413, 147, 4318, 990]}
80 | {"id": "3933899", "title": ["pge", "-", "oxa-phenylene", "compound"], "abstract": ["invention", "group", "pge", "-", "type", "oxa-phenylene", "compound", "process", "making", "compound", "variety", "pharmacological", "purpose", "including", "anti-ulcer", "inhibition", "platelet", "aggregation", "increase", "nasal", "patency", "labor", "inducement", "term", "wound", "healing"], "section": [2], "subsection": [58], "group": [276, 277], "subgroup": [3537, 3558, 3534, 3543, 3555, 3620, 3535], "labels": [2, 67, 413, 414, 4335, 4356, 4332, 4341, 4353, 4418, 4333]}
81 | {"id": "3933925", "title": ["hydrolysis", "toluene", "diamine", "produce", "methyl", "resorcinol"], "abstract": ["methyl", "resorcinol", "produced", "hydrolysis", "toluene", "diamine", "aqueous", "excess", "ammonium", "bisulfate", "reactant", "contacted", "elevated", "temperature", "period", "time", "sufficient", "hydrolyze", "toluene", "diamine", "methyl", "resorcinol", "methyl", "resorcinol", "produced", "separated", "reaction", "mixture", "ammonium", "sulfate", "regenerated", "ammonium", "bisulfate", "removing", "water", "thermally", "decomposing", "by-product", "ammonium", "sulfate", "elevated", "temperature"], "section": [2, 0], "subsection": [58, 12, 52], "group": [276, 257, 73, 68], "subgroup": [916, 3537, 852, 3262, 917, 919, 3535], "labels": [2, 0, 67, 21, 61, 413, 394, 210, 205, 1714, 4335, 1650, 4060, 1715, 1717, 4333]}
82 | {"id": "3933929", "title": ["process", "purification", "p-nitrophenol"], "abstract": ["purification", "process", "p-nitrophenol", "obtained", "nitration", "phenol", "separation", "crude", "nitrophenols", "steam", "distillation", "remove", "o-nitrophenol", "cooling", "broth", "obtained", "sodium", "bisulphite", "ph", "deposit", "crystal", "p-nitrophenol", "improvement", "consists", "stirring", "crystal", "water", "degree", "-", "degree", "give", "mixture", "excess", "p-nitrophenol", "solubility", "temperature", "separating", "upper", "layer", "p-nitrophenol", "water", "obtained", "cooling", "degree", "-", "degree", "separating", "layer", "water", "p-nitrophenol", "cooling", "degree", "collecting", "crystal", "deposited"], "section": [2], "subsection": [58], "group": [276], "subgroup": [3458, 3456], "labels": [2, 67, 413, 4256, 4254]}
83 | {"id": "3933936", "title": ["rapid", "setting", "adhesive", "compound"], "abstract": ["mixture", "curable", "phenolic", "resin", "organic", "diaziridine", "strong", "adhesive", "curable", "short", "time", "moderate", "temperature", "make", "strong", "bond", "wood", "metal", "plastic"], "section": [2, 8], "subsection": [127, 60, 59], "group": [660, 297, 289, 290], "subgroup": [3942, 8354, 3981, 3989, 4108], "labels": [2, 8, 136, 69, 68, 797, 434, 426, 427, 4740, 9152, 4779, 4787, 4906]}
84 | {"id": "3933960", "title": ["method", "extruding", "fiber", "reinforced", "plural", "layered", "plastic", "tube"], "abstract": ["specification", "discloses", "method", "making", "reinforced", "tube", "comprising", "continuously", "extruding", "viscous", "material", "reinforcing", "fibre", "concentric", "set", "discrete", "passage", "producing", "laminar", "flow", "passage", "causing", "material", "accelerate", "entry", "passage", "preventing", "deceleration", "thereof", "passage", "fibre", "orientate", "material", "lengthwise", "passage", "bringing", "extruded", "material", "respective", "passage", "form", "layer", "material", "fibre", "lying", "helix", "opposite", "hand", "simultaneously", "hauling-off", "extruded", "layer", "regulated", "rate", "control", "angle", "helix", "fibre", "lie", "allowing", "layer", "consolidate", "single", "tube"], "section": [1], "subsection": [32], "group": [157, 155], "subgroup": [1950, 2016, 2043], "labels": [1, 41, 294, 292, 2748, 2814, 2841]}
85 | {"id": "3934010", "title": ["insecticidal", "composition", "method", "utilizing", "phosphoric", "acid", "phenylsulphonamide", "ester"], "abstract": ["insecticidal", "acaricidal", "composition", "method", "combating", "insect", "acaricide", "provided", "active", "insecticidal", "ingredient", "phosphoric", "acid", "phenylsulphonamide", "ester", "formula", "represents", "oxygen", "sulphur", "represents", "alkyl", "carbon", "atom", "represents", "alkyl", "carbon", "atom", "alkenyl", "alkinyl", "carbon", "atom", "alkoxyalkyl", "carbon", "atom", "moiety", "alkylthioalkyl", "carbon", "atom", "moiety", "haloalkyl", "carbon", "atom", "represents", "hydrogen", "alkyl", "carbon", "atom", "alkenyl", "carbon", "atom", "represents", "hydrogen", "represents", "halogen", "alkyl", "carbon", "atom", "represents", "alkyl", "carbon", "atom", "represents", "hydrogen", "halogen", "alkyl", "carbon", "atom"], "section": [2, 8], "subsection": [58, 127], "group": [278, 659], "subgroup": [3676, 8253], "labels": [2, 8, 67, 136, 415, 796, 4474, 9051]}
86 | {"id": "3934042", "title": ["method", "apparatus", "irradiative", "treatment", "beverage"], "abstract": ["method", "apparatus", "irradiative", "treatment", "beverage", "milk", "beer", "wine", "fruit", "juice", "sterilize", "pasteurize", "beverage", "pumped", "system", "contact", "air", "entering", "beverage", "heat", "exchanged", "exiting", "beverage", "subjected", "ultra-violet", "irradiation", "heat", "exchange", "exiting", "beverage", "case", "milk", "homogenization", "place", "heat", "exchange", "returning", "beverage", "heating", "beverage", "infra-red", "irradiation", "elevated", "temperature", "infra-red", "heating", "beverage", "held", "elevated", "temperature", "insulated", "conduit", "return", "heat", "exchange", "entering", "beverage", "irradiation", "beverage", "performed", "passing", "beverage", "transparent", "conduit", "fused", "quartz", "improved", "taste", "shortened", "cycle", "time", "lower", "treatment", "temperature", "prolonged", "shelf", "life", "beverage", "obtained"], "section": [0], "subsection": [3], "group": [17, 23], "subgroup": [252, 286], "labels": [0, 12, 154, 160, 1050, 1084]}
87 | {"id": "3934109", "title": ["latch", "pivot", "latch", "needle"], "abstract": ["knitting", "machine", "latch", "needle", "latch", "pivot", "formed", "displacing", "portion", "wall", "slot", "displaced", "portion", "extend", "pivot", "hole", "latch", "displaced", "portion", "wall", "fused", "high", "energy", "heat", "source", "emitting", "sufficient", "energy", "drill", "hole", "displaced", "portion", "melt", "displaced", "portion"], "section": [3, 1], "subsection": [75, 26], "group": [124, 356], "subgroup": [1678, 4702], "labels": [3, 1, 84, 35, 261, 493, 2476, 5500]}
88 | {"id": "3934190", "title": ["signal", "compressor", "expanders"], "abstract": ["compressor", "expanders", "effecting", "dynamic", "range", "modification", "constructed", "connecting", "reactive", "network", "series", "voltage", "dividing", "action", "parallel", "current", "dividing", "action", "output", "signal", "derived", "voltage", "current", "network", "includes", "series", "parallel", "variable", "resistance", "variable", "resistance", "controlled", "dependence", "voltage", "thereacross", "sense", "required", "achieve", "compression", "expansion", "case", "resistance", "change", "shift", "turnover", "frequency", "circuit", "exclude", "large", "amplitude", "component", "amplitude", "increase", "reduction", "applied", "low", "level", "component", "restricted", "frequency", "band", "create", "compressor", "expander", "action"], "section": [7], "subsection": [123, 122], "group": [627, 633], "subgroup": [7800, 7855], "labels": [7, 132, 131, 764, 770, 8598, 8653]}
89 | {"id": "3934194", "title": ["solid", "state", "flyback", "transformer", "checker"], "abstract": ["instrument", "providing", "information", "condition", "horizontal", "output", "transformer", "television", "set"], "section": [6, 7], "subsection": [106, 123], "group": [531, 639], "subgroup": [7938, 6782], "labels": [6, 7, 115, 132, 668, 776, 8736, 7580]}
90 | {"id": "3934245", "title": ["alphanumeric", "display", "computer-linked", "typewriter", "console"], "abstract": ["present", "invention", "relates", "alphanumeric", "display", "unit", "improving", "identification", "quantity", "selected", "key", "button", "computer-linked", "typewriter", "console", "usage", "light-emitting", "identification", "indicia", "attached", "adjacent", "selected", "key", "button", "light-emitting", "display", "matrix", "format", "consisting", "set", "fiber", "optic", "member", "arranged", "orthogonal", "row", "column", "encoding", "stencil", "tab", "inserted", "path", "light", "generated", "incandescent", "bulb", "operational", "alignment", "set", "fiber", "optic", "member"], "section": [6, 7], "subsection": [111, 122], "group": [630, 558, 632], "subgroup": [7058, 7830, 7849], "labels": [6, 7, 120, 131, 767, 695, 769, 7856, 8628, 8647]}
91 | {"id": "3934259", "title": ["all-sky", "camera", "apparatus", "time-resolved", "lightning", "photography"], "abstract": ["pair", "all-sky", "camera", "equipped", "degree", "fisheye-nikkor", "disposed", "lens", "pointing", "vertically", "camera", "rotated", "axis", "passing", "zenith", "maintained", "stationary", "disposition", "desired", "counter-rotated", "relative", "rotational", "movement", "film", "camera", "measure", "displacement", "image", "formed", "respective", "film", "angular", "deviation", "produced", "displacement", "measured", "determine", "time", "development", "lightning", "discharge"], "section": [6], "subsection": [108, 107], "group": [539, 536], "subgroup": [6877, 6820], "labels": [6, 117, 116, 676, 673, 7675, 7618]}
92 | {"id": "3934302", "title": ["portable", "multi-purpose", "rechargeable", "cigarette", "lighter"], "abstract": ["multi-purpose", "cigarette", "lighter", "rechargeable", "ni-cd", "battery", "comprises", "heated", "coil", "cigarette", "lighter", "general", "smoking", "purpose", "incorporates", "built-in", "vacuum", "cleaner", "electric", "lamp"], "section": [8, 0, 5], "subsection": [98, 127, 11], "group": [60, 484, 659], "subgroup": [694, 8135, 6214], "labels": [8, 0, 5, 107, 136, 20, 197, 621, 796, 1492, 8933, 7012]}
93 | {"id": "3934427", "title": ["dispensing", "machine"], "abstract": ["machine", "dispenses", "ready-made", "milk", "shake", "freezing", "chamber", "ha", "dispensing", "valve", "porting", "valve", "element", "arranged", "flow", "semi-frozen", "comestible", "flavoring", "material", "occurs", "simultaneously", "beater", "mix", "dispensing", "conduit", "flavoring", "material", "ha", "unique", "coupling", "valve", "block", "quick-release", "coupling", "intermediate", "end", "release", "manually", "operable", "sampling", "valve", "connected", "conduit", "selectively", "draw", "sample", "flavoring", "pump", "provided", "sucking", "liquid", "comestible", "gas", "preselected", "proportion", "delivering", "bottom", "freezing", "chamber", "vent", "located", "valve", "block", "vent", "air", "freezing", "chamber", "start-up", "machine", "vent", "ha", "inlet", "located", "level", "liquid", "gas", "volume", "chamber", "equal", "respective", "proportion", "pumped"], "section": [0], "subsection": [3], "group": [20], "subgroup": [265], "labels": [0, 12, 157, 1063]}
94 | {"id": "3934473", "title": ["fluid", "flow", "meter", "counter", "rotating", "turbine", "impeller"], "abstract": ["fluid", "flow", "meter", "independantly", "counter", "rotating", "turbine", "impeller", "disclosed", "fluid", "characteristic", "upstream", "flow", "disturbance", "minimal", "variation", "volume", "flow", "rate", "measurement", "meter", "result", "fluidynamic", "interaction", "impeller", "angular", "velocity", "impeller", "sensed", "conventional", "manner", "velocity", "signal", "added", "total", "volume", "thruput", "rate", "flow", "optionally", "compared", "occurance", "mechanical", "electronic", "degradation"], "section": [6], "subsection": [106], "group": [521], "subgroup": [6624], "labels": [6, 115, 658, 7422]}
95 | {"id": "3934489", "title": ["rear", "view", "mirror", "vehicle"], "abstract": ["remote", "control", "exterior", "rear", "view", "mirror", "vehicle", "mirror", "adjusted", "stationary", "housing", "rotation", "single", "control", "member", "coupled", "mirror", "head", "adjustment", "head", "plane"], "section": [8, 1], "subsection": [127, 41], "group": [202, 660], "subgroup": [2517, 8361], "labels": [8, 1, 136, 50, 339, 797, 3315, 9159]}
96 | {"id": "3934629", "title": ["screw", "driver"], "abstract": ["screw", "driver", "comprising", "torque", "responsive", "clutch", "determining", "final", "tightening", "torque", "output", "spindle", "connected", "forwardly", "extending", "screw", "bit", "spindle", "axially", "displaceable", "forward", "rest", "position", "intermediate", "tightening", "position", "rear", "position", "dog", "spindle", "engage", "dog", "driving", "part", "clutch", "inactivation", "clutch", "spindle", "spring-biased", "normal", "tightening", "rest", "position", "permitted", "reoccupy", "normal", "tightening", "position", "final", "tightening", "sequence", "stud", "element", "screw", "driver", "housing", "arranged", "abut", "screw", "landing", "surface", "automatically", "preventing", "screw", "driver", "housing", "clutch", "spindle", "screw", "bit", "final", "position", "ensuring", "reactivation", "clutch"], "section": [1], "subsection": [28], "group": [130], "subgroup": [1769], "labels": [1, 37, 267, 2567]}
97 | {"id": "3934680", "title": ["safety", "latch", "automotive", "hoist"], "abstract": ["disclosed", "safety", "latch", "operable", "automotive", "hoist", "rack", "gear", "connected", "movable", "hoist", "piston", "piston", "carry", "hoist", "superstructure", "disposed", "cylinder", "piston", "telescope", "raise", "lower", "hoist", "response", "hydraulic", "pneumatic", "pressure", "acting", "thereon", "pinion", "gear", "mounted", "latch", "engagement", "rack", "slip", "clutch", "mechanism", "drive", "operating", "lever", "connected", "latch", "dog", "securely", "engaging", "rack", "prevent", "movement", "rack", "piston", "downwardly", "respect", "cylinder", "latch", "operating", "mechanism", "operated", "manually", "completely", "disengage", "latch", "dog", "rack", "hoist", "lowered", "slip", "clutch", "mechanism", "provided", "lost", "motion", "coupling", "member", "latch", "operating", "member", "latch", "dog", "latch", "dog", "remains", "engagement", "rack", "system", "raising", "hoist", "inoperative"], "section": [1], "subsection": [47], "group": [244], "subgroup": [3186], "labels": [1, 56, 381, 3984]}
98 | {"id": "3934690", "title": ["magnetic", "spring", "clutch"], "abstract": ["helical", "clutch", "spring", "ha", "end", "thereof", "fixed", "continuously", "rotating", "input", "member", "clutch", "turn", "spring", "partially", "envelop", "floating", "magnetic", "ring", "secured", "rotatable", "output", "member", "clutch", "input", "output", "member", "made", "nonmagnetic", "material", "clutch", "coil", "energized", "magnetic", "flux", "path", "passing", "clutch", "spring", "magnetic", "ring", "clutch", "spring", "tighten", "magnetic", "ring", "provide", "driving", "connection", "input", "output", "member"], "section": [5], "subsection": [94], "group": [449], "subgroup": [5808, 5809, 5821, 5791], "labels": [5, 103, 586, 6606, 6607, 6619, 6589]}
99 | {"id": "3934898", "title": ["passenger", "restraint", "device"], "abstract": ["passenger", "restraint", "device", "automotive", "vehicle", "end", "passenger", "restraint", "arm", "pivoted", "member", "slidably", "displaceable", "door", "motion", "transmitting", "strap", "interconnects", "restraint", "arm", "member", "slidable", "door", "member", "carry", "latching", "locking", "element", "automatically", "connected", "operative", "relation", "arm", "ha", "swung", "door", "passenger", "restraint", "position", "latch", "manually", "releasable", "passenger"], "section": [1], "subsection": [41], "group": [202], "subgroup": [2530], "labels": [1, 50, 339, 3328]}
100 | {"id": "3934974", "title": ["solution", "ethylauramine", "hydrochloride", "thiodiglycol"], "abstract": ["solution", "ethylauramine", "hydrochloride", "thiodiglycol", "process", "preparation", "solution", "dry", "ethylauramine", "hydrochloride", "water", "crystallisation", "dissolved", "thiodiglycol", "process", "colouration", "colouring", "agent", "solution"], "section": [2], "subsection": [60], "group": [291], "subgroup": [4030], "labels": [2, 69, 428, 4828]}
101 | {"id": "3934991", "title": ["nitric", "oxide", "analysis", "scrubber", "therefor"], "abstract": ["method", "analyzing", "nitric", "oxide", "gas", "stream", "nitrogen", "dioxide", "scrubber", "apparatus", "selectively", "removing", "nitrogen", "dioxide", "gas", "stream", "nitric", "oxide", "scrubber", "apparatus", "comprises", "container", "inlet", "port", "gas", "stream", "outlet", "port", "scrubber", "material", "container", "includes", "silver", "carbonate", "scrubber", "ha", "efficiency", "capacity", "part", "million", "hour", "nitrogen", "dioxide", "removal", "gram", "silver", "carbonate", "method", "involves", "passing", "gas", "stream", "scrubber", "material", "silver", "carbonate", "remove", "nitrogen", "dioxide", "gas", "stream", "passing", "nitric", "oxide", "unattenuated", "conveying", "gas", "stream", "scrubber", "material", "analyzer", "nitric", "oxide", "gas", "stream", "analyzed", "analyzer", "determine", "nitric", "oxide", "concentration"], "section": [6, 8], "subsection": [106, 127], "group": [660, 528], "subgroup": [8355, 6747, 6706], "labels": [6, 8, 115, 136, 797, 665, 9153, 7545, 7504]}
102 | {"id": "3935020", "title": ["faraday", "rotation", "glass"], "abstract": ["faraday", "rotation", "glass", "exhibiting", "high", "verdet", "constant", "low", "susceptability", "devitrification", "formed", "introducing", "high", "quantity", "rare", "earth", "oxide", "borate", "glass", "base", "glass", "melted", "standard", "environmental", "condition", "made", "large", "scale"], "section": [2], "subsection": [54], "group": [264], "subgroup": [3367, 3368], "labels": [2, 63, 401, 4165, 4166]}
103 | {"id": "3935052", "title": ["acid", "engraving", "machine", "device"], "abstract": ["acid", "engraving", "device", "reservoir", "acid", "valve", "meter", "acid", "flow", "reservoir", "ball", "point", "etching", "pen", "apply", "acid", "work", "surface"], "section": [2], "subsection": [68], "group": [336], "subgroup": [4563], "labels": [2, 77, 473, 5361]}
104 | {"id": "3935076", "title": ["stage", "separation", "system"], "abstract": ["invention", "improvement", "hot", "water", "process", "recovering", "bitumen", "tar", "sand", "aqueous", "slurry", "tar", "sand", "introduced", "vessel", "termed", "sand", "separation", "cell", "body", "hot", "water", "coarse", "sand", "settle", "discharged", "tailing", "top", "product", "comprising", "bitumen", "water", "fine", "sand", "transferred", "vessel", "termed", "froth", "formation", "cell", "body", "hot", "water", "cell", "bitumen", "form", "froth", "recovered", "fine", "solid", "water", "recycled", "lower", "end", "sand", "separation", "cell", "coarse", "sand", "ha", "previously", "removed", "sand", "separation", "cell", "good", "distribution", "feed", "cross-sectional", "area", "froth", "formation", "cell", "achieved", "lead", "good", "recovery", "froth", "quality", "recycling", "fine", "froth", "formation", "cell", "vicinity", "tailing", "outlet", "sand", "separation", "cell", "fine", "eliminated", "system", "middling", "dragstream", "required", "prior", "art"], "section": [2, 1], "subsection": [61, 15], "group": [300, 302, 86], "subgroup": [1112, 4184, 4188], "labels": [2, 1, 70, 24, 437, 439, 223, 1910, 4982, 4986]}
105 | {"id": "3935170", "title": ["trivalent", "antimony", "catalyst"], "abstract": ["antimony", "catalyst", "polyester", "condensation", "reaction", "comprising", "trivalent", "antimony", "valence", "occupied", "dianion", "radical", "-", "diol", "anion", "radical", "organic", "carboxylic", "acid", "molar", "ratio", "antimony", "dianion", "radical", "-", "diol", "anion", "radical", "organic", "carboxylic", "acid", "antimony", "compound", "prepared", "reacting", "mixture", "-", "diol", "trivalent", "antimony", "reactant", "represented", "formula", "sb", "antimony", "anion", "radical", "organic", "carboxylic", "acid", "selected", "group", "consisting", "anion", "alcohol", "anion", "organic", "carboxylic", "acid", "mixture", "thereof"], "section": [2, 8, 1], "subsection": [125, 58, 15, 59], "group": [286, 278, 88, 655], "subgroup": [1213, 3676, 8088, 1218, 3850], "labels": [2, 8, 1, 134, 67, 24, 68, 423, 415, 225, 792, 2011, 4474, 8886, 2016, 4648]}
106 | {"id": "3935292", "title": ["cast", "polymerization", "process", "improved", "mold", "release"], "abstract": ["process", "producing", "cast", "polymer", "carried", "mold", "treated", "step", "coating", "glass", "surface", "mold", "polysiloxane", "heating", "coated", "glass", "surface", "temperature", "range", "degree", "temperature", "coated", "polysiloxane", "hardened", "wiping", "coated", "surface", "baked", "polysiloxane", "easy", "mold", "release", "attained", "repeated", "mold"], "section": [1], "subsection": [32], "group": [158, 155], "subgroup": [1942, 2060], "labels": [1, 41, 295, 292, 2740, 2858]}
107 | {"id": "3935306", "title": ["toothpaste", "formulation"], "abstract": ["toothpaste", "formulation", "dispersed", "plurality", "agglomerated", "particle", "dental", "polishing", "agent", "visible", "palpable", "substantially", "insoluble", "toothpaste", "disclosed", "agglomerate", "comprise", "individually", "impalpable", "particle", "water", "insoluble", "dental", "polishing", "agent", "include", "agglomerating", "agent", "reduced", "smaller", "sized", "particle", "dental", "polishing", "agent", "subjected", "mild", "mechanical", "agitation", "toothbrushing", "agglomerate", "suited", "incorporation", "transparent", "gel", "dental", "vehicle", "provide", "special", "effect", "supplemental", "cleaning", "polishing", "characteristic", "adversely", "affecting", "visual", "clarity", "finished", "toothpaste"], "section": [0], "subsection": [12], "group": [73, 68], "subgroup": [913, 852], "labels": [0, 21, 210, 205, 1711, 1650]}
108 | {"id": "3935313", "title": ["pharmaceutical", "composition", "-", "-", "process", "treatment", "hypertension", "therewith"], "abstract": ["invention", "relates", "process", "treatment", "hypertension", "disorder", "derived", "therefrom", "comprising", "administering", "patient", "suffering", "hypertension", "therapeutically", "effective", "amount", "compound", "selected", "-", "-", "acid", "addition", "salt", "therapeutically", "acceptable", "acid"], "section": [2], "subsection": [58], "group": [277], "subgroup": [3577], "labels": [2, 67, 414, 4375]}
109 | {"id": "3935316", "title": ["method", "formulation"], "abstract": ["method", "controlling", "parasitic", "worm", "animal", "method", "employ", "chemical", "compound", "chloride", "formulation", "compound", "diluent", "carrier", "adjuvant", "discussed", "exemplified", "synthesis", "compound"], "section": [2], "subsection": [58], "group": [276], "subgroup": [3486], "labels": [2, 67, 413, 4284]}
110 | {"id": "3935322", "title": ["chip", "separating", "fried", "ribbon"], "abstract": ["method", "apparatus", "preparing", "chip-type", "snack", "disclosed", "dough", "prepared", "sheeted", "elongated", "shaped", "ribbon", "connected", "dough", "piece", "cut", "dough", "sheet", "ribbon", "passed", "deep", "fat", "fryer", "severed", "individual", "chip"], "section": [8, 0], "subsection": [3, 127, 11, 1], "group": [58, 11, 23, 659], "subgroup": [205, 8255, 279, 662, 290], "labels": [8, 0, 12, 136, 20, 10, 195, 148, 160, 796, 1003, 9053, 1077, 1460, 1088]}
111 | {"id": "3935358", "title": ["process", "preparing", "hollow", "rib-reinforced", "laminated", "structure"], "abstract": ["invention", "process", "preparing", "hollow", "rib-reinforced", "laminated", "article", "placing", "sheet", "opposing", "mold", "platen", "sheet", "aligned", "sheet", "surface", "oppose", "sheet", "thermoplastic", "material", "heated", "thermoforming", "temperature", "sheet", "provided", "groove", "integral", "projection", "form", "fluid", "passageway", "mold", "platen", "provided", "mold", "caivty", "form", "shaped", "article", "rib", "closing", "mold", "platen", "contact", "sheet", "introducing", "fluid", "fluid", "passageway", "distend", "thermoplastic", "sheet", "mold", "cavity", "forming", "shaped", "article", "rib", "sheet", "maintain", "contact", "nondistended", "area"], "section": [8, 1], "subsection": [32, 127, 35], "group": [660, 155, 164], "subgroup": [8354, 2192, 2188, 2136, 1951, 1936, 2140, 2190], "labels": [8, 1, 41, 136, 44, 797, 292, 301, 9152, 2990, 2986, 2934, 2749, 2734, 2938, 2988]}
112 | {"id": "3935384", "title": ["network", "generating", "crt", "control", "signal", "enhancing", "edge", "television", "image"], "abstract": ["low", "cost", "network", "television", "receiver", "receiving", "video", "signal", "generating", "therefrom", "control", "signal", "modulating", "scan", "velocity", "crt", "electron", "beam", "delayed", "video", "signal", "intensity", "modulating", "crt", "electron", "beam", "picture", "information", "network", "delay", "line", "generating", "control", "signal", "delayed", "video", "signal", "video", "signal", "applied", "impedance", "delay", "coupled", "reflecting", "termination", "impedance", "impedance", "substantially", "equal", "characteristic", "impedance", "delay", "output", "terminal", "control", "signal", "included", "point", "impedance", "delay", "delayed", "video", "signal", "generated", "responsive", "signal", "received", "reflecting", "termination", "delay", "network", "generate", "preshoot", "overshoot", "peaking", "component", "peaking", "delayed", "video", "signal"], "section": [7], "subsection": [123], "group": [639], "subgroup": [7949], "labels": [7, 132, 776, 8747]}
113 | {"id": "3935494", "title": ["single", "substrate", "plasma", "discharge", "cell"], "abstract": ["gaseous", "display", "device", "memory", "disclosed", "requires", "single", "dielectric", "substrate", "layer", "orthogonal", "conductor", "laid", "thereon", "layer", "separated", "dielectric", "layer", "substrate", "layer", "thereon", "enclosed", "gaseous", "environment", "conductor", "brought", "envelope", "facilitate", "application", "signal", "dielectric", "barrier", "conveniently", "established", "substrate", "control", "shape", "individual", "discharge", "prevent", "crosstalk"], "section": [7], "subsection": [120], "group": [605], "subgroup": [7502], "labels": [7, 129, 742, 8300]}
114 | {"id": "3935547", "title": ["high", "pressure", "gas", "laser", "uniform", "field", "electrode", "configuration", "irradiation", "corona", "discharge"], "abstract": ["molecular", "gas", "laser", "capable", "operating", "atmospheric", "pressure", "electrical", "energy", "coupled", "active", "molecular", "gas", "medium", "comprising", "molecule", "vibrational", "rotational", "energy", "level", "electric", "field", "transverse", "lasing", "axis", "applying", "impulse", "voltage", "electrode", "configuration", "high", "current", "glow", "discharge", "created", "pulse", "discharge", "place", "electrode", "parallel", "planar", "surface", "facing", "lateral", "edge", "face", "suitably", "profiled", "avoid", "field", "concentration", "provide", "diffused", "glow", "discharge", "uniform", "electric", "field", "transverse", "lasing", "axis", "initiatory", "electron", "required", "produce", "high", "current", "diffused", "glow", "provided", "generating", "intense", "burst", "corona", "gap", "spacer", "member", "high", "dielectric", "constant", "interposed", "electrode", "specifically", "voltage", "pulse", "applied", "gap", "spacer", "element", "high", "field", "appears", "interface", "generates", "intense", "burst", "corona", "ultraviolet", "irradiation", "cathode", "resulting", "emission", "electron"], "section": [7], "subsection": [120], "group": [612], "subgroup": [7634], "labels": [7, 129, 749, 8432]}
115 | {"id": "3935551", "title": ["filter", "arrangement", "converter", "circuit"], "abstract": ["improved", "filtering", "arrangement", "converter", "circuit", "highpass", "filter", "tuned", "cutoff", "frequency", "le", "lowest", "harmonic", "filtered", "provided", "customary", "plurality", "filter", "tuned", "individual", "harmonic", "current", "pulse", "converter", "circuit", "highpass", "filter", "tuned", "filter", "harmonic", "starting", "harmonic", "ha", "resonant", "frequency", "harmonic"], "section": [7], "subsection": [121], "group": [619], "subgroup": [7716], "labels": [7, 130, 756, 8514]}
116 | {"id": "3935607", "title": ["inflatable", "boat"], "abstract": ["inflatable", "boat", "comprising", "inflatable", "tube", "outer", "tube", "tube", "outer", "tube", "fabricated", "flat", "sheet", "stock", "method", "involving", "stitching", "bottom", "seam", "outer", "tube", "outer", "tube", "place", "inflated", "tube"], "section": [1], "subsection": [44], "group": [225], "subgroup": [2802], "labels": [1, 53, 362, 3600]}
117 | {"id": "3935636", "title": ["method", "making", "pressure", "transducer"], "abstract": ["low", "cost", "pressure", "transducer", "manufactured", "assembled", "automatic", "semiautomatic", "production", "line", "technique", "transducer", "includes", "pressure", "fitting", "diaphragm", "strain", "gage", "comprising", "bridge", "circuit", "tab", "lead", "bridge", "circuit", "termination", "board", "contained", "case", "end", "case", "swaged", "flange", "fitting", "order", "sealingly", "clamp", "diaphragm", "shoulder", "case", "fitting", "flange", "bridge", "portion", "strain", "gage", "adhesively", "secured", "diaphragm", "pressure", "distribution", "member", "bridge", "pressure", "applied", "assembly", "heat-curing", "adhesive", "secures", "gage", "diaphragm", "termination", "board", "ha", "terminal", "number", "conductive", "strip", "positioned", "case", "conductive", "strip", "pressed", "electrical", "contact", "lead", "tab", "gage", "end", "case", "swaged", "termination", "board", "lock", "place"], "section": [6, 8, 7], "subsection": [106, 120, 127], "group": [660, 526, 601], "subgroup": [7381, 8347, 6694, 8342], "labels": [6, 8, 7, 115, 129, 136, 797, 663, 738, 8179, 9145, 7492, 9140]}
118 | {"id": "3935745", "title": ["pore", "water", "pressure", "measuring", "device"], "abstract": ["pore", "water", "pressure", "metering", "device", "incorporating", "pressure", "meter", "force", "meter", "influenced", "pressure", "meter", "device", "includes", "power", "member", "arranged", "control", "pressure", "exerted", "pressure", "meter", "force", "meter", "applying", "overriding", "force", "pressure", "meter", "stop", "influence", "force", "meter", "removing", "overriding", "force", "pressure", "meter", "influence", "force", "meter", "resumed"], "section": [6, 4], "subsection": [106, 87], "group": [526, 534, 411], "subgroup": [6816, 6694, 5234], "labels": [6, 4, 115, 96, 663, 671, 548, 7614, 7492, 6032]}
119 | {"id": "3935861", "title": ["protective", "breathing", "mask", "compressed", "air", "supply", "breathing"], "abstract": ["protective", "breathing", "mask", "comprises", "face", "encircling", "mask", "body", "connected", "compressed", "gas", "line", "supply", "compressed", "gas", "thereto", "mask", "includes", "encircling", "rim", "portion", "defines", "air", "seal", "cavity", "provided", "passage", "body", "separated", "body", "connected", "compressed", "gas", "line", "supplying", "gas", "seal", "cavity", "rim", "includes", "lip", "engages", "face", "wearer", "gas", "circulated", "cavity", "escape", "lip", "face", "mask", "interior", "gas", "conduit", "advantageously", "includes", "lung", "demand", "inlet", "valve", "open", "front", "side", "mask", "ha", "pa", "passage", "throttle", "seal", "cavity", "seal", "cavity", "advantageously", "defined", "outwardly", "formed", "annular", "bead", "annular", "tubular", "member", "opening", "periphery", "directing", "compressed", "gas", "cavity", "mask", "interior"], "section": [0], "subsection": [13], "group": [74], "subgroup": [926, 937], "labels": [0, 22, 211, 1724, 1735]}
120 | {"id": "3935906", "title": ["adjustable", "height", "soil", "conditioner", "frame", "extending", "rearwardly", "cultivating", "implement"], "abstract": ["soil", "conditioner", "combination", "cultivating", "implement", "drawn", "tractor", "break", "soil", "leave", "prepared", "seedbed", "single", "operation", "conditioner", "mounted", "framework", "extending", "rearwardly", "cultivator", "comprises", "set", "reel", "mounted", "framework", "free", "rotation", "transverse", "axis", "set", "axial", "blade", "member", "provided", "reel", "blade", "equiangularly", "spaced", "axis", "arranged", "blade", "set", "angularly", "spaced", "midway", "adjacent", "blade", "set", "set", "blade", "include", "ground-engaging", "edge", "spaced", "radius", "axis", "edge", "remaining", "set", "blade", "spaced", "radius", "axis", "le", "radius", "radius", "set", "blade", "enable", "conditioner", "towed", "field", "freely-rotating", "blade", "breaking", "clod", "dirt", "left", "cultivator", "blade", "clogged", "dirt", "conditioner", "includes", "adjustment", "structure", "elevationally", "adjusting", "blade", "edge", "relative", "cultivator", "adjustment", "structure", "includes", "adjustment", "bracket", "connecting", "portion", "soil", "conditioner", "frame", "bracket", "provided", "plurality", "aperture", "aperture", "selectively", "aligned", "set", "aperture", "portion", "soil", "conditioner", "frame", "placing", "bolt", "aligned", "aperture", "elevation", "blade", "edge", "selectively", "adjusted"], "section": [0], "subsection": [0], "group": [0], "subgroup": [10, 15], "labels": [0, 9, 137, 808, 813]}
121 | {"id": "3935924", "title": ["vibratory", "material", "paper", "pulp", "carbon", "fiber"], "abstract": ["vibratory", "plate", "pulp", "chopped", "carbon", "fiber", "mixed", "uniformly", "paper", "pulp", "beaten", "degree", "higher", "cc", "canadian", "standard", "freeness"], "section": [6, 3, 7], "subsection": [123, 80, 115], "group": [641, 382, 585], "subgroup": [4904, 7263, 7991], "labels": [6, 3, 7, 132, 89, 124, 778, 519, 722, 5702, 8061, 8789]}
122 | {"id": "3935957", "title": ["insulation", "double", "walled", "cryogenic", "storage", "tank"], "abstract": ["thermal", "insulation", "material", "affixed", "outer", "surface", "sidewall", "double", "walled", "storage", "tank", "spaced", "outer", "sidewall", "form", "gaseous", "space", "therebetween", "blackish", "wall", "radially", "outer", "face", "insulating", "material", "face", "tank", "outer", "sidewall"], "section": [8, 5], "subsection": [125, 127, 95], "group": [654, 462, 659], "subgroup": [8085, 6020, 8170, 6024, 6027, 6015, 6019, 6016], "labels": [8, 5, 134, 136, 104, 791, 599, 796, 8883, 6818, 8968, 6822, 6825, 6813, 6817, 6814]}
123 | {"id": "3935963", "title": ["cap", "locking", "member"], "abstract": ["cap", "locking", "member", "locking", "cap", "closed", "position", "relative", "neck", "spout", "container", "prevent", "removal", "unscrewing", "cap", "child", "providing", "safety", "factor", "adult", "unscrew", "cap", "hand", "cap", "locking", "member", "secured", "top", "container", "ha", "upwardly", "extending", "portion", "ha", "notch", "recess", "transversely", "extending", "front", "edge", "thereof", "engaging", "underside", "cap", "front", "edge", "imbedded", "underside", "cap", "manner", "prevent", "counterrotation", "unscrewing", "cap", "container", "held", "hand", "finger", "hand", "applying", "sufficient", "manual", "pressure", "depress", "manually", "engageable", "portion", "locking", "effect", "disengagement", "cap", "simultaneously", "rotating", "cap", "counterclockwise", "direction", "hand", "unscrew", "cap", "locking", "member", "formed", "essentially", "spring", "metal", "integrally", "formed", "inexpensive", "produce"], "section": [1], "subsection": [46], "group": [237], "subgroup": [3018], "labels": [1, 55, 374, 3816]}
124 | {"id": "3935985", "title": ["support", "welding", "head", "carriage"], "abstract": ["support", "carriage", "welding", "head", "intended", "automatic", "welding", "metal", "plate", "comprising", "plane", "area", "framed", "corrugation", "running", "perpendicular", "direction", "intersecting", "form", "abutment", "surface", "end", "corrugation", "support", "comprises", "base", "equipped", "fixing", "base", "plane", "area", "centering", "base", "respect", "corrugation", "surrounding", "plane", "area", "contact", "plurality", "abutment", "surface", "adjustable", "stop", "defining", "distance", "base", "plane", "area", "contact", "plurality", "corrugation", "base", "including", "mounting", "base", "guide", "bar", "designed", "receive", "carriage", "translationally"], "section": [1], "subsection": [26], "group": [124], "subgroup": [1685, 1687], "labels": [1, 35, 261, 2483, 2485]}
125 | {"id": "3935995", "title": ["swinging", "bucket", "centrifuge", "rotor"], "abstract": ["swinging", "bucket", "centrifuge", "rotor", "ha", "plurality", "peripheral", "cavity", "adapted", "seat", "swinging", "bucket", "cavity", "ha", "hanger", "slideably", "positioned", "receptacle", "rear", "cavity", "receptacle", "prevents", "rotation", "hanger", "path", "movement", "extremity", "hanger", "form", "hook", "adapted", "support", "cross-pin", "located", "swinging", "bucket", "cap", "cross-pin", "positioned", "bucket", "hang", "properly", "hook", "manner", "hook", "ha", "outwardly", "downwardly", "sloping", "entrance", "opening", "aid", "properly", "hanging", "bucket"], "section": [1], "subsection": [18], "group": [95], "subgroup": [1292], "labels": [1, 27, 232, 2090]}
126 | {"id": "3936046", "title": ["front", "side", "sheet", "registering", "apparatus"], "abstract": ["sheet", "feeding", "device", "adapted", "separate", "single", "sheet", "stack", "sheet", "forward", "separated", "sheet", "stack", "subsequent", "processing", "device", "adapted", "side-register", "sheet", "drawn", "stack", "front-register", "sheet", "sheet-forwarding", "mechanism", "sheet", "feeding", "device"], "section": [1], "subsection": [46], "group": [240], "subgroup": [3122, 3149, 3131, 3100], "labels": [1, 55, 377, 3920, 3947, 3929, 3898]}
127 | {"id": "3936055", "title": ["golf", "practice", "device"], "abstract": ["golf", "shot", "practice", "stage", "comprising", "frame", "side", "panel", "define", "green", "", "fairway", "", "playing", "surface", "panel", "positionable", "angle", "horizontal", "supported", "angle", "defining", "wedge", "enable", "practice", "ball", "lie", "stage", "foldable", "compact", "form", "storage", "wedge", "enclosed", "folded-up", "stage"], "section": [0], "subsection": [14], "group": [77], "subgroup": [1028, 992], "labels": [0, 23, 214, 1826, 1790]}
128 | {"id": "3936204", "title": ["tape", "clamp"], "abstract": ["clamp", "securing", "loop", "end", "flat", "metallic", "tape", "type", "transmitting", "linear", "motion", "remote", "control", "master-slave", "manipulator", "securing", "clamp", "manipulator", "element", "transmits", "motion", "tape", "moved", "force", "transmitted", "tape", "clamp", "characterized", "independence", "tape", "loop", "variation", "tensile", "load", "disclosed", "clamp", "includes", "elongated", "tubular", "frame", "intermediate", "open", "section", "anchoring", "pin", "roller", "mounted", "free", "end", "tape", "secured", "introduced", "end", "frame", "passed", "pin", "roller", "partial", "peripheral", "engagement", "therewith", "brought", "back", "end", "frame", "tape", "clamping", "mechanism", "adjacent", "frame", "end", "force", "abutting", "tape", "surface", "inside", "surface", "frame", "rigidly", "secure", "tape", "loop", "clamp", "join", "tape", "end", "connect", "tape", "cable", "anchor", "tape", "manipulator", "part"], "section": [8, 1, 5], "subsection": [28, 127, 94], "group": [660, 136, 451], "subgroup": [5849, 1815, 8350], "labels": [8, 1, 5, 37, 136, 103, 797, 273, 588, 6647, 2613, 9148]}
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow==1.15.0
2 | tensorflow_tensorboard==1.5.1
3 | tensorboard==1.15.0
4 | matplotlib==2.2.3
5 | tflearn==0.3.2
6 | gensim==3.8.3
7 | numpy==1.16.2
8 | Pillow==5.4.1
9 | python_gflags==3.1.2
10 | scikit_learn==0.19.1
11 | texttable==1.6.3
12 | tqdm==4.49.0
13 | google-compute-engine==2.8.13
--------------------------------------------------------------------------------
/utils/checkmate.py:
--------------------------------------------------------------------------------
1 | import os
2 | import glob
3 | import json
4 | import numpy as np
5 | import tensorflow as tf
6 |
7 |
8 | class BestCheckpointSaver(object):
9 | """Maintains a directory containing only the best n checkpoints.
10 | Inside the directory is a best_checkpoints JSON file containing a dictionary
11 | mapping of the best checkpoint filepaths to the values by which the checkpoints
12 | are compared. Only the best n checkpoints are contained in the directory and JSON file.
13 | This is a light-weight wrapper class only intended to work in simple,
14 | non-distributed settings. It is not intended to work with the tf.Estimator
15 | framework.
16 | """
17 | def __init__(self, save_dir, num_to_keep=1, maximize=True, saver=None):
18 | """Creates a `BestCheckpointSaver`.
19 | `BestCheckpointSaver` acts as a wrapper class around a `tf.train.Saver`.
20 |
21 | Args:
22 | save_dir: The directory in which the checkpoint files will be saved.
23 | num_to_keep: The number of best checkpoint files to retain.
24 | maximize: Define 'best' values to be the highest values. For example,
25 | set this to True if selecting for the checkpoints with the highest
26 | given accuracy. Or set to False to select for checkpoints with the
27 | lowest given error rate.
28 | saver: A `tf.train.Saver` to use for saving checkpoints. A default
29 | `tf.train.Saver` will be created if none is provided.
30 | """
31 | self._num_to_keep = num_to_keep
32 | self._save_dir = save_dir
33 | self._save_path = os.path.join(save_dir, 'model')
34 | self._maximize = maximize
35 | self._saver = saver if saver else tf.train.Saver(
36 | max_to_keep=None,
37 | save_relative_paths=True
38 | )
39 |
40 | if not os.path.exists(save_dir):
41 | os.makedirs(save_dir)
42 | self.best_checkpoints_file = os.path.join(save_dir, 'best_checkpoints')
43 |
44 | def handle(self, value, sess, global_step):
45 | """Updates the set of best checkpoints based on the given result.
46 |
47 | Args:
48 | value: The value by which to rank the checkpoint.
49 | sess: A tf.Session to use to save the checkpoint.
50 | global_step: The global step.
51 | """
52 | current_ckpt = 'model-{}'.format(global_step)
53 | value = float(value)
54 | if not os.path.exists(self.best_checkpoints_file):
55 | self._save_best_checkpoints_file({current_ckpt: value})
56 | self._saver.save(sess, self._save_path, global_step)
57 | return
58 |
59 | best_checkpoints = self._load_best_checkpoints_file()
60 |
61 | if len(best_checkpoints) < self._num_to_keep:
62 | best_checkpoints[current_ckpt] = value
63 | self._save_best_checkpoints_file(best_checkpoints)
64 | self._saver.save(sess, self._save_path, global_step)
65 | return
66 |
67 | if self._maximize:
68 | should_save = not all(current_best >= value
69 | for current_best in best_checkpoints.values())
70 | else:
71 | should_save = not all(current_best <= value
72 | for current_best in best_checkpoints.values())
73 | if should_save:
74 | best_checkpoint_list = self._sort(best_checkpoints)
75 |
76 | worst_checkpoint = os.path.join(self._save_dir,
77 | best_checkpoint_list.pop(-1)[0])
78 | self._remove_outdated_checkpoint_files(worst_checkpoint)
79 | self._update_internal_saver_state(best_checkpoint_list)
80 |
81 | best_checkpoints = dict(best_checkpoint_list)
82 | best_checkpoints[current_ckpt] = value
83 | self._save_best_checkpoints_file(best_checkpoints)
84 |
85 | self._saver.save(sess, self._save_path, global_step)
86 |
87 | def _save_best_checkpoints_file(self, updated_best_checkpoints):
88 | with open(self.best_checkpoints_file, 'w') as f:
89 | json.dump(updated_best_checkpoints, f, indent=3)
90 |
91 | def _remove_outdated_checkpoint_files(self, worst_checkpoint):
92 | os.remove(os.path.join(self._save_dir, 'checkpoint'))
93 | for ckpt_file in glob.glob(worst_checkpoint + '.*'):
94 | os.remove(ckpt_file)
95 |
96 | def _update_internal_saver_state(self, best_checkpoint_list):
97 | best_checkpoint_files = [
98 | (ckpt[0], np.inf) # TODO: Try to use actual file timestamp
99 | for ckpt in best_checkpoint_list
100 | ]
101 | self._saver.set_last_checkpoints_with_time(best_checkpoint_files)
102 |
103 | def _load_best_checkpoints_file(self):
104 | with open(self.best_checkpoints_file, 'r') as f:
105 | best_checkpoints = json.load(f)
106 | return best_checkpoints
107 |
108 | def _sort(self, best_checkpoints):
109 | best_checkpoints = [
110 | (ckpt, best_checkpoints[ckpt])
111 | for ckpt in sorted(best_checkpoints,
112 | key=best_checkpoints.get,
113 | reverse=self._maximize)
114 | ]
115 | return best_checkpoints
116 |
117 |
118 | def get_best_checkpoint(best_checkpoint_dir, select_maximum_value=True):
119 | """Returns filepath to the best checkpoint.
120 | Reads the best_checkpoints file in the best_checkpoint_dir directory.
121 | Returns the filepath in the best_checkpoints file associated with
122 | the highest value if select_maximum_value is True, or the filepath
123 | associated with the lowest value if select_maximum_value is False.
124 |
125 | Args:
126 | best_checkpoint_dir: Directory containing best_checkpoints JSON file.
127 | select_maximum_value: If True, select the filepath associated
128 | with the highest value. Otherwise, select the filepath associated
129 | with the lowest value.
130 | Returns:
131 | The full path to the best checkpoint file.
132 | """
133 | best_checkpoints_file = os.path.join(best_checkpoint_dir, 'best_checkpoints')
134 | assert os.path.exists(best_checkpoints_file)
135 | with open(best_checkpoints_file, 'r') as f:
136 | best_checkpoints = json.load(f)
137 | best_checkpoints = [
138 | ckpt for ckpt in sorted(best_checkpoints,
139 | key=best_checkpoints.get,
140 | reverse=select_maximum_value)
141 | ]
142 | return os.path.join(os.path.abspath(best_checkpoint_dir), best_checkpoints[0])
143 |
--------------------------------------------------------------------------------
/utils/data_helpers.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | __author__ = 'Randolph'
3 |
4 | import os
5 | import time
6 | import heapq
7 | import gensim
8 | import logging
9 | import json
10 | import numpy as np
11 | from collections import OrderedDict
12 | from pylab import *
13 | from texttable import Texttable
14 | from gensim.models import KeyedVectors
15 | from tflearn.data_utils import pad_sequences
16 |
17 |
18 | def _option(pattern):
19 | """
20 | Get the option according to the pattern.
21 | pattern 0: Choose training or restore.
22 | pattern 1: Choose best or latest checkpoint.
23 |
24 | Args:
25 | pattern: 0 for training step. 1 for testing step.
26 | Returns:
27 | The OPTION.
28 | """
29 | if pattern == 0:
30 | OPTION = input("[Input] Train or Restore? (T/R): ")
31 | while not (OPTION.upper() in ['T', 'R']):
32 | OPTION = input("[Warning] The format of your input is illegal, please re-input: ")
33 | if pattern == 1:
34 | OPTION = input("Load Best or Latest Model? (B/L): ")
35 | while not (OPTION.isalpha() and OPTION.upper() in ['B', 'L']):
36 | OPTION = input("[Warning] The format of your input is illegal, please re-input: ")
37 | return OPTION.upper()
38 |
39 |
40 | def logger_fn(name, input_file, level=logging.INFO):
41 | """
42 | The Logger.
43 |
44 | Args:
45 | name: The name of the logger.
46 | input_file: The logger file path.
47 | level: The logger level.
48 | Returns:
49 | The logger.
50 | """
51 | logger = logging.getLogger(name)
52 | logger.setLevel(level)
53 | log_dir = os.path.dirname(input_file)
54 | if not os.path.exists(log_dir):
55 | os.makedirs(log_dir)
56 | formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
57 |
58 | # File Handler
59 | fh = logging.FileHandler(input_file, mode='w')
60 | fh.setFormatter(formatter)
61 | logger.addHandler(fh)
62 |
63 | # stream Handler
64 | sh = logging.StreamHandler()
65 | sh.setFormatter(formatter)
66 | sh.setLevel(logging.INFO)
67 | logger.addHandler(sh)
68 | return logger
69 |
70 |
71 | def tab_printer(args, logger):
72 | """
73 | Function to print the logs in a nice tabular format.
74 |
75 | Args:
76 | args: Parameters used for the model.
77 | logger: The logger.
78 | """
79 | args = vars(args)
80 | keys = sorted(args.keys())
81 | t = Texttable()
82 | t.add_rows([[k.replace("_", " ").capitalize(), args[k]] for k in keys])
83 | t.add_rows([["Parameter", "Value"]])
84 | logger.info('\n' + t.draw())
85 |
86 |
87 | def get_out_dir(option, logger):
88 | """
89 | Get the out dir for saving model checkpoints.
90 |
91 | Args:
92 | option: Train or Restore.
93 | logger: The logger.
94 | Returns:
95 | The output dir for model checkpoints.
96 | """
97 | if option == 'T':
98 | timestamp = str(int(time.time()))
99 | out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
100 | logger.info("Writing to {0}\n".format(out_dir))
101 | if option == 'R':
102 | MODEL = input("[Input] Please input the checkpoints model you want to restore, "
103 | "it should be like (1490175368): ") # The model you want to restore
104 |
105 | while not (MODEL.isdigit() and len(MODEL) == 10):
106 | MODEL = input("[Warning] The format of your input is illegal, please re-input: ")
107 | out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", MODEL))
108 | logger.info("Writing to {0}\n".format(out_dir))
109 | return out_dir
110 |
111 |
112 | def get_model_name():
113 | """
114 | Get the model name used for test.
115 |
116 | Returns:
117 | The model name.
118 | """
119 | MODEL = input("[Input] Please input the model file you want to test, it should be like (1490175368): ")
120 |
121 | while not (MODEL.isdigit() and len(MODEL) == 10):
122 | MODEL = input("[Warning] The format of your input is illegal, "
123 | "it should be like (1490175368), please re-input: ")
124 | return MODEL
125 |
126 |
127 | def create_prediction_file(output_file, data_id, true_labels, predict_labels, predict_scores):
128 | """
129 | Create the prediction file.
130 |
131 | Args:
132 | output_file: The all classes predicted results provided by network.
133 | data_id: The data record id info provided by dict .
134 | true_labels: The all true labels.
135 | predict_labels: The all predict labels by threshold.
136 | predict_scores: The all predict scores by threshold.
137 | Raises:
138 | IOError: If the prediction file is not a .json file.
139 | """
140 | if not output_file.endswith('.json'):
141 | raise IOError("[Error] The prediction file is not a json file."
142 | "Please make sure the prediction data is a json file.")
143 | with open(output_file, 'w') as fout:
144 | data_size = len(predict_labels)
145 | for i in range(data_size):
146 | data_record = OrderedDict([
147 | ('id', data_id[i]),
148 | ('labels', [int(i) for i in true_labels[i]]),
149 | ('predict_labels', [int(i) for i in predict_labels[i]]),
150 | ('predict_scores', [round(i, 4) for i in predict_scores[i]])
151 | ])
152 | fout.write(json.dumps(data_record, ensure_ascii=False) + '\n')
153 |
154 |
155 | def get_onehot_label_threshold(scores, threshold=0.5):
156 | """
157 | Get the predicted one-hot labels based on the threshold.
158 | If there is no predict score greater than threshold, then choose the label which has the max predict score.
159 |
160 | Args:
161 | scores: The all classes predicted scores provided by network.
162 | threshold: The threshold (default: 0.5).
163 | Returns:
164 | predicted_onehot_labels: The predicted labels (one-hot).
165 | """
166 | predicted_onehot_labels = []
167 | scores = np.ndarray.tolist(scores)
168 | for score in scores:
169 | count = 0
170 | onehot_labels_list = [0] * len(score)
171 | for index, predict_score in enumerate(score):
172 | if predict_score >= threshold:
173 | onehot_labels_list[index] = 1
174 | count += 1
175 | if count == 0:
176 | max_score_index = score.index(max(score))
177 | onehot_labels_list[max_score_index] = 1
178 | predicted_onehot_labels.append(onehot_labels_list)
179 | return predicted_onehot_labels
180 |
181 |
182 | def get_onehot_label_topk(scores, top_num=1):
183 | """
184 | Get the predicted one-hot labels based on the topK.
185 |
186 | Args:
187 | scores: The all classes predicted scores provided by network.
188 | top_num: The max topK number (default: 5).
189 | Returns:
190 | predicted_onehot_labels: The predicted labels (one-hot).
191 | """
192 | predicted_onehot_labels = []
193 | scores = np.ndarray.tolist(scores)
194 | for score in scores:
195 | onehot_labels_list = [0] * len(score)
196 | max_num_index_list = list(map(score.index, heapq.nlargest(top_num, score)))
197 | for i in max_num_index_list:
198 | onehot_labels_list[i] = 1
199 | predicted_onehot_labels.append(onehot_labels_list)
200 | return predicted_onehot_labels
201 |
202 |
203 | def get_label_threshold(scores, threshold=0.5):
204 | """
205 | Get the predicted labels based on the threshold.
206 | If there is no predict score greater than threshold, then choose the label which has the max predict score.
207 | Note: Only Used in `test_model.py`
208 |
209 | Args:
210 | scores: The all classes predicted scores provided by network.
211 | threshold: The threshold (default: 0.5).
212 | Returns:
213 | predicted_labels: The predicted labels.
214 | predicted_scores: The predicted scores.
215 | """
216 | predicted_labels = []
217 | predicted_scores = []
218 | scores = np.ndarray.tolist(scores)
219 | for score in scores:
220 | count = 0
221 | index_list = []
222 | score_list = []
223 | for index, predict_score in enumerate(score):
224 | if predict_score >= threshold:
225 | index_list.append(index)
226 | score_list.append(predict_score)
227 | count += 1
228 | if count == 0:
229 | index_list.append(score.index(max(score)))
230 | score_list.append(max(score))
231 | predicted_labels.append(index_list)
232 | predicted_scores.append(score_list)
233 | return predicted_labels, predicted_scores
234 |
235 |
236 | def get_label_topk(scores, top_num=1):
237 | """
238 | Get the predicted labels based on the topK.
239 | Note: Only Used in `test_model.py`
240 |
241 | Args:
242 | scores: The all classes predicted scores provided by network.
243 | top_num: The max topK number (default: 5).
244 | Returns:
245 | The predicted labels.
246 | """
247 | predicted_labels = []
248 | predicted_scores = []
249 | scores = np.ndarray.tolist(scores)
250 | for score in scores:
251 | score_list = []
252 | index_list = np.argsort(score)[-top_num:]
253 | index_list = index_list[::-1]
254 | for index in index_list:
255 | score_list.append(score[index])
256 | predicted_labels.append(np.ndarray.tolist(index_list))
257 | predicted_scores.append(score_list)
258 | return predicted_labels, predicted_scores
259 |
260 |
261 | def create_metadata_file(word2vec_file, output_file):
262 | """
263 | Create the metadata file based on the corpus file (Used for the Embedding Visualization later).
264 |
265 | Args:
266 | word2vec_file: The word2vec file.
267 | output_file: The metadata file path.
268 | Raises:
269 | IOError: If word2vec model file doesn't exist.
270 | """
271 | if not os.path.isfile(word2vec_file):
272 | raise IOError("[Error] The word2vec file doesn't exist.")
273 |
274 | wv = KeyedVectors.load(word2vec_file, mmap='r')
275 | word2idx = dict([(k, v.index) for k, v in wv.vocab.items()])
276 | word2idx_sorted = [(k, word2idx[k]) for k in sorted(word2idx, key=word2idx.get, reverse=False)]
277 |
278 | with open(output_file, 'w+') as fout:
279 | for word in word2idx_sorted:
280 | if word[0] is None:
281 | print("[Warning] Empty Line, should replaced by any thing else, or will cause a bug of tensorboard")
282 | fout.write('' + '\n')
283 | else:
284 | fout.write(word[0] + '\n')
285 |
286 |
287 | def load_word2vec_matrix(word2vec_file):
288 | """
289 | Get the word2idx dict and embedding matrix.
290 |
291 | Args:
292 | word2vec_file: The word2vec file.
293 | Returns:
294 | word2idx: The word2idx dict.
295 | embedding_matrix: The word2vec model matrix.
296 | Raises:
297 | IOError: If word2vec model file doesn't exist.
298 | """
299 | if not os.path.isfile(word2vec_file):
300 | raise IOError("[Error] The word2vec file doesn't exist. ")
301 |
302 | wv = KeyedVectors.load(word2vec_file, mmap='r')
303 |
304 | word2idx = OrderedDict({"_UNK": 0})
305 | embedding_size = wv.vector_size
306 | for k, v in wv.vocab.items():
307 | word2idx[k] = v.index + 1
308 | vocab_size = len(word2idx)
309 |
310 | embedding_matrix = np.zeros([vocab_size, embedding_size])
311 | for key, value in word2idx.items():
312 | if key == "_UNK":
313 | embedding_matrix[value] = [0. for _ in range(embedding_size)]
314 | else:
315 | embedding_matrix[value] = wv[key]
316 | return word2idx, embedding_matrix
317 |
318 |
319 | def load_data_and_labels(args, input_file, word2idx: dict):
320 | """
321 | Load research data from files, padding sentences and generate one-hot labels.
322 |
323 | Args:
324 | args: The arguments.
325 | input_file: The research record.
326 | word2idx: The word2idx dict.
327 | Returns:
328 | The dict (includes the record tokenindex and record labels)
329 | Raises:
330 | IOError: If word2vec model file doesn't exist
331 | """
332 | if not input_file.endswith('.json'):
333 | raise IOError("[Error] The research record is not a json file. "
334 | "Please preprocess the research record into the json file.")
335 |
336 | def _token_to_index(x: list):
337 | result = []
338 | for item in x:
339 | if item not in word2idx.keys():
340 | result.append(word2idx['_UNK'])
341 | else:
342 | word_idx = word2idx[item]
343 | result.append(word_idx)
344 | return result
345 |
346 | def _create_onehot_labels(labels_index, num_labels):
347 | label = [0] * num_labels
348 | for item in labels_index:
349 | label[int(item)] = 1
350 | return label
351 |
352 | Data = dict()
353 | with open(input_file) as fin:
354 | Data['id'] = []
355 | Data['content_index'] = []
356 | Data['content'] = []
357 | Data['section'] = []
358 | Data['subsection'] = []
359 | Data['group'] = []
360 | Data['subgroup'] = []
361 | Data['onehot_labels'] = []
362 | Data['labels'] = []
363 |
364 | for eachline in fin:
365 | record = json.loads(eachline)
366 | id = record['id']
367 | content = record['abstract']
368 | section = record['section']
369 | subsection = record['subsection']
370 | group = record['group']
371 | subgroup = record['subgroup']
372 | labels = record['labels']
373 |
374 | Data['id'].append(id)
375 | Data['content_index'].append(_token_to_index(content))
376 | Data['content'].append(content)
377 | Data['section'].append(_create_onehot_labels(section, args.num_classes_list[0]))
378 | Data['subsection'].append(_create_onehot_labels(subsection, args.num_classes_list[1]))
379 | Data['group'].append(_create_onehot_labels(group, args.num_classes_list[2]))
380 | Data['subgroup'].append(_create_onehot_labels(subgroup, args.num_classes_list[3]))
381 | Data['onehot_labels'].append(_create_onehot_labels(labels, args.total_classes))
382 | Data['labels'].append(labels)
383 | Data['pad_seqs'] = pad_sequences(Data['content_index'], maxlen=args.pad_seq_len, value=0.)
384 | return Data
385 |
386 |
387 | def batch_iter(data, batch_size, num_epochs, shuffle=True):
388 | """
389 | 含有 yield 说明不是一个普通函数,是一个 Generator.
390 | 函数效果:对 data,一共分成 num_epochs 个阶段(epoch),在每个 epoch 内,如果 shuffle=True,就将 data 重新洗牌,
391 | 批量生成 (yield) 一批一批的重洗过的 data,每批大小是 batch_size,一共生成 int(len(data)/batch_size)+1 批。
392 |
393 | Args:
394 | data: The data.
395 | batch_size: The size of the data batch.
396 | num_epochs: The number of epochs.
397 | shuffle: Shuffle or not (default: True).
398 | Returns:
399 | A batch iterator for data set.
400 | """
401 | data = np.array(data)
402 | data_size = len(data)
403 | num_batches_per_epoch = int((data_size - 1) / batch_size) + 1
404 | for epoch in range(num_epochs):
405 | # Shuffle the data at each epoch
406 | if shuffle:
407 | shuffle_indices = np.random.permutation(np.arange(data_size))
408 | shuffled_data = data[shuffle_indices]
409 | else:
410 | shuffled_data = data
411 | for batch_num in range(num_batches_per_epoch):
412 | start_index = batch_num * batch_size
413 | end_index = min((batch_num + 1) * batch_size, data_size)
414 | yield shuffled_data[start_index:end_index]
415 |
--------------------------------------------------------------------------------
/utils/param_parser.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 |
4 | def parameter_parser():
5 | """
6 | A method to parse up command line parameters.
7 | The default hyperparameters give good results without cross-validation.
8 | """
9 | parser = argparse.ArgumentParser(description="Run HARNN.")
10 |
11 | # Data Parameters
12 | parser.add_argument("--train-file", nargs="?", default="../data/Train_sample.json", help="Training data.")
13 | parser.add_argument("--validation-file", nargs="?", default="../data/Validation_sample.json", help="Validation data.")
14 | parser.add_argument("--test-file", nargs="?", default="../data/Test_sample.json", help="Testing data.")
15 | parser.add_argument("--metadata-file", nargs="?", default="../data/metadata.tsv",
16 | help="Metadata file for embedding visualization.")
17 | parser.add_argument("--word2vec-file", nargs="?", default="../data/word2vec_100.kv",
18 | help="Word2vec file for embedding characters (the dim need to be the same as embedding dim).")
19 |
20 | # Model Hyperparameters
21 | parser.add_argument("--pad-seq-len", type=int, default=150, help="Padding sequence length. (depends on the data)")
22 | parser.add_argument("--embedding-type", type=int, default=1, help="The embedding type.")
23 | parser.add_argument("--embedding-dim", type=int, default=100, help="Dimensionality of character embedding.")
24 | parser.add_argument("--lstm-dim", type=int, default=256, help="Dimensionality of LSTM neurons.")
25 | parser.add_argument("--lstm-layers", type=int, default=1, help="Number of LSTM layers.")
26 | parser.add_argument("--attention-dim", type=int, default=200, help="Dimensionality of Attention neurons.")
27 | parser.add_argument("--attention-penalization", type=bool, default=True, help="Use attention penalization or not.")
28 | parser.add_argument("--fc-dim", type=int, default=512, help="Dimensionality for FC neurons.")
29 | parser.add_argument("--dropout-rate", type=float, default=0.5, help="Dropout keep probability.")
30 | parser.add_argument("--alpha", type=float, default=0.5, help="Weight of global part in scores cal.")
31 | parser.add_argument("--num-classes-list", type=list, default=[9, 128, 661, 8364],
32 | help="Each number of labels in hierarchical structure. (depends on the task)")
33 | parser.add_argument("--total-classes", type=int, default=9162, help="Total number of labels. (depends on the task)")
34 | parser.add_argument("--topK", type=int, default=5, help="Number of top K prediction classes.")
35 | parser.add_argument("--threshold", type=float, default=0.5, help="Threshold for prediction classes.")
36 |
37 | # Training Parameters
38 | parser.add_argument("--epochs", type=int, default=20, help="Number of training epochs.")
39 | parser.add_argument("--batch-size", type=int, default=32, help="Batch Size.")
40 | parser.add_argument("--learning-rate", type=float, default=0.001, help="Learning rate.")
41 | parser.add_argument("--decay-rate", type=float, default=0.95, help="Rate of decay for learning rate.")
42 | parser.add_argument("--decay-steps", type=int, default=500, help="How many steps before decay learning rate.")
43 | parser.add_argument("--evaluate-steps", type=int, default=10, help="Evaluate model on val set after how many steps.")
44 | parser.add_argument("--norm-ratio", type=float, default=1.25,
45 | help="The ratio of the sum of gradients norms of trainable variable.")
46 | parser.add_argument("--l2-lambda", type=float, default=0.0, help="L2 regularization lambda.")
47 | parser.add_argument("--checkpoint-steps", type=int, default=10, help="Save model after how many steps.")
48 | parser.add_argument("--num-checkpoints", type=int, default=5, help="Number of checkpoints to store.")
49 |
50 | # Misc Parameters
51 | parser.add_argument("--allow-soft-placement", type=bool, default=True, help="Allow device soft device placement.")
52 | parser.add_argument("--log-device-placement", type=bool, default=False, help="Log placement of ops on devices.")
53 | parser.add_argument("--gpu-options-allow-growth", type=bool, default=True, help="Allow gpu options growth.")
54 |
55 | return parser.parse_args()
--------------------------------------------------------------------------------