├── README.md ├── dist_conv_net_classes.py ├── dist_conv_net_sentence.py ├── dist_conv_net_sentence_oneFold.py └── dist_process_data.py /README.md: -------------------------------------------------------------------------------- 1 | This is the source code for the paper: 2 | 3 | Relation Extraction: Perspective from Convolutional Neural Networks 4 | 5 | Thien Huu Nguyen and Ralph Grishman, in Proceedings of NAACL Workshop on Vector Space Modeling for NLP, Denver, Colorado, June, 2015. 6 | 7 | ---------------- 8 | 9 | Much of this code is modified from: https://github.com/yoonkim/CNN_sentence 10 | 11 | This code is written when i started my deep learning journey so it is not optimal :). 12 | 13 | There are two steps to run this code: 14 | 15 | * Preprocessing: using file ```dist_process_data.py``` 16 | 17 | You will need to have the ACE 2005 data set in the format required by this file. We cannot include the data in this release due to licence issues. 18 | 19 | * Train and test the model: using file ```dist_conv_net_sentence_oneFold.py``` 20 | 21 | This step takes the output file in step 1. 22 | 23 | THE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND. -------------------------------------------------------------------------------- /dist_conv_net_classes.py: -------------------------------------------------------------------------------- 1 | """ 2 | Convolutional Neural Network for Relation Extraction 3 | 4 | Much of the code is modified from 5 | - https://github.com/yoonkim/CNN_sentence 6 | """ 7 | 8 | import numpy 9 | import theano.tensor.shared_randomstreams 10 | import theano 11 | import theano.tensor as T 12 | from theano.tensor.signal import downsample 13 | from theano.tensor.nnet import conv 14 | 15 | def ReLU(x): 16 | y = T.maximum(0.0, x) 17 | return(y) 18 | def Sigmoid(x): 19 | y = T.nnet.sigmoid(x) 20 | return(y) 21 | def Tanh(x): 22 | y = T.tanh(x) 23 | return(y) 24 | def Iden(x): 25 | y = x 26 | return(y) 27 | 28 | class HiddenLayer(object): 29 | """ 30 | Class for HiddenLayer 31 | """ 32 | def __init__(self, rng, input, n_in, n_out, activation, W=None, b=None, 33 | use_bias=False): 34 | 35 | self.input = input 36 | self.activation = activation 37 | 38 | if W is None: 39 | if activation.func_name == "ReLU": 40 | W_values = numpy.asarray(0.01 * rng.standard_normal(size=(n_in, n_out)), dtype=theano.config.floatX) 41 | else: 42 | W_values = numpy.asarray(rng.uniform(low=-numpy.sqrt(6. / (n_in + n_out)), high=numpy.sqrt(6. / (n_in + n_out)), 43 | size=(n_in, n_out)), dtype=theano.config.floatX) 44 | W = theano.shared(value=W_values, name='W') 45 | if b is None: 46 | b_values = numpy.zeros((n_out,), dtype=theano.config.floatX) 47 | b = theano.shared(value=b_values, name='b') 48 | 49 | self.W = W 50 | self.b = b 51 | 52 | if use_bias: 53 | lin_output = T.dot(input, self.W) + self.b 54 | else: 55 | lin_output = T.dot(input, self.W) 56 | 57 | self.output = (lin_output if activation is None else activation(lin_output)) 58 | 59 | # parameters of the model 60 | if use_bias: 61 | self.params = [self.W, self.b] 62 | else: 63 | self.params = [self.W] 64 | 65 | def _dropout_from_layer(rng, layer, p): 66 | """p is the probablity of dropping a unit 67 | """ 68 | srng = theano.tensor.shared_randomstreams.RandomStreams(rng.randint(999999)) 69 | # p=1-p because 1's indicate keep and p is prob of dropping 70 | mask = srng.binomial(n=1, p=1-p, size=layer.shape) 71 | # The cast is important because 72 | # int * float32 = float64 which pulls things off the gpu 73 | output = layer * T.cast(mask, theano.config.floatX) 74 | return output 75 | 76 | class DropoutHiddenLayer(HiddenLayer): 77 | def __init__(self, rng, input, n_in, n_out, 78 | activation, dropout_rate, use_bias, W=None, b=None): 79 | super(DropoutHiddenLayer, self).__init__( 80 | rng=rng, input=input, n_in=n_in, n_out=n_out, W=W, b=b, 81 | activation=activation, use_bias=use_bias) 82 | 83 | self.output = _dropout_from_layer(rng, self.output, p=dropout_rate) 84 | 85 | class MLPDropout(object): 86 | """A multilayer perceptron with dropout""" 87 | def __init__(self,rng,input,layer_sizes,dropout_rates,activations,use_bias=True): 88 | 89 | #rectified_linear_activation = lambda x: T.maximum(0.0, x) 90 | 91 | # Set up all the hidden layers 92 | self.weight_matrix_sizes = zip(layer_sizes, layer_sizes[1:]) 93 | self.layers = [] 94 | self.dropout_layers = [] 95 | self.activations = activations 96 | next_layer_input = input 97 | #first_layer = True 98 | # dropout the input 99 | next_dropout_layer_input = _dropout_from_layer(rng, input, p=dropout_rates[0]) 100 | layer_counter = 0 101 | for n_in, n_out in self.weight_matrix_sizes[:-1]: 102 | next_dropout_layer = DropoutHiddenLayer(rng=rng, 103 | input=next_dropout_layer_input, 104 | activation=activations[layer_counter], 105 | n_in=n_in, n_out=n_out, use_bias=use_bias, 106 | dropout_rate=dropout_rates[layer_counter]) 107 | self.dropout_layers.append(next_dropout_layer) 108 | next_dropout_layer_input = next_dropout_layer.output 109 | 110 | # Reuse the parameters from the dropout layer here, in a different 111 | # path through the graph. 112 | next_layer = HiddenLayer(rng=rng, 113 | input=next_layer_input, 114 | activation=activations[layer_counter], 115 | # scale the weight matrix W with (1-p) 116 | W=next_dropout_layer.W * (1 - dropout_rates[layer_counter]), 117 | b=next_dropout_layer.b, 118 | n_in=n_in, n_out=n_out, 119 | use_bias=use_bias) 120 | self.layers.append(next_layer) 121 | next_layer_input = next_layer.output 122 | #first_layer = False 123 | layer_counter += 1 124 | 125 | # Set up the output layer 126 | n_in, n_out = self.weight_matrix_sizes[-1] 127 | dropout_output_layer = LogisticRegression( 128 | input=next_dropout_layer_input, 129 | n_in=n_in, n_out=n_out) 130 | self.dropout_layers.append(dropout_output_layer) 131 | 132 | # Again, reuse paramters in the dropout output. 133 | output_layer = LogisticRegression( 134 | input=next_layer_input, 135 | # scale the weight matrix W with (1-p) 136 | W=dropout_output_layer.W * (1 - dropout_rates[-1]), 137 | b=dropout_output_layer.b, 138 | n_in=n_in, n_out=n_out) 139 | self.layers.append(output_layer) 140 | 141 | # Use the negative log likelihood of the logistic regression layer as 142 | # the objective. 143 | self.dropout_negative_log_likelihood = self.dropout_layers[-1].negative_log_likelihood 144 | self.dropout_errors = self.dropout_layers[-1].errors 145 | self.dropout_F1 = self.dropout_layers[-1].F1 146 | 147 | self.negative_log_likelihood = self.layers[-1].negative_log_likelihood 148 | self.errors = self.layers[-1].errors 149 | self.F1 = self.layers[-1].F1 150 | 151 | # Grab all the parameters together. 152 | self.params = [ param for layer in self.dropout_layers for param in layer.params ] 153 | 154 | def predict(self, new_data): 155 | next_layer_input = new_data 156 | for i,layer in enumerate(self.layers): 157 | if i