├── CNN ├── __init__.py ├── conv_net_classes.py ├── conv_net_sentence.py ├── mr.p ├── neg ├── negSentences ├── pos ├── posSentences └── process_data.py ├── README.md └── RNN ├── __init__.py ├── code ├── lstm.py ├── lstm.pyc ├── lstm_mydata_model.npz ├── lstmmain.py ├── textprocessing.py └── textprocessing.pyc ├── data └── my_data │ ├── chengfengpolang_test.txt │ ├── chengfengpolang_testneg.txt │ ├── chengfengpolang_testpos.txt │ ├── chengfengpolang_train.txt │ ├── dictionary.pkl │ ├── greatewall_test.txt │ ├── greatewall_testneg.txt │ ├── greatewall_testpos.txt │ ├── greatewall_train.txt │ ├── mix_test.txt │ ├── mix_testneg.txt │ ├── mix_testpos.txt │ ├── mix_train.txt │ ├── my_data.pkl │ ├── test.txt │ ├── train.txt │ ├── xiyoufuyaopian_test.txt │ ├── xiyoufuyaopian_testneg.txt │ ├── xiyoufuyaopian_testpos.txt │ └── xiyoufuyaopian_train.txt ├── dict ├── sentiment_stopword.txt ├── stopword.txt └── userdict.txt └── seniment review set ├── CHENGFENGPOLANGNEG.xls ├── CHENGFENGPOLANGPOS.xls ├── CHENGFENGPOLANGTEST.xls ├── GREATEWALLNEG.xls ├── GREATEWALLPOS.xls ├── GREATEWALLTEST.xls ├── THREEMIXNEG.xls ├── THREEMIXPOS.xls ├── THREEMIXTEST.xls ├── XIYOUFUYAOPIANNEG.xls ├── XIYOUFUYAOPIANPOS.xls └── XIYOUFUYAOPIANTEST.xls /CNN/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/life-is-good/MoiveDataAnalysisByDL/5c0e22d1dd2fe1561acd146647d6c14f169de625/CNN/__init__.py -------------------------------------------------------------------------------- /CNN/conv_net_classes.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import theano.tensor.shared_randomstreams 3 | import theano 4 | import theano.tensor as T 5 | from theano.tensor.signal import downsample 6 | from theano.tensor.nnet import conv 7 | 8 | def ReLU(x): 9 | y = T.maximum(0.0, x) 10 | return(y) 11 | def Sigmoid(x): 12 | y = T.nnet.sigmoid(x) 13 | return(y) 14 | def Tanh(x): 15 | y = T.tanh(x) 16 | return(y) 17 | def Iden(x): 18 | y = x 19 | return(y) 20 | 21 | class HiddenLayer(object): 22 | """ 23 | Class for HiddenLayer 24 | """ 25 | def __init__(self, rng, input, n_in, n_out, activation, W=None, b=None, 26 | use_bias=False): 27 | 28 | self.input = input 29 | self.activation = activation 30 | 31 | if W is None: 32 | if activation.func_name == "ReLU": 33 | W_values = numpy.asarray(0.01 * rng.standard_normal(size=(n_in, n_out)), dtype=theano.config.floatX) 34 | else: 35 | W_values = numpy.asarray(rng.uniform(low=-numpy.sqrt(6. / (n_in + n_out)), high=numpy.sqrt(6. / (n_in + n_out)), 36 | size=(n_in, n_out)), dtype=theano.config.floatX) 37 | W = theano.shared(value=W_values, name='W') 38 | if b is None: 39 | b_values = numpy.zeros((n_out,), dtype=theano.config.floatX) 40 | b = theano.shared(value=b_values, name='b') 41 | 42 | self.W = W 43 | self.b = b 44 | 45 | if use_bias: 46 | lin_output = T.dot(input, self.W) + self.b 47 | else: 48 | lin_output = T.dot(input, self.W) 49 | 50 | self.output = (lin_output if activation is None else activation(lin_output)) 51 | 52 | # parameters of the model 53 | if use_bias: 54 | self.params = [self.W, self.b] 55 | else: 56 | self.params = [self.W] 57 | 58 | def _dropout_from_layer(rng, layer, p): 59 | """p is the probablity of dropping a unit 60 | """ 61 | srng = theano.tensor.shared_randomstreams.RandomStreams(rng.randint(999999)) 62 | # p=1-p because 1's indicate keep and p is prob of dropping 63 | mask = srng.binomial(n=1, p=1-p, size=layer.shape) 64 | # The cast is important because 65 | # int * float32 = float64 which pulls things off the gpu 66 | output = layer * T.cast(mask, theano.config.floatX) 67 | return output 68 | 69 | class DropoutHiddenLayer(HiddenLayer): 70 | def __init__(self, rng, input, n_in, n_out, 71 | activation, dropout_rate, use_bias, W=None, b=None): 72 | super(DropoutHiddenLayer, self).__init__( 73 | rng=rng, input=input, n_in=n_in, n_out=n_out, W=W, b=b, 74 | activation=activation, use_bias=use_bias) 75 | 76 | self.output = _dropout_from_layer(rng, self.output, p=dropout_rate) 77 | 78 | class MLPDropout(object): 79 | """A multilayer perceptron with dropout""" 80 | def __init__(self,rng,input,layer_sizes,dropout_rates,activations,use_bias=True): 81 | 82 | #rectified_linear_activation = lambda x: T.maximum(0.0, x) 83 | 84 | # Set up all the hidden layers 85 | self.weight_matrix_sizes = zip(layer_sizes, layer_sizes[1:]) 86 | self.layers = [] 87 | self.dropout_layers = [] 88 | self.activations = activations 89 | next_layer_input = input 90 | #first_layer = True 91 | # dropout the input 92 | next_dropout_layer_input = _dropout_from_layer(rng, input, p=dropout_rates[0]) 93 | layer_counter = 0 94 | for n_in, n_out in self.weight_matrix_sizes[:-1]: 95 | next_dropout_layer = DropoutHiddenLayer(rng=rng, 96 | input=next_dropout_layer_input, 97 | activation=activations[layer_counter], 98 | n_in=n_in, n_out=n_out, use_bias=use_bias, 99 | dropout_rate=dropout_rates[layer_counter]) 100 | self.dropout_layers.append(next_dropout_layer) 101 | next_dropout_layer_input = next_dropout_layer.output 102 | 103 | # Reuse the parameters from the dropout layer here, in a different 104 | # path through the graph. 105 | next_layer = HiddenLayer(rng=rng, 106 | input=next_layer_input, 107 | activation=activations[layer_counter], 108 | # scale the weight matrix W with (1-p) 109 | W=next_dropout_layer.W * (1 - dropout_rates[layer_counter]), 110 | b=next_dropout_layer.b, 111 | n_in=n_in, n_out=n_out, 112 | use_bias=use_bias) 113 | self.layers.append(next_layer) 114 | next_layer_input = next_layer.output 115 | #first_layer = False 116 | layer_counter += 1 117 | 118 | # Set up the output layer 119 | n_in, n_out = self.weight_matrix_sizes[-1] 120 | dropout_output_layer = LogisticRegression( 121 | input=next_dropout_layer_input, 122 | n_in=n_in, n_out=n_out) 123 | self.dropout_layers.append(dropout_output_layer) 124 | 125 | # Again, reuse paramters in the dropout output. 126 | output_layer = LogisticRegression( 127 | input=next_layer_input, 128 | # scale the weight matrix W with (1-p) 129 | W=dropout_output_layer.W * (1 - dropout_rates[-1]), 130 | b=dropout_output_layer.b, 131 | n_in=n_in, n_out=n_out) 132 | self.layers.append(output_layer) 133 | 134 | # Use the negative log likelihood of the logistic regression layer as 135 | # the objective. 136 | self.dropout_negative_log_likelihood = self.dropout_layers[-1].negative_log_likelihood 137 | self.dropout_errors = self.dropout_layers[-1].errors 138 | 139 | self.negative_log_likelihood = self.layers[-1].negative_log_likelihood 140 | self.errors = self.layers[-1].errors 141 | 142 | # Grab all the parameters together. 143 | self.params = [ param for layer in self.dropout_layers for param in layer.params ] 144 | 145 | def predict(self, new_data): 146 | next_layer_input = new_data 147 | for i,layer in enumerate(self.layers): 148 | if i