├── Deep_Learning
├── 1_Intro.py
├── Tensorflow
│ ├── 2_Tensorflow_basics.py
│ ├── 3_building_model.py
│ ├── 4.1_Using_our_own_data.py
│ ├── 4.2_sentiment_neural_network.py
│ ├── 4.3_adding_more_data(preprocessing).py
│ ├── 4.4_Neural_netword_model_forMoreData.py
│ ├── 5_Recurrent Neural Networks.py
│ ├── 6_Convolutional_Neural_nets.py
│ ├── 7_tflearn.py
│ ├── cs20si
│ │ ├── .ipynb_checkpoints
│ │ │ ├── Untitled-checkpoint.ipynb
│ │ │ └── lecture_1:graphsandsessions-checkpoint.ipynb
│ │ ├── Untitled.ipynb
│ │ ├── graphs
│ │ │ ├── events.out.tfevents.1508077168.raghav-PC
│ │ │ ├── events.out.tfevents.1508077664.raghav-PC
│ │ │ └── events.out.tfevents.1512490021.raghav-PC
│ │ └── lecture_1:graphsandsessions.ipynb
│ ├── lexicon-2500-2638.pickle
│ ├── lexicon.pickle
│ ├── model.ckpt
│ ├── neg.txt
│ ├── pos.txt
│ └── processed-test-set.csv
├── chatbot
│ └── 1_intro.py
├── fast.ai
│ ├── .ipynb_checkpoints
│ │ ├── Untitled-checkpoint.ipynb
│ │ ├── lesson1-checkpoint.ipynb
│ │ └── lesson1_code-checkpoint.ipynb
│ ├── Untitled.ipynb
│ ├── __pycache__
│ │ ├── utils.cpython-35.pyc
│ │ ├── vgg16.cpython-35.pyc
│ │ └── vgg16bn.cpython-35.pyc
│ ├── lesson1.ipynb
│ ├── lesson1_code.ipynb
│ ├── utils.py
│ ├── vgg16.py
│ └── vgg16bn.py
├── keras
│ ├── 1_intro.py
│ ├── Image_Augumentation.py
│ ├── image_classifier
│ │ ├── 2_Image_classifier.py
│ │ ├── 2_image_classifier_only_code.py
│ │ ├── 3_usingVG16.py
│ │ └── 3_usingVGG16_codeonly.py
│ ├── rnn_keras
│ │ └── intro.py
│ └── sentiment_analysis_movie
│ │ ├── 4.1_code_only.py
│ │ └── 4_movie_sentiment.py
└── openAIGym
│ ├── 1_intro.py
│ ├── 2_train.py
│ ├── log
│ ├── openAIStuff
│ │ ├── events.out.tfevents.1497419827.raghav-PC
│ │ ├── events.out.tfevents.1497420569.raghav-PC
│ │ ├── events.out.tfevents.1497420852.raghav-PC
│ │ ├── events.out.tfevents.1497420919.raghav-PC
│ │ ├── events.out.tfevents.1497441395.raghav-PC
│ │ ├── events.out.tfevents.1497441421.raghav-PC
│ │ └── events.out.tfevents.1497441457.raghav-PC
│ └── openai_learning
│ │ ├── events.out.tfevents.1497421583.raghav-PC
│ │ ├── events.out.tfevents.1497421622.raghav-PC
│ │ ├── events.out.tfevents.1497421683.raghav-PC
│ │ ├── events.out.tfevents.1497421744.raghav-PC
│ │ ├── events.out.tfevents.1497421940.raghav-PC
│ │ ├── events.out.tfevents.1497421973.raghav-PC
│ │ ├── events.out.tfevents.1497422209.raghav-PC
│ │ ├── events.out.tfevents.1497422303.raghav-PC
│ │ ├── events.out.tfevents.1497422371.raghav-PC
│ │ ├── events.out.tfevents.1497422400.raghav-PC
│ │ ├── events.out.tfevents.1497422427.raghav-PC
│ │ └── events.out.tfevents.1497441498.raghav-PC
│ └── saved.npy
├── README.md
└── classical_ml
├── clustering
├── K means
│ ├── 1_Intro.py
│ ├── 2_handling_non_numeric_data.py
│ ├── 3_K_means_from_scratch.py
│ └── titanic.xls
└── Mean Shift
│ ├── 1_intro.py
│ ├── 2_Applying_on_titanic_dataset.py
│ ├── 3_from_scratch.py
│ └── titanic.xls
├── k nearerst neighbours
├── 1_k_nearest_neighbours_intro.py
├── 2_k_nearest_neighbors_from_scratch.py
├── 3_applying_our_algo_on_practical_eg.py
├── README.md
└── breast-cancer-wisconsin.data.txt
├── linear regression
├── 1_linear_regression_intro.py
├── 2predicting_using_regression.py
├── 3_pickling_classifier.py
├── 4linear_regression_from_scratch.py
├── 5testing_assumptions.py
├── LinearRegression.pickle
└── README.md
└── svm
├── 1_intro.py
├── 2_svm_from_scratch.py
├── 3_kernels_intro.py
├── 4_kerenels_using_sklearn.py
└── breast-cancer-wisconsin.data.txt
/Deep_Learning/1_Intro.py:
--------------------------------------------------------------------------------
1 | """
2 | Our brain has neurons. It has dendrites which are the branches. The long portion is the axon.
3 | The dendrites are inputs.
4 |
5 | Our model of a neuron:
6 | We have our input eg X1,X2,X3
7 | These values are passed to a function which gives the weighted sum of the inputs + biases i.e sum(input*weight + bias)
8 | Bias is important for the case when all the inputs are zero
9 |
10 | This then gets passed through a threshold(Sigmoid/Activation) function and checks if output needs to be passed or not
11 | depending upon if value is greater than threshold or not.
12 | 0 means value less than threshold. 1 means greater than threshold.
13 | This might go to another input.
14 |
15 | Output Y = f(x,w) where the w are the weights
16 |
17 | Model of a neural network:
18 |
19 | Consider layers of neurons. eg the first layer may have 3 neurons, the second 2, and so on.
20 | We also have our input x1,x2,x3.
21 | Each input is fed to all the neurons of the first layer and each connection has a unique weight.
22 | The output from the neurons of the first layer becomes the input for the second layer and so on.
23 |
24 | x1,x2,x3 -> Input layer
25 | Layers inbetween ->Hidden Layers
26 | Last layer ->Output Layer.
27 |
28 | If we have one hidden layer, it is a regular neural network. If > one layer, then "deep" neural network.
29 |
30 |
31 | """
32 |
33 | # Datasets available at : ImageNet, Wiki dumps, Tatoba, CominCrawl
34 |
--------------------------------------------------------------------------------
/Deep_Learning/Tensorflow/2_Tensorflow_basics.py:
--------------------------------------------------------------------------------
1 | """
2 | Tensor: Array like object
3 | TensorFlow: Has tons of deeplearning functions
4 |
5 | In TensorFlow: Define the model in abstract terms. When ready, run the session.
6 |
7 | So tensorflow has a computation graph where we model everything.
8 | Then we run the session
9 | """
10 | import tensorflow as tf
11 |
12 | # construct the computation graph, first thing to do
13 | x1 = tf.constant(5)
14 | x2 = tf.constant(6)
15 |
16 | # result = x1*x2 # can do this but not efficient
17 | result = tf.multiply(x1, x2)
18 | print(result)
19 | # result is a tensor object
20 |
21 | # to actually get the answer we need to run it in a session
22 | # Method 1
23 | # sess = tf.Session()
24 | # print(sess.run(result))
25 | # sess.close()
26 |
27 | # Method 2, better
28 | with tf.Session() as sess:
29 | print(sess.run(result))
30 |
--------------------------------------------------------------------------------
/Deep_Learning/Tensorflow/3_building_model.py:
--------------------------------------------------------------------------------
1 | """
2 | Our model
3 | input data > Weight it > hidden layer 1 (activation function) > weights > Hidden Layer 2(activation function)> weights
4 | > output layer.
5 |
6 | In a neural network, this data is passed straight through. That passing of data is called feed forward
7 |
8 | Compare output to intended output.> cost function
9 |
10 | optimisation function(optimiser) which will minimise the cost eg(Adam Optimiser, AdaGrad)
11 | This optimiser goes backwards and manipulates the weights.
12 | This motion is called Backward Propogation
13 |
14 | feed forward + backpropogation = epoch-> Cycle. Cost minimised at each cycle
15 |
16 | """
17 |
18 | # tf.Variable is used to train variables such as weights.
19 | # tf.placeholder is used to feed actual training examples
20 | import tensorflow as tf
21 | from tensorflow.examples.tutorials.mnist import input_data
22 |
23 | # using MNIST dataset, set of written examples of handwritten digits, 28 by 28 pixels
24 | mnist = input_data.read_data_sets('/home/raghav/Desktop/Data', one_hot=True) # 1 is on and rest is off. Could be
25 |
26 | # usefull in multiclass
27 | # eg here 10 classes - 0 to 9
28 | # one hot means output of 0 is by [1,0,0,0,0,0,0,0,0]
29 | # output of 1 is output of 0 is by [0,1,0,0,0,0,0,0,0]
30 |
31 | # defining our model
32 |
33 | n_nodes_h1 = 500
34 | n_nodes_h2 = 500
35 | n_nodes_h3 = 500
36 |
37 | n_classes = 10
38 | batch_size = 100 # goes through batches of 100 of features and feed them to network at a time and manipulate the
39 | # weights and then another batch and so on
40 |
41 | # matrix is height by width
42 | x = tf.placeholder('float', [None, 28 * 28]) # flattening out the matrix
43 | y = tf.placeholder('float')
44 |
45 |
46 | # x is the data, y is output
47 |
48 | def neural_network_model(data):
49 | # weights are tf variable where the variable is a tf random_normal and we specify the shape of the normal
50 | # for eg in the hidden_1_layer we have 28*28 inputs and n_nodes_h1 nodes. So a total of 28*28*n_nodes_h1 weights
51 |
52 | # tf.truncated_normal selects random numbers whose mean is close to zero and values are close to 0
53 | hidden_1_layer = {'weights': tf.Variable(tf.truncated_normal([28 * 28, n_nodes_h1])),
54 | 'biases': tf.Variable(tf.truncated_normal(shape=[n_nodes_h1]))}
55 |
56 | hidden_2_layer = {'weights': tf.Variable(tf.truncated_normal([n_nodes_h1, n_nodes_h2])),
57 | 'biases': tf.Variable(tf.truncated_normal(shape=[n_nodes_h2]))}
58 |
59 | hidden_3_layer = {'weights': tf.Variable(tf.truncated_normal([n_nodes_h2, n_nodes_h3])),
60 | 'biases': tf.Variable(tf.truncated_normal(shape=[n_nodes_h2]))}
61 |
62 | output_layer = {'weights': tf.Variable(tf.truncated_normal([n_nodes_h3, n_classes])),
63 | 'biases': tf.Variable(tf.truncated_normal(shape=[n_classes]))}
64 |
65 | # (input*weights + bias)
66 | l1 = tf.add(tf.matmul(data, hidden_1_layer['weights']), hidden_1_layer['biases'])
67 | l1 = tf.nn.relu(l1) # threshold function
68 |
69 | l2 = tf.add(tf.matmul(l1, hidden_2_layer['weights']), hidden_2_layer['biases'])
70 | l2 = tf.nn.relu(l2)
71 |
72 | l3 = tf.add(tf.matmul(l2, hidden_3_layer['weights']), hidden_3_layer['biases'])
73 | l3 = tf.nn.relu(l3)
74 |
75 | output = tf.add(tf.matmul(l3, output_layer['weights']), output_layer['biases'])
76 |
77 | return output
78 |
79 |
80 | def train_neural_network(x):
81 | prediction = neural_network_model(x)
82 | cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y)) # calculates the
83 | # diff of prediction to known label
84 | # minimise the cost
85 |
86 | optimizer = tf.train.AdamOptimizer().minimize(cost)
87 |
88 | hm_epochs = 10
89 |
90 | with tf.Session() as sess:
91 | sess.run(tf.global_variables_initializer())
92 |
93 | for epoch in range(hm_epochs):
94 | epoch_loss = 0
95 | for _ in range(int(mnist.train.num_examples) // batch_size):
96 | epoch_x, epoch_y = mnist.train.next_batch(batch_size)
97 | _, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y})
98 | epoch_loss += c
99 | print('Epoch', epoch+1, 'completed out of ', hm_epochs, ' loss ', epoch_loss)
100 |
101 | # testing
102 | correct = tf.equal(tf.argmax(prediction, 1),
103 | tf.argmax(y, 1)) # argmax returns the index of max value in the arrays
104 |
105 | accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
106 | print('accuracy ', accuracy.eval({x: mnist.test.images, y: mnist.test.labels}))
107 |
108 |
109 | train_neural_network(x)
110 |
--------------------------------------------------------------------------------
/Deep_Learning/Tensorflow/4.1_Using_our_own_data.py:
--------------------------------------------------------------------------------
1 | """
2 | Applying tf on pos and neg sentiments data. The data is in the form of string. Also every string has diff length.
3 | We want the same length input for tf
4 |
5 | These are all the unique words in our input data:
6 | ['chair' , 'table' , 'spoon' , 'tv']
7 | New sentence:
8 | I pulled the chair upto the table.
9 |
10 | [0 , 0, 0, 0]
11 | chair is in the sentence, table is also there
12 | [1,0,0,1]
13 | """
14 |
15 | from nltk.tokenize import word_tokenize
16 | from nltk.stem import WordNetLemmatizer # running, ran, run are same thing
17 | import numpy as np
18 | import random
19 | import pickle
20 | from collections import Counter
21 |
22 | lemmatizer = WordNetLemmatizer()
23 | hm_lines = 10000000
24 |
25 |
26 | def create_lexicon(pos, neg): # creates a list of words that are important
27 | lexicon = []
28 | for fi in [pos, neg]:
29 | with open(fi, 'r') as f:
30 | contents = f.readlines()
31 | for l in contents[:hm_lines]:
32 | all_words = word_tokenize(l.lower())
33 | lexicon += list(all_words)
34 | lexicon = [lemmatizer.lemmatize(i) for i in lexicon]
35 | word_counts = Counter(lexicon) # gives dictionary
36 | # word_counts = {'the':32322,'a':32134}
37 |
38 | l2 = []
39 | for w in word_counts:
40 | if 1000 > word_counts[w] > 50: # the, an , a not necessory
41 | l2.append(w)
42 |
43 | print(len(l2))
44 | return l2
45 |
46 |
47 | def sample_handling(sample, lexicon, classification): # creates a list of lists where the first element of list
48 | # denotes if word of lexicon present in our sample and the second tells us if it is pos or neg sampple
49 | featureset = []
50 | # [
51 | # [ [0 1 0 0 1],[0,1]]
52 | # ]
53 | with open(sample, 'r') as f:
54 | contents = f.readlines()
55 | for l in contents[:hm_lines]:
56 | current_words = word_tokenize(l.lower())
57 | current_words = [lemmatizer.lemmatize(i) for i in current_words]
58 | features = np.zeros(len(lexicon))
59 | for word in current_words:
60 | if word.lower() in lexicon:
61 | index_value = lexicon.index(word.lower())
62 | features[index_value] += 1
63 | features = list(features)
64 | featureset.append([features, classification])
65 |
66 | return featureset
67 |
68 |
69 | def create_feature_sets_and_labels(pos, neg, test_size=0.1):
70 | lexicon = create_lexicon(pos, neg)
71 | features = []
72 | features += sample_handling('pos.txt', lexicon, [1, 0])
73 | features += sample_handling('neg.txt', lexicon, [0, 1])
74 | random.shuffle(features)
75 |
76 | features = np.array(features)
77 | testing_size = int(test_size * len(features))
78 |
79 | train_x = list(features[:, 0][:-testing_size]) # [:,0] feature of numpy gets the first element
80 | train_y = list(features[:, 1][:-testing_size])
81 | test_x = list(features[:, 0][-testing_size:])
82 | test_y = list(features[:, 1][-testing_size:])
83 |
84 | return train_x, train_y, test_x, test_y
85 |
86 |
87 | if __name__ == '__main__':
88 | train_x, train_y, test_x, test_y = create_feature_sets_and_labels('pos.txt', 'neg.txt')
89 |
90 | with open('sentiment_set.pickle', 'wb') as f:
91 | pickle.dump([train_x, train_y, test_x, test_y], f)
92 |
--------------------------------------------------------------------------------
/Deep_Learning/Tensorflow/4.2_sentiment_neural_network.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import pickle
3 | import numpy as np
4 |
5 | f = open("sentiment_set.pickle", 'rb')
6 | data_pickle = pickle.load(f)
7 | train_x, train_y, test_x, test_y = data_pickle
8 |
9 | n_nodes_h1 = 500
10 | n_nodes_h2 = 500
11 | n_nodes_h3 = 500
12 |
13 | n_classes = 2
14 | batch_size = 100
15 | x = tf.placeholder('float', [None, len(train_x[0])])
16 | y = tf.placeholder('float')
17 |
18 |
19 | # x is the data, y is output
20 |
21 | def neural_network_model(data):
22 | hidden_1_layer = {'weights': tf.Variable(tf.truncated_normal([len(train_x[0]), n_nodes_h1])),
23 | 'biases': tf.constant(0.1, shape=[n_nodes_h1])}
24 |
25 | hidden_2_layer = {'weights': tf.Variable(tf.truncated_normal([n_nodes_h1, n_nodes_h2])),
26 | 'biases': tf.constant(0.1, shape=[n_nodes_h2])}
27 |
28 | hidden_3_layer = {'weights': tf.Variable(tf.truncated_normal([n_nodes_h2, n_nodes_h3])),
29 | 'biases': tf.constant(0.1, shape=[n_nodes_h2])}
30 |
31 | output_layer = {'weights': tf.Variable(tf.truncated_normal([n_nodes_h3, n_classes])),
32 | 'biases': tf.constant(0.1, shape=[n_classes])}
33 |
34 | # (input*weights + bias)
35 | l1 = tf.add(tf.matmul(data, hidden_1_layer['weights']), hidden_1_layer['biases'])
36 | l1 = tf.nn.relu(l1) # threshold function
37 |
38 | l2 = tf.add(tf.matmul(l1, hidden_2_layer['weights']), hidden_2_layer['biases'])
39 | l2 = tf.nn.relu(l2)
40 |
41 | l3 = tf.add(tf.matmul(l2, hidden_3_layer['weights']), hidden_3_layer['biases'])
42 | l3 = tf.nn.relu(l3)
43 |
44 | output = tf.add(tf.matmul(l3, output_layer['weights']), output_layer['biases'])
45 |
46 | return output
47 |
48 |
49 | def train_neural_network(x):
50 | prediction = neural_network_model(x)
51 | cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y)) # calculates the
52 | # diff of prediction
53 | # to known label
54 | # minimise the cost
55 |
56 | optimizer = tf.train.AdamOptimizer().minimize(cost)
57 |
58 | hm_epochs = 10
59 |
60 | with tf.Session() as sess:
61 | sess.run(tf.global_variables_initializer())
62 |
63 | for epoch in range(hm_epochs):
64 | epoch_loss = 0
65 |
66 | i = 0
67 | while i < len(train_x):
68 | start = i
69 | end = i + batch_size
70 | batch_x = np.array(train_x[start:end])
71 | batch_y = np.array(train_y[start:end])
72 | _, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y})
73 | epoch_loss += c
74 | i += batch_size
75 | print('Epoch', epoch+1, 'completed out of ', hm_epochs, ' loss ', epoch_loss)
76 |
77 | # testing
78 | correct = tf.equal(tf.argmax(prediction, 1),
79 | tf.argmax(y, 1)) # argmax returns the index of max value in the arrays
80 |
81 | accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
82 | print('accuracy ', accuracy.eval({x: test_x, y: test_y}))
83 |
84 |
85 | train_neural_network(x)
86 |
--------------------------------------------------------------------------------
/Deep_Learning/Tensorflow/4.3_adding_more_data(preprocessing).py:
--------------------------------------------------------------------------------
1 | import nltk
2 | from nltk.tokenize import word_tokenize
3 | from nltk.stem import WordNetLemmatizer
4 | import pickle
5 | import numpy as np
6 | import pandas as pd
7 |
8 | lemmatizer = WordNetLemmatizer()
9 |
10 | '''
11 | polarity 0 = negative. 2 = neutral. 4 = positive.
12 | id
13 | date
14 | query
15 | user
16 | tweet
17 | '''
18 |
19 |
20 | def init_process(fin, fout): # formatting stuff for input data
21 | outfile = open(fout, 'a')
22 | with open(fin, buffering=200000, encoding='latin-1') as f:
23 | try:
24 | for line in f:
25 | line = line.replace('"', '')
26 | initial_polarity = line.split(',')[0]
27 | if initial_polarity == '0':
28 | initial_polarity = [1, 0]
29 | elif initial_polarity == '4':
30 | initial_polarity = [0, 1]
31 |
32 | tweet = line.split(',')[-1]
33 | outline = str(initial_polarity) + ':::' + tweet
34 | outfile.write(outline)
35 | except Exception as e:
36 | print(str(e))
37 | outfile.close()
38 |
39 |
40 | # init_process('/home/raghav/Desktop/trainingandtestdata/training.1600000.processed.noemoticon.csv', 'train_set.csv')
41 | # init_process('/home/raghav/Desktop/trainingandtestdata/testdata.manual.2009.06.14.csv', 'test_set.csv')
42 |
43 |
44 | def create_lexicon(fin): # creating lexicon for our data
45 | lexicon = []
46 | with open(fin, 'r', buffering=100000, encoding='latin-1') as f:
47 | try:
48 | counter = 1
49 | content = ''
50 | for line in f:
51 | counter += 1
52 | if (counter / 2500.0).is_integer(): # every 2500 lines
53 | tweet = line.split(':::')[1]
54 | content += ' ' + tweet
55 | words = word_tokenize(content)
56 | words = [lemmatizer.lemmatize(i) for i in words]
57 | lexicon = list(set(lexicon + words))
58 | print(counter, len(lexicon))
59 |
60 | except Exception as e:
61 | print(str(e))
62 |
63 | with open('lexicon-2500-2638.pickle', 'wb') as f: # 2500 as one in every 2500 and 2638 is the number of words in
64 | # lexicon
65 | pickle.dump(lexicon, f)
66 |
67 |
68 | # create_lexicon('/home/raghav/Documents/deep_learning_datasets/train_set.csv')
69 |
70 |
71 | def convert_to_vec(fin, fout, lexicon_pickle):
72 | with open(lexicon_pickle, 'rb') as f:
73 | lexicon = pickle.load(f)
74 | outfile = open(fout, 'a')
75 | with open(fin, buffering=20000, encoding='latin-1') as f:
76 | counter = 0
77 | for line in f:
78 | counter += 1
79 | label = line.split(':::')[0]
80 | tweet = line.split(':::')[1]
81 | current_words = word_tokenize(tweet.lower())
82 | current_words = [lemmatizer.lemmatize(i) for i in current_words]
83 |
84 | features = np.zeros(len(lexicon))
85 |
86 | for word in current_words:
87 | if word.lower() in lexicon:
88 | index_value = lexicon.index(word.lower())
89 | # OR DO +=1, test both
90 | features[index_value] += 1
91 |
92 | features = list(features)
93 | outline = str(features) + '::' + str(label) + '\n'
94 | outfile.write(outline)
95 |
96 | print(counter)
97 |
98 |
99 | # convert_to_vec('/home/raghav/Documents/deep_learning_datasets/test_set.csv', 'processed-test-set.csv', 'lexicon-2500-2638.pickle')
100 |
101 |
102 | def shuffle_data(fin):
103 | df = pd.read_csv(fin, error_bad_lines=False)
104 | df = df.iloc[np.random.permutation(len(df))]
105 | print(df.head())
106 | df.to_csv('/home/raghav/Documents/train_set_shuffled.csv', index=False)
107 |
108 |
109 | shuffle_data('/home/raghav/Documents/deep_learning_datasets/train_set.csv')
110 |
111 |
112 | def create_test_data_pickle(fin):
113 | feature_sets = []
114 | labels = []
115 | counter = 0
116 | with open(fin, buffering=20000) as f:
117 | for line in f:
118 | try:
119 | features = list(eval(line.split('::')[0]))
120 | label = list(eval(line.split('::')[1]))
121 |
122 | feature_sets.append(features)
123 | labels.append(label)
124 | counter += 1
125 | except:
126 | pass
127 | print(counter)
128 | feature_sets = np.array(feature_sets)
129 | labels = np.array(labels)
130 |
131 | # this is too big. Will do inline later
132 | # create_test_data_pickle('processed-test-set.csv')
133 |
--------------------------------------------------------------------------------
/Deep_Learning/Tensorflow/4.4_Neural_netword_model_forMoreData.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import pickle
3 | import numpy as np
4 | import nltk
5 | from nltk.tokenize import word_tokenize
6 | from nltk.stem import WordNetLemmatizer
7 |
8 | lemmatizer = WordNetLemmatizer()
9 |
10 | n_nodes_hl1 = 500
11 | n_nodes_hl2 = 500
12 |
13 | n_classes = 2
14 |
15 | batch_size = 32
16 | total_batches = int(1600000 / batch_size)
17 | hm_epochs = 10
18 |
19 | x = tf.placeholder('float', name='X')
20 | y = tf.placeholder('float', name='y')
21 |
22 | hidden_1_layer = {'f_fum': n_nodes_hl1,
23 | 'weight': tf.Variable(tf.random_normal([2638, n_nodes_hl1])),
24 | 'bias': tf.Variable(tf.random_normal([n_nodes_hl1]))}
25 |
26 | hidden_2_layer = {'f_fum': n_nodes_hl2,
27 | 'weight': tf.Variable(tf.random_normal([n_nodes_hl1, n_nodes_hl2])),
28 | 'bias': tf.Variable(tf.random_normal([n_nodes_hl2]))}
29 |
30 | output_layer = {'f_fum': None,
31 | 'weight': tf.Variable(tf.random_normal([n_nodes_hl2, n_classes])),
32 | 'bias': tf.Variable(tf.random_normal([n_classes])), }
33 |
34 |
35 | def neural_network_model(data):
36 | l1 = tf.add(tf.matmul(data, hidden_1_layer['weight']), hidden_1_layer['bias'])
37 | l1 = tf.nn.relu(l1)
38 | l2 = tf.add(tf.matmul(l1, hidden_2_layer['weight']), hidden_2_layer['bias'])
39 | l2 = tf.nn.relu(l2)
40 | output = tf.matmul(l2, output_layer['weight']) + output_layer['bias']
41 | return output
42 |
43 |
44 | saver = tf.train.Saver()
45 | tf_log = 'tf.log'
46 |
47 |
48 | def train_neural_network(x):
49 | prediction = neural_network_model(x)
50 | cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=prediction))
51 | optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost)
52 | with tf.Session() as sess:
53 | sess.run(tf.global_variables_initializer())
54 | try:
55 | epoch = int(open(tf_log, 'r').read().split('\n')[-2]) + 1
56 | print('STARTING:', epoch)
57 | except:
58 | epoch = 1
59 |
60 | while epoch <= hm_epochs:
61 | if epoch != 1:
62 | saver.restore(sess, "model.ckpt")
63 | epoch_loss = 1
64 | with open('lexicon-200-2638.pickle', 'rb') as f:
65 | lexicon = pickle.load(f)
66 | with open('/home/raghav/Documents/deep_learning_datasets/train_set_shuffled.csv', buffering=20000, encoding='latin-1') as f:
67 | batch_x = []
68 | batch_y = []
69 | batches_run = 0
70 | for line in f:
71 | label = line.split(':::')[0]
72 | tweet = line.split(':::')[1]
73 | current_words = word_tokenize(tweet.lower())
74 | current_words = [lemmatizer.lemmatize(i) for i in current_words]
75 |
76 | features = np.zeros(len(lexicon))
77 |
78 | for word in current_words:
79 | if word.lower() in lexicon:
80 | index_value = lexicon.index(word.lower())
81 | # OR DO +=1, test both
82 | features[index_value] += 1
83 | line_x = list(features)
84 | line_y = eval(label)
85 | batch_x.append(line_x)
86 | batch_y.append(line_y)
87 | if len(batch_x) >= batch_size:
88 | _, c = sess.run([optimizer, cost], feed_dict={x: np.array(batch_x),
89 | y: np.array(batch_y)})
90 | epoch_loss += c
91 | batch_x = []
92 | batch_y = []
93 | batches_run += 1
94 | # print('Batch run:', batches_run, '/', total_batches, '| Epoch:', epoch, '| Batch Loss:', c, )
95 |
96 | saver.save(sess, "model.ckpt")
97 | print('Epoch', epoch, 'completed out of', hm_epochs, 'loss:', epoch_loss)
98 | with open(tf_log, 'a') as f:
99 | f.write(str(epoch) + '\n')
100 | epoch += 1
101 |
102 |
103 | def test_neural_network():
104 | prediction = neural_network_model(x)
105 | with tf.Session() as sess:
106 | sess.run(tf.global_variables_initializer())
107 | for epoch in range(hm_epochs):
108 | saver.restore(sess, './model.ckpt')
109 | # try:
110 | # saver.restore(sess, "model.ckpt")
111 | # except Exception as e:
112 | # # print(str(e))
113 | # print("fuck")
114 | # epoch_loss = 0
115 |
116 | correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
117 | accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
118 | feature_sets = []
119 | labels = []
120 | counter = 0
121 | with open('processed-test-set.csv', buffering=20000) as f:
122 | for line in f:
123 | try:
124 | features = list(eval(line.split('::')[0]))
125 | label = list(eval(line.split('::')[1]))
126 | feature_sets.append(features)
127 | labels.append(label)
128 | counter += 1
129 | except:
130 | pass
131 | print('Tested', counter, 'samples.')
132 | test_x = np.array(feature_sets)
133 | test_y = np.array(labels)
134 | print('Accuracy:', accuracy.eval({x: test_x, y: test_y}))
135 |
136 | test_neural_network()
137 |
--------------------------------------------------------------------------------
/Deep_Learning/Tensorflow/5_Recurrent Neural Networks.py:
--------------------------------------------------------------------------------
1 | """
2 | 1) Recurrent Neural Networks: Solves a problem that involves time: eg a machine playing
3 | catch, it needs to know if the ball is moving away or coming towards. They are used with languages as tense plays a
4 | role in language.
5 |
6 | LSTM cell (Long Short Term Memory Cell) most common cells used with RNN.
7 |
8 | In deep neural network, we had inputs with weights, which was then fed to a neuron. However order in which they
9 | were fed did not matter. The inputs were fed to an activation function(neurons) and then we got the output
10 | In RNN, X1 gets send into the activation function and the output is fed back to the activation function. So at t=0,
11 | only input fed to activation function. At t=1, both input and output of activation function fed back to activation
12 | function
13 |
14 | Consider "Raghav drove car" where each word is a feature. In the deep neural network, "Raghav drove car" and "car
15 | drove Raghav" is same.
16 | """
17 |
18 |
19 | """
20 | Say , u have a 5*5 image and u have 1 such image then it is :
21 |
22 | x = np.ones((1,5,5))
23 |
24 | so u have ,
25 |
26 | x = array([[[ 1., 1., 1., 1., 1.],
27 | [ 1., 1., 1., 1., 1.],
28 | [ 1., 1., 1., 1., 1.],
29 | [ 1., 1., 1., 1., 1.],
30 | [ 1., 1., 1., 1., 1.]]])
31 |
32 | now for the rnn u need to convert each row of pixel into a single chunk.
33 | so , u would have 5 chunks of 5 values each
34 | so, u need to convert each row to an array
35 |
36 | x = np.transpose(x,(1,0,2))
37 |
38 | this swaps the 0th dim with the 1st dim . so, u get shape of x as (5,1,5)
39 | which is 5 arrays of 1 chunk each of 5 elements
40 |
41 | x = array([[[ 1., 1., 1., 1., 1.]],
42 |
43 | [[ 1., 1., 1., 1., 1.]],
44 |
45 | [[ 1., 1., 1., 1., 1.]],
46 |
47 | [[ 1., 1., 1., 1., 1.]],
48 |
49 | [[ 1., 1., 1., 1., 1.]]])
50 |
51 | now , u need to remove 1 pair of extra braces . so flatten by one dimension
52 |
53 | x = np.reshape(x,(-1,chunk_size))
54 |
55 | so, u will have :
56 |
57 | x = array([[ 1., 1., 1., 1., 1.],
58 | [ 1., 1., 1., 1., 1.],
59 | [ 1., 1., 1., 1., 1.],
60 | [ 1., 1., 1., 1., 1.],
61 | [ 1., 1., 1., 1., 1.]])
62 |
63 | and finally u will need to split the entire thing into 5 chunks(5 arrays)
64 | x = np.split(x,n_chunks,0)
65 |
66 | so, finally u have :
67 |
68 | x = [array([[ 1., 1., 1., 1., 1.]]), array([[ 1., 1., 1., 1., 1.]]), array([[ 1., 1., 1., 1., 1.]]),
69 | array([[ 1., 1., 1., 1., 1.]]), array([[ 1., 1., 1., 1., 1.]])]
70 | """
71 |
72 | import tensorflow as tf
73 | from tensorflow.examples.tutorials.mnist import input_data
74 | from tensorflow.contrib import rnn
75 |
76 | # using MNIST dataset,
77 | mnist = input_data.read_data_sets('/home/raghav/Desktop/Data', one_hot=True)
78 |
79 | n_nodes_h1 = 500
80 | n_nodes_h2 = 500
81 | n_nodes_h3 = 500
82 |
83 | hm_epochs = 3
84 | n_classes = 10
85 | batch_size = 128
86 | chuck_size = 28
87 | n_chunks = 28
88 | rnn_size = 128
89 |
90 | # images are 28*28
91 | x = tf.placeholder('float', [None, n_chunks, chuck_size]) # flattening out the matrix
92 | y = tf.placeholder('float')
93 |
94 |
95 | def recurrent_neural_network_model(x):
96 | layer = {'weights': tf.Variable(tf.truncated_normal([rnn_size, n_classes])),
97 | 'biases': tf.constant(0.1, shape=[n_classes])}
98 |
99 | x = tf.transpose(x, [1, 0, 2])
100 | x = tf.reshape(x, [-1, chuck_size])
101 | x = tf.split(x, n_chunks, 0)
102 |
103 | lstm_cell = rnn.BasicLSTMCell(rnn_size)
104 | outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)
105 | output = tf.add(tf.matmul(outputs[-1], layer['weights']), layer['biases'])
106 |
107 | return output
108 |
109 |
110 | def train_neural_network(x):
111 | prediction = recurrent_neural_network_model(x)
112 | cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y)) # calculates the
113 |
114 | optimizer = tf.train.AdamOptimizer().minimize(cost)
115 |
116 | with tf.Session() as sess:
117 | sess.run(tf.global_variables_initializer())
118 |
119 | for epoch in range(hm_epochs):
120 | epoch_loss = 0
121 | for _ in range(int(mnist.train.num_examples) // batch_size):
122 | epoch_x, epoch_y = mnist.train.next_batch(batch_size)
123 | epoch_x = epoch_x.reshape((batch_size, n_chunks, chuck_size))
124 |
125 | _, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y})
126 | epoch_loss += c
127 | print('Epoch', epoch, 'completed out of ', hm_epochs, ' loss ', epoch_loss)
128 |
129 | # testing
130 | correct = tf.equal(tf.argmax(prediction, 1),
131 | tf.argmax(y, 1)) # argmax returns the index of max value in the arrays
132 |
133 | accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
134 | print('accuracy ', accuracy.eval({x: mnist.test.images.reshape((-1, n_chunks, chuck_size)),
135 | y: mnist.test.labels}))
136 |
137 |
138 | train_neural_network(x)
139 |
--------------------------------------------------------------------------------
/Deep_Learning/Tensorflow/6_Convolutional_Neural_nets.py:
--------------------------------------------------------------------------------
1 | """
2 | Convolution Neural Network: State of Art,: Used in images
3 |
4 | We have input data. Do some convolutions which creates feature maps. Then do some pooling. Conv+Pooling is the hidden
5 | layer. Then a fully connected layer added. Then output.
6 |
7 | input -> (convolution -> pooling) -> fully connected layer(hidden layers of simple neural nets) -> output
8 | |
9 | \ /
10 | hidden layer
11 |
12 | Convolve : Creating feature maps from original dataset. eg let the dataset be image of a cat. take a 3*3 window. \
13 | This window can be moved over the image. The creation of these new windows which have some value in them is called
14 | convolution. We can now instead of image, create a feature map(2*2)grid which has the values of the windows.
15 |
16 | Pooling: eg 3*3 pooling i.e 3*3 window. Pooling is simplifying the window. For eg max pool. Take the max value out of
17 | the 3*3 pool
18 |
19 | """
20 | """
21 | Our model
22 | input data > Weight it > hidden layer 1 (activation function) > weights > Hidden Layer 2(activation function)> weights
23 | > output layer.
24 |
25 | In a neural network, this data is passed straight through. That passing of data is called feed forward
26 |
27 | Compare output to intended output.> cost function
28 |
29 | optimisation function(optimiser) which will minimise the cost eg(Adam Optimiser, AdaGrad)
30 | This optimiser goes backwards and manipulates the weights.
31 | This motion is called Backward Propogation
32 |
33 | feed forward + backpropogation = epoch-> Cycle. Cost minimised at each cycle
34 |
35 | """
36 | import tensorflow as tf
37 | from tensorflow.examples.tutorials.mnist import input_data
38 |
39 | mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
40 |
41 | n_classes = 10
42 | batch_size = 128
43 |
44 | x = tf.placeholder('float', [None, 784])
45 | y = tf.placeholder('float')
46 |
47 | keep_rate = 0.8
48 | keep_prob = tf.placeholder(tf.float32)
49 |
50 |
51 | def conv2d(x, W):
52 | return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
53 |
54 |
55 | def maxpool2d(x):
56 | # size of window movement of window
57 | return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') # window is 2*2, and will move
58 | # 2 pixels
59 |
60 |
61 | def convolutional_neural_network(x):
62 | weights = {'W_conv1': tf.Variable(tf.random_normal([5, 5, 1, 32])), # we are taking 5*5 window on original image
63 | # and taking 32 outputs
64 | 'W_conv2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
65 | 'W_fc': tf.Variable(tf.random_normal([7 * 7 * 64, 1024])),
66 | 'out': tf.Variable(tf.random_normal([1024, n_classes]))}
67 |
68 | biases = {'b_conv1': tf.Variable(tf.random_normal([32])),
69 | 'b_conv2': tf.Variable(tf.random_normal([64])),
70 | 'b_fc': tf.Variable(tf.random_normal([1024])),
71 | 'out': tf.Variable(tf.random_normal([n_classes]))}
72 |
73 | x = tf.reshape(x, shape=[-1, 28, 28, 1])
74 |
75 | conv1 = tf.nn.relu(conv2d(x, weights['W_conv1']) + biases['b_conv1'])
76 | conv1 = maxpool2d(conv1)
77 |
78 | conv2 = tf.nn.relu(conv2d(conv1, weights['W_conv2']) + biases['b_conv2'])
79 | conv2 = maxpool2d(conv2)
80 |
81 | fc = tf.reshape(conv2, [-1, 7 * 7 * 64])
82 | fc = tf.nn.relu(tf.matmul(fc, weights['W_fc']) + biases['b_fc'])
83 | fc = tf.nn.dropout(fc, keep_rate)
84 |
85 | output = tf.matmul(fc, weights['out']) + biases['out']
86 |
87 | return output
88 |
89 |
90 | def train_neural_network(x):
91 | prediction = convolutional_neural_network(x)
92 | cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y))
93 | optimizer = tf.train.AdamOptimizer().minimize(cost)
94 |
95 | hm_epochs = 10
96 | with tf.Session() as sess:
97 | sess.run(tf.global_variables_initializer())
98 |
99 | for epoch in range(hm_epochs):
100 | epoch_loss = 0
101 | for _ in range(int(mnist.train.num_examples / batch_size)):
102 | epoch_x, epoch_y = mnist.train.next_batch(batch_size)
103 | _, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y})
104 | epoch_loss += c
105 |
106 | print('Epoch', epoch, 'completed out of', hm_epochs, 'loss:', epoch_loss)
107 |
108 | correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
109 |
110 | accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
111 | print('Accuracy:', accuracy.eval({x: mnist.test.images, y: mnist.test.labels}))
112 |
113 |
114 | train_neural_network(x)
115 |
--------------------------------------------------------------------------------
/Deep_Learning/Tensorflow/7_tflearn.py:
--------------------------------------------------------------------------------
1 | """
2 | Abstraction layers over tensorflow: tflearn, keras, tfslim , skflow
3 |
4 | """
5 |
6 | import tflearn
7 | from tflearn.layers.conv import conv_2d, max_pool_2d
8 | from tflearn.layers.core import input_data, dropout, fully_connected
9 | from tflearn.layers.estimator import regression
10 | import tflearn.datasets.mnist as mnist
11 |
12 | X, y, test_X, test_y = mnist.load_data(one_hot=True)
13 |
14 | X = X.reshape([-1, 28, 28, 1])
15 | test_X = test_X.reshape([-1, 28, 28, 1])
16 |
17 | convnet = input_data(shape=[None, 28, 28, 1], name='input')
18 |
19 | convnet = conv_2d(convnet, 32, 2, activation='relu')
20 | convnet = max_pool_2d(convnet, 2)
21 |
22 | convnet = conv_2d(convnet, 64, 2, activation='relu')
23 | convnet = max_pool_2d(convnet, 2)
24 |
25 | convnet = fully_connected(convnet, 1024, activation='relu')
26 |
27 | convnet = dropout(convnet, 0.8)
28 |
29 | convnet = fully_connected(convnet, 10, activation='softmax') # output layer
30 | convnet = regression(convnet, optimizer='adam', learning_rate=0.01, loss='categorical_crossentropy', name='targets')
31 |
32 | model = tflearn.DNN(convnet)
33 |
34 | # model.fit({'input': X}, {'targets': y}, n_epoch=10,
35 | # validation_set=({'input': test_X}, {'targets': test_y}),
36 | # snapshot_step=500, show_metric=True, run_id='mnist')
37 | #
38 | #
39 | # model.save('tflearncnn.model') # saves the weights. So we need to do everything before model.fit() and then load this
40 | #
41 | # once saved comment it and load
42 |
43 | model.load('tflearncnn.model')
44 |
45 | print(model.predict([test_X[1]]))
46 |
--------------------------------------------------------------------------------
/Deep_Learning/Tensorflow/cs20si/.ipynb_checkpoints/lecture_1:graphsandsessions-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# **TENSORFLOW**\n",
8 | "\n",
9 | "* Open sourced by Google \n",
10 | "* Pytorch by Facebook\n",
11 | "* Wavenet-> Text to speech network\n"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {},
17 | "source": [
18 | "**Graphs and Sessions**\n",
19 | "\n",
20 | "Tensorflow separates the definition and execution of a model. We first define the graph. Then we use session to layout the graph.\n",
21 | "\n",
22 | "**Tensor -> n dim matrix**\n"
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": 2,
28 | "metadata": {},
29 | "outputs": [
30 | {
31 | "name": "stdout",
32 | "output_type": "stream",
33 | "text": [
34 | "Tensor(\"Add:0\", shape=(), dtype=int32)\n"
35 | ]
36 | }
37 | ],
38 | "source": [
39 | "import tensorflow as tf\n",
40 | "\n",
41 | "a = tf.add(2, 3)\n",
42 | "\n",
43 | "print(a)"
44 | ]
45 | },
46 | {
47 | "cell_type": "markdown",
48 | "metadata": {},
49 | "source": [
50 | "When we print(a) above we expect 5 as output but it is not so.\n",
51 | "This is because we are just creating the graph. \n",
52 | "\n",
53 | "Here a node called \"add\" has been created. The input is a scalar ie 3 and 5, so a zero dimension. Hence shape is 0. Data type is int32"
54 | ]
55 | },
56 | {
57 | "cell_type": "markdown",
58 | "metadata": {},
59 | "source": [
60 | "**Tensorboard** ->Allows us to visualise the graph defined\n",
61 | "\n",
62 | "* Nodes : are the operations, variables, constants etc\n",
63 | "* Tensor values on edges\n",
64 | "\n",
65 | "So the above statement gives a node having addition operation with 2 edges going to it having values 3 and 5"
66 | ]
67 | },
68 | {
69 | "cell_type": "markdown",
70 | "metadata": {},
71 | "source": [
72 | "**To get the value of a** -> Create a session\n"
73 | ]
74 | },
75 | {
76 | "cell_type": "code",
77 | "execution_count": 3,
78 | "metadata": {
79 | "collapsed": true
80 | },
81 | "outputs": [],
82 | "source": [
83 | "sess = tf.Session()"
84 | ]
85 | },
86 | {
87 | "cell_type": "code",
88 | "execution_count": 4,
89 | "metadata": {},
90 | "outputs": [
91 | {
92 | "name": "stdout",
93 | "output_type": "stream",
94 | "text": [
95 | "5\n"
96 | ]
97 | }
98 | ],
99 | "source": [
100 | "print(sess.run(a)) # runs the session\n",
101 | "\n",
102 | "# tf.Session(fetches, feed_dict=None, options=None, run_metadata=None)\n"
103 | ]
104 | },
105 | {
106 | "cell_type": "markdown",
107 | "metadata": {},
108 | "source": [
109 | "Inside sess.run(), we give either the whole graph or the node as input to the paranthesis.\n",
110 | "**If you want to compute the value of multiple nodes, provide them as a list**\n",
111 | "\n",
112 | "\n",
113 | "\n",
114 | "eg:\n"
115 | ]
116 | },
117 | {
118 | "cell_type": "code",
119 | "execution_count": null,
120 | "metadata": {
121 | "collapsed": true
122 | },
123 | "outputs": [],
124 | "source": [
125 | "# sess.run([a,b]) # computes the value of a and b"
126 | ]
127 | },
128 | {
129 | "cell_type": "markdown",
130 | "metadata": {},
131 | "source": [
132 | "\n",
133 | "When we give a as input to parameter of .run : tensorflow looks at the graph defined and computes whatever is necessary to get the value of a. So if many nodes, it will first compute the other nodes necessary to get the value of node a."
134 | ]
135 | },
136 | {
137 | "cell_type": "code",
138 | "execution_count": null,
139 | "metadata": {
140 | "collapsed": true
141 | },
142 | "outputs": [],
143 | "source": [
144 | "sess.close() # closes the session\n"
145 | ]
146 | },
147 | {
148 | "cell_type": "markdown",
149 | "metadata": {},
150 | "source": [
151 | "** Instead of explicitly closing the session, we can use the with statement of python like in case of files **\n"
152 | ]
153 | },
154 | {
155 | "cell_type": "code",
156 | "execution_count": 5,
157 | "metadata": {},
158 | "outputs": [
159 | {
160 | "name": "stdout",
161 | "output_type": "stream",
162 | "text": [
163 | "5\n"
164 | ]
165 | }
166 | ],
167 | "source": [
168 | "with tf.Session() as sess:\n",
169 | " print(sess.run(a))"
170 | ]
171 | },
172 | {
173 | "cell_type": "markdown",
174 | "metadata": {},
175 | "source": [
176 | "## **Now lets consider new example**\n"
177 | ]
178 | },
179 | {
180 | "cell_type": "code",
181 | "execution_count": 6,
182 | "metadata": {},
183 | "outputs": [
184 | {
185 | "name": "stdout",
186 | "output_type": "stream",
187 | "text": [
188 | "15625\n"
189 | ]
190 | }
191 | ],
192 | "source": [
193 | "x = 2\n",
194 | "y = 3\n",
195 | "op1 = tf.add(x,y)\n",
196 | "op2 = tf.multiply(x,y)\n",
197 | "op3 = tf.pow(op1,op2)\n",
198 | "\n",
199 | "with tf.Session() as sess:\n",
200 | " print(sess.run(op3))"
201 | ]
202 | },
203 | {
204 | "cell_type": "markdown",
205 | "metadata": {},
206 | "source": [
207 | "The above will first compute op1 and op2 and then finally op3, even though we only want the value of op3. \n",
208 | "\n",
209 | "**The graph is created when defining not in the tf.Session()**\n",
210 | "\n",
211 | "The important thing to note is **not everything that is defined is calculated. Only those portion of the graph is calculated which is required by the sess.run(). This is the power of Graph dataflow used in tensorflow**"
212 | ]
213 | },
214 | {
215 | "cell_type": "markdown",
216 | "metadata": {},
217 | "source": [
218 | "* Since tf is based on graph, different nodes could be calculated across different CPUs/GPUs"
219 | ]
220 | },
221 | {
222 | "cell_type": "markdown",
223 | "metadata": {},
224 | "source": [
225 | "## What if we want more graphs?\n",
226 | "\n",
227 | "Ans) Not recommended. When we start to create our graph, tensorflow actually has a default graph. It just puts our nodes/values in that graph. ** You should use disconnected subgraphs **\n",
228 | "\n",
229 | "If you really want to....."
230 | ]
231 | },
232 | {
233 | "cell_type": "code",
234 | "execution_count": 5,
235 | "metadata": {
236 | "collapsed": true
237 | },
238 | "outputs": [],
239 | "source": [
240 | "g = tf.Graph() # if you want something other than the default graph\n",
241 | "\n",
242 | "with g.as_default(): # making it the default graph\n",
243 | " x = tf.add(2, 3)\n",
244 | "\n",
245 | "sess = tf.Session(graph=g) # need to pass the graph..\n",
246 | "sess.run(x)\n",
247 | "sess.close()"
248 | ]
249 | }
250 | ],
251 | "metadata": {
252 | "kernelspec": {
253 | "display_name": "Python 3",
254 | "language": "python",
255 | "name": "python3"
256 | },
257 | "language_info": {
258 | "codemirror_mode": {
259 | "name": "ipython",
260 | "version": 3
261 | },
262 | "file_extension": ".py",
263 | "mimetype": "text/x-python",
264 | "name": "python",
265 | "nbconvert_exporter": "python",
266 | "pygments_lexer": "ipython3",
267 | "version": "3.5.2"
268 | }
269 | },
270 | "nbformat": 4,
271 | "nbformat_minor": 1
272 | }
273 |
--------------------------------------------------------------------------------
/Deep_Learning/Tensorflow/cs20si/graphs/events.out.tfevents.1508077168.raghav-PC:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/Tensorflow/cs20si/graphs/events.out.tfevents.1508077168.raghav-PC
--------------------------------------------------------------------------------
/Deep_Learning/Tensorflow/cs20si/graphs/events.out.tfevents.1508077664.raghav-PC:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/Tensorflow/cs20si/graphs/events.out.tfevents.1508077664.raghav-PC
--------------------------------------------------------------------------------
/Deep_Learning/Tensorflow/cs20si/graphs/events.out.tfevents.1512490021.raghav-PC:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/Tensorflow/cs20si/graphs/events.out.tfevents.1512490021.raghav-PC
--------------------------------------------------------------------------------
/Deep_Learning/Tensorflow/cs20si/lecture_1:graphsandsessions.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# **TENSORFLOW**\n",
8 | "\n",
9 | "* Open sourced by Google \n",
10 | "* Pytorch by Facebook\n",
11 | "* Wavenet-> Text to speech network\n"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {},
17 | "source": [
18 | "**Graphs and Sessions**\n",
19 | "\n",
20 | "Tensorflow separates the definition and execution of a model. We first define the graph. Then we use session to layout the graph.\n",
21 | "\n",
22 | "**Tensor -> n dim matrix**\n"
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": 1,
28 | "metadata": {},
29 | "outputs": [
30 | {
31 | "name": "stdout",
32 | "output_type": "stream",
33 | "text": [
34 | "Tensor(\"Add:0\", shape=(), dtype=int32)\n"
35 | ]
36 | }
37 | ],
38 | "source": [
39 | "import tensorflow as tf\n",
40 | "\n",
41 | "a = tf.add(2, 3)\n",
42 | "\n",
43 | "print(a)"
44 | ]
45 | },
46 | {
47 | "cell_type": "markdown",
48 | "metadata": {},
49 | "source": [
50 | "When we print(a) above we expect 5 as output but it is not so.\n",
51 | "This is because we are just creating the graph. \n",
52 | "\n",
53 | "Here a node called \"add\" has been created. The input is a scalar ie 3 and 5, so a zero dimension. Hence shape is 0. Data type is int32"
54 | ]
55 | },
56 | {
57 | "cell_type": "markdown",
58 | "metadata": {},
59 | "source": [
60 | "**Tensorboard** ->Allows us to visualise the graph defined\n",
61 | "\n",
62 | "* Nodes : are the operations, variables, constants etc\n",
63 | "* Tensor values on edges\n",
64 | "\n",
65 | "So the above statement gives a node having addition operation with 2 edges going to it having values 3 and 5"
66 | ]
67 | },
68 | {
69 | "cell_type": "markdown",
70 | "metadata": {},
71 | "source": [
72 | "**To get the value of a** -> Create a session\n"
73 | ]
74 | },
75 | {
76 | "cell_type": "code",
77 | "execution_count": 2,
78 | "metadata": {
79 | "collapsed": true
80 | },
81 | "outputs": [],
82 | "source": [
83 | "sess = tf.Session()"
84 | ]
85 | },
86 | {
87 | "cell_type": "code",
88 | "execution_count": 3,
89 | "metadata": {},
90 | "outputs": [
91 | {
92 | "name": "stdout",
93 | "output_type": "stream",
94 | "text": [
95 | "5\n"
96 | ]
97 | }
98 | ],
99 | "source": [
100 | "print(sess.run(a)) # runs the session\n",
101 | "\n",
102 | "# tf.Session(fetches, feed_dict=None, options=None, run_metadata=None)\n"
103 | ]
104 | },
105 | {
106 | "cell_type": "markdown",
107 | "metadata": {},
108 | "source": [
109 | "Inside sess.run(), we give either the whole graph or the node as input to the paranthesis.\n",
110 | "**If you want to compute the value of multiple nodes, provide them as a list**\n",
111 | "\n",
112 | "\n",
113 | "\n",
114 | "eg:\n"
115 | ]
116 | },
117 | {
118 | "cell_type": "code",
119 | "execution_count": null,
120 | "metadata": {
121 | "collapsed": true
122 | },
123 | "outputs": [],
124 | "source": [
125 | "# sess.run([a,b]) # computes the value of a and b"
126 | ]
127 | },
128 | {
129 | "cell_type": "markdown",
130 | "metadata": {},
131 | "source": [
132 | "\n",
133 | "When we give a as input to parameter of .run : tensorflow looks at the graph defined and computes whatever is necessary to get the value of a. So if many nodes, it will first compute the other nodes necessary to get the value of node a."
134 | ]
135 | },
136 | {
137 | "cell_type": "code",
138 | "execution_count": null,
139 | "metadata": {
140 | "collapsed": true
141 | },
142 | "outputs": [],
143 | "source": [
144 | "sess.close() # closes the session\n"
145 | ]
146 | },
147 | {
148 | "cell_type": "markdown",
149 | "metadata": {},
150 | "source": [
151 | "** Instead of explicitly closing the session, we can use the with statement of python like in case of files **\n"
152 | ]
153 | },
154 | {
155 | "cell_type": "code",
156 | "execution_count": 5,
157 | "metadata": {},
158 | "outputs": [
159 | {
160 | "name": "stdout",
161 | "output_type": "stream",
162 | "text": [
163 | "5\n"
164 | ]
165 | }
166 | ],
167 | "source": [
168 | "with tf.Session() as sess:\n",
169 | " print(sess.run(a))"
170 | ]
171 | },
172 | {
173 | "cell_type": "markdown",
174 | "metadata": {},
175 | "source": [
176 | "## **Now lets consider new example**\n"
177 | ]
178 | },
179 | {
180 | "cell_type": "code",
181 | "execution_count": 6,
182 | "metadata": {},
183 | "outputs": [
184 | {
185 | "name": "stdout",
186 | "output_type": "stream",
187 | "text": [
188 | "15625\n"
189 | ]
190 | }
191 | ],
192 | "source": [
193 | "x = 2\n",
194 | "y = 3\n",
195 | "op1 = tf.add(x,y)\n",
196 | "op2 = tf.multiply(x,y)\n",
197 | "op3 = tf.pow(op1,op2)\n",
198 | "\n",
199 | "with tf.Session() as sess:\n",
200 | " print(sess.run(op3))"
201 | ]
202 | },
203 | {
204 | "cell_type": "markdown",
205 | "metadata": {},
206 | "source": [
207 | "The above will first compute op1 and op2 and then finally op3, even though we only want the value of op3. \n",
208 | "\n",
209 | "**The graph is created when defining not in the tf.Session()**\n",
210 | "\n",
211 | "The important thing to note is **not everything that is defined is calculated. Only those portion of the graph is calculated which is required by the sess.run(). This is the power of Graph dataflow used in tensorflow**"
212 | ]
213 | },
214 | {
215 | "cell_type": "markdown",
216 | "metadata": {},
217 | "source": [
218 | "* Since tf is based on graph, different nodes could be calculated across different CPUs/GPUs"
219 | ]
220 | },
221 | {
222 | "cell_type": "markdown",
223 | "metadata": {},
224 | "source": [
225 | "## What if we want more graphs?\n",
226 | "\n",
227 | "**Ans)** Not recommended. When we start to create our graph, tensorflow actually has a default graph. It just puts our nodes/values in that graph. ** You should use disconnected subgraphs **\n",
228 | "\n",
229 | "If you really want to....."
230 | ]
231 | },
232 | {
233 | "cell_type": "code",
234 | "execution_count": 5,
235 | "metadata": {
236 | "collapsed": true
237 | },
238 | "outputs": [],
239 | "source": [
240 | "g = tf.Graph() # if you want something other than the default graph\n",
241 | "\n",
242 | "with g.as_default(): # making it the default graph\n",
243 | " x = tf.add(2, 3)\n",
244 | "\n",
245 | "sess = tf.Session(graph=g) # need to pass the graph..\n",
246 | "sess.run(x)\n",
247 | "sess.close()"
248 | ]
249 | }
250 | ],
251 | "metadata": {
252 | "kernelspec": {
253 | "display_name": "Python 3",
254 | "language": "python",
255 | "name": "python3"
256 | },
257 | "language_info": {
258 | "codemirror_mode": {
259 | "name": "ipython",
260 | "version": 3
261 | },
262 | "file_extension": ".py",
263 | "mimetype": "text/x-python",
264 | "name": "python",
265 | "nbconvert_exporter": "python",
266 | "pygments_lexer": "ipython3",
267 | "version": "3.5.2"
268 | }
269 | },
270 | "nbformat": 4,
271 | "nbformat_minor": 1
272 | }
273 |
--------------------------------------------------------------------------------
/Deep_Learning/Tensorflow/lexicon-2500-2638.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/Tensorflow/lexicon-2500-2638.pickle
--------------------------------------------------------------------------------
/Deep_Learning/Tensorflow/lexicon.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/Tensorflow/lexicon.pickle
--------------------------------------------------------------------------------
/Deep_Learning/Tensorflow/model.ckpt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/Tensorflow/model.ckpt
--------------------------------------------------------------------------------
/Deep_Learning/chatbot/1_intro.py:
--------------------------------------------------------------------------------
1 | import keras
2 |
--------------------------------------------------------------------------------
/Deep_Learning/fast.ai/.ipynb_checkpoints/Untitled-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [],
3 | "metadata": {},
4 | "nbformat": 4,
5 | "nbformat_minor": 2
6 | }
7 |
--------------------------------------------------------------------------------
/Deep_Learning/fast.ai/.ipynb_checkpoints/lesson1-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Introduction\n",
8 | "\n",
9 | "Neural networks are universal approximating machines. To fit the parameters for the function, we use Gradient Descent. \n",
10 | "\n",
11 | "We are able to make this function fast with the help of GPU since GPUs are based on mostly based on matrix operations (on pixels) which is also what we want for Deep learning. We need NVIDIA GPU as they support CUDA. Amazon provides us GPU instances called P2 instances.\n"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {},
17 | "source": [
18 | "To run the instances we use AWS cli. For this use the alias.sh file in the fast-ai course in documents folder and go to setup. "
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 2,
24 | "metadata": {},
25 | "outputs": [],
26 | "source": [
27 | "# To run the aws-alias.sh\n",
28 | "# | source aws-alias.sh\n",
29 | "# | aws-get-p2\n",
30 | "# this line will get the instance id for the p2 instance and save it in variable \n",
31 | "#`instanceId`\n",
32 | "# to start the instance\n",
33 | "# | aws-start. This will start the instance and queries for the ip and prints it out\n",
34 | "# | aws-ssh. This will then ssh into that instance\n"
35 | ]
36 | },
37 | {
38 | "cell_type": "markdown",
39 | "metadata": {},
40 | "source": [
41 | "Now we are inside our aws instance. AWS has something called AMI (Amazon Machine Images). It is basically a snapshot of the computer at a particular instance of time. We can start our instance using a copy of that snapshot. In the script given, there was an AMI which already had all the things installed."
42 | ]
43 | },
44 | {
45 | "cell_type": "markdown",
46 | "metadata": {},
47 | "source": [
48 | "When we type `jupyter notebook` in the aws ssh, it returns us the port which we should append with our ip address obtained from 'aws-start`. "
49 | ]
50 | },
51 | {
52 | "cell_type": "markdown",
53 | "metadata": {},
54 | "source": [
55 | "2 notebooks can be run in parallel compeletely separate from each other. \n",
56 | "\n",
57 | "Now to prevent typing **source aws-alias.sh** everytime, add it to **.bashrc** which contains a set of commands that bash runs before starting\n",
58 | "\n",
59 | "# Getting started with Dogs vs Cats\n",
60 | "\n",
61 | "To get started with the dogs vs cats, first we run the p2 instance of aws and then **wget** to get the .ipynb file and the data.\n",
62 | "\n",
63 | "**The structuring of the files in data is very important**\n",
64 | "* keras expects that each class of the image be in separate folder\n",
65 | "\n"
66 | ]
67 | },
68 | {
69 | "cell_type": "markdown",
70 | "metadata": {},
71 | "source": [
72 | "there are about 12000 images each of dogs and cats in the train folder\n"
73 | ]
74 | },
75 | {
76 | "cell_type": "markdown",
77 | "metadata": {},
78 | "source": [
79 | "**Always test the model on a small sample first**\n"
80 | ]
81 | },
82 | {
83 | "cell_type": "code",
84 | "execution_count": null,
85 | "metadata": {
86 | "collapsed": true
87 | },
88 | "outputs": [],
89 | "source": [
90 | "%matplotlib inline"
91 | ]
92 | },
93 | {
94 | "cell_type": "markdown",
95 | "metadata": {},
96 | "source": [
97 | "Tells the jupyter notebook to display all the matplotlib graphs"
98 | ]
99 | },
100 | {
101 | "cell_type": "code",
102 | "execution_count": 3,
103 | "metadata": {
104 | "collapsed": true
105 | },
106 | "outputs": [],
107 | "source": [
108 | "#path = \"data/dogscats/\"\n",
109 | "path = \"data/dogscats/sample/\""
110 | ]
111 | },
112 | {
113 | "cell_type": "markdown",
114 | "metadata": {},
115 | "source": [
116 | "Tells the path to use"
117 | ]
118 | },
119 | {
120 | "cell_type": "markdown",
121 | "metadata": {},
122 | "source": [
123 | "We will be using anaconda to install stuff. To install via anaconda ** conda install thing**. Sometimes conda installer not available, we will use pip.\n",
124 | "\n",
125 | "Now we would be using a pretrained neural network\n",
126 | "* V5516 -2014 winner\n",
127 | "* Inception-2015 winner\n",
128 | "* Resnet - 2016 winner\n",
129 | "We use VGG as it is the last \"simple\" model. Our script for VGG16 has already been downloaded.\n",
130 | "\n",
131 | "\n",
132 | "Now Keras runs on top of Theano/Tensorflow which convert our python code to CUDNN based code. Theano is sitting on top of CUDNN (CUDA deep neural network library).\n",
133 | "\n",
134 | "Tensorflow-> works well for multi GPUs\n",
135 | "Keras can easily be configured to use Tensorflow as backend instead of theano.\n",
136 | "\n",
137 | "To do that"
138 | ]
139 | },
140 | {
141 | "cell_type": "code",
142 | "execution_count": 5,
143 | "metadata": {},
144 | "outputs": [],
145 | "source": [
146 | "#cd ~/.keras/\n",
147 | "#vim keras.json\n",
148 | "# here change the backend to tensorflow\n",
149 | "# also change the th to tf"
150 | ]
151 | },
152 | {
153 | "cell_type": "markdown",
154 | "metadata": {},
155 | "source": [
156 | "To change theano to use cpu instead of gpu"
157 | ]
158 | },
159 | {
160 | "cell_type": "code",
161 | "execution_count": 6,
162 | "metadata": {
163 | "collapsed": true
164 | },
165 | "outputs": [],
166 | "source": [
167 | "#vim ~/.theanorc\n",
168 | "#Here change the cpu to gpu or vice versa"
169 | ]
170 | },
171 | {
172 | "cell_type": "markdown",
173 | "metadata": {},
174 | "source": [
175 | "In dnn, we train in batches. We can't do all at time as it may be too large for the GPU's memory. "
176 | ]
177 | }
178 | ],
179 | "metadata": {
180 | "kernelspec": {
181 | "display_name": "Python 3",
182 | "language": "python",
183 | "name": "python3"
184 | },
185 | "language_info": {
186 | "codemirror_mode": {
187 | "name": "ipython",
188 | "version": 3
189 | },
190 | "file_extension": ".py",
191 | "mimetype": "text/x-python",
192 | "name": "python",
193 | "nbconvert_exporter": "python",
194 | "pygments_lexer": "ipython3",
195 | "version": "3.5.2"
196 | }
197 | },
198 | "nbformat": 4,
199 | "nbformat_minor": 2
200 | }
201 |
--------------------------------------------------------------------------------
/Deep_Learning/fast.ai/Untitled.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [
8 | {
9 | "name": "stdout",
10 | "output_type": "stream",
11 | "text": [
12 | "[name: \"/cpu:0\"\n",
13 | "device_type: \"CPU\"\n",
14 | "memory_limit: 268435456\n",
15 | "locality {\n",
16 | "}\n",
17 | "incarnation: 13250090349658849473\n",
18 | ", name: \"/gpu:0\"\n",
19 | "device_type: \"GPU\"\n",
20 | "memory_limit: 150470656\n",
21 | "locality {\n",
22 | " bus_id: 1\n",
23 | "}\n",
24 | "incarnation: 15036183153429570801\n",
25 | "physical_device_desc: \"device: 0, name: GeForce GTX 850M, pci bus id: 0000:0a:00.0\"\n",
26 | "]\n"
27 | ]
28 | }
29 | ],
30 | "source": [
31 | "from tensorflow.python.client import device_lib\n",
32 | "print(device_lib.list_local_devices())"
33 | ]
34 | }
35 | ],
36 | "metadata": {
37 | "kernelspec": {
38 | "display_name": "Python 3",
39 | "language": "python",
40 | "name": "python3"
41 | },
42 | "language_info": {
43 | "codemirror_mode": {
44 | "name": "ipython",
45 | "version": 3
46 | },
47 | "file_extension": ".py",
48 | "mimetype": "text/x-python",
49 | "name": "python",
50 | "nbconvert_exporter": "python",
51 | "pygments_lexer": "ipython3",
52 | "version": "3.5.2"
53 | }
54 | },
55 | "nbformat": 4,
56 | "nbformat_minor": 2
57 | }
58 |
--------------------------------------------------------------------------------
/Deep_Learning/fast.ai/__pycache__/utils.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/fast.ai/__pycache__/utils.cpython-35.pyc
--------------------------------------------------------------------------------
/Deep_Learning/fast.ai/__pycache__/vgg16.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/fast.ai/__pycache__/vgg16.cpython-35.pyc
--------------------------------------------------------------------------------
/Deep_Learning/fast.ai/__pycache__/vgg16bn.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/fast.ai/__pycache__/vgg16bn.cpython-35.pyc
--------------------------------------------------------------------------------
/Deep_Learning/fast.ai/lesson1.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Introduction\n",
8 | "\n",
9 | "Neural networks are universal approximating machines. To fit the parameters for the function, we use Gradient Descent. \n",
10 | "\n",
11 | "We are able to make this function fast with the help of GPU since GPUs are based on mostly based on matrix operations (on pixels) which is also what we want for Deep learning. We need NVIDIA GPU as they support CUDA. Amazon provides us GPU instances called P2 instances.\n"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {},
17 | "source": [
18 | "To run the instances we use AWS cli. For this use the alias.sh file in the fast-ai course in documents folder and go to setup. "
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 2,
24 | "metadata": {
25 | "collapsed": true
26 | },
27 | "outputs": [],
28 | "source": [
29 | "# To run the aws-alias.sh\n",
30 | "# | source aws-alias.sh\n",
31 | "# | aws-get-p2\n",
32 | "# this line will get the instance id for the p2 instance and save it in variable \n",
33 | "#`instanceId`\n",
34 | "# to start the instance\n",
35 | "# | aws-start. This will start the instance and queries for the ip and prints it out\n",
36 | "# | aws-ssh. This will then ssh into that instance\n"
37 | ]
38 | },
39 | {
40 | "cell_type": "markdown",
41 | "metadata": {},
42 | "source": [
43 | "Now we are inside our aws instance. AWS has something called AMI (Amazon Machine Images). It is basically a snapshot of the computer at a particular instance of time. We can start our instance using a copy of that snapshot. In the script given, there was an AMI which already had all the things installed."
44 | ]
45 | },
46 | {
47 | "cell_type": "markdown",
48 | "metadata": {},
49 | "source": [
50 | "When we type `jupyter notebook` in the aws ssh, it returns us the port which we should append with our ip address obtained from 'aws-start`. "
51 | ]
52 | },
53 | {
54 | "cell_type": "markdown",
55 | "metadata": {},
56 | "source": [
57 | "2 notebooks can be run in parallel compeletely separate from each other. \n",
58 | "\n",
59 | "Now to prevent typing **source aws-alias.sh** everytime, add it to **.bashrc** which contains a set of commands that bash runs before starting\n",
60 | "\n",
61 | "# Getting started with Dogs vs Cats\n",
62 | "\n",
63 | "To get started with the dogs vs cats, first we run the p2 instance of aws and then **wget** to get the .ipynb file and the data.\n",
64 | "\n",
65 | "**The structuring of the files in data is very important**\n",
66 | "* keras expects that each class of the image be in separate folder\n",
67 | "\n"
68 | ]
69 | },
70 | {
71 | "cell_type": "markdown",
72 | "metadata": {},
73 | "source": [
74 | "there are about 12000 images each of dogs and cats in the train folder\n"
75 | ]
76 | },
77 | {
78 | "cell_type": "markdown",
79 | "metadata": {},
80 | "source": [
81 | "**Always test the model on a small sample first**\n"
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "execution_count": null,
87 | "metadata": {
88 | "collapsed": true
89 | },
90 | "outputs": [],
91 | "source": [
92 | "%matplotlib inline"
93 | ]
94 | },
95 | {
96 | "cell_type": "markdown",
97 | "metadata": {},
98 | "source": [
99 | "Tells the jupyter notebook to display all the matplotlib graphs"
100 | ]
101 | },
102 | {
103 | "cell_type": "code",
104 | "execution_count": 3,
105 | "metadata": {
106 | "collapsed": true
107 | },
108 | "outputs": [],
109 | "source": [
110 | "#path = \"data/dogscats/\"\n",
111 | "path = \"/home/Documents/dogscats/sample/\""
112 | ]
113 | },
114 | {
115 | "cell_type": "markdown",
116 | "metadata": {},
117 | "source": [
118 | "Tells the path to use"
119 | ]
120 | },
121 | {
122 | "cell_type": "markdown",
123 | "metadata": {},
124 | "source": [
125 | "We will be using anaconda to install stuff. To install via anaconda ** conda install thing**. Sometimes conda installer not available, we will use pip.\n",
126 | "\n",
127 | "Now we would be using a pretrained neural network\n",
128 | "* V5516 -2014 winner\n",
129 | "* Inception-2015 winner\n",
130 | "* Resnet - 2016 winner\n",
131 | "We use VGG as it is the last \"simple\" model. Our script for VGG16 has already been downloaded.\n",
132 | "\n",
133 | "\n",
134 | "Now Keras runs on top of Theano/Tensorflow which convert our python code to CUDNN based code. Theano is sitting on top of CUDNN (CUDA deep neural network library).\n",
135 | "\n",
136 | "Tensorflow-> works well for multi GPUs\n",
137 | "Keras can easily be configured to use Tensorflow as backend instead of theano.\n",
138 | "\n",
139 | "To do that"
140 | ]
141 | },
142 | {
143 | "cell_type": "code",
144 | "execution_count": 5,
145 | "metadata": {
146 | "collapsed": true
147 | },
148 | "outputs": [],
149 | "source": [
150 | "#cd ~/.keras/\n",
151 | "#vim keras.json\n",
152 | "# here change the backend to tensorflow\n",
153 | "# also change the th to tf"
154 | ]
155 | },
156 | {
157 | "cell_type": "markdown",
158 | "metadata": {},
159 | "source": [
160 | "To change theano to use cpu instead of gpu"
161 | ]
162 | },
163 | {
164 | "cell_type": "code",
165 | "execution_count": 6,
166 | "metadata": {
167 | "collapsed": true
168 | },
169 | "outputs": [],
170 | "source": [
171 | "#vim ~/.theanorc\n",
172 | "#Here change the cpu to gpu or vice versa"
173 | ]
174 | },
175 | {
176 | "cell_type": "markdown",
177 | "metadata": {},
178 | "source": [
179 | "In dnn, we train in batches. We can't do all at time as it may be too large for the GPU's memory. \n",
180 | "\n",
181 | "**NOTE** the password of the notebook is dl_course"
182 | ]
183 | }
184 | ],
185 | "metadata": {
186 | "kernelspec": {
187 | "display_name": "Python 3",
188 | "language": "python",
189 | "name": "python3"
190 | },
191 | "language_info": {
192 | "codemirror_mode": {
193 | "name": "ipython",
194 | "version": 3
195 | },
196 | "file_extension": ".py",
197 | "mimetype": "text/x-python",
198 | "name": "python",
199 | "nbconvert_exporter": "python",
200 | "pygments_lexer": "ipython3",
201 | "version": "3.5.2"
202 | }
203 | },
204 | "nbformat": 4,
205 | "nbformat_minor": 2
206 | }
207 |
--------------------------------------------------------------------------------
/Deep_Learning/fast.ai/utils.py:
--------------------------------------------------------------------------------
1 | from __future__ import division,print_function
2 | import math, os, json, sys, re
3 |
4 | # import cPickle as pickle # Python 2
5 | import pickle # Python3
6 |
7 | from glob import glob
8 | import numpy as np
9 | from matplotlib import pyplot as plt
10 | from operator import itemgetter, attrgetter, methodcaller
11 | from collections import OrderedDict
12 | import itertools
13 | from itertools import chain
14 |
15 | import pandas as pd
16 | import PIL
17 | from PIL import Image
18 | from numpy.random import random, permutation, randn, normal, uniform, choice
19 | from numpy import newaxis
20 | import scipy
21 | from scipy import misc, ndimage
22 | from scipy.ndimage.interpolation import zoom
23 | from scipy.ndimage import imread
24 | from sklearn.metrics import confusion_matrix
25 | import bcolz
26 | from sklearn.preprocessing import OneHotEncoder
27 | from sklearn.manifold import TSNE
28 |
29 | from IPython.lib.display import FileLink
30 |
31 | import theano
32 | from theano import shared, tensor as T
33 | from theano.tensor.nnet import conv2d, nnet
34 | from theano.tensor.signal import pool
35 |
36 | import keras
37 | from keras import backend as K
38 | from keras.utils.data_utils import get_file
39 | from keras.utils import np_utils
40 | from keras.utils.np_utils import to_categorical
41 | from keras.models import Sequential, Model
42 | from keras.layers import Input, Embedding, Reshape, merge, LSTM, Bidirectional
43 | from keras.layers import SpatialDropout1D, Concatenate # Keras2
44 |
45 | from keras.layers import TimeDistributed, Activation, SimpleRNN, GRU
46 | from keras.layers.core import Flatten, Dense, Dropout, Lambda
47 |
48 | # from keras.regularizers import l2, activity_l2, l1, activity_l1 # Keras1
49 | from keras.regularizers import l2, l1 # Keras2
50 |
51 | from keras.layers.normalization import BatchNormalization
52 | from keras.optimizers import SGD, RMSprop, Adam
53 |
54 | # from keras.utils.layer_utils import layer_from_config # Keras1
55 | from keras.layers import deserialize # Keras 2
56 | from keras.layers.merge import dot, add, concatenate # Keras2
57 | from keras.metrics import categorical_crossentropy, categorical_accuracy
58 | from keras.layers.convolutional import *
59 | from keras.preprocessing import image, sequence
60 | from keras.preprocessing.text import Tokenizer
61 |
62 | from vgg16 import *
63 | from vgg16bn import *
64 | np.set_printoptions(precision=4, linewidth=100)
65 |
66 |
67 | to_bw = np.array([0.299, 0.587, 0.114])
68 |
69 | def gray(img):
70 | if K.image_dim_ordering() == 'tf':
71 | return np.rollaxis(img, 0, 1).dot(to_bw)
72 | else:
73 | return np.rollaxis(img, 0, 3).dot(to_bw)
74 |
75 | def to_plot(img):
76 | if K.image_dim_ordering() == 'tf':
77 | return np.rollaxis(img, 0, 1).astype(np.uint8)
78 | else:
79 | return np.rollaxis(img, 0, 3).astype(np.uint8)
80 |
81 | def plot(img):
82 | plt.imshow(to_plot(img))
83 |
84 |
85 | def floor(x):
86 | return int(math.floor(x))
87 | def ceil(x):
88 | return int(math.ceil(x))
89 |
90 | def plots(ims, figsize=(12,6), rows=1, interp=False, titles=None):
91 | if type(ims[0]) is np.ndarray:
92 | ims = np.array(ims).astype(np.uint8)
93 | if (ims.shape[-1] != 3):
94 | ims = ims.transpose((0,2,3,1))
95 | f = plt.figure(figsize=figsize)
96 | for i in range(len(ims)):
97 | sp = f.add_subplot(rows, len(ims)//rows, i+1)
98 | sp.axis('Off')
99 | if titles is not None:
100 | sp.set_title(titles[i], fontsize=16)
101 | plt.imshow(ims[i], interpolation=None if interp else 'none')
102 |
103 |
104 | def do_clip(arr, mx):
105 | clipped = np.clip(arr, (1-mx)/1, mx)
106 | return clipped/clipped.sum(axis=1)[:, np.newaxis]
107 |
108 |
109 | def get_batches(dirname, gen=image.ImageDataGenerator(), shuffle=True, batch_size=4, class_mode='categorical',
110 | target_size=(224,224)):
111 | return gen.flow_from_directory(dirname, target_size=target_size,
112 | class_mode=class_mode, shuffle=shuffle, batch_size=batch_size)
113 |
114 |
115 | def onehot(x):
116 | return to_categorical(x)
117 |
118 |
119 | def wrap_config(layer):
120 | return {'class_name': layer.__class__.__name__, 'config': layer.get_config()}
121 |
122 |
123 | def copy_layer(layer): return deserialize(wrap_config(layer)) # Keras2
124 |
125 |
126 | def copy_layers(layers): return [copy_layer(layer) for layer in layers]
127 |
128 |
129 | def copy_weights(from_layers, to_layers):
130 | for from_layer,to_layer in zip(from_layers, to_layers):
131 | to_layer.set_weights(from_layer.get_weights())
132 |
133 |
134 | def copy_model(m):
135 | res = Sequential(copy_layers(m.layers))
136 | copy_weights(m.layers, res.layers)
137 | return res
138 |
139 |
140 | def insert_layer(model, new_layer, index):
141 | res = Sequential()
142 | for i,layer in enumerate(model.layers):
143 | if i==index: res.add(new_layer)
144 | copied = deserialize(wrap_config(layer)) # Keras2
145 | res.add(copied)
146 | copied.set_weights(layer.get_weights())
147 | return res
148 |
149 |
150 | def adjust_dropout(weights, prev_p, new_p):
151 | scal = (1-prev_p)/(1-new_p)
152 | return [o*scal for o in weights]
153 |
154 |
155 | def get_data(path, target_size=(224,224)):
156 | batches = get_batches(path, shuffle=False, batch_size=1, class_mode=None, target_size=target_size)
157 | return np.concatenate([batches.next() for i in range(batches.samples)]) # Keras2
158 |
159 |
160 | def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues):
161 | """
162 | This function prints and plots the confusion matrix.
163 | Normalization can be applied by setting `normalize=True`.
164 | (This function is copied from the scikit docs.)
165 | """
166 | plt.figure()
167 | plt.imshow(cm, interpolation='nearest', cmap=cmap)
168 | plt.title(title)
169 | plt.colorbar()
170 | tick_marks = np.arange(len(classes))
171 | plt.xticks(tick_marks, classes, rotation=45)
172 | plt.yticks(tick_marks, classes)
173 |
174 | if normalize:
175 | cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
176 | print(cm)
177 | thresh = cm.max() / 2.
178 | for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
179 | plt.text(j, i, cm[i, j], horizontalalignment="center", color="white" if cm[i, j] > thresh else "black")
180 |
181 | plt.tight_layout()
182 | plt.ylabel('True label')
183 | plt.xlabel('Predicted label')
184 |
185 |
186 | def save_array(fname, arr):
187 | c=bcolz.carray(arr, rootdir=fname, mode='w')
188 | c.flush()
189 |
190 |
191 | def load_array(fname):
192 | return bcolz.open(fname)[:]
193 |
194 |
195 | def mk_size(img, r2c):
196 | r,c,_ = img.shape
197 | curr_r2c = r/c
198 | new_r, new_c = r,c
199 | if r2c>curr_r2c:
200 | new_r = floor(c*r2c)
201 | else:
202 | new_c = floor(r/r2c)
203 | arr = np.zeros((new_r, new_c, 3), dtype=np.float32)
204 | r2=(new_r-r)//2
205 | c2=(new_c-c)//2
206 | arr[floor(r2):floor(r2)+r,floor(c2):floor(c2)+c] = img
207 | return arr
208 |
209 |
210 | def mk_square(img):
211 | x,y,_ = img.shape
212 | maxs = max(img.shape[:2])
213 | y2=(maxs-y)//2
214 | x2=(maxs-x)//2
215 | arr = np.zeros((maxs,maxs,3), dtype=np.float32)
216 | arr[floor(x2):floor(x2)+x,floor(y2):floor(y2)+y] = img
217 | return arr
218 |
219 |
220 | def vgg_ft(out_dim):
221 | vgg = Vgg16()
222 | vgg.ft(out_dim)
223 | model = vgg.model
224 | return model
225 |
226 | def vgg_ft_bn(out_dim):
227 | vgg = Vgg16BN()
228 | vgg.ft(out_dim)
229 | model = vgg.model
230 | return model
231 |
232 |
233 | def get_classes(path):
234 | batches = get_batches(path+'train', shuffle=False, batch_size=1)
235 | val_batches = get_batches(path+'valid', shuffle=False, batch_size=1)
236 | test_batches = get_batches(path+'test', shuffle=False, batch_size=1)
237 | return (val_batches.classes, batches.classes, onehot(val_batches.classes), onehot(batches.classes),
238 | val_batches.filenames, batches.filenames, test_batches.filenames)
239 |
240 |
241 | def split_at(model, layer_type):
242 | layers = model.layers
243 | layer_idx = [index for index,layer in enumerate(layers)
244 | if type(layer) is layer_type][-1]
245 | return layers[:layer_idx+1], layers[layer_idx+1:]
246 |
247 |
248 | class MixIterator(object):
249 | def __init__(self, iters):
250 | self.iters = iters
251 | self.multi = type(iters) is list
252 | if self.multi:
253 | self.N = sum([it[0].N for it in self.iters])
254 | else:
255 | self.N = sum([it.N for it in self.iters])
256 |
257 | def reset(self):
258 | for it in self.iters: it.reset()
259 |
260 | def __iter__(self):
261 | return self
262 |
263 | def next(self, *args, **kwargs):
264 | if self.multi:
265 | nexts = [[next(it) for it in o] for o in self.iters]
266 | n0 = np.concatenate([n[0] for n in nexts])
267 | n1 = np.concatenate([n[1] for n in nexts])
268 | return (n0, n1)
269 | else:
270 | nexts = [next(it) for it in self.iters]
271 | n0 = np.concatenate([n[0] for n in nexts])
272 | n1 = np.concatenate([n[1] for n in nexts])
273 | return (n0, n1)
274 |
275 |
--------------------------------------------------------------------------------
/Deep_Learning/fast.ai/vgg16.py:
--------------------------------------------------------------------------------
1 | from __future__ import division, print_function
2 |
3 | import os, json
4 | from glob import glob
5 | import numpy as np
6 | from scipy import misc, ndimage
7 | from scipy.ndimage.interpolation import zoom
8 |
9 | from keras import backend as K
10 | from keras.layers.normalization import BatchNormalization
11 | from keras.utils.data_utils import get_file
12 | from keras.models import Sequential
13 | from keras.layers.core import Flatten, Dense, Dropout, Lambda
14 | from keras.layers.convolutional import Conv2D, MaxPooling2D, ZeroPadding2D # Conv2D: Keras2
15 | from keras.layers.pooling import GlobalAveragePooling2D
16 | from keras.optimizers import SGD, RMSprop, Adam
17 | from keras.preprocessing import image
18 |
19 | K.set_image_dim_ordering('th')
20 | vgg_mean = np.array([123.68, 116.779, 103.939], dtype=np.float32).reshape((3,1,1))
21 | def vgg_preprocess(x):
22 | x = x - vgg_mean
23 | return x[:, ::-1] # reverse axis rgb->bgr
24 |
25 |
26 | class Vgg16():
27 | """The VGG 16 Imagenet model"""
28 |
29 |
30 | def __init__(self):
31 | self.FILE_PATH = 'http://files.fast.ai/models/'
32 | self.create()
33 | self.get_classes()
34 |
35 |
36 | def get_classes(self):
37 | fname = 'imagenet_class_index.json'
38 | fpath = get_file(fname, self.FILE_PATH+fname, cache_subdir='models')
39 | with open(fpath) as f:
40 | class_dict = json.load(f)
41 | self.classes = [class_dict[str(i)][1] for i in range(len(class_dict))]
42 |
43 | def predict(self, imgs, details=False):
44 | all_preds = self.model.predict(imgs)
45 | idxs = np.argmax(all_preds, axis=1)
46 | preds = [all_preds[i, idxs[i]] for i in range(len(idxs))]
47 | classes = [self.classes[idx] for idx in idxs]
48 | return np.array(preds), idxs, classes
49 |
50 |
51 | def ConvBlock(self, layers, filters):
52 | model = self.model
53 | for i in range(layers):
54 | model.add(ZeroPadding2D((1, 1)))
55 | model.add(Conv2D(filters, kernel_size=(3, 3), activation='relu')) # Keras2
56 | model.add(MaxPooling2D((2, 2), strides=(2, 2)))
57 |
58 |
59 | def FCBlock(self):
60 | model = self.model
61 | model.add(Dense(4096, activation='relu'))
62 | model.add(Dropout(0.5))
63 |
64 |
65 | def create(self):
66 | model = self.model = Sequential()
67 | model.add(Lambda(vgg_preprocess, input_shape=(3,224,224), output_shape=(3,224,224)))
68 |
69 | self.ConvBlock(2, 64)
70 | self.ConvBlock(2, 128)
71 | self.ConvBlock(3, 256)
72 | self.ConvBlock(3, 512)
73 | self.ConvBlock(3, 512)
74 |
75 | model.add(Flatten())
76 | self.FCBlock()
77 | self.FCBlock()
78 | model.add(Dense(1000, activation='softmax'))
79 |
80 | fname = 'vgg16.h5'
81 | model.load_weights(get_file(fname, self.FILE_PATH+fname, cache_subdir='models'))
82 |
83 |
84 | def get_batches(self, path, gen=image.ImageDataGenerator(), shuffle=True, batch_size=8, class_mode='categorical'):
85 | return gen.flow_from_directory(path, target_size=(224,224),
86 | class_mode=class_mode, shuffle=shuffle, batch_size=batch_size)
87 |
88 |
89 | def ft(self, num):
90 | model = self.model
91 | model.pop()
92 | for layer in model.layers: layer.trainable=False
93 | model.add(Dense(num, activation='softmax'))
94 | self.compile()
95 |
96 | def finetune(self, batches):
97 | self.ft(batches.num_class) # Keras2
98 | classes = list(iter(batches.class_indices))
99 | for c in batches.class_indices:
100 | classes[batches.class_indices[c]] = c
101 | self.classes = classes
102 |
103 |
104 | def compile(self, lr=0.001):
105 | self.model.compile(optimizer=Adam(lr=lr),
106 | loss='categorical_crossentropy', metrics=['accuracy'])
107 |
108 |
109 | # Keras2
110 | def fit_data(self, trn, labels, val, val_labels, nb_epoch=1, batch_size=64):
111 | self.model.fit(trn, labels, epochs=nb_epoch,
112 | validation_data=(val, val_labels), batch_size=batch_size)
113 |
114 |
115 | # Keras2
116 | def fit(self, batches, val_batches, batch_size, nb_epoch=1):
117 | self.model.fit_generator(batches, steps_per_epoch=int(np.ceil(batches.samples/batch_size)), epochs=nb_epoch,
118 | validation_data=val_batches, validation_steps=int(np.ceil(val_batches.samples/batch_size)))
119 |
120 |
121 | # Keras2
122 | def test(self, path, batch_size=8):
123 | test_batches = self.get_batches(path, shuffle=False, batch_size=batch_size, class_mode=None)
124 | return test_batches, self.model.predict_generator(test_batches, int(np.ceil(test_batches.samples/batch_size)))
125 |
--------------------------------------------------------------------------------
/Deep_Learning/fast.ai/vgg16bn.py:
--------------------------------------------------------------------------------
1 | from __future__ import division, print_function
2 |
3 | import os, json
4 | from glob import glob
5 | import numpy as np
6 | from scipy import misc, ndimage
7 | from scipy.ndimage.interpolation import zoom
8 |
9 | from keras import backend as K
10 | from keras.layers.normalization import BatchNormalization
11 | from keras.utils.data_utils import get_file
12 | from keras.models import Sequential
13 | from keras.layers.core import Flatten, Dense, Dropout, Lambda
14 | from keras.layers.convolutional import Conv2D, MaxPooling2D, ZeroPadding2D # Conv2D: Keras2
15 | from keras.layers.pooling import GlobalAveragePooling2D
16 | from keras.optimizers import SGD, RMSprop, Adam
17 | from keras.preprocessing import image
18 |
19 |
20 | vgg_mean = np.array([123.68, 116.779, 103.939], dtype=np.float32).reshape((3,1,1))
21 | def vgg_preprocess(x):
22 | x = x - vgg_mean
23 | return x[:, ::-1] # reverse axis rgb->bgr
24 |
25 |
26 | class Vgg16BN():
27 | """The VGG 16 Imagenet model with Batch Normalization for the Dense Layers"""
28 |
29 |
30 | def __init__(self, size=(224,224), include_top=True):
31 | self.FILE_PATH = 'http://files.fast.ai/models/'
32 | self.create(size, include_top)
33 | self.get_classes()
34 |
35 |
36 | def get_classes(self):
37 | fname = 'imagenet_class_index.json'
38 | fpath = get_file(fname, self.FILE_PATH+fname, cache_subdir='models')
39 | with open(fpath) as f:
40 | class_dict = json.load(f)
41 | self.classes = [class_dict[str(i)][1] for i in range(len(class_dict))]
42 |
43 | def predict(self, imgs, details=False):
44 | all_preds = self.model.predict(imgs)
45 | idxs = np.argmax(all_preds, axis=1)
46 | preds = [all_preds[i, idxs[i]] for i in range(len(idxs))]
47 | classes = [self.classes[idx] for idx in idxs]
48 | return np.array(preds), idxs, classes
49 |
50 |
51 | def ConvBlock(self, layers, filters):
52 | model = self.model
53 | for i in range(layers):
54 | model.add(ZeroPadding2D((1, 1)))
55 | model.add(Conv2D(filters, kernel_size=(3, 3), activation='relu')) # Keras2
56 | model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
57 |
58 |
59 | def FCBlock(self):
60 | model = self.model
61 | model.add(Dense(4096, activation='relu'))
62 | model.add(BatchNormalization())
63 | model.add(Dropout(0.5))
64 |
65 |
66 | def create(self, size, include_top):
67 | if size != (224,224):
68 | include_top=False
69 |
70 | model = self.model = Sequential()
71 | model.add(Lambda(vgg_preprocess, input_shape=(3,)+size, output_shape=(3,)+size))
72 |
73 | self.ConvBlock(2, 64)
74 | self.ConvBlock(2, 128)
75 | self.ConvBlock(3, 256)
76 | self.ConvBlock(3, 512)
77 | self.ConvBlock(3, 512)
78 |
79 | if not include_top:
80 | fname = 'vgg16_bn_conv.h5'
81 | model.load_weights(get_file(fname, self.FILE_PATH+fname, cache_subdir='models'))
82 | return
83 |
84 | model.add(Flatten())
85 | self.FCBlock()
86 | self.FCBlock()
87 | model.add(Dense(1000, activation='softmax'))
88 |
89 | fname = 'vgg16_bn.h5'
90 | model.load_weights(get_file(fname, self.FILE_PATH+fname, cache_subdir='models'))
91 |
92 |
93 | def get_batches(self, path, gen=image.ImageDataGenerator(), shuffle=True, batch_size=8, class_mode='categorical'):
94 | return gen.flow_from_directory(path, target_size=(224,224),
95 | class_mode=class_mode, shuffle=shuffle, batch_size=batch_size)
96 |
97 |
98 | def ft(self, num):
99 | model = self.model
100 | model.pop()
101 | for layer in model.layers: layer.trainable=False
102 | model.add(Dense(num, activation='softmax'))
103 | self.compile()
104 |
105 | def finetune(self, batches):
106 | model = self.model
107 | model.pop()
108 | for layer in model.layers: layer.trainable=False
109 | model.add(Dense(batches.num_class, activation='softmax')) # Keras2
110 | self.compile()
111 |
112 |
113 | def compile(self, lr=0.001):
114 | self.model.compile(optimizer=Adam(lr=lr),
115 | loss='categorical_crossentropy', metrics=['accuracy'])
116 |
117 |
118 | # Keras2
119 | def fit_data(self, trn, labels, val, val_labels, nb_epoch=1, batch_size=64):
120 | self.model.fit(trn, labels, epochs=nb_epoch,
121 | validation_data=(val, val_labels), batch_size=batch_size)
122 |
123 |
124 | # Keras2
125 | def fit(self, batches, val_batches, batch_size, nb_epoch=1):
126 | self.model.fit_generator(batches, steps_per_epoch=int(np.ceil(batches.samples/batch_size)), epochs=nb_epoch,
127 | validation_data=val_batches, validation_steps=int(np.ceil(val_batches.samples/batch_size)))
128 |
129 |
130 | # Keras2
131 | def test(self, path, batch_size=8):
132 | test_batches = self.get_batches(path, shuffle=False, batch_size=batch_size, class_mode=None)
133 | return test_batches, self.model.predict_generator(test_batches, int(np.ceil(test_batches.samples/batch_size)))
134 |
--------------------------------------------------------------------------------
/Deep_Learning/keras/1_intro.py:
--------------------------------------------------------------------------------
1 | """ Here we will be looking at the sequential model in keras. The Sequential model is a linear stack of layers"""
2 |
3 | from keras.models import Sequential
4 | from keras.layers import Dense, Activation
5 | import keras
6 | import numpy as np
7 |
8 | model = Sequential()
9 | # add new layers using .add
10 |
11 | # Dense implements operation : activation(dot(input,weights)+bias)
12 | model.add(Dense(32, input_dim=100, activation='relu')) # output array is of the shape(*,32)
13 | model.add(Dense(10, activation='softmax')) # output is of the shape (*,10), now we don't need to specify input anymore
14 |
15 | """The model needs to know what input shape it should expect. For this reason, the first layer in a Sequential model
16 | needs to receive information about its input shape. 1) pass input_shape to first layer: It should be a tuple: None
17 | indicates any positive integer may be expected. In input_shape, the batch dimension is not included.
18 |
19 | 2) Some 2D layers, such as Dense, support the specification of their input shape via the argument input_dim,
20 |
21 | 3) If you ever need to specify a fixed batch size for your inputs (this is useful for stateful recurrent networks),
22 | you can pass a batch_size argument to a layer. If you pass both batch_size=32 and input_shape=(6, 8) to a layer,
23 | it will then expect every batch of inputs to have the batch shape (32, 6, 8) """
24 |
25 | # Before training the model it needs to be compiled
26 |
27 | model.compile(optimizer='rmsprop',
28 | loss='categorical_crossentropy',
29 | metrics=['accuracy'])
30 | # Now we train the model
31 | # Keras models are trained on Numpy arrays of input data and labels
32 |
33 | data = np.random.random((1000, 100)) # 1000 rows and 100 cols
34 | labels = np.random.randint(10, size=(1000, 1)) # output can be of 10 classes so random number between 0 to 10 and
35 | # since 1000 inputs so 1000 outputs
36 |
37 | # now we need to convert the labels to one hot encoding
38 | one_hot_labels = keras.utils.to_categorical(labels, num_classes=10)
39 |
40 | # Train the model, iterating through the data in batch size of 32
41 | model.fit(data, one_hot_labels, epochs=10, batch_size=32)
42 |
--------------------------------------------------------------------------------
/Deep_Learning/keras/Image_Augumentation.py:
--------------------------------------------------------------------------------
1 | """ Image Augumentation is the process of taking images of the training set and creating altered versions of the same
2 | image to deal with less training data being available and prevent overfitting.
3 |
4 | we will be using cifar10 dataset available with dataset"""
5 |
6 | # the first thing is to load the cifar10 dataset and format the images to prepare them for CNN. We will also take a look
7 | # at some images to see if it worked
8 |
9 | from __future__ import print_function
10 | import keras
11 | from keras.datasets import cifar10
12 | from keras import backend as K
13 | import matplotlib
14 | from matplotlib import pyplot as plt
15 | import numpy as np
16 |
17 | # input image dimensions
18 | img_row, img_cols = 32, 32
19 |
20 | # the data shuffled and split between train and test sets
21 | (X_train, y_train), (X_test, y_test) = cifar10.load_data() # y_train, y_test are uint8 labels from 0 to 9. 3 is for
22 | # cats and 5 for dogs
23 |
24 | # only look at cats[=3] and dogs[=5]
25 | train_picks = np.ravel(np.logical_or(y_train == 3, y_train == 5)) # np.ravel flattens an array
26 | test_picks = np.ravel(np.logical_or(y_train == 3, y_train == 5))
27 |
28 | y_train = np.array(y_train[train_picks] ==5 , dtype= int)
29 | y_test = np.array(y_test[test_picks]== 5, dtype=int)
--------------------------------------------------------------------------------
/Deep_Learning/keras/image_classifier/2_Image_classifier.py:
--------------------------------------------------------------------------------
1 | """dogs vs cats
2 | """
3 |
4 | from keras.preprocessing.image import ImageDataGenerator # our images are augumented over random transformations so
5 | # that our model never sees the same pic twice. This prevents overfitting
6 | from keras.preprocessing.image import array_to_img, img_to_array, load_img
7 | from keras.models import Sequential, load_model
8 | from keras.layers import Conv2D, MaxPool2D, Dense, Activation, Dropout, Flatten
9 | from keras import backend as K
10 | from PIL import Image
11 | import numpy as np
12 |
13 |
14 | def check_data_augument():
15 | datagen = ImageDataGenerator(rotation_range=40, width_shift_range=0.2, height_shift_range=0.2,
16 | shear_range=0.2, zoom_range=0.2, horizontal_flip=True, fill_mode='nearest')
17 | """rotation_range is a value in degrees (0-180), a range within which to randomly rotate pictures
18 |
19 | width_shift and height_shift are ranges (as a fraction of total width or height) within which to randomly translate
20 | pictures
21 |
22 | vertically or horizontally rescale is a value by which we will multiply the data before any other processing. Our
23 | original images consist in RGB coefficients in the 0-255, but such values would be too high for our models to process
24 | (given a typical learning rate), so we target values between 0 and 1 instead by scaling with a 1/255. factor.
25 |
26 | shear_range is for randomly applying shearing transformations
27 |
28 | zoom_range is for randomly zooming inside pictures
29 |
30 |
31 | horizontal_flip is for randomly flipping half of the images horizontally --relevant when there are no assumptions of
32 |
33 | horizontal assymetry (e.g. real-world pictures). fill_mode is the strategy used for filling in newly created pixels,
34 | which can appear after a rotation or a width/height shift. """
35 |
36 | img = load_img('/home/raghav/Documents/kaggle/dogscats/train/cat.0.jpg')
37 | x = img_to_array(img) # this is a numpy array with shape (3, 150,150)
38 | x = x.reshape((1,) + x.shape) # reshapes to (1,2,150,150)
39 |
40 | # .flow generates batches of randomly transformed images and saves the result to preview/ directory
41 | i = 0
42 | for batch in datagen.flow(x, batch_size=1, save_to_dir='/home/raghav/Desktop', save_prefix='cat',
43 | save_format='jpeg'):
44 | i += 1
45 | if i > 20:
46 | break
47 |
48 |
49 | # for small data number one concern is overfitting. Overfitting happens when a model exposed to too few examples
50 | # learns patterns that do not generalize to new data, i.e. when the model starts using irrelevant features for making
51 | # prediction
52 |
53 | # data augumentation helps but the images generated are highly corelated.
54 | # another way is entropic capacity of the model ie how many features is model allowed to store.
55 | # methods to modulate entropic capacity: number of parameters eg no of layers, nodes
56 | # Another way is weight regularisation ie ensuring small weights.
57 | # dropout: prevents a layer from seeing the same pattern twice
58 |
59 | if K.image_data_format() == 'channels_first':
60 | input_shape = (3, 150, 150)
61 | else:
62 | input_shape = (150, 150, 3)
63 |
64 | model = Sequential()
65 | model.add(Conv2D(32, kernel_size=(3, 3), input_shape=input_shape)) # no of filters. Input is (batch size, channels,
66 | # rows, cols)
67 | # Output is 4d tensor (batch size, filter, new rows, new cols)
68 | model.add(Activation('relu'))
69 | model.add(MaxPool2D(pool_size=(2, 2))) # pool size: tuple of 2 integers to downscale. (2,2) halfs the row,
70 | # col. Output is 4d tensor (batch size, channels, rows, cols)
71 |
72 | model.add(Conv2D(32, kernel_size=(3, 3)))
73 | model.add(Activation('relu'))
74 | model.add(MaxPool2D(pool_size=(2, 2)))
75 |
76 | model.add(Conv2D(64, kernel_size=(3, 3)))
77 | model.add(Activation('relu'))
78 | model.add(MaxPool2D(pool_size=(2, 2)))
79 |
80 | # the model so far outputs 3D feature maps (height, width, features)
81 |
82 | model.add(Flatten()) # Earlier there were 64 filters each being a 2d matrix. flattens our 3d feature maps to 1d
83 | # feature maps. Now only 64*row*cols 1d inputs
84 | model.add(Dense(64)) # 64 outputs
85 | model.add(Activation('relu')) # f(x) = max(0,x), it can range from [0,inf] So used in hidden layers.
86 | model.add(Dropout(0.5))
87 | model.add(Dense(1))
88 | model.add(
89 | Activation('sigmoid')) # for 2 class classification, sigmoid is used. For multiclass, we use softmax. They are
90 | # applied only in the final layer as they give the probability of occurence of different classes
91 |
92 | # since binary classifier
93 | model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])
94 |
95 | # preparing our data
96 | batch_size = 16
97 |
98 | train_datagen = ImageDataGenerator(rescale=1 / 255, zoom_range=0.2, horizontal_flip=True, shear_range=0.2)
99 | test_datagen = ImageDataGenerator(rescale=1 / 255)
100 |
101 | # this is a generator that will read images from training portion
102 | train_generator = train_datagen.flow_from_directory(directory="/home/raghav/Desktop/data_image/train",
103 | target_size=(150, 150), batch_size=batch_size, class_mode='binary')
104 | # since we are using binary_crossentropy loss, we need binary labels
105 |
106 | # validation generator
107 | validation_generator = train_datagen.flow_from_directory(directory="/home/raghav/Desktop/data_image/validate",
108 | target_size=(150, 150), batch_size=batch_size,
109 | class_mode='binary')
110 | # all images are resized to (150,150)
111 |
112 | model.fit_generator(train_generator, epochs=50, validation_data=validation_generator,
113 | steps_per_epoch=2000 // batch_size,
114 | validation_steps=800 // batch_size)
115 | #model.save_weights('first_try.h5')
116 | """
117 | model.load_weights('first_try.h5')
118 | img = load_img("/home/raghav/Desktop/data_image/test/cat/cat.3000.jpg", target_size=(150, 150))
119 | x = img_to_array(img)
120 | x = np.expand_dims(x, axis=0)
121 | preds = model.predict_classes(x)
122 | prob = model.predict_proba(x)
123 | print(preds, prob)
124 | if preds:
125 | print("Dog")
126 | else:
127 | print("cat")"""
--------------------------------------------------------------------------------
/Deep_Learning/keras/image_classifier/2_image_classifier_only_code.py:
--------------------------------------------------------------------------------
1 | """dogs vs cats
2 | """
3 |
4 | from keras.preprocessing.image import ImageDataGenerator
5 | from keras.models import Sequential
6 | from keras.layers import Conv2D, MaxPool2D, Dense, Activation, Dropout, Flatten
7 | from keras import backend as K
8 |
9 | if K.image_data_format() == 'channels_first':
10 | input_shape = (3, 150, 150)
11 | else:
12 | input_shape = (150, 150, 3) # 3 because RGB
13 |
14 | model = Sequential()
15 | model.add(Conv2D(32, kernel_size=(3, 3), input_shape=input_shape))
16 | model.add(Activation('relu'))
17 | model.add(MaxPool2D(pool_size=(2, 2)))
18 |
19 | model.add(Conv2D(32, kernel_size=(3, 3)))
20 | model.add(Activation('relu'))
21 | model.add(MaxPool2D(pool_size=(2, 2)))
22 |
23 | model.add(Conv2D(64, kernel_size=(3, 3)))
24 | model.add(Activation('relu'))
25 | model.add(MaxPool2D(pool_size=(2, 2)))
26 |
27 | model.add(Flatten())
28 | model.add(Dense(64))
29 | model.add(Activation('relu'))
30 | model.add(Dropout(0.5))
31 | model.add(Dense(1))
32 | model.add(Activation('sigmoid'))
33 |
34 | model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])
35 |
36 | batch_size = 16
37 |
38 | train_datagen = ImageDataGenerator(rescale=1 / 255, zoom_range=0.2, horizontal_flip=True, shear_range=0.2)
39 | test_datagen = ImageDataGenerator(rescale=1 / 255)
40 |
41 | train_generator = train_datagen.flow_from_directory(directory="/home/raghav/Desktop/data_image/train",
42 | target_size=(150, 150), batch_size=batch_size, class_mode='binary')
43 |
44 | validation_generator = train_datagen.flow_from_directory(directory="/home/raghav/Desktop/data_image/validate",
45 | target_size=(150, 150), batch_size=batch_size,
46 | class_mode='binary')
47 |
48 | model.fit_generator(train_generator, epochs=50, validation_data=validation_generator,
49 | steps_per_epoch=2000 // batch_size,
50 | validation_steps=800 // batch_size)
51 | model.save_weights('first_try.h5')
52 |
--------------------------------------------------------------------------------
/Deep_Learning/keras/image_classifier/3_usingVG16.py:
--------------------------------------------------------------------------------
1 | """
2 | Using the concept of transfer learning to improve accuracy. VGG16 is a CNN that has been trained on ImageNet data.
3 | We first load this model upto the first fully connected layer.
4 | """
5 | import numpy as np
6 | from keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
7 | from keras.models import Sequential
8 | from keras.layers import Dense, Flatten, Dropout
9 | from keras import applications
10 | import gc
11 |
12 | img_width, img_ht = 150, 150
13 | top_model_wt_path = "bottleneck_fc_model.h5"
14 | train_dir = "/home/raghav/Desktop/data_image/train"
15 | validation_dir = "/home/raghav/Desktop/data_image/validate"
16 | test_dir = "/home/raghav/Desktop/data_image/test"
17 | no_train_samples = 2000
18 | no_validation_samples = 800
19 | epochs = 50
20 | batch_size = 16
21 |
22 |
23 | def save_bottleneck_features():
24 | datagen = ImageDataGenerator(rescale=1 / 255)
25 |
26 | # build the vgg16 model
27 | model = applications.VGG16(include_top=False, weights='imagenet')
28 |
29 | generator = datagen.flow_from_directory(train_dir, target_size=(img_width, img_ht), shuffle=False, class_mode=None,
30 | batch_size=batch_size) # class_mode=None means our data will only yield
31 | # batches of data, no labels, shuffle=False means our data will be in order so first 1000 images will be cats and
32 | # next 1000 dogs
33 |
34 | # generates predication for a generator. Steps: total no of batches. Returns a numpy array of predictions
35 | bottleneck_features_train = model.predict_generator(generator=generator, steps=no_train_samples // batch_size)
36 | # saves an array to a binary file
37 | np.save(file="bottleneck_features_train.npy", arr=bottleneck_features_train)
38 |
39 | generator = datagen.flow_from_directory(validation_dir, target_size=(img_width, img_ht), batch_size=batch_size,
40 | class_mode=None, shuffle=False)
41 | bottleneck_features_validation = model.predict_generator(generator, no_validation_samples // batch_size)
42 | np.save(file="bottleneck_features_validate.npy", arr=bottleneck_features_validation)
43 |
44 |
45 | def train_top_model():
46 | train_data = np.load(file="bottleneck_features_train.npy")
47 | train_labels = np.array([0] * (no_train_samples // 2) + [1] * (no_train_samples // 2))
48 |
49 | validation_data = np.load(file="bottleneck_features_validate.npy")
50 | validation_labels = np.array([0] * (no_validation_samples // 2) + [1] * (no_validation_samples // 2))
51 |
52 | model = Sequential()
53 | model.add(Flatten(input_shape=train_data.shape[1:])) # don't need to tell batch size in input shape
54 | model.add(Dense(256, activation='relu'))
55 | model.add(Dropout(0.5))
56 | model.add(Dense(1, activation='sigmoid'))
57 |
58 | model.compile(optimizer='rmsprop',
59 | loss='binary_crossentropy', metrics=['accuracy'])
60 |
61 | # this gives training data accuracy("acc") and validation data accuracy ("val_acc"). If the "acc" keeps on improving
62 | # while the "val_acc" keeps on decreasing, then we are likely overfitting the model
63 | model.fit(train_data, train_labels,
64 | epochs=epochs,
65 | batch_size=batch_size,
66 | validation_data=(validation_data, validation_labels))
67 |
68 | model.save_weights(top_model_wt_path)
69 |
70 |
71 | def predict_image_class(file):
72 | model = applications.VGG16(include_top=False, weights='imagenet')
73 | x = load_img(file, target_size=(img_width, img_ht))
74 | x = img_to_array(x)
75 | x = np.expand_dims(x, axis=0)
76 | array = model.predict(x, verbose=0) # verbose = 0 means no logging
77 | model = Sequential()
78 | model.add(Flatten(input_shape=array.shape[1:]))
79 | model.add(Dense(256, activation='relu'))
80 | model.add(Dropout(0.5))
81 | model.add(Dense(1, activation='sigmoid'))
82 | model.load_weights(top_model_wt_path)
83 | class_predicted = model.predict_classes(array, verbose=0)
84 | probability = model.predict(array, verbose=0)[0][0]
85 | if class_predicted == 1 and probability > 0.5:
86 | print("dogs")
87 | elif class_predicted == 0 and probability > 0.5:
88 | print("cat")
89 | else:
90 | print("None")
91 |
92 |
93 | """
94 | save_bottleneck_features()
95 | train_top_model()
96 | """
97 |
98 | # predict_image_class(test_dir + "/cat/cat.3120.jpg")
99 | predict_image_class("/home/raghav/Pictures/1.png")
100 | gc.collect() # resolves an error of session of tensorflow
101 |
--------------------------------------------------------------------------------
/Deep_Learning/keras/image_classifier/3_usingVGG16_codeonly.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
3 | from keras.models import Sequential
4 | from keras.layers import Dense, Flatten, Dropout
5 | from keras import applications
6 | import gc
7 |
8 | img_width, img_ht = 150, 150
9 | top_model_wt_path = "bottleneck_fc_model.h5"
10 | train_dir = "/home/raghav/Desktop/data_image/train"
11 | validation_dir = "/home/raghav/Desktop/data_image/validate"
12 | test_dir = "/home/raghav/Desktop/data_image/test"
13 | no_train_samples = 2000
14 | no_validation_samples = 800
15 | epochs = 50
16 | batch_size = 16
17 |
18 |
19 | def save_bottleneck_features():
20 | datagen = ImageDataGenerator(rescale=1 / 255)
21 | model = applications.VGG16(include_top=False, weights='imagenet')
22 |
23 | generator = datagen.flow_from_directory(train_dir, target_size=(img_width, img_ht), shuffle=False, class_mode=None,
24 | batch_size=batch_size)
25 | bottleneck_features_train = model.predict_generator(generator=generator, steps=no_train_samples // batch_size)
26 | np.save(file="bottleneck_features_train.npy", arr=bottleneck_features_train)
27 |
28 | generator = datagen.flow_from_directory(validation_dir, target_size=(img_width, img_ht), batch_size=batch_size,
29 | class_mode=None, shuffle=False)
30 | bottleneck_features_validation = model.predict_generator(generator, no_validation_samples // batch_size)
31 | np.save(file="bottleneck_features_validate.npy", arr=bottleneck_features_validation)
32 |
33 |
34 | def train_top_model():
35 | train_data = np.load(file="bottleneck_features_train.npy")
36 | train_labels = np.array([0] * (no_train_samples // 2) + [1] * (no_train_samples // 2))
37 |
38 | validation_data = np.load(file="bottleneck_features_validate.npy")
39 | validation_labels = np.array([0] * (no_validation_samples // 2) + [1] * (no_validation_samples // 2))
40 |
41 | model = Sequential()
42 | model.add(Flatten(input_shape=train_data.shape[1:]))
43 | model.add(Dense(256, activation='relu'))
44 | model.add(Dropout(0.5))
45 | model.add(Dense(1, activation='sigmoid'))
46 | model.compile(optimizer='rmsprop',
47 | loss='binary_crossentropy', metrics=['accuracy'])
48 | model.fit(train_data, train_labels,
49 | epochs=epochs,
50 | batch_size=batch_size,
51 | validation_data=(validation_data, validation_labels))
52 |
53 | model.save_weights(top_model_wt_path)
54 |
55 |
56 | def predict_image_class(file):
57 | model = applications.VGG16(include_top=False, weights='imagenet')
58 | x = load_img(file, target_size=(img_width, img_ht))
59 | x = img_to_array(x)
60 | x = np.expand_dims(x, axis=0)
61 | array = model.predict(x, verbose=0)
62 | model = Sequential()
63 | model.add(Flatten(input_shape=array.shape[1:]))
64 | model.add(Dense(256, activation='relu'))
65 | model.add(Dropout(0.5))
66 | model.add(Dense(1, activation='sigmoid'))
67 | model.load_weights(top_model_wt_path)
68 | class_predicted = model.predict_classes(array, verbose=0)
69 | probability = model.predict(array, verbose=0)[0][0]
70 | if class_predicted == 1 and probability > 0.5:
71 | print("dogs")
72 | elif class_predicted == 0 and probability > 0.5:
73 | print("cat")
74 | else:
75 | print("None")
76 |
77 | save_bottleneck_features()
78 | #train_top_model()
79 | #predict_image_class("/home/raghav/Pictures/1.png")
80 | #gc.collect()
81 |
--------------------------------------------------------------------------------
/Deep_Learning/keras/rnn_keras/intro.py:
--------------------------------------------------------------------------------
1 | import keras
2 | from keras.preprocessing import sequence
3 | from keras.models import Sequential
4 | from keras.layers import Dense, Embedding
5 | from keras.layers import LSTM
6 | from keras.datasets import imdb
7 |
8 |
--------------------------------------------------------------------------------
/Deep_Learning/keras/sentiment_analysis_movie/4.1_code_only.py:
--------------------------------------------------------------------------------
1 |
2 | import numpy as np
3 | from keras.datasets import imdb
4 | from keras.models import Sequential
5 | from keras.layers import Dense, Flatten
6 | from keras.layers.embeddings import Embedding
7 | from keras.preprocessing import sequence
8 |
9 | # load the dataset
10 | top_words = 5000
11 | (X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)
12 | max_words_in_a_review = 500
13 | X_train = sequence.pad_sequences(X_train, maxlen=max_words_in_a_review)
14 | X_test = sequence.pad_sequences(X_test, maxlen=max_words_in_a_review)
15 | model = Sequential()
16 | model.add(Embedding(input_dim=top_words, output_dim=32, input_length=max_words_in_a_review))
17 | model.add(Flatten())
18 | model.add(Dense(250, activation='relu'))
19 | model.add(Dense(1, activation='sigmoid'))
20 | model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
21 | print(model.summary())
22 |
23 | model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=2, batch_size=128, verbose=2)
24 | scores = model.evaluate(X_test, y_test, verbose=0)
25 | print("\n accuracy %s" % scores[1]*100)
26 |
27 |
--------------------------------------------------------------------------------
/Deep_Learning/keras/sentiment_analysis_movie/4_movie_sentiment.py:
--------------------------------------------------------------------------------
1 | """ Using the IMDB dataset of movie reviews. The Large Movie Review Dataset (often referred to as the IMDB dataset)
2 | contains 25,000 highly polar moving reviews (good or bad) for training and the same amount again for testing. The
3 | problem is to determine whether a given moving review has a positive or negative sentiment.
4 | """
5 |
6 | from keras.datasets import imdb # keras provides access to the imdb dataset built-in
7 | from keras.models import Sequential
8 | from keras.layers import Dense, Flatten
9 | from keras.layers.embeddings import Embedding
10 | from keras.preprocessing import sequence
11 |
12 | # load the dataset
13 | top_words = 5000
14 | (X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words) # loading only the top 5000 words
15 | # imdb.load_data(): the words have been replaced by integers
16 | # which represent the absolute popularity of a word in the dataset. so that for instance the integer "3" encodes the
17 | # 3rd most frequent word in the data. This allows for quick filtering operations such as: "only consider the top 10,
18 | # 000 most common words, but eliminate the top 20 most common words". The sentences in each review thus comprises of
19 | # a sequence of integers
20 |
21 | max_words_in_a_review = 500
22 |
23 | # sequence.pad_sequences() creates a list where each review is of length = max_words_in_review. If length of actual
24 | # review greater than 500, it is truncated, else 0s are padded in the beginning
25 | X_train = sequence.pad_sequences(X_train, maxlen=max_words_in_a_review)
26 | X_test = sequence.pad_sequences(X_test, maxlen=max_words_in_a_review)
27 |
28 | # now we will create our model. We will first use Embedding layer setting the vocabulary to be 5000 , the output
29 | # vector size is 32 and input length is 500. The output is a 2d matrix of 500*32 size. Next we will Flatten this and
30 | # add a dense layer of 250 outputs and then another dense layer of 1 output unit
31 |
32 | # now we do word embeddings: This is a technique where words are encoded as real-valued vectors in a
33 | # high-dimensional space, where the similarity between words in terms of meaning translates to closeness in the
34 | # vector space
35 |
36 | # in keras we can turn positive integers into dense vectors of fixed size using embedding
37 | # keras.layers.embeddings.Embedding()
38 |
39 | # input_dim: int > 0. Size of the vocabulary, i.e. maximum integer index + 1.
40 | # output_dim: int >= 0. Dimension of the dense embedding. embeddings_initializer: Initializer for the embeddings
41 | # matrix (see initializers).
42 | # embeddings_regularizer: Regularizer function applied to the embeddings matrix (see
43 | # regularizer).
44 | # embeddings_constraint: Constraint function applied to the embeddings matrix (see constraints).
45 | # mask_zero: Whether or not the input value 0 is a special "padding" value that should be masked out. This is useful
46 | # when using recurrent layers which may take variable length input. If this is True then all subsequent
47 | # layers in the model need to support masking or an exception will be raised. If mask_zero is set to True,
48 | # as a consequence, index 0 cannot be used in the vocabulary (input_dim should equal size of vocabulary + 1).
49 | # input_length: Length of input sequences, when it is constant. This argument is required if you are going to connect
50 | # Flatten then Dense layers upstream (without it, the shape of the dense outputs cannot be computed).
51 |
52 |
53 | model = Sequential()
54 | model.add(Embedding(input_dim=top_words, output_dim=32, input_length=max_words_in_a_review))
55 | model.add(Flatten())
56 | model.add(Dense(250, activation='relu'))
57 | model.add(Dense(1, activation='sigmoid'))
58 | model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
59 | print(model.summary())
60 |
61 | # fit the model
62 | model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=2, batch_size=128, verbose=2) # only 1 log line
63 | # per epoch
64 | scores = model.evaluate(X_test, y_test, verbose=0)
65 | print("\n accuracy %s" % scores[1]*100)
66 |
67 |
--------------------------------------------------------------------------------
/Deep_Learning/openAIGym/1_intro.py:
--------------------------------------------------------------------------------
1 | """ Developing tflearn model for cartpole Evaluations on openAI
2 | Here I am working with CartPol-v0 wherin I have to balance a pole on the cart
3 | Every frame it is balanced, 1 score is added
4 |
5 | """
6 |
7 | import gym
8 | import random
9 | import numpy as np
10 | import tflearn
11 | from tflearn.layers.core import input_data, dropout, fully_connected
12 | from tflearn.layers.estimator import regression
13 | from statistics import mean, median
14 | from collections import Counter
15 |
16 | LR = 1e-3
17 | env = gym.make('CartPole-v0') # defines the environment to be CartPole environment
18 | env.reset()
19 | goal_steps = 500
20 | score_requirement = 60
21 | initial_games = 20000
22 |
23 |
24 | def some_random_games():
25 | for episode in range(5): # creating 5 episodes to work on
26 | env.reset()
27 | for t in range(goal_steps):
28 | env.render()
29 | action = env.action_space.sample() # takes a random action in our environment
30 | observation, reward, done, info = env.step(action) # observation: an environment-specific object
31 | # representing your observation of the environment. reward : amount of reward achieved by the previous
32 | # action. done: whether it's time to reset the environment again. info:diagnostic information useful for
33 | # debugging. To get the actual actions, print(ev.action_space)
34 | if done:
35 | print("Episode finished after {} timesteps".format(t + 1))
36 | break
37 |
38 |
39 | def initial_population():
40 | training_data = [] # add those moves which gave score > score requirement
41 | scores = []
42 | accepted_scores = []
43 | for _ in range(initial_games):
44 | score = 0
45 | game_memory = [] # store the moves of every game in memory as we don't know if the score > 50
46 | prev_observation = []
47 | for _ in range(goal_steps):
48 | action = random.randrange(0, 2)
49 | observation, reward, done, info = env.step(action)
50 |
51 | if len(prev_observation) > 0:
52 | game_memory.append([prev_observation, action])
53 |
54 | prev_observation = observation
55 | score += reward # reward will be 0 or 1 for each frame
56 | if done:
57 | break
58 |
59 | if score >= score_requirement:
60 | accepted_scores.append(score)
61 | for data in game_memory:
62 | output = []
63 | if data[1] == 1:
64 | output = [0, 1]
65 | elif data[1] == 0:
66 | output = [1, 0]
67 |
68 | training_data.append([data[0], output])
69 |
70 | env.reset()
71 | scores.append(score)
72 |
73 | training_data_save = np.array(training_data)
74 | np.save('saved.npy', training_data_save)
75 |
76 | print("Average accepted score: ", mean(accepted_scores))
77 | print("Median accepted score:", median(accepted_scores))
78 | print(Counter(accepted_scores))
79 |
80 | return training_data
81 |
82 |
83 | def neural_network_model(input_size):
84 | network = input_data(shape=[None, input_size, 1], name='Input')
85 |
86 | network = fully_connected(network, 128, activation='relu') # on which input, no of nodes, activation
87 | network = dropout(network, 0.8) # on which network , keep rate
88 |
89 | network = fully_connected(network, 256, activation='relu')
90 | network = dropout(network, 0.8)
91 |
92 | network = fully_connected(network, 512, activation='relu')
93 | network = dropout(network, 0.8)
94 |
95 | network = fully_connected(network, 256, activation='relu')
96 | network = dropout(network, 0.8)
97 |
98 | network = fully_connected(network, 128, activation='relu')
99 | network = dropout(network, 0.8)
100 |
101 | network = fully_connected(network, 2, activation='softmax') # no of output, activation function
102 | network = regression(network, optimizer='adam', learning_rate=LR, loss='categorical_crossentropy', name='Targets')
103 |
104 | model = tflearn.DNN(network, tensorboard_dir='log')
105 |
106 | return model
107 |
108 |
109 | def train_model(training_data, model=False):
110 | x = np.array([i[0] for i in training_data]).reshape(-1, len(training_data[0][0]), 1)
111 | y = [i[1] for i in training_data]
112 |
113 | if not model:
114 | model = neural_network_model(input_size=len(x[0]))
115 |
116 | model.fit(X_inputs=x, Y_targets=y, n_epoch=3, snapshot_epoch=1, run_id='openAIStuff', show_metric=True)
117 |
118 | return model
119 |
120 |
121 | training_data = initial_population()
122 | model = train_model(training_data)
123 |
124 | scores = []
125 | choices = []
126 |
127 | for each_game in range(10):
128 | score = 0
129 | game_memory = []
130 | prev_obser = []
131 | env.reset()
132 | for _ in range(goal_steps):
133 | env.render()
134 | if len(prev_obser) == 0:
135 | action = random.randrange(0, 2)
136 | else:
137 | action = np.argmax(model.predict(prev_obser.reshape(-1, len(prev_obser), 1))[0])
138 | choices.append(action)
139 |
140 | new_observation, reward, done, info = env.step(action)
141 | prev_obser = new_observation
142 | game_memory.append([new_observation, action])
143 | score += reward
144 | if done:
145 | break
146 | scores.append(score)
147 | print("Average scores", sum(scores)/len(scores))
148 | print("Choice 1: {}, Choice 2: {}".format(choices.count(1)/len(choices), choices.count(0)/len(choices)))
149 |
150 |
151 |
--------------------------------------------------------------------------------
/Deep_Learning/openAIGym/2_train.py:
--------------------------------------------------------------------------------
1 | import gym
2 | import random
3 | import numpy as np
4 | import tflearn
5 | from tflearn.layers.core import input_data, dropout, fully_connected
6 | from tflearn.layers.estimator import regression
7 | from statistics import median, mean
8 | from collections import Counter
9 |
10 | LR = 1e-3
11 | env = gym.make("CartPole-v0")
12 | env.reset()
13 | goal_steps = 500
14 | score_requirement = 70
15 | initial_games = 30000
16 |
17 |
18 | def some_random_games_first():
19 | # Each of these is its own game.
20 | for episode in range(100):
21 | env.reset()
22 | # this is each frame, up to 200...but we wont make it that far.
23 | for t in range(200):
24 | # This will display the environment
25 | # Only display if you really want to see it.
26 | # Takes much longer to display it.
27 | env.render()
28 |
29 | # This will just create a sample action in any environment.
30 | # In this environment, the action can be 0 or 1, which is left or right
31 | action = env.action_space.sample()
32 |
33 | # this executes the environment with an action,
34 | # and returns the observation of the environment,
35 | # the reward, if the env is over, and other info.
36 | observation, reward, done, info = env.step(action)
37 | if done:
38 | break
39 |
40 |
41 | def initial_population():
42 | # [OBS, MOVES]
43 | training_data = []
44 | # all scores:
45 | scores = []
46 | # just the scores that met our threshold:
47 | accepted_scores = []
48 | # iterate through however many games we want:
49 | for _ in range(initial_games):
50 | score = 0
51 | # moves specifically from this environment:
52 | game_memory = []
53 | # previous observation that we saw
54 | prev_observation = []
55 | # for each frame in 200
56 | for _ in range(goal_steps):
57 | # choose random action (0 or 1)
58 | action = random.randrange(0, 2)
59 | # do it!
60 | observation, reward, done, info = env.step(action)
61 |
62 | # notice that the observation is returned FROM the action
63 | # so we'll store the previous observation here, pairing
64 | # the prev observation to the action we'll take.
65 | if len(prev_observation) > 0:
66 | game_memory.append([prev_observation, action])
67 | prev_observation = observation
68 | score += reward
69 | if done: break
70 |
71 | # IF our score is higher than our threshold, we'd like to save
72 | # every move we made
73 | # NOTE the reinforcement methodology here.
74 | # all we're doing is reinforcing the score, we're not trying
75 | # to influence the machine in any way as to HOW that score is
76 | # reached.
77 | if score >= score_requirement:
78 | accepted_scores.append(score)
79 | for data in game_memory:
80 | # convert to one-hot (this is the output layer for our neural network)
81 | if data[1] == 1:
82 | output = [0, 1]
83 | elif data[1] == 0:
84 | output = [1, 0]
85 |
86 | # saving our training data
87 | training_data.append([data[0], output])
88 |
89 | # reset env to play again
90 | env.reset()
91 | # save overall scores
92 | scores.append(score)
93 |
94 | # just in case you wanted to reference later
95 | # training_data_save = np.array(training_data)
96 | # np.save('saved.npy', training_data_save)
97 |
98 | # some stats here, to further illustrate the neural network magic!
99 | print('Average accepted score:', mean(accepted_scores))
100 | print('Median score for accepted scores:', median(accepted_scores))
101 | print(Counter(accepted_scores))
102 |
103 | return training_data
104 |
105 |
106 | def neural_network_model(input_size):
107 | network = input_data(shape=[None, input_size, 1], name='input')
108 |
109 | network = fully_connected(network, 128, activation='relu')
110 | network = dropout(network, 0.8)
111 |
112 | network = fully_connected(network, 256, activation='relu')
113 | network = dropout(network, 0.8)
114 |
115 | network = fully_connected(network, 512, activation='relu')
116 | network = dropout(network, 0.8)
117 |
118 | network = fully_connected(network, 256, activation='relu')
119 | network = dropout(network, 0.8)
120 |
121 | network = fully_connected(network, 128, activation='relu')
122 | network = dropout(network, 0.8)
123 |
124 | network = fully_connected(network, 2, activation='softmax')
125 | network = regression(network, optimizer='adam', learning_rate=LR, loss='categorical_crossentropy', name='targets')
126 | model = tflearn.DNN(network, tensorboard_dir='log')
127 |
128 | return model
129 |
130 |
131 | def train_model(training_data, model=False):
132 | X = np.array([i[0] for i in training_data]).reshape(-1, len(training_data[0][0]), 1)
133 | y = [i[1] for i in training_data]
134 |
135 | if not model:
136 | model = neural_network_model(input_size=len(X[0]))
137 |
138 | model.fit({'input': X}, {'targets': y}, n_epoch=5, snapshot_step=500, show_metric=True, run_id='openai_learning')
139 | return model
140 |
141 | training_data = initial_population()
142 | model = train_model(training_data)
143 |
144 | scores = []
145 | choices = []
146 | for each_game in range(10):
147 | score = 0
148 | game_memory = []
149 | prev_obs = []
150 | env.reset()
151 | for _ in range(goal_steps):
152 | env.render()
153 |
154 | if len(prev_obs) == 0:
155 | action = random.randrange(0, 2)
156 | else:
157 | action = np.argmax(model.predict(prev_obs.reshape(-1, len(prev_obs), 1))[0])
158 |
159 | choices.append(action)
160 |
161 | new_observation, reward, done, info = env.step(action)
162 | prev_obs = new_observation
163 | game_memory.append([new_observation, action])
164 | score += reward
165 | if done:
166 | break
167 |
168 | scores.append(score)
169 |
170 | print('Average Score:', sum(scores) / len(scores))
171 | print('choice 1:{} choice 0:{}'.format(choices.count(1) / len(choices), choices.count(0) / len(choices)))
172 | print(score_requirement)
--------------------------------------------------------------------------------
/Deep_Learning/openAIGym/log/openAIStuff/events.out.tfevents.1497419827.raghav-PC:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/log/openAIStuff/events.out.tfevents.1497419827.raghav-PC
--------------------------------------------------------------------------------
/Deep_Learning/openAIGym/log/openAIStuff/events.out.tfevents.1497420569.raghav-PC:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/log/openAIStuff/events.out.tfevents.1497420569.raghav-PC
--------------------------------------------------------------------------------
/Deep_Learning/openAIGym/log/openAIStuff/events.out.tfevents.1497420852.raghav-PC:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/log/openAIStuff/events.out.tfevents.1497420852.raghav-PC
--------------------------------------------------------------------------------
/Deep_Learning/openAIGym/log/openAIStuff/events.out.tfevents.1497420919.raghav-PC:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/log/openAIStuff/events.out.tfevents.1497420919.raghav-PC
--------------------------------------------------------------------------------
/Deep_Learning/openAIGym/log/openAIStuff/events.out.tfevents.1497441395.raghav-PC:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/log/openAIStuff/events.out.tfevents.1497441395.raghav-PC
--------------------------------------------------------------------------------
/Deep_Learning/openAIGym/log/openAIStuff/events.out.tfevents.1497441421.raghav-PC:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/log/openAIStuff/events.out.tfevents.1497441421.raghav-PC
--------------------------------------------------------------------------------
/Deep_Learning/openAIGym/log/openAIStuff/events.out.tfevents.1497441457.raghav-PC:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/log/openAIStuff/events.out.tfevents.1497441457.raghav-PC
--------------------------------------------------------------------------------
/Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497421583.raghav-PC:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497421583.raghav-PC
--------------------------------------------------------------------------------
/Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497421622.raghav-PC:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497421622.raghav-PC
--------------------------------------------------------------------------------
/Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497421683.raghav-PC:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497421683.raghav-PC
--------------------------------------------------------------------------------
/Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497421744.raghav-PC:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497421744.raghav-PC
--------------------------------------------------------------------------------
/Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497421940.raghav-PC:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497421940.raghav-PC
--------------------------------------------------------------------------------
/Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497421973.raghav-PC:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497421973.raghav-PC
--------------------------------------------------------------------------------
/Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497422209.raghav-PC:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497422209.raghav-PC
--------------------------------------------------------------------------------
/Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497422303.raghav-PC:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497422303.raghav-PC
--------------------------------------------------------------------------------
/Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497422371.raghav-PC:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497422371.raghav-PC
--------------------------------------------------------------------------------
/Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497422400.raghav-PC:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497422400.raghav-PC
--------------------------------------------------------------------------------
/Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497422427.raghav-PC:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497422427.raghav-PC
--------------------------------------------------------------------------------
/Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497441498.raghav-PC:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497441498.raghav-PC
--------------------------------------------------------------------------------
/Deep_Learning/openAIGym/saved.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/saved.npy
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # machine-learning
2 |
3 | A repository which contains all of my snippets and projects related to Machine Learning.
4 |
5 | * **classical_ml** : Consists of all the basic machine learning algorithms. All of them have been first coded without using **sklearn** in order to understand how the algorithm actually works. Later, they have been coded using sklearn.
6 | **Libraries used** : numpy, sklearn and pandas
7 |
8 | * **deep_learning** : Consists of snippets of various deep learning libraries like Tensorflow and Keras. It also includes my projects in deep learning.
9 | **Frameworks used** : Tensorflow, Keras, Theano
10 |
11 | ## The various projects that I have done are:
12 | * ### **Image Classifier model :**
13 | 1) First made my own Image Classifier model using Tensorflow and Keras on a small dataset. Achieved 90% accuracy. Needed to use Image Augumentation and a heavy Dropout in order to achieve this.
14 | 2) Applied Transfer Learning on the VGG 16 model by training my model just on the final fully connected layer of VGG16 model. Accuracy > 95%
15 |
16 | * ### **Google Dinosaur using CNN and Reinforcement Learning:**
17 | 1) Model is still in development phase (It has some bugs). Want to develop a model that is able to play the Google Dinosaur Game on its own.
18 |
19 | * ### **Sentiment Analysis of Movie Reviews:**
20 | 1) Given any movie review, the model is able to predict whether the review was "positive" or "negative".
21 | 2) Accuracy > 80%
22 |
23 | ## Sources
24 |
25 | * [Andrew NG's Machine learning course on coursera](https://www.coursera.org/learn/machine-learning): The most basic course. Everybody does it. Hello world of machine learning.
26 | * [Stanford's CS231n](https://cs231n.github.io/): Introduction to Deep learning and Convolutional Neural Networks
27 | * [Sentdex' playlist for Machine Learning with Python](https://www.youtube.com/watch?v=OGxgnH8y2NM&list=PLQVvvaa0QuDfKTOs3Keq_kaG2P55YRn5v) : Awesome Channel in general for any Python related stuff. This playlist especially focuses on how to use Python for Machine Learning.
28 | * [Jeremy Howard's fast.ai](http://www.fast.ai/) : An awesome MOOC which teaches the different frameworks in Python available for Deep Learning.
29 | * [Andrew NG's new course on Deep learning(paid)](https://www.coursera.org/specializations/deep-learning) : New course being offered by Andrew NG in Deep learning on coursera. It is paid though. However financial help is available like for any other coursera course.
30 |
31 | ## Installation Tutorials (Just Google it):
32 | * [Tensorflow](https://www.tensorflow.org/install/)
33 | * [Keras](https://keras.io/#installation)
34 | * sklearn, numpy, pandas : Can be installed using pip
35 |
36 | **NOTE**: In order to train various deep learning models, it is recommended that you have a GPU which supports CUDA framework to speed up things.
37 |
--------------------------------------------------------------------------------
/classical_ml/clustering/K means/1_Intro.py:
--------------------------------------------------------------------------------
1 | """
2 | Supervised: We have told the machines what the classes were
3 |
4 | Clustering:
5 | 1) Flat
6 | 2) Hirierchal
7 |
8 | In both, The machine is just given the featureset. Then the machine itself searches for groups or clusters.
9 |
10 | With Flat Clustering, we tell the machine to find 2 clusters or 3 clusters.
11 | With Hirierchal clusterign , the machine figures out how many groups are there
12 |
13 | First Algo we use:
14 | 1) K Means : K is the number of clusters we want -> does Flat Clustering
15 | 2) Mean Shift : Hirierchal Clustering
16 |
17 | K mean working:
18 | Chose K centroids randomly in the beginning, mostly the first k points are taken.
19 | Calculate the distance of each featureset to the centroids and classify each accordingly.
20 | Then take all the featureset of one cluster and take mean of those. These are the new centroids.
21 | Repeat until the centroids are no longer moving.
22 |
23 | Downside of K means: It always tries to find same sized groups
24 |
25 | """
26 |
27 | import matplotlib.pyplot as plt
28 | from matplotlib import style
29 | from sklearn.cluster import KMeans
30 | import numpy as np
31 |
32 | style.use('ggplot')
33 |
34 | X = np.array([[1, 2],
35 | [1.5, 1.8],
36 | [5, 8],
37 | [8, 8],
38 | [1, 0.6],
39 | [9, 11]]
40 | )
41 |
42 |
43 | clf = KMeans(n_clusters=2)
44 | clf.fit(X)
45 |
46 | centroids = clf.cluster_centers_
47 | labels = clf.labels_
48 |
49 | colors = ["g.", "r.", "c.", "b.", "k.", "y."]
50 |
51 | for i in range(len(X)):
52 | plt.plot(X[i][0], X[i][1], colors[labels[i]], markersize=20)
53 | plt.scatter(centroids[:, 0], centroids[:, 1], marker='x', s=150)
54 | plt.show()
55 |
--------------------------------------------------------------------------------
/classical_ml/clustering/K means/2_handling_non_numeric_data.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 | from matplotlib import style
4 | from sklearn.cluster import KMeans
5 | from sklearn import preprocessing
6 | from sklearn.preprocessing import LabelEncoder
7 | import pandas as pd
8 |
9 | style.use('ggplot')
10 |
11 | """
12 | Pclass Passenger Class (1 = 1st; 2 = 2nd; 3 = 3rd)
13 | survival Survival (0 = No; 1 = Yes)
14 | name Name
15 | sex Sex
16 | age Age
17 | sibsp Number of Siblings/Spouses Aboard
18 | parch Number of Parents/Children Aboard
19 | ticket Ticket Number
20 | fare Passenger Fare (British pound)
21 | cabin Cabin
22 | embarked Port of Embarkation (C = Cherbourg; Q = Queenstown; S = Southampton)
23 | boat Lifeboat
24 | body Body Identification Number
25 | home.dest Home/Destination
26 | """
27 |
28 | df = pd.read_excel('titanic.xls')
29 |
30 | # here we find that some values are non numeric
31 | # eg sex column, we take the set of sex column and then assign then numbers
32 |
33 | df.drop(['body', 'name'], 1)
34 | df.apply(pd.to_numeric, errors='ignore')
35 | df.fillna(0, inplace=True)
36 |
37 |
38 | def handle_non_numerical_data(df):
39 | le = LabelEncoder()
40 | columns = list(df.columns.values) # to handle non numeric data types use LabelEncoder()
41 |
42 | for column in columns:
43 | l = []
44 | if df[column].dtype != np.int64 and df[column].dtype != np.float64:
45 | for i in df[column]:
46 | l.append(i)
47 | le.fit(np.array(l))
48 | x = le.transform(l)
49 | df[column] = x
50 | return df
51 |
52 |
53 | df = handle_non_numerical_data(df)
54 |
55 | # once the clusters are obtained,we could svm etc
56 |
57 | X = np.array(df.drop(['survived'], 1)).astype(float)
58 | X = preprocessing.scale(X) # important
59 | y = np.array(df['survived'])
60 |
61 | clf = KMeans(n_clusters=2)
62 | clf.fit(X)
63 | labels = clf.labels_
64 | correct = 0
65 | for i in range(len(X)):
66 | predict_me = np.array(
67 | X[i].astype(float)) # the first centroid is 0. It might be that the survived is 1 and we get 0 because
68 | # that is the first centroid. So our accuracy would be 20% instead of 80%
69 | predict_me = predict_me.reshape(-1, len(predict_me))
70 | prediction = clf.predict(predict_me)
71 | if prediction == y[i]:
72 | correct += 1
73 |
74 | print(correct / len(X))
75 |
--------------------------------------------------------------------------------
/classical_ml/clustering/K means/3_K_means_from_scratch.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | from matplotlib import style
3 | import numpy as np
4 |
5 | style.use('ggplot')
6 |
7 | X = np.array([[1, 2],
8 | [1.5, 1.8],
9 | [5, 8],
10 | [8, 8],
11 | [1, 0.6],
12 | [9, 11]]
13 | )
14 |
15 | colors = ["g", "r", "c", "b", "k", "y"]
16 |
17 |
18 | class K_Means:
19 | def __init__(self, k=2, tol=0.0001, max_iter=300):
20 | self.classifications = {}
21 | self.centroids = {}
22 | self.k = k
23 | self.tol = tol
24 | self.max_iter = max_iter
25 |
26 | def fit(self, data):
27 |
28 | for i in range(self.k):
29 | self.centroids[i] = data[i]
30 |
31 | for i in range(self.max_iter):
32 |
33 | for j in range(self.k):
34 | self.classifications[j] = []
35 |
36 | for featureset in data:
37 | distances = [np.linalg.norm(featureset - self.centroids[centroid]) for centroid in self.centroids]
38 | classification = distances.index(min(distances))
39 | self.classifications[classification].append(featureset)
40 |
41 | prev_centroids = dict(self.centroids)
42 |
43 | for classification in self.classifications:
44 | self.centroids[classification] = np.average(self.classifications[classification], axis=0)
45 |
46 | optimized = True
47 |
48 | for c in self.centroids:
49 | original_centroid = prev_centroids[c]
50 | current_centroid = self.centroids[c]
51 | if float(np.sum((current_centroid - original_centroid) / original_centroid * 100.0)) > self.tol:
52 | print(np.sum((current_centroid - original_centroid) / original_centroid * 100.0))
53 | optimized = False
54 |
55 | if optimized:
56 | break
57 |
58 | def predict(self, data):
59 | distances = [np.linalg.norm(data - self.centroids[centroid]) for centroid in self.centroids]
60 | classification = distances.index(min(distances))
61 | return classification
62 |
63 |
64 | clf = K_Means()
65 | clf.fit(X)
66 |
67 | for centroid in clf.centroids:
68 | plt.scatter(clf.centroids[centroid][0], clf.centroids[centroid][1], marker="o", color='k', s=150, linewidths=5)
69 |
70 | for classification in clf.classifications:
71 | color = colors[classification]
72 | for featureset in clf.classifications[classification]:
73 | plt.scatter(featureset[0], featureset[1], marker='x', color=color, s=150)
74 |
75 | unknowns = np.array([[1, 2],
76 | [5, 1],
77 | [8, 1],
78 | [1, 7],
79 | [0, 0]])
80 |
81 | for unknown in unknowns:
82 | classification = clf.predict(unknown)
83 | plt.scatter(unknown[0], unknown[1], color=colors[classification], s=150, marker='*')
84 | plt.show()
85 |
--------------------------------------------------------------------------------
/classical_ml/clustering/K means/titanic.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/classical_ml/clustering/K means/titanic.xls
--------------------------------------------------------------------------------
/classical_ml/clustering/Mean Shift/1_intro.py:
--------------------------------------------------------------------------------
1 | """
2 | It is hirirechal clustering. Automatically figures out the number of clusters needed and where those clusters are.
3 |
4 | Here we say every featureset is a cluster center.
5 |
6 | It has something called Radius/Bandwidth. Every data point has a circle of radius or a bandwidth around it.
7 | Then we take the mean of data points in a bandwidth.Then this would have a new bandwidth. Repeat till when centroid does
8 | not move.
9 |
10 | Repeat for other data points. 2 Centroids from different bandwidths could coincide.
11 |
12 | """
13 | import numpy as np
14 | from sklearn.datasets import make_blobs
15 | import matplotlib.pyplot as plt
16 | from matplotlib import style
17 | from sklearn.cluster import MeanShift
18 | from mpl_toolkits.mplot3d import Axes3D
19 |
20 | style.use('ggplot')
21 |
22 | centres = [[1, 1, 1], [5, 5, 5], [3, 10, 10]]
23 | X, _ = make_blobs(n_samples=100, centers=centres, cluster_std=1)
24 |
25 | ms = MeanShift()
26 | ms.fit(X)
27 | labels = ms.labels_
28 | cluster_centers = ms.cluster_centers_
29 |
30 | colors = ['r', 'g', 'b', 'c', 'k', 'y']
31 | fig = plt.figure()
32 | ax = fig.add_subplot(111, projection='3d')
33 |
34 | for i in range(len(X)):
35 | ax.scatter(X[i][0], X[i][1], X[i][2], c=colors[labels[i]], marker='o')
36 |
37 | ax.scatter(cluster_centers[:, 0], cluster_centers[:, 1], cluster_centers[:, 2], marker='x', color='k', s=150,
38 | linewidths=5)
39 | plt.show()
40 |
--------------------------------------------------------------------------------
/classical_ml/clustering/Mean Shift/2_Applying_on_titanic_dataset.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from sklearn.cluster import MeanShift, KMeans
3 | from sklearn import preprocessing
4 | from sklearn.preprocessing import LabelEncoder
5 | import pandas as pd
6 | import matplotlib.pyplot as plt
7 |
8 | '''
9 | Pclass Passenger Class (1 = 1st; 2 = 2nd; 3 = 3rd)
10 | survival Survival (0 = No; 1 = Yes)
11 | name Name
12 | sex Sex
13 | age Age
14 | sibsp Number of Siblings/Spouses Aboard
15 | parch Number of Parents/Children Aboard
16 | ticket Ticket Number
17 | fare Passenger Fare (British pound)
18 | cabin Cabin
19 | embarked Port of Embarkation (C = Cherbourg; Q = Queenstown; S = Southampton)
20 | boat Lifeboat
21 | body Body Identification Number
22 | home.dest Home/Destination
23 | '''
24 | pd.options.mode.chained_assignment = None # default='warn'
25 | # https://pythonprogramming.net/static/downloads/machine-learning-data/titanic.xls
26 | df = pd.read_excel('titanic.xls')
27 |
28 | original_df = pd.DataFrame.copy(df)
29 | df.drop(['body', 'name'], 1, inplace=True)
30 | df.fillna(0, inplace=True)
31 |
32 |
33 | def handle_non_numerical_data(df):
34 | le = LabelEncoder()
35 | columns = list(df.columns.values) # to handle non numeric data types use LabelEncoder()
36 |
37 | for column in columns:
38 | l = []
39 | if df[column].dtype != np.int64 and df[column].dtype != np.float64:
40 | for i in df[column]:
41 | l.append(i)
42 | le.fit(np.array(l))
43 | x = le.transform(l)
44 | df[column] = x
45 | return df
46 |
47 |
48 | df = handle_non_numerical_data(df)
49 | df.drop(['ticket', 'home.dest'], 1, inplace=True)
50 |
51 | X = np.array(df.drop(['survived'], 1).astype(float))
52 | X = preprocessing.scale(X)
53 | y = np.array(df['survived'])
54 |
55 | clf = MeanShift()
56 | clf.fit(X)
57 | labels = clf.labels_
58 | cluster_centers = clf.cluster_centers_
59 | n_clusters_ = len(np.unique(labels))
60 | original_df['cluster_group'] = np.nan
61 | for i in range(len(X)):
62 | original_df['cluster_group'].iloc[i] = labels[i]
63 |
64 | survival_rates = {}
65 |
66 | for i in range(n_clusters_):
67 | temp_df = original_df[(original_df['cluster_group'] == float(i))]
68 | survival_cluster = temp_df[(temp_df['survived'] == 1)]
69 |
70 | survival_rate = len(survival_cluster) / len(temp_df)
71 | survival_rates[i] = survival_rate
72 |
73 | print(survival_rates)
74 | print(original_df[(original_df['cluster_group'] == 2)])
75 |
--------------------------------------------------------------------------------
/classical_ml/clustering/Mean Shift/3_from_scratch.py:
--------------------------------------------------------------------------------
1 | """
2 | Chose the radius dynamically.
3 |
4 | """
5 | import matplotlib.pyplot as plt
6 | from matplotlib import style
7 | import numpy as np
8 | from sklearn.datasets import make_blobs
9 |
10 | style.use('ggplot')
11 |
12 | X = np.array([[1, 2],
13 | [1.5, 1.8],
14 | [5, 8],
15 | [8, 8],
16 | [1, 0.6],
17 | [9, 11],
18 | [8, 2],
19 | [10, 2],
20 | [9, 3]]
21 | )
22 |
23 | colors = 10 * ["g", "r", "c", "b", "k", "y"]
24 |
25 |
26 | # plt.scatter(X[:, 0], X[:, 1])
27 | # plt.show()
28 |
29 |
30 | class MeanShift:
31 | def __init__(self, radius=None, radius_norm_step=100):
32 | self.classifciations = {}
33 | self.radius = radius
34 | self.radius_norm_step = radius_norm_step
35 | self.centroids = {}
36 |
37 | def fit(self, data):
38 | self.data = data
39 | if self.radius is None:
40 | all_data_centroid = np.average(data, axis=0)
41 | all_data_norm = np.linalg.norm(all_data_centroid)
42 | self.radius = all_data_norm / self.radius_norm_step
43 |
44 | centroids = {}
45 |
46 | for i in range(len(data)):
47 | centroids[i] = data[i]
48 |
49 | weights = [i for i in range(self.radius_norm_step)][::-1]
50 | while True:
51 | new_centroids = []
52 | for i in centroids:
53 | in_bandwidth = []
54 | centroid = centroids[i]
55 | for featureset in data:
56 | distance = np.linalg.norm(featureset - centroid)
57 | if distance == 0:
58 | distance = 0.0000001
59 | weight_index = int(distance / self.radius)
60 | if weight_index > self.radius_norm_step - 1:
61 | weight_index = self.radius_norm_step - 1
62 |
63 | to_add = (weights[weight_index] ** 2) * [featureset]
64 | in_bandwidth += to_add
65 |
66 | new_centroid = np.average(in_bandwidth, axis=0)
67 | new_centroids.append(tuple(new_centroid))
68 |
69 | uniques = sorted(list(set(new_centroids)))
70 | to_pop = []
71 | for i in uniques: # Since we have created many steps, if 2 centroids are very close to each other,
72 | # we remove one
73 | for ii in uniques:
74 | if i == ii:
75 | pass
76 | elif np.linalg.norm(np.array(i) - np.array(ii)) <= self.radius:
77 | to_pop.append(ii)
78 | break
79 | for i in to_pop:
80 | try:
81 | uniques.remove(i)
82 | except:
83 | pass
84 |
85 | prev_centroids = dict(centroids) # copying the centroids dict
86 |
87 | centroids = {}
88 | for i in range(len(uniques)):
89 | centroids[i] = np.array(uniques[i])
90 |
91 | optimised = True
92 | for i in centroids:
93 | if not np.array_equal(centroids[i], prev_centroids[i]):
94 | optimised = False
95 | if not optimised:
96 | break
97 | if optimised:
98 | break
99 |
100 | self.centroids = centroids
101 | for i in range(len(self.centroids)):
102 | self.classifciations[i] = []
103 |
104 | for featureset in self.data:
105 | distances = [np.linalg.norm(featureset - self.centroids[centroid]) for centroid in self.centroids]
106 | classification = distances.index(min(distances))
107 | self.classifciations[classification].append(featureset)
108 |
109 | def predict(self):
110 | distances = [np.linalg.norm(featureset - self.centroids[centroid]) for centroid in self.centroids]
111 | classification = distances.index(min(distances))
112 | return classification
113 |
114 |
115 | clf = MeanShift()
116 | clf.fit(X)
117 |
118 | centroids = clf.centroids
119 |
120 | for classification in clf.classifciations:
121 | color = colors[classification]
122 | for featureset in clf.classifciations[classification]:
123 | plt.scatter(featureset[0], featureset[1], marker='x', c=color, s=150, linewidths=5)
124 |
125 | for c in centroids:
126 | plt.scatter(centroids[c][0], centroids[c][1], c='k', marker='*', s=150, )
127 |
128 | plt.show()
129 |
--------------------------------------------------------------------------------
/classical_ml/clustering/Mean Shift/titanic.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/classical_ml/clustering/Mean Shift/titanic.xls
--------------------------------------------------------------------------------
/classical_ml/k nearerst neighbours/1_k_nearest_neighbours_intro.py:
--------------------------------------------------------------------------------
1 | """
2 | classification algo. Divides the data into groups
3 | Given pluses and minuses in a graph, and an unknown point would it belong to pluses or minuses
4 |
5 | Linear regression aim was to create a model that best fits the data
6 |
7 | We would classify depending upon how close it lies to one group. This is essentialy the nearest neighbours.
8 |
9 | With k nearest neighbours , eg k =2. We check only the 2 nearest neigbours and then decide which group it lies based
10 | on that. If both the neighbours in same group well enough. Else if split vote, So we take odd number of ks. For
11 | three groups min value of k is 5.
12 |
13 | If 2 groups and k =3 and we get 2 votes for 1 group, then confidence =2/3
14 |
15 | Downfalls: we need to calculate distance from all. So on huge datasets it would be a problem.
16 | """
17 |
18 | import numpy as np
19 | import pandas as pd
20 | from sklearn import preprocessing, neighbors
21 | from sklearn.model_selection import train_test_split
22 |
23 | df = pd.read_csv("breast-cancer-wisconsin.data.txt")
24 |
25 | df.replace('?', -9999, inplace=True) # replacing missing values. Most algo recognise -9999 as outlier
26 | df.drop(['id'], 1, inplace=True)
27 | # print(df.head())
28 |
29 | X = np.array(df.drop(['class'], 1)) # here 1 means we want to drop the columns
30 | y = np.array(df['class'])
31 |
32 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
33 |
34 | clf = neighbors.KNeighborsClassifier(n_jobs=-1) # default k value is 5. By setting n_jobs = -1, we are threading
35 | # the classifier. This allows us to run the classifier on multiple testst simultaneously
36 | clf.fit(X_train, y_train)
37 |
38 | accuracy = clf.score(X_test, y_test)
39 | print(accuracy)
40 |
41 | example_measures = np.array([4, 2, 1, 1, 1, 2, 3, 2, 1]) # this returns 9 rows
42 | example_measures = example_measures.reshape(1, -1) # -1 means unspecified number of cols
43 | prediction = clf.predict(example_measures)
44 | print(prediction)
45 |
46 | """
47 | Instead of using k nearest neighbors, we could get the best fit line for each of the groups using linear regression
48 | and then find the distance of our point from the lines thus obtained. The group corresponding to the line having the
49 | least distance will be the answer.
50 |
51 | However if the data is non linear, then best fit line won't work. But k nearest neighbors will work
52 | """
--------------------------------------------------------------------------------
/classical_ml/k nearerst neighbours/2_k_nearest_neighbors_from_scratch.py:
--------------------------------------------------------------------------------
1 | """
2 | Eucledian Distance: (sum i=1 to n where n is number of dimensions ( Qi -Pi )^2 )^(1/2)
3 | Compare for 2-d points, put n=2
4 | """
5 | import numpy as np
6 | import matplotlib.pyplot as plt
7 | from matplotlib import style
8 | from collections import Counter
9 | import warnings
10 |
11 | style.use('fivethirtyeight')
12 |
13 | dataset = {'k': [[1, 2], [2, 3], [3, 1]], 'r': [[6, 5], [7, 7], [8, 6]]} # 2 classes k and r
14 | new_features = [5, 7]
15 |
16 | [[plt.scatter(j[0], j[1], s=100, color=i) for j in dataset[i]] for i in dataset]
17 |
18 | plt.show()
19 |
20 |
21 | def k_nearest_neighbors(data, predict, k=3):
22 | if len(data) >= k:
23 | warnings.warn('K is set to value less than total groups')
24 | distances = []
25 | for group in data:
26 | for features in data[group]:
27 | # eucledian_distance = np.sqrt(np.sum(((np.array(features)-np.array(predict))**2))
28 | eucledian_distance = np.linalg.norm(
29 | np.array(features) - np.array(predict)) # calculates the eucledian distance
30 | distances.append([eucledian_distance, group])
31 |
32 | votes = [i[1] for i in sorted(distances)[:k]]
33 | vote_result = Counter(votes).most_common(1)[0][0]
34 | return vote_result
35 |
36 |
37 | result = k_nearest_neighbors(dataset, new_features, k=3)
38 | print(result)
39 |
--------------------------------------------------------------------------------
/classical_ml/k nearerst neighbours/3_applying_our_algo_on_practical_eg.py:
--------------------------------------------------------------------------------
1 | # applying the k nearest algo we built on the breast cancer classification problem
2 | import numpy as np
3 | import pandas as pd
4 | import random
5 | from collections import Counter
6 | import warnings
7 |
8 |
9 | def k_nearest_neighbors(data, predict, k=3):
10 | if len(data) >= k:
11 | warnings.warn('K is set to value less than total groups')
12 | distances = []
13 | for group in data:
14 | for features in data[group]:
15 | eucledian_distance = np.linalg.norm(
16 | np.array(features) - np.array(predict))
17 | distances.append([eucledian_distance, group])
18 |
19 | votes = [i[1] for i in sorted(distances)[:k]]
20 | vote_result = Counter(votes).most_common(1)[0][0]
21 | confidence = Counter(votes).most_common(1)[0][1] / k
22 | return vote_result, confidence
23 |
24 | df = pd.read_csv("breast-cancer-wisconsin.data.txt")
25 | df.replace('?', -9999, inplace=True)
26 | df.drop(['id'], axis=1, inplace=True)
27 |
28 | full_data = df.astype(float).values.tolist() # converting the data to float and then getting a list
29 |
30 | random.shuffle(full_data) # shuffles the list
31 |
32 | test_size = 0.2
33 | train_set = {2: [], 4: []}
34 | test_set = {2: [], 4: []}
35 | train_data = full_data[:-int(test_size * len(full_data))]
36 | test_data = full_data[-int(test_size * len(full_data)):]
37 |
38 | for i in train_data:
39 | train_set[i[-1]].append(i[:-1])
40 |
41 | for i in test_data:
42 | test_set[i[-1]].append(i[:-1])
43 |
44 | correct = 0
45 | total = 0
46 |
47 | for group in test_set:
48 | for data in test_set[group]:
49 | vote, confidence = k_nearest_neighbors(train_set, data, k=5)
50 | if group == vote:
51 | correct += 1
52 | total += 1
53 |
54 | print("Accuracy", correct / total)
55 | accuracies.append(correct / total)
56 |
--------------------------------------------------------------------------------
/classical_ml/k nearerst neighbours/README.md:
--------------------------------------------------------------------------------
1 | # K nearest neighbors
2 |
3 | Here I am first applying the algo on dataset available [here](https://archive.ics.uci.edu/ml/datasets.html) to classify whether
4 | the cells in the patient are benign or malignant wrt breast cancer
5 |
6 | Later I developed the k nearest neighbors algo from scratch and then used this to test for the same dataset.
7 |
8 | The accuracy using the inbuilt libraries and my own algo was approximately the same
9 |
--------------------------------------------------------------------------------
/classical_ml/linear regression/1_linear_regression_intro.py:
--------------------------------------------------------------------------------
1 | # applying linear regression to a training data and checking its accuracy against a test data using inbuilt libraries
2 |
3 | import pandas as pd
4 | import quandl
5 | import math
6 | import numpy as np
7 | from sklearn import preprocessing, svm
8 | from sklearn.model_selection import train_test_split
9 | from sklearn.linear_model import LinearRegression
10 |
11 | # using preprocessing for scaling. We want the features between -1 to 1. Helps to increase accuracy and processing speed
12 | # train-test-split is used to get the training and testing samples.
13 | # svm is used to do regression
14 |
15 | quandl.ApiConfig.api_key = 'vhkrsKz3TmUN6Qa4QjZK'
16 | df = quandl.get('WIKI/GOOGL')
17 |
18 | # print(df.head()) # the open, high low are features of the stock. But we need meaningul features
19 | df = df[['Adj. Open', 'Adj. High', 'Adj. Low', 'Adj. Close', 'Adj. Volume']]
20 | df['HL_percent'] = (df['Adj. High'] - df['Adj. Low']) / df['Adj. Low'] * 100.0
21 | df['percent_change'] = (df['Adj. Close'] - df['Adj. Open']) / df['Adj. Open'] * 100.0
22 | df = df[['Adj. Close', 'HL_percent', 'percent_change', 'Adj. Volume']]
23 | # print(df.head())
24 |
25 | # features are used to predict the label.
26 |
27 | forecast_col = 'Adj. Close'
28 | df.fillna('-9999', inplace=True) # fills the nan data with -9999
29 |
30 | forecast_out = int(math.ceil(0.01 * len(df))) # we will be predicting the final 10% of the data
31 |
32 | # let forecast out be = 10 days
33 | df['label'] = df[forecast_col].shift(-forecast_out) # our label here is the stock price 10 days into the future. So
34 | # based on historical data we want to predict the stock close 10 days into the future. For that, we create a new
35 | # column for the label and shift the Adj. close 10 days up
36 |
37 | df.dropna(inplace=True) # removing the rows whose label value we don't know, we will predict this by regression
38 | # print(df.tail())
39 |
40 | X = np.array(df.drop(['label'], 1)) # X is an array of features. Everything other than 'Label' is a feature
41 | X = preprocessing.scale(X)
42 |
43 | Y = np.array(df['label'])
44 |
45 | # print(len(X), len(Y))
46 |
47 | X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2) # shuffles X and Y and outputs X_train,
48 | # Y_train
49 |
50 | clf = LinearRegression()
51 | clf.fit(X_train, Y_train)
52 | accuracy = clf.score(X_test, Y_test) # testing the accuracy of the classifier
53 | # train and test always on different data
54 | print(accuracy) # the accuracy will be the squared error done in numerical analysis
55 |
56 |
57 |
--------------------------------------------------------------------------------
/classical_ml/linear regression/2predicting_using_regression.py:
--------------------------------------------------------------------------------
1 | # actually predicting data using linear regression
2 |
3 | import pandas as pd
4 | import quandl
5 | import math
6 | import numpy as np
7 | from sklearn import preprocessing, svm
8 | from sklearn.model_selection import train_test_split
9 | from sklearn.linear_model import LinearRegression
10 | import matplotlib.pyplot as plt
11 | from matplotlib import style
12 | import datetime
13 |
14 | style.use('ggplot')
15 |
16 | quandl.ApiConfig.api_key = 'vhkrsKz3TmUN6Qa4QjZK'
17 | df = quandl.get('WIKI/GOOGL')
18 |
19 | df = df[['Adj. Open', 'Adj. High', 'Adj. Low', 'Adj. Close', 'Adj. Volume']]
20 | df['HL_percent'] = (df['Adj. High'] - df['Adj. Low']) / df['Adj. Low'] * 100.0
21 | df['percent_change'] = (df['Adj. Close'] - df['Adj. Open']) / df['Adj. Open'] * 100.0
22 | df = df[['Adj. Close', 'HL_percent', 'percent_change', 'Adj. Volume']]
23 |
24 | forecast_col = 'Adj. Close'
25 | df.fillna('-9999', inplace=True)
26 |
27 | forecast_out = int(math.ceil(0.01 * len(df)))
28 |
29 | df['label'] = df[forecast_col].shift(-forecast_out)
30 | X = np.array(df.drop(['label'], 1)) # X is an array of features. Everything other than 'Label' is a feature
31 | X = preprocessing.scale(X)
32 | X_lately = X[-forecast_out:]
33 | X = X[:-forecast_out]
34 |
35 | df.dropna(inplace=True)
36 | Y = np.array(df['label'])
37 |
38 | X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)
39 | clf = LinearRegression(n_jobs=-1)
40 | clf.fit(X_train, Y_train)
41 | accuracy = clf.score(X_test, Y_test)
42 |
43 | forecast_set = clf.predict(X_lately) # predicts the value for an array or a single value using our classifier
44 | print(forecast_set, accuracy, forecast_out)
45 |
46 | # plotting our predictions
47 |
48 | df['Forecast'] = np.nan
49 | last_date = df.iloc[-1].name
50 | last_unix = last_date.timestamp()
51 | one_day = 86400
52 | next_unix = last_unix + one_day
53 |
54 | for i in forecast_set:
55 | next_date = datetime.datetime.fromtimestamp(next_unix)
56 | next_unix += one_day
57 | df.loc[next_date] = [np.nan for _ in range(len(df.columns) - 1)] + [i] # .loc referes the index by name, iloc by
58 | # number
59 |
60 | df['Adj. Close'].plot()
61 | df['Forecast'].plot()
62 | plt.legend(loc=4)
63 | plt.xlabel('Date')
64 | plt.ylabel('Price')
65 | plt.show()
66 |
--------------------------------------------------------------------------------
/classical_ml/linear regression/3_pickling_classifier.py:
--------------------------------------------------------------------------------
1 | # pickling the data
2 |
3 | import datetime
4 | import math, pickle
5 |
6 | import matplotlib.pyplot as plt
7 | import numpy as np
8 | import quandl
9 | from matplotlib import style
10 | from sklearn import preprocessing
11 | from sklearn.linear_model import LinearRegression
12 | from sklearn.model_selection import train_test_split
13 |
14 | style.use('ggplot')
15 |
16 | quandl.ApiConfig.api_key = 'vhkrsKz3TmUN6Qa4QjZK'
17 | df = quandl.get('WIKI/GOOGL')
18 |
19 | df = df[['Adj. Open', 'Adj. High', 'Adj. Low', 'Adj. Close', 'Adj. Volume']]
20 | df['HL_percent'] = (df['Adj. High'] - df['Adj. Low']) / df['Adj. Low'] * 100.0
21 | df['percent_change'] = (df['Adj. Close'] - df['Adj. Open']) / df['Adj. Open'] * 100.0
22 | df = df[['Adj. Close', 'HL_percent', 'percent_change', 'Adj. Volume']]
23 |
24 | forecast_col = 'Adj. Close'
25 | df.fillna('-9999', inplace=True)
26 |
27 | forecast_out = int(math.ceil(0.01 * len(df)))
28 |
29 | df['label'] = df[forecast_col].shift(-forecast_out)
30 | X = np.array(df.drop(['label'], 1))
31 | X = preprocessing.scale(X)
32 | X_lately = X[-forecast_out:]
33 | X = X[:-forecast_out]
34 |
35 | df.dropna(inplace=True)
36 | Y = np.array(df['label'])
37 |
38 | X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)
39 | clf = LinearRegression(n_jobs=-1)
40 | clf.fit(X_train, Y_train)
41 |
42 | with open('LinearRegression.pickle', 'wb') as f:
43 | pickle.dump(clf, f)
44 |
45 | pickle_in = open('LinearRegression.pickle', 'rb')
46 | clf = pickle.load(pickle_in)
47 | accuracy = clf.score(X_test, Y_test)
48 |
49 | forecast_set = clf.predict(X_lately)
50 | print(forecast_set, accuracy, forecast_out)
51 |
52 | df['Forecast'] = np.nan
53 | last_date = df.iloc[-1].name
54 | last_unix = last_date.timestamp()
55 | one_day = 86400
56 | next_unix = last_unix + one_day
57 |
58 | for i in forecast_set:
59 | next_date = datetime.datetime.fromtimestamp(next_unix)
60 | next_unix += one_day
61 | df.loc[next_date] = [np.nan for _ in range(len(df.columns) - 1)] + [i]
62 |
63 | df['Adj. Close'].plot()
64 | df['Forecast'].plot()
65 | plt.legend(loc=4)
66 | plt.xlabel('Date')
67 | plt.ylabel('Price')
68 | plt.show()
69 |
--------------------------------------------------------------------------------
/classical_ml/linear regression/4linear_regression_from_scratch.py:
--------------------------------------------------------------------------------
1 | """
2 | In linear regression, we approximate y using a straight line. y=mx+b. Then we minimise the squared error
3 | and differentiate with respect to m and ab to get the values of m and b
4 |
5 | m = x'y' - (xy)' / (x')^2 - (x^2)' Here x' is the mean of x and y' is mean of y
6 | for b, put this m in y'= mx' + b
7 | """
8 |
9 | from statistics import mean
10 | import numpy as np
11 | import matplotlib.pyplot as plt
12 | from matplotlib import style
13 |
14 | style.use('fivethirtyeight')
15 |
16 | xs = np.array([1, 2, 3, 4, 5, 6], dtype=np.float64) # it is acutally the default data type
17 | ys = np.array([5, 4, 6, 5, 6, 7], dtype=np.float64)
18 |
19 |
20 | def best_fit_slope_and_intercept(xs, ys):
21 | m = ((mean(xs) * mean(ys)) - (mean(xs * ys))) / (mean(xs) ** 2 - mean(xs ** 2))
22 | b = mean(ys) - m * mean(xs)
23 | return m, b
24 |
25 |
26 | def squared_error(ys_orig, ys_line):
27 | return sum((ys_line - ys_orig) ** 2)
28 |
29 |
30 | def coeff_of_determination(ys_orig, ys_line):
31 | ys_mean_line = [mean(ys_orig) for y in ys_orig]
32 | squared_error_regression = squared_error(ys_orig, ys_line)
33 | squared_error_mean = squared_error(ys_orig, ys_mean_line)
34 | return 1 - (squared_error_regression / squared_error_mean)
35 |
36 |
37 | m, b = best_fit_slope_and_intercept(xs, ys)
38 |
39 | regression_line = [(m * x) + b for x in xs]
40 |
41 | coeff_of_deter = coeff_of_determination(ys, regression_line)
42 | print(coeff_of_deter)
43 |
44 | plt.scatter(xs, ys)
45 | plt.plot(xs, regression_line)
46 | plt.show()
47 |
48 | # getting the accuracy of our linear fit line. We use doing squared error. We use square not mod becuase we want to
49 | # penalise the far of points from the line heavily
50 |
51 | '''
52 | R squared theory -> coefficient of determination
53 | r^2 = 1 - (squared error of ys) / (squared error of mean of ys)
54 | More the value of R squared better the fit
55 | '''
56 |
--------------------------------------------------------------------------------
/classical_ml/linear regression/5testing_assumptions.py:
--------------------------------------------------------------------------------
1 | from statistics import mean
2 | import numpy as np
3 | import matplotlib.pyplot as plt
4 | from matplotlib import style
5 | import random
6 |
7 | style.use('fivethirtyeight')
8 |
9 |
10 | # xs = np.array([1, 2, 3, 4, 5, 6], dtype=np.float64) # it is acutally the default data type
11 | # ys = np.array([5, 4, 6, 5, 6, 7], dtype=np.float64)
12 |
13 |
14 | def create_dataset(hm, variance, step=2, correlation=False): # how many data points, variance of ys,
15 | val = 1
16 | ys = []
17 | for i in range(hm):
18 | y = val + random.randrange(-variance, variance)
19 | ys.append(y)
20 | if correlation and correlation == 'pos':
21 | val += step
22 | elif correlation and correlation == 'neg':
23 | val -= step
24 | xs = [i for i in range(len(ys))]
25 | return np.array(xs, dtype=np.float64), np.array(ys, dtype=np.float64)
26 |
27 |
28 | def best_fit_slope_and_intercept(xs, ys):
29 | m = ((mean(xs) * mean(ys)) - (mean(xs * ys))) / (mean(xs) ** 2 - mean(xs ** 2))
30 | b = mean(ys) - m * mean(xs)
31 | return m, b
32 |
33 |
34 | def squared_error(ys_orig, ys_line):
35 | return sum((ys_line - ys_orig) ** 2)
36 |
37 |
38 | def coeff_of_determination(ys_orig, ys_line):
39 | ys_mean_line = [mean(ys_orig) for y in ys_orig]
40 | squared_error_regression = squared_error(ys_orig, ys_line)
41 | squared_error_mean = squared_error(ys_orig, ys_mean_line)
42 | return 1 - (squared_error_regression / squared_error_mean)
43 |
44 |
45 | xs, ys = create_dataset(40, 40, 2, correlation='pos')
46 | # if variance is decrease then the coeffecient of determination increases
47 |
48 | m, b = best_fit_slope_and_intercept(xs, ys)
49 |
50 | regression_line = [(m * x) + b for x in xs]
51 |
52 | coeff_of_deter = coeff_of_determination(ys, regression_line)
53 | print(coeff_of_deter)
54 |
55 | plt.scatter(xs, ys)
56 | plt.plot(xs, regression_line)
57 | plt.show()
58 |
--------------------------------------------------------------------------------
/classical_ml/linear regression/LinearRegression.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/classical_ml/linear regression/LinearRegression.pickle
--------------------------------------------------------------------------------
/classical_ml/linear regression/README.md:
--------------------------------------------------------------------------------
1 | Applying linear regression on the data set of google stock prices obtained from quandl.
2 |
3 | The classifier is not that good because the features that I selected were not good.
4 |
5 | However, I was not able to find a better data set
6 |
7 |
8 |
--------------------------------------------------------------------------------
/classical_ml/svm/1_intro.py:
--------------------------------------------------------------------------------
1 | """
2 | SVM is a binary classifier: Separates only in 2 groups at a time. That does not mean it can only "classify"
3 | in 2 groups. It just means that at a time it can only separate one group from the rest.
4 |
5 | The 2 groups are denoted as positives and negatives.
6 |
7 | We want a street that separates the 2 goups and is as wide as possible. Then we consider a vector(w) which is
8 | perpendicular to the street's median. Now to check if a point lies on one side or other, we take the dot product of the
9 | unknown point (vector u) with (vector w). Now from this length we can check if the point is on left or the right side
10 | of the street
11 | OR
12 | (vector u).(vector w) + b >=0 (1)
13 | then one side else on other
14 | Here we don't know w and b, we just know w is perpendicular
15 | ALSO
16 | (X+).(vector w) +b > =1 where X+ is the positive sample
17 | and
18 | (X-).(vector w) + b<=1 where X- is the negative sample.
19 |
20 | All these are constraints
21 |
22 | We introduce Yi such that Yi = 1 for +
23 | Yi = -1 for _
24 | Multiplying both by Yi gives
25 | Yi* [(X).(vector w) + b] -1 >=0 (2)
26 | where X is any known sample
27 | Also for Xi in the gutter (2) = 0 These are called Support Vectors
28 |
29 | Q) How to find the widht of the "street"?
30 | A) If we had a unit normal to the "gutter", then the dot product (X+ - X-).(w/|w|) (3)
31 | is width as w is a normal to the street. This can be simplified using (2)
32 | The width comes out as : 2/|w|
33 |
34 | To max the widht of the street, we want to min |w| or
35 | min 1/2(|w|)^2 (4)
36 |
37 | To solve a question having to find extremeties and given some constraints, we use Lagrange. We find that
38 | w = sum(Ci*Xi*Yi) (5)
39 | sum(Ci*Yi) = 0 (6)
40 |
41 | (vector X).(vector W) + bias = 0 gives decision boundary
42 |
43 | It finds the decision boundry which is a boundry which separates the 2 groups.
44 |
45 | """
46 | # applying the svm lib on the breast cancer eg
47 |
48 | import numpy as np
49 | import pandas as pd
50 | from sklearn import preprocessing, svm
51 | from sklearn.model_selection import train_test_split
52 |
53 | df = pd.read_csv("breast-cancer-wisconsin.data.txt")
54 |
55 | df.replace('?', -9999, inplace=True) # replacing missing values. Most algo recognise -9999 as outlier
56 | df.drop(['id'], 1, inplace=True)
57 | # print(df.head())
58 |
59 | X = np.array(df.drop(['class'], 1)) # here 1 means we want to drop the columns
60 | y = np.array(df['class'])
61 |
62 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
63 |
64 | clf = svm.SVC() # SVC: support vector classifier
65 | clf.fit(X_train, y_train)
66 |
67 | accuracy = clf.score(X_test, y_test)
68 | print(accuracy)
69 |
70 | example_measures = np.array([4, 2, 1, 1, 1, 2, 3, 2, 1]) # this returns 9 rows
71 | example_measures = example_measures.reshape(1, -1) # -1 means unspecified number of cols
72 | prediction = clf.predict(example_measures)
73 | print(prediction)
74 |
--------------------------------------------------------------------------------
/classical_ml/svm/2_svm_from_scratch.py:
--------------------------------------------------------------------------------
1 | """
2 | We want
3 | 1) min(|w|) for max width
4 | 2) max(|b|)
5 | given constraint
6 | 3) Yi*[(Xi).(vector w) +b] >=1
7 |
8 | This is optimisation problem.
9 |
10 | This is a quadratic optimisation problem. But it will have a global min as it is convex. So convex optimisation problem
11 |
12 | can use cvxopt, libsvm lib
13 |
14 | We take a value of W initially and keep reducing and so on while satifying (3).
15 | """
16 |
17 | import matplotlib.pyplot as plt
18 | from matplotlib import style
19 | import numpy as np
20 |
21 | style.use('ggplot')
22 |
23 |
24 | class Support_Vector_Machine:
25 | def __init__(self, visualization=True):
26 | self.visualization = visualization
27 | self.colors = {1: 'r', -1: 'b'}
28 | if self.visualization:
29 | self.fig = plt.figure()
30 | self.ax = self.fig.add_subplot(1, 1, 1)
31 |
32 | """
33 | we want to get the values of w and b. For the beginning, we first find the max number in the feature
34 | set that to latest_optimum. w becomes [ latest_optimum, latest_optimum]. What we need to do is that
35 | for all yi,xi : yi[w.xi + b] >=1
36 | So first we check for this w and b . If True, we reduce w by a smaller step size and again check.
37 | if not true, we keep on reducing w with the same step size.
38 | """
39 |
40 | def fit(self, data):
41 | self.data = data
42 | # { ||w||: [w,b] }
43 | opt_dict = {}
44 |
45 | transforms = [[1, 1],
46 | [-1, 1],
47 | [-1, -1],
48 | [1, -1]]
49 |
50 | all_data = []
51 | for yi in self.data:
52 | for featureset in self.data[yi]:
53 | for feature in featureset:
54 | all_data.append(feature)
55 |
56 | self.max_feature_value = max(all_data)
57 | self.min_feature_value = min(all_data)
58 | all_data = None
59 |
60 | # support vectors yi(xi.w+b) = 1
61 |
62 |
63 | step_sizes = [self.max_feature_value * 0.1,
64 | self.max_feature_value * 0.01,
65 | # point of expense:
66 | self.max_feature_value * 0.001,
67 | ]
68 |
69 | # extremely expensive
70 | b_range_multiple = 2
71 | # we dont need to take as small of steps
72 | # with b as we do w
73 | b_multiple = 5
74 | latest_optimum = self.max_feature_value * 10
75 |
76 | for step in step_sizes:
77 | w = np.array([latest_optimum, latest_optimum])
78 | # we can do this because convex
79 | optimized = False
80 | while not optimized:
81 | for b in np.arange(-1 * (self.max_feature_value * b_range_multiple),
82 | self.max_feature_value * b_range_multiple,
83 | step * b_multiple):
84 | for transformation in transforms:
85 | w_t = w * transformation
86 | found_option = True
87 | # weakest link in the SVM fundamentally
88 | # SMO attempts to fix this a bit
89 | # yi(xi.w+b) >= 1
90 | #
91 | # #### add a break here later..
92 | for i in self.data:
93 | for xi in self.data[i]:
94 | yi = i
95 | if not yi * (np.dot(w_t, xi) + b) >= 1:
96 | found_option = False
97 | break
98 | if not found_option:
99 | break
100 |
101 | # print(xi,':',yi*(np.dot(w_t,xi)+b))
102 |
103 | if found_option:
104 | opt_dict[np.linalg.norm(w_t)] = [w_t, b]
105 |
106 | if w[0] < 0:
107 | optimized = True
108 | print('Optimized a step.')
109 | else:
110 | w = w - step
111 |
112 | norms = sorted([n for n in opt_dict])
113 | # ||w|| : [w,b]
114 | opt_choice = opt_dict[norms[0]]
115 | self.w = opt_choice[0]
116 | self.b = opt_choice[1]
117 | latest_optimum = opt_choice[0][0] + step * 2
118 |
119 | def predict(self, features):
120 | # sign( x.w+b )
121 | classification = np.sign(np.dot(np.array(features), self.w) + self.b)
122 | if self.visualization and classification != 0:
123 | self.ax.scatter(features[0], features[1], s=200, marker='*', c=self.colors[classification])
124 | return classification
125 |
126 | def visualise(self):
127 | [[self.ax.scatter(x[0], x[1], s=100, color=self.colors[i]) for x in data_dict[i]] for i in data_dict]
128 |
129 | # hyperplane = wx + b
130 | # v = wx + b
131 | # for positive support vectr v=1
132 | # negative support vect = -1
133 | # for decision boundary = 0.
134 | """
135 | Here we want to draw the hyperplane for that we need 2 points to draw a line. The feature we
136 | assumed are 2. We also have the max and min value of feature.
137 | First we assume x to be min and find the corrs. y for the positve support vector line
138 | then we assume x to be max and find the corr. y for the positive support vector
139 | We repeat for negative support vector line and the decision boundary
140 | """
141 |
142 | def hyperplane(x, w, b, v):
143 | return (-w[0] * x - b + v) / w[1]
144 |
145 | data_range = (self.min_feature_value * 0.9, self.max_feature_value * 1.1)
146 | hyp_x_min = data_range[0]
147 | hyp_x_max = data_range[1]
148 |
149 | # (w.x + b) =1
150 | # positive support vector hyperplane
151 | psv1 = hyperplane(hyp_x_min, self.w, self.b, 1)
152 | psv2 = hyperplane(hyp_x_max, self.w, self.b, 1)
153 | self.ax.plot([hyp_x_min, hyp_x_max], [psv1, psv2], 'black')
154 |
155 | # (w.x + b) =-1
156 | # negative support vector hyperplane
157 | nsv1 = hyperplane(hyp_x_min, self.w, self.b, -1)
158 | nsv2 = hyperplane(hyp_x_max, self.w, self.b, -1)
159 | self.ax.plot([hyp_x_min, hyp_x_max], [nsv1, nsv2], 'black')
160 |
161 | # (w.x+ b) = 0
162 | # decision boundary
163 | db1 = hyperplane(hyp_x_min, self.w, self.b, 0)
164 | db2 = hyperplane(hyp_x_max, self.w, self.b, 0)
165 | self.ax.plot([hyp_x_min, hyp_x_max], [db1, db2], 'y--')
166 |
167 | plt.show()
168 |
169 |
170 | data_dict = {-1: np.array([[1, 7],
171 | [2, 8],
172 | [3, 8]]),
173 | 1: np.array([[5, 1],
174 | [6, -1],
175 | [7, 3]])}
176 |
177 | clf = Support_Vector_Machine()
178 | clf.fit(data=data_dict)
179 |
180 | predict_us = [[0, 10],
181 | [1, 3],
182 | [3, 4],
183 | [3, 5],
184 | [5, 5],
185 | [5, 6],
186 | [6, -5],
187 | [5, 8]]
188 |
189 | for p in predict_us:
190 | clf.predict(p)
191 | clf.visualise()
192 |
--------------------------------------------------------------------------------
/classical_ml/svm/3_kernels_intro.py:
--------------------------------------------------------------------------------
1 | """
2 | If the data is not linearly separable
3 |
4 | We can take another perspective and add a new dimension
5 | eg if earlier features were x1, x2
6 |
7 | How to add the dimension?
8 | x3 = x1*x2
9 | But this increases the data.
10 |
11 | The main downfall of svm was training it on large data because of the optimisation problem. So not a good sol.
12 |
13 |
14 | So comes the use of kernels:
15 | It is a similiarity function, which takes two functions as there input and outputs there similiarity.
16 | We can use kernels which transforms the non linear data to different dimensions and creating a linearly
17 | separable situation.
18 |
19 | They are done using inner product. Same as dot product.
20 |
21 | If we earlier had x1,x2 now z1,z2,z3 so on Don't know the dimensions.
22 |
23 | Constraints earlier:
24 | Yi[xi.w+b] >=1
25 | Also, w = sum(AiXiYi) using lagrange
26 |
27 | We can replace X with Z since everywhere only a dot product is being used which will return a scalar.
28 |
29 | K(X,X') = Z.Z' ->dot product gives scale
30 | Z = f(X)
31 | Z' = f(X')
32 |
33 | We just need the dot product, not the actual values of Z and Z'
34 |
35 | Let us have orignally [X1,X2] => convert to a second order poly. SO
36 | X=[x1,x2] Z=[1,x1,x2,x1^2,x2^2,x1.x2] ->6 dimensions
37 |
38 | So K(X,X') = Z.Z' = 1+x1.x1'+x2.x2'+....
39 |
40 | The kernel used is polynomial kernel.
41 | Here we first visited Z.
42 | Now K(x,x') = (1+x.x')^p where X has n terms
43 |
44 | Another Kernel is RBF:radio basis function kernel.
45 | K(x,x') = e^(-gamma|x-x'|^2) . This is the default kernel. It works in infitnite dimensions
46 |
47 | But it might be the case that data never is linearly separable. That is there is no pattern for clasification.
48 | But the RBF kernel will separate this. To detect this:
49 | check the number of support vectors. If almost all of the features are on support vectors, it is a major red flag.
50 | This means that there is overfitment.
51 |
52 | So if you are running a classifier and find accuracy to be say 53%, is my theory wrong or should i use diff kernel.
53 | Query to see percentage of SV is >20%. that means overfitment. So try different kernel.
54 | If percentage also low and accuracy also low, then our data will not work out. We can still try different
55 | kernel.
56 |
57 | Yi[xi.w+b]>= 1 - Slack, Otherwise overfitment may take place
58 | We want to minimise the slack as well
59 | Constraint:
60 | 1) min|w| or min (1/2 |w|^2 + c.sum(slacks)
61 | lower c means less important slacks is to our constraint
62 |
63 | The classifier now obtained is called soft margin classifier
64 | """
65 |
66 | #just to visualise. Not coded by me
67 |
68 | import numpy as np
69 | from numpy import linalg
70 | import cvxopt
71 | import cvxopt.solvers
72 |
73 |
74 | def linear_kernel(x1, x2):
75 | return np.dot(x1, x2)
76 |
77 |
78 | def polynomial_kernel(x, y, p=3):
79 | return (1 + np.dot(x, y)) ** p
80 |
81 |
82 | def gaussian_kernel(x, y, sigma=5.0):
83 | return np.exp(-linalg.norm(x - y) ** 2 / (2 * (sigma ** 2)))
84 |
85 |
86 | class SVM(object):
87 | def __init__(self, kernel=linear_kernel, C=None):
88 | self.kernel = kernel
89 | self.C = C
90 | if self.C is not None: self.C = float(self.C) # C for soft margin
91 |
92 | def fit(self, X, y):
93 | n_samples, n_features = X.shape
94 |
95 | # Gram matrix
96 | K = np.zeros((n_samples, n_samples))
97 | for i in range(n_samples):
98 | for j in range(n_samples):
99 | K[i, j] = self.kernel(X[i], X[j])
100 |
101 | P = cvxopt.matrix(np.outer(y, y) * K)
102 | q = cvxopt.matrix(np.ones(n_samples) * -1)
103 | A = cvxopt.matrix(y, (1, n_samples))
104 | b = cvxopt.matrix(0.0)
105 |
106 | if self.C is None:
107 | G = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))
108 | h = cvxopt.matrix(np.zeros(n_samples))
109 | else:
110 | tmp1 = np.diag(np.ones(n_samples) * -1)
111 | tmp2 = np.identity(n_samples)
112 | G = cvxopt.matrix(np.vstack((tmp1, tmp2)))
113 | tmp1 = np.zeros(n_samples)
114 | tmp2 = np.ones(n_samples) * self.C
115 | h = cvxopt.matrix(np.hstack((tmp1, tmp2)))
116 |
117 | # solve QP problem
118 | solution = cvxopt.solvers.qp(P, q, G, h, A, b)
119 |
120 | # Lagrange multipliers
121 | a = np.ravel(solution['x'])
122 |
123 | # Support vectors have non zero lagrange multipliers
124 | sv = a > 1e-5
125 | ind = np.arange(len(a))[sv]
126 | self.a = a[sv]
127 | self.sv = X[sv]
128 | self.sv_y = y[sv]
129 | print("%d support vectors out of %d points" % (len(self.a), n_samples))
130 |
131 | # Intercept
132 | self.b = 0
133 | for n in range(len(self.a)):
134 | self.b += self.sv_y[n]
135 | self.b -= np.sum(self.a * self.sv_y * K[ind[n], sv])
136 | self.b /= len(self.a)
137 |
138 | # Weight vector
139 | if self.kernel == linear_kernel:
140 | self.w = np.zeros(n_features)
141 | for n in range(len(self.a)):
142 | self.w += self.a[n] * self.sv_y[n] * self.sv[n]
143 | else:
144 | self.w = None
145 |
146 | def project(self, X):
147 | if self.w is not None:
148 | return np.dot(X, self.w) + self.b
149 | else:
150 | y_predict = np.zeros(len(X))
151 | for i in range(len(X)):
152 | s = 0
153 | for a, sv_y, sv in zip(self.a, self.sv_y, self.sv):
154 | s += a * sv_y * self.kernel(X[i], sv)
155 | y_predict[i] = s
156 | return y_predict + self.b
157 |
158 | def predict(self, X):
159 | return np.sign(self.project(X))
160 |
161 |
162 | if __name__ == "__main__":
163 | import pylab as pl
164 |
165 |
166 | def gen_lin_separable_data():
167 | # generate training data in the 2-d case
168 | mean1 = np.array([0, 2])
169 | mean2 = np.array([2, 0])
170 | cov = np.array([[0.8, 0.6], [0.6, 0.8]])
171 | X1 = np.random.multivariate_normal(mean1, cov, 100)
172 | y1 = np.ones(len(X1))
173 | X2 = np.random.multivariate_normal(mean2, cov, 100)
174 | y2 = np.ones(len(X2)) * -1
175 | return X1, y1, X2, y2
176 |
177 |
178 | def gen_non_lin_separable_data():
179 | mean1 = [-1, 2]
180 | mean2 = [1, -1]
181 | mean3 = [4, -4]
182 | mean4 = [-4, 4]
183 | cov = [[1.0, 0.8], [0.8, 1.0]]
184 | X1 = np.random.multivariate_normal(mean1, cov, 50)
185 | X1 = np.vstack((X1, np.random.multivariate_normal(mean3, cov, 50)))
186 | y1 = np.ones(len(X1))
187 | X2 = np.random.multivariate_normal(mean2, cov, 50)
188 | X2 = np.vstack((X2, np.random.multivariate_normal(mean4, cov, 50)))
189 | y2 = np.ones(len(X2)) * -1
190 | return X1, y1, X2, y2
191 |
192 |
193 | def gen_lin_separable_overlap_data():
194 | # generate training data in the 2-d case
195 | mean1 = np.array([0, 2])
196 | mean2 = np.array([2, 0])
197 | cov = np.array([[1.5, 1.0], [1.0, 1.5]])
198 | X1 = np.random.multivariate_normal(mean1, cov, 100)
199 | y1 = np.ones(len(X1))
200 | X2 = np.random.multivariate_normal(mean2, cov, 100)
201 | y2 = np.ones(len(X2)) * -1
202 | return X1, y1, X2, y2
203 |
204 |
205 | def split_train(X1, y1, X2, y2):
206 | X1_train = X1[:90]
207 | y1_train = y1[:90]
208 | X2_train = X2[:90]
209 | y2_train = y2[:90]
210 | X_train = np.vstack((X1_train, X2_train))
211 | y_train = np.hstack((y1_train, y2_train))
212 | return X_train, y_train
213 |
214 |
215 | def split_test(X1, y1, X2, y2):
216 | X1_test = X1[90:]
217 | y1_test = y1[90:]
218 | X2_test = X2[90:]
219 | y2_test = y2[90:]
220 | X_test = np.vstack((X1_test, X2_test))
221 | y_test = np.hstack((y1_test, y2_test))
222 | return X_test, y_test
223 |
224 |
225 | def plot_margin(X1_train, X2_train, clf):
226 | def f(x, w, b, c=0):
227 | # given x, return y such that [x,y] in on the line
228 | # w.x + b = c
229 | return (-w[0] * x - b + c) / w[1]
230 |
231 | pl.plot(X1_train[:, 0], X1_train[:, 1], "ro")
232 | pl.plot(X2_train[:, 0], X2_train[:, 1], "bo")
233 | pl.scatter(clf.sv[:, 0], clf.sv[:, 1], s=100, c="g")
234 |
235 | # w.x + b = 0
236 | a0 = -4;
237 | a1 = f(a0, clf.w, clf.b)
238 | b0 = 4;
239 | b1 = f(b0, clf.w, clf.b)
240 | pl.plot([a0, b0], [a1, b1], "k")
241 |
242 | # w.x + b = 1
243 | a0 = -4;
244 | a1 = f(a0, clf.w, clf.b, 1)
245 | b0 = 4;
246 | b1 = f(b0, clf.w, clf.b, 1)
247 | pl.plot([a0, b0], [a1, b1], "k--")
248 |
249 | # w.x + b = -1
250 | a0 = -4;
251 | a1 = f(a0, clf.w, clf.b, -1)
252 | b0 = 4;
253 | b1 = f(b0, clf.w, clf.b, -1)
254 | pl.plot([a0, b0], [a1, b1], "k--")
255 |
256 | pl.axis("tight")
257 | pl.show()
258 |
259 |
260 | def plot_contour(X1_train, X2_train, clf):
261 | pl.plot(X1_train[:, 0], X1_train[:, 1], "ro")
262 | pl.plot(X2_train[:, 0], X2_train[:, 1], "bo")
263 | pl.scatter(clf.sv[:, 0], clf.sv[:, 1], s=100, c="g")
264 |
265 | X1, X2 = np.meshgrid(np.linspace(-6, 6, 50), np.linspace(-6, 6, 50))
266 | X = np.array([[x1, x2] for x1, x2 in zip(np.ravel(X1), np.ravel(X2))])
267 | Z = clf.project(X).reshape(X1.shape)
268 | pl.contour(X1, X2, Z, [0.0], colors='k', linewidths=1, origin='lower')
269 | pl.contour(X1, X2, Z + 1, [0.0], colors='grey', linewidths=1, origin='lower')
270 | pl.contour(X1, X2, Z - 1, [0.0], colors='grey', linewidths=1, origin='lower')
271 |
272 | pl.axis("tight")
273 | pl.show()
274 |
275 |
276 | def test_linear():
277 | X1, y1, X2, y2 = gen_lin_separable_data()
278 | X_train, y_train = split_train(X1, y1, X2, y2)
279 | X_test, y_test = split_test(X1, y1, X2, y2)
280 |
281 | clf = SVM()
282 | clf.fit(X_train, y_train)
283 |
284 | y_predict = clf.predict(X_test)
285 | correct = np.sum(y_predict == y_test)
286 | print("%d out of %d predictions correct" % (correct, len(y_predict)))
287 |
288 | plot_margin(X_train[y_train == 1], X_train[y_train == -1], clf)
289 |
290 |
291 | def test_non_linear():
292 | X1, y1, X2, y2 = gen_non_lin_separable_data()
293 | X_train, y_train = split_train(X1, y1, X2, y2)
294 | X_test, y_test = split_test(X1, y1, X2, y2)
295 |
296 | clf = SVM(polynomial_kernel)
297 | clf.fit(X_train, y_train)
298 |
299 | y_predict = clf.predict(X_test)
300 | correct = np.sum(y_predict == y_test)
301 | print("%d out of %d predictions correct" % (correct, len(y_predict)))
302 |
303 | plot_contour(X_train[y_train == 1], X_train[y_train == -1], clf)
304 |
305 |
306 | def test_soft():
307 | X1, y1, X2, y2 = gen_lin_separable_overlap_data()
308 | X_train, y_train = split_train(X1, y1, X2, y2)
309 | X_test, y_test = split_test(X1, y1, X2, y2)
310 |
311 | clf = SVM(C=1000.1)
312 | clf.fit(X_train, y_train)
313 |
314 | y_predict = clf.predict(X_test)
315 | correct = np.sum(y_predict == y_test)
316 | print("%d out of %d predictions correct" % (correct, len(y_predict)))
317 |
318 | plot_contour(X_train[y_train == 1], X_train[y_train == -1], clf)
319 |
320 |
321 | # test_linear() # hard margin
322 | # test_non_linear()
323 | test_soft()
324 |
--------------------------------------------------------------------------------
/classical_ml/svm/4_kerenels_using_sklearn.py:
--------------------------------------------------------------------------------
1 | """
2 | Classifying with svm when more than 2 groups:
3 | 1) OVR : one vs rest:
4 | separate one group from rest of the data
5 | 2) OVO : One vs One:
6 | assume 3 groups : 1,2,3
7 | first make hyperplane for 1 vs 2 and 1 vs 3
8 | then 2 vs 3
9 | """
10 |
11 | # check the documentation for svm.SVM
12 | import numpy as np
13 | import pandas as pd
14 | from sklearn import preprocessing, svm
15 | from sklearn.model_selection import train_test_split
16 |
17 | df = pd.read_csv("breast-cancer-wisconsin.data.txt")
18 |
19 | df.replace('?', -9999, inplace=True)
20 | df.drop(['id'], 1, inplace=True)
21 |
22 | X = np.array(df.drop(['class'], 1)) # here 1 means we want to drop the columns
23 | y = np.array(df['class'])
24 |
25 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
26 |
27 | clf = svm.SVC() # SVC: support vector classifier
28 | clf.fit(X_train, y_train)
29 |
30 | accuracy = clf.score(X_test, y_test)
31 | print(accuracy)
32 |
33 | example_measures = np.array([4, 2, 1, 1, 1, 2, 3, 2, 1])
34 | example_measures = example_measures.reshape(1, -1)
35 | prediction = clf.predict(example_measures)
36 | print(prediction)
37 |
--------------------------------------------------------------------------------
/classical_ml/svm/breast-cancer-wisconsin.data.txt:
--------------------------------------------------------------------------------
1 | id,clump_thickness,unif_cell_size,uni_cell_shape,marg_adhesion,single_epi_cell_size,bare_nuclei,bland_chrom,norm_nucleoli,mitosis,class
2 | 1000025,5,1,1,1,2,1,3,1,1,2
3 | 1002945,5,4,4,5,7,10,3,2,1,2
4 | 1015425,3,1,1,1,2,2,3,1,1,2
5 | 1016277,6,8,8,1,3,4,3,7,1,2
6 | 1017023,4,1,1,3,2,1,3,1,1,2
7 | 1017122,8,10,10,8,7,10,9,7,1,4
8 | 1018099,1,1,1,1,2,10,3,1,1,2
9 | 1018561,2,1,2,1,2,1,3,1,1,2
10 | 1033078,2,1,1,1,2,1,1,1,5,2
11 | 1033078,4,2,1,1,2,1,2,1,1,2
12 | 1035283,1,1,1,1,1,1,3,1,1,2
13 | 1036172,2,1,1,1,2,1,2,1,1,2
14 | 1041801,5,3,3,3,2,3,4,4,1,4
15 | 1043999,1,1,1,1,2,3,3,1,1,2
16 | 1044572,8,7,5,10,7,9,5,5,4,4
17 | 1047630,7,4,6,4,6,1,4,3,1,4
18 | 1048672,4,1,1,1,2,1,2,1,1,2
19 | 1049815,4,1,1,1,2,1,3,1,1,2
20 | 1050670,10,7,7,6,4,10,4,1,2,4
21 | 1050718,6,1,1,1,2,1,3,1,1,2
22 | 1054590,7,3,2,10,5,10,5,4,4,4
23 | 1054593,10,5,5,3,6,7,7,10,1,4
24 | 1056784,3,1,1,1,2,1,2,1,1,2
25 | 1057013,8,4,5,1,2,?,7,3,1,4
26 | 1059552,1,1,1,1,2,1,3,1,1,2
27 | 1065726,5,2,3,4,2,7,3,6,1,4
28 | 1066373,3,2,1,1,1,1,2,1,1,2
29 | 1066979,5,1,1,1,2,1,2,1,1,2
30 | 1067444,2,1,1,1,2,1,2,1,1,2
31 | 1070935,1,1,3,1,2,1,1,1,1,2
32 | 1070935,3,1,1,1,1,1,2,1,1,2
33 | 1071760,2,1,1,1,2,1,3,1,1,2
34 | 1072179,10,7,7,3,8,5,7,4,3,4
35 | 1074610,2,1,1,2,2,1,3,1,1,2
36 | 1075123,3,1,2,1,2,1,2,1,1,2
37 | 1079304,2,1,1,1,2,1,2,1,1,2
38 | 1080185,10,10,10,8,6,1,8,9,1,4
39 | 1081791,6,2,1,1,1,1,7,1,1,2
40 | 1084584,5,4,4,9,2,10,5,6,1,4
41 | 1091262,2,5,3,3,6,7,7,5,1,4
42 | 1096800,6,6,6,9,6,?,7,8,1,2
43 | 1099510,10,4,3,1,3,3,6,5,2,4
44 | 1100524,6,10,10,2,8,10,7,3,3,4
45 | 1102573,5,6,5,6,10,1,3,1,1,4
46 | 1103608,10,10,10,4,8,1,8,10,1,4
47 | 1103722,1,1,1,1,2,1,2,1,2,2
48 | 1105257,3,7,7,4,4,9,4,8,1,4
49 | 1105524,1,1,1,1,2,1,2,1,1,2
50 | 1106095,4,1,1,3,2,1,3,1,1,2
51 | 1106829,7,8,7,2,4,8,3,8,2,4
52 | 1108370,9,5,8,1,2,3,2,1,5,4
53 | 1108449,5,3,3,4,2,4,3,4,1,4
54 | 1110102,10,3,6,2,3,5,4,10,2,4
55 | 1110503,5,5,5,8,10,8,7,3,7,4
56 | 1110524,10,5,5,6,8,8,7,1,1,4
57 | 1111249,10,6,6,3,4,5,3,6,1,4
58 | 1112209,8,10,10,1,3,6,3,9,1,4
59 | 1113038,8,2,4,1,5,1,5,4,4,4
60 | 1113483,5,2,3,1,6,10,5,1,1,4
61 | 1113906,9,5,5,2,2,2,5,1,1,4
62 | 1115282,5,3,5,5,3,3,4,10,1,4
63 | 1115293,1,1,1,1,2,2,2,1,1,2
64 | 1116116,9,10,10,1,10,8,3,3,1,4
65 | 1116132,6,3,4,1,5,2,3,9,1,4
66 | 1116192,1,1,1,1,2,1,2,1,1,2
67 | 1116998,10,4,2,1,3,2,4,3,10,4
68 | 1117152,4,1,1,1,2,1,3,1,1,2
69 | 1118039,5,3,4,1,8,10,4,9,1,4
70 | 1120559,8,3,8,3,4,9,8,9,8,4
71 | 1121732,1,1,1,1,2,1,3,2,1,2
72 | 1121919,5,1,3,1,2,1,2,1,1,2
73 | 1123061,6,10,2,8,10,2,7,8,10,4
74 | 1124651,1,3,3,2,2,1,7,2,1,2
75 | 1125035,9,4,5,10,6,10,4,8,1,4
76 | 1126417,10,6,4,1,3,4,3,2,3,4
77 | 1131294,1,1,2,1,2,2,4,2,1,2
78 | 1132347,1,1,4,1,2,1,2,1,1,2
79 | 1133041,5,3,1,2,2,1,2,1,1,2
80 | 1133136,3,1,1,1,2,3,3,1,1,2
81 | 1136142,2,1,1,1,3,1,2,1,1,2
82 | 1137156,2,2,2,1,1,1,7,1,1,2
83 | 1143978,4,1,1,2,2,1,2,1,1,2
84 | 1143978,5,2,1,1,2,1,3,1,1,2
85 | 1147044,3,1,1,1,2,2,7,1,1,2
86 | 1147699,3,5,7,8,8,9,7,10,7,4
87 | 1147748,5,10,6,1,10,4,4,10,10,4
88 | 1148278,3,3,6,4,5,8,4,4,1,4
89 | 1148873,3,6,6,6,5,10,6,8,3,4
90 | 1152331,4,1,1,1,2,1,3,1,1,2
91 | 1155546,2,1,1,2,3,1,2,1,1,2
92 | 1156272,1,1,1,1,2,1,3,1,1,2
93 | 1156948,3,1,1,2,2,1,1,1,1,2
94 | 1157734,4,1,1,1,2,1,3,1,1,2
95 | 1158247,1,1,1,1,2,1,2,1,1,2
96 | 1160476,2,1,1,1,2,1,3,1,1,2
97 | 1164066,1,1,1,1,2,1,3,1,1,2
98 | 1165297,2,1,1,2,2,1,1,1,1,2
99 | 1165790,5,1,1,1,2,1,3,1,1,2
100 | 1165926,9,6,9,2,10,6,2,9,10,4
101 | 1166630,7,5,6,10,5,10,7,9,4,4
102 | 1166654,10,3,5,1,10,5,3,10,2,4
103 | 1167439,2,3,4,4,2,5,2,5,1,4
104 | 1167471,4,1,2,1,2,1,3,1,1,2
105 | 1168359,8,2,3,1,6,3,7,1,1,4
106 | 1168736,10,10,10,10,10,1,8,8,8,4
107 | 1169049,7,3,4,4,3,3,3,2,7,4
108 | 1170419,10,10,10,8,2,10,4,1,1,4
109 | 1170420,1,6,8,10,8,10,5,7,1,4
110 | 1171710,1,1,1,1,2,1,2,3,1,2
111 | 1171710,6,5,4,4,3,9,7,8,3,4
112 | 1171795,1,3,1,2,2,2,5,3,2,2
113 | 1171845,8,6,4,3,5,9,3,1,1,4
114 | 1172152,10,3,3,10,2,10,7,3,3,4
115 | 1173216,10,10,10,3,10,8,8,1,1,4
116 | 1173235,3,3,2,1,2,3,3,1,1,2
117 | 1173347,1,1,1,1,2,5,1,1,1,2
118 | 1173347,8,3,3,1,2,2,3,2,1,2
119 | 1173509,4,5,5,10,4,10,7,5,8,4
120 | 1173514,1,1,1,1,4,3,1,1,1,2
121 | 1173681,3,2,1,1,2,2,3,1,1,2
122 | 1174057,1,1,2,2,2,1,3,1,1,2
123 | 1174057,4,2,1,1,2,2,3,1,1,2
124 | 1174131,10,10,10,2,10,10,5,3,3,4
125 | 1174428,5,3,5,1,8,10,5,3,1,4
126 | 1175937,5,4,6,7,9,7,8,10,1,4
127 | 1176406,1,1,1,1,2,1,2,1,1,2
128 | 1176881,7,5,3,7,4,10,7,5,5,4
129 | 1177027,3,1,1,1,2,1,3,1,1,2
130 | 1177399,8,3,5,4,5,10,1,6,2,4
131 | 1177512,1,1,1,1,10,1,1,1,1,2
132 | 1178580,5,1,3,1,2,1,2,1,1,2
133 | 1179818,2,1,1,1,2,1,3,1,1,2
134 | 1180194,5,10,8,10,8,10,3,6,3,4
135 | 1180523,3,1,1,1,2,1,2,2,1,2
136 | 1180831,3,1,1,1,3,1,2,1,1,2
137 | 1181356,5,1,1,1,2,2,3,3,1,2
138 | 1182404,4,1,1,1,2,1,2,1,1,2
139 | 1182410,3,1,1,1,2,1,1,1,1,2
140 | 1183240,4,1,2,1,2,1,2,1,1,2
141 | 1183246,1,1,1,1,1,?,2,1,1,2
142 | 1183516,3,1,1,1,2,1,1,1,1,2
143 | 1183911,2,1,1,1,2,1,1,1,1,2
144 | 1183983,9,5,5,4,4,5,4,3,3,4
145 | 1184184,1,1,1,1,2,5,1,1,1,2
146 | 1184241,2,1,1,1,2,1,2,1,1,2
147 | 1184840,1,1,3,1,2,?,2,1,1,2
148 | 1185609,3,4,5,2,6,8,4,1,1,4
149 | 1185610,1,1,1,1,3,2,2,1,1,2
150 | 1187457,3,1,1,3,8,1,5,8,1,2
151 | 1187805,8,8,7,4,10,10,7,8,7,4
152 | 1188472,1,1,1,1,1,1,3,1,1,2
153 | 1189266,7,2,4,1,6,10,5,4,3,4
154 | 1189286,10,10,8,6,4,5,8,10,1,4
155 | 1190394,4,1,1,1,2,3,1,1,1,2
156 | 1190485,1,1,1,1,2,1,1,1,1,2
157 | 1192325,5,5,5,6,3,10,3,1,1,4
158 | 1193091,1,2,2,1,2,1,2,1,1,2
159 | 1193210,2,1,1,1,2,1,3,1,1,2
160 | 1193683,1,1,2,1,3,?,1,1,1,2
161 | 1196295,9,9,10,3,6,10,7,10,6,4
162 | 1196915,10,7,7,4,5,10,5,7,2,4
163 | 1197080,4,1,1,1,2,1,3,2,1,2
164 | 1197270,3,1,1,1,2,1,3,1,1,2
165 | 1197440,1,1,1,2,1,3,1,1,7,2
166 | 1197510,5,1,1,1,2,?,3,1,1,2
167 | 1197979,4,1,1,1,2,2,3,2,1,2
168 | 1197993,5,6,7,8,8,10,3,10,3,4
169 | 1198128,10,8,10,10,6,1,3,1,10,4
170 | 1198641,3,1,1,1,2,1,3,1,1,2
171 | 1199219,1,1,1,2,1,1,1,1,1,2
172 | 1199731,3,1,1,1,2,1,1,1,1,2
173 | 1199983,1,1,1,1,2,1,3,1,1,2
174 | 1200772,1,1,1,1,2,1,2,1,1,2
175 | 1200847,6,10,10,10,8,10,10,10,7,4
176 | 1200892,8,6,5,4,3,10,6,1,1,4
177 | 1200952,5,8,7,7,10,10,5,7,1,4
178 | 1201834,2,1,1,1,2,1,3,1,1,2
179 | 1201936,5,10,10,3,8,1,5,10,3,4
180 | 1202125,4,1,1,1,2,1,3,1,1,2
181 | 1202812,5,3,3,3,6,10,3,1,1,4
182 | 1203096,1,1,1,1,1,1,3,1,1,2
183 | 1204242,1,1,1,1,2,1,1,1,1,2
184 | 1204898,6,1,1,1,2,1,3,1,1,2
185 | 1205138,5,8,8,8,5,10,7,8,1,4
186 | 1205579,8,7,6,4,4,10,5,1,1,4
187 | 1206089,2,1,1,1,1,1,3,1,1,2
188 | 1206695,1,5,8,6,5,8,7,10,1,4
189 | 1206841,10,5,6,10,6,10,7,7,10,4
190 | 1207986,5,8,4,10,5,8,9,10,1,4
191 | 1208301,1,2,3,1,2,1,3,1,1,2
192 | 1210963,10,10,10,8,6,8,7,10,1,4
193 | 1211202,7,5,10,10,10,10,4,10,3,4
194 | 1212232,5,1,1,1,2,1,2,1,1,2
195 | 1212251,1,1,1,1,2,1,3,1,1,2
196 | 1212422,3,1,1,1,2,1,3,1,1,2
197 | 1212422,4,1,1,1,2,1,3,1,1,2
198 | 1213375,8,4,4,5,4,7,7,8,2,2
199 | 1213383,5,1,1,4,2,1,3,1,1,2
200 | 1214092,1,1,1,1,2,1,1,1,1,2
201 | 1214556,3,1,1,1,2,1,2,1,1,2
202 | 1214966,9,7,7,5,5,10,7,8,3,4
203 | 1216694,10,8,8,4,10,10,8,1,1,4
204 | 1216947,1,1,1,1,2,1,3,1,1,2
205 | 1217051,5,1,1,1,2,1,3,1,1,2
206 | 1217264,1,1,1,1,2,1,3,1,1,2
207 | 1218105,5,10,10,9,6,10,7,10,5,4
208 | 1218741,10,10,9,3,7,5,3,5,1,4
209 | 1218860,1,1,1,1,1,1,3,1,1,2
210 | 1218860,1,1,1,1,1,1,3,1,1,2
211 | 1219406,5,1,1,1,1,1,3,1,1,2
212 | 1219525,8,10,10,10,5,10,8,10,6,4
213 | 1219859,8,10,8,8,4,8,7,7,1,4
214 | 1220330,1,1,1,1,2,1,3,1,1,2
215 | 1221863,10,10,10,10,7,10,7,10,4,4
216 | 1222047,10,10,10,10,3,10,10,6,1,4
217 | 1222936,8,7,8,7,5,5,5,10,2,4
218 | 1223282,1,1,1,1,2,1,2,1,1,2
219 | 1223426,1,1,1,1,2,1,3,1,1,2
220 | 1223793,6,10,7,7,6,4,8,10,2,4
221 | 1223967,6,1,3,1,2,1,3,1,1,2
222 | 1224329,1,1,1,2,2,1,3,1,1,2
223 | 1225799,10,6,4,3,10,10,9,10,1,4
224 | 1226012,4,1,1,3,1,5,2,1,1,4
225 | 1226612,7,5,6,3,3,8,7,4,1,4
226 | 1227210,10,5,5,6,3,10,7,9,2,4
227 | 1227244,1,1,1,1,2,1,2,1,1,2
228 | 1227481,10,5,7,4,4,10,8,9,1,4
229 | 1228152,8,9,9,5,3,5,7,7,1,4
230 | 1228311,1,1,1,1,1,1,3,1,1,2
231 | 1230175,10,10,10,3,10,10,9,10,1,4
232 | 1230688,7,4,7,4,3,7,7,6,1,4
233 | 1231387,6,8,7,5,6,8,8,9,2,4
234 | 1231706,8,4,6,3,3,1,4,3,1,2
235 | 1232225,10,4,5,5,5,10,4,1,1,4
236 | 1236043,3,3,2,1,3,1,3,6,1,2
237 | 1241232,3,1,4,1,2,?,3,1,1,2
238 | 1241559,10,8,8,2,8,10,4,8,10,4
239 | 1241679,9,8,8,5,6,2,4,10,4,4
240 | 1242364,8,10,10,8,6,9,3,10,10,4
241 | 1243256,10,4,3,2,3,10,5,3,2,4
242 | 1270479,5,1,3,3,2,2,2,3,1,2
243 | 1276091,3,1,1,3,1,1,3,1,1,2
244 | 1277018,2,1,1,1,2,1,3,1,1,2
245 | 128059,1,1,1,1,2,5,5,1,1,2
246 | 1285531,1,1,1,1,2,1,3,1,1,2
247 | 1287775,5,1,1,2,2,2,3,1,1,2
248 | 144888,8,10,10,8,5,10,7,8,1,4
249 | 145447,8,4,4,1,2,9,3,3,1,4
250 | 167528,4,1,1,1,2,1,3,6,1,2
251 | 169356,3,1,1,1,2,?,3,1,1,2
252 | 183913,1,2,2,1,2,1,1,1,1,2
253 | 191250,10,4,4,10,2,10,5,3,3,4
254 | 1017023,6,3,3,5,3,10,3,5,3,2
255 | 1100524,6,10,10,2,8,10,7,3,3,4
256 | 1116116,9,10,10,1,10,8,3,3,1,4
257 | 1168736,5,6,6,2,4,10,3,6,1,4
258 | 1182404,3,1,1,1,2,1,1,1,1,2
259 | 1182404,3,1,1,1,2,1,2,1,1,2
260 | 1198641,3,1,1,1,2,1,3,1,1,2
261 | 242970,5,7,7,1,5,8,3,4,1,2
262 | 255644,10,5,8,10,3,10,5,1,3,4
263 | 263538,5,10,10,6,10,10,10,6,5,4
264 | 274137,8,8,9,4,5,10,7,8,1,4
265 | 303213,10,4,4,10,6,10,5,5,1,4
266 | 314428,7,9,4,10,10,3,5,3,3,4
267 | 1182404,5,1,4,1,2,1,3,2,1,2
268 | 1198641,10,10,6,3,3,10,4,3,2,4
269 | 320675,3,3,5,2,3,10,7,1,1,4
270 | 324427,10,8,8,2,3,4,8,7,8,4
271 | 385103,1,1,1,1,2,1,3,1,1,2
272 | 390840,8,4,7,1,3,10,3,9,2,4
273 | 411453,5,1,1,1,2,1,3,1,1,2
274 | 320675,3,3,5,2,3,10,7,1,1,4
275 | 428903,7,2,4,1,3,4,3,3,1,4
276 | 431495,3,1,1,1,2,1,3,2,1,2
277 | 432809,3,1,3,1,2,?,2,1,1,2
278 | 434518,3,1,1,1,2,1,2,1,1,2
279 | 452264,1,1,1,1,2,1,2,1,1,2
280 | 456282,1,1,1,1,2,1,3,1,1,2
281 | 476903,10,5,7,3,3,7,3,3,8,4
282 | 486283,3,1,1,1,2,1,3,1,1,2
283 | 486662,2,1,1,2,2,1,3,1,1,2
284 | 488173,1,4,3,10,4,10,5,6,1,4
285 | 492268,10,4,6,1,2,10,5,3,1,4
286 | 508234,7,4,5,10,2,10,3,8,2,4
287 | 527363,8,10,10,10,8,10,10,7,3,4
288 | 529329,10,10,10,10,10,10,4,10,10,4
289 | 535331,3,1,1,1,3,1,2,1,1,2
290 | 543558,6,1,3,1,4,5,5,10,1,4
291 | 555977,5,6,6,8,6,10,4,10,4,4
292 | 560680,1,1,1,1,2,1,1,1,1,2
293 | 561477,1,1,1,1,2,1,3,1,1,2
294 | 563649,8,8,8,1,2,?,6,10,1,4
295 | 601265,10,4,4,6,2,10,2,3,1,4
296 | 606140,1,1,1,1,2,?,2,1,1,2
297 | 606722,5,5,7,8,6,10,7,4,1,4
298 | 616240,5,3,4,3,4,5,4,7,1,2
299 | 61634,5,4,3,1,2,?,2,3,1,2
300 | 625201,8,2,1,1,5,1,1,1,1,2
301 | 63375,9,1,2,6,4,10,7,7,2,4
302 | 635844,8,4,10,5,4,4,7,10,1,4
303 | 636130,1,1,1,1,2,1,3,1,1,2
304 | 640744,10,10,10,7,9,10,7,10,10,4
305 | 646904,1,1,1,1,2,1,3,1,1,2
306 | 653777,8,3,4,9,3,10,3,3,1,4
307 | 659642,10,8,4,4,4,10,3,10,4,4
308 | 666090,1,1,1,1,2,1,3,1,1,2
309 | 666942,1,1,1,1,2,1,3,1,1,2
310 | 667204,7,8,7,6,4,3,8,8,4,4
311 | 673637,3,1,1,1,2,5,5,1,1,2
312 | 684955,2,1,1,1,3,1,2,1,1,2
313 | 688033,1,1,1,1,2,1,1,1,1,2
314 | 691628,8,6,4,10,10,1,3,5,1,4
315 | 693702,1,1,1,1,2,1,1,1,1,2
316 | 704097,1,1,1,1,1,1,2,1,1,2
317 | 704168,4,6,5,6,7,?,4,9,1,2
318 | 706426,5,5,5,2,5,10,4,3,1,4
319 | 709287,6,8,7,8,6,8,8,9,1,4
320 | 718641,1,1,1,1,5,1,3,1,1,2
321 | 721482,4,4,4,4,6,5,7,3,1,2
322 | 730881,7,6,3,2,5,10,7,4,6,4
323 | 733639,3,1,1,1,2,?,3,1,1,2
324 | 733639,3,1,1,1,2,1,3,1,1,2
325 | 733823,5,4,6,10,2,10,4,1,1,4
326 | 740492,1,1,1,1,2,1,3,1,1,2
327 | 743348,3,2,2,1,2,1,2,3,1,2
328 | 752904,10,1,1,1,2,10,5,4,1,4
329 | 756136,1,1,1,1,2,1,2,1,1,2
330 | 760001,8,10,3,2,6,4,3,10,1,4
331 | 760239,10,4,6,4,5,10,7,1,1,4
332 | 76389,10,4,7,2,2,8,6,1,1,4
333 | 764974,5,1,1,1,2,1,3,1,2,2
334 | 770066,5,2,2,2,2,1,2,2,1,2
335 | 785208,5,4,6,6,4,10,4,3,1,4
336 | 785615,8,6,7,3,3,10,3,4,2,4
337 | 792744,1,1,1,1,2,1,1,1,1,2
338 | 797327,6,5,5,8,4,10,3,4,1,4
339 | 798429,1,1,1,1,2,1,3,1,1,2
340 | 704097,1,1,1,1,1,1,2,1,1,2
341 | 806423,8,5,5,5,2,10,4,3,1,4
342 | 809912,10,3,3,1,2,10,7,6,1,4
343 | 810104,1,1,1,1,2,1,3,1,1,2
344 | 814265,2,1,1,1,2,1,1,1,1,2
345 | 814911,1,1,1,1,2,1,1,1,1,2
346 | 822829,7,6,4,8,10,10,9,5,3,4
347 | 826923,1,1,1,1,2,1,1,1,1,2
348 | 830690,5,2,2,2,3,1,1,3,1,2
349 | 831268,1,1,1,1,1,1,1,3,1,2
350 | 832226,3,4,4,10,5,1,3,3,1,4
351 | 832567,4,2,3,5,3,8,7,6,1,4
352 | 836433,5,1,1,3,2,1,1,1,1,2
353 | 837082,2,1,1,1,2,1,3,1,1,2
354 | 846832,3,4,5,3,7,3,4,6,1,2
355 | 850831,2,7,10,10,7,10,4,9,4,4
356 | 855524,1,1,1,1,2,1,2,1,1,2
357 | 857774,4,1,1,1,3,1,2,2,1,2
358 | 859164,5,3,3,1,3,3,3,3,3,4
359 | 859350,8,10,10,7,10,10,7,3,8,4
360 | 866325,8,10,5,3,8,4,4,10,3,4
361 | 873549,10,3,5,4,3,7,3,5,3,4
362 | 877291,6,10,10,10,10,10,8,10,10,4
363 | 877943,3,10,3,10,6,10,5,1,4,4
364 | 888169,3,2,2,1,4,3,2,1,1,2
365 | 888523,4,4,4,2,2,3,2,1,1,2
366 | 896404,2,1,1,1,2,1,3,1,1,2
367 | 897172,2,1,1,1,2,1,2,1,1,2
368 | 95719,6,10,10,10,8,10,7,10,7,4
369 | 160296,5,8,8,10,5,10,8,10,3,4
370 | 342245,1,1,3,1,2,1,1,1,1,2
371 | 428598,1,1,3,1,1,1,2,1,1,2
372 | 492561,4,3,2,1,3,1,2,1,1,2
373 | 493452,1,1,3,1,2,1,1,1,1,2
374 | 493452,4,1,2,1,2,1,2,1,1,2
375 | 521441,5,1,1,2,2,1,2,1,1,2
376 | 560680,3,1,2,1,2,1,2,1,1,2
377 | 636437,1,1,1,1,2,1,1,1,1,2
378 | 640712,1,1,1,1,2,1,2,1,1,2
379 | 654244,1,1,1,1,1,1,2,1,1,2
380 | 657753,3,1,1,4,3,1,2,2,1,2
381 | 685977,5,3,4,1,4,1,3,1,1,2
382 | 805448,1,1,1,1,2,1,1,1,1,2
383 | 846423,10,6,3,6,4,10,7,8,4,4
384 | 1002504,3,2,2,2,2,1,3,2,1,2
385 | 1022257,2,1,1,1,2,1,1,1,1,2
386 | 1026122,2,1,1,1,2,1,1,1,1,2
387 | 1071084,3,3,2,2,3,1,1,2,3,2
388 | 1080233,7,6,6,3,2,10,7,1,1,4
389 | 1114570,5,3,3,2,3,1,3,1,1,2
390 | 1114570,2,1,1,1,2,1,2,2,1,2
391 | 1116715,5,1,1,1,3,2,2,2,1,2
392 | 1131411,1,1,1,2,2,1,2,1,1,2
393 | 1151734,10,8,7,4,3,10,7,9,1,4
394 | 1156017,3,1,1,1,2,1,2,1,1,2
395 | 1158247,1,1,1,1,1,1,1,1,1,2
396 | 1158405,1,2,3,1,2,1,2,1,1,2
397 | 1168278,3,1,1,1,2,1,2,1,1,2
398 | 1176187,3,1,1,1,2,1,3,1,1,2
399 | 1196263,4,1,1,1,2,1,1,1,1,2
400 | 1196475,3,2,1,1,2,1,2,2,1,2
401 | 1206314,1,2,3,1,2,1,1,1,1,2
402 | 1211265,3,10,8,7,6,9,9,3,8,4
403 | 1213784,3,1,1,1,2,1,1,1,1,2
404 | 1223003,5,3,3,1,2,1,2,1,1,2
405 | 1223306,3,1,1,1,2,4,1,1,1,2
406 | 1223543,1,2,1,3,2,1,1,2,1,2
407 | 1229929,1,1,1,1,2,1,2,1,1,2
408 | 1231853,4,2,2,1,2,1,2,1,1,2
409 | 1234554,1,1,1,1,2,1,2,1,1,2
410 | 1236837,2,3,2,2,2,2,3,1,1,2
411 | 1237674,3,1,2,1,2,1,2,1,1,2
412 | 1238021,1,1,1,1,2,1,2,1,1,2
413 | 1238464,1,1,1,1,1,?,2,1,1,2
414 | 1238633,10,10,10,6,8,4,8,5,1,4
415 | 1238915,5,1,2,1,2,1,3,1,1,2
416 | 1238948,8,5,6,2,3,10,6,6,1,4
417 | 1239232,3,3,2,6,3,3,3,5,1,2
418 | 1239347,8,7,8,5,10,10,7,2,1,4
419 | 1239967,1,1,1,1,2,1,2,1,1,2
420 | 1240337,5,2,2,2,2,2,3,2,2,2
421 | 1253505,2,3,1,1,5,1,1,1,1,2
422 | 1255384,3,2,2,3,2,3,3,1,1,2
423 | 1257200,10,10,10,7,10,10,8,2,1,4
424 | 1257648,4,3,3,1,2,1,3,3,1,2
425 | 1257815,5,1,3,1,2,1,2,1,1,2
426 | 1257938,3,1,1,1,2,1,1,1,1,2
427 | 1258549,9,10,10,10,10,10,10,10,1,4
428 | 1258556,5,3,6,1,2,1,1,1,1,2
429 | 1266154,8,7,8,2,4,2,5,10,1,4
430 | 1272039,1,1,1,1,2,1,2,1,1,2
431 | 1276091,2,1,1,1,2,1,2,1,1,2
432 | 1276091,1,3,1,1,2,1,2,2,1,2
433 | 1276091,5,1,1,3,4,1,3,2,1,2
434 | 1277629,5,1,1,1,2,1,2,2,1,2
435 | 1293439,3,2,2,3,2,1,1,1,1,2
436 | 1293439,6,9,7,5,5,8,4,2,1,2
437 | 1294562,10,8,10,1,3,10,5,1,1,4
438 | 1295186,10,10,10,1,6,1,2,8,1,4
439 | 527337,4,1,1,1,2,1,1,1,1,2
440 | 558538,4,1,3,3,2,1,1,1,1,2
441 | 566509,5,1,1,1,2,1,1,1,1,2
442 | 608157,10,4,3,10,4,10,10,1,1,4
443 | 677910,5,2,2,4,2,4,1,1,1,2
444 | 734111,1,1,1,3,2,3,1,1,1,2
445 | 734111,1,1,1,1,2,2,1,1,1,2
446 | 780555,5,1,1,6,3,1,2,1,1,2
447 | 827627,2,1,1,1,2,1,1,1,1,2
448 | 1049837,1,1,1,1,2,1,1,1,1,2
449 | 1058849,5,1,1,1,2,1,1,1,1,2
450 | 1182404,1,1,1,1,1,1,1,1,1,2
451 | 1193544,5,7,9,8,6,10,8,10,1,4
452 | 1201870,4,1,1,3,1,1,2,1,1,2
453 | 1202253,5,1,1,1,2,1,1,1,1,2
454 | 1227081,3,1,1,3,2,1,1,1,1,2
455 | 1230994,4,5,5,8,6,10,10,7,1,4
456 | 1238410,2,3,1,1,3,1,1,1,1,2
457 | 1246562,10,2,2,1,2,6,1,1,2,4
458 | 1257470,10,6,5,8,5,10,8,6,1,4
459 | 1259008,8,8,9,6,6,3,10,10,1,4
460 | 1266124,5,1,2,1,2,1,1,1,1,2
461 | 1267898,5,1,3,1,2,1,1,1,1,2
462 | 1268313,5,1,1,3,2,1,1,1,1,2
463 | 1268804,3,1,1,1,2,5,1,1,1,2
464 | 1276091,6,1,1,3,2,1,1,1,1,2
465 | 1280258,4,1,1,1,2,1,1,2,1,2
466 | 1293966,4,1,1,1,2,1,1,1,1,2
467 | 1296572,10,9,8,7,6,4,7,10,3,4
468 | 1298416,10,6,6,2,4,10,9,7,1,4
469 | 1299596,6,6,6,5,4,10,7,6,2,4
470 | 1105524,4,1,1,1,2,1,1,1,1,2
471 | 1181685,1,1,2,1,2,1,2,1,1,2
472 | 1211594,3,1,1,1,1,1,2,1,1,2
473 | 1238777,6,1,1,3,2,1,1,1,1,2
474 | 1257608,6,1,1,1,1,1,1,1,1,2
475 | 1269574,4,1,1,1,2,1,1,1,1,2
476 | 1277145,5,1,1,1,2,1,1,1,1,2
477 | 1287282,3,1,1,1,2,1,1,1,1,2
478 | 1296025,4,1,2,1,2,1,1,1,1,2
479 | 1296263,4,1,1,1,2,1,1,1,1,2
480 | 1296593,5,2,1,1,2,1,1,1,1,2
481 | 1299161,4,8,7,10,4,10,7,5,1,4
482 | 1301945,5,1,1,1,1,1,1,1,1,2
483 | 1302428,5,3,2,4,2,1,1,1,1,2
484 | 1318169,9,10,10,10,10,5,10,10,10,4
485 | 474162,8,7,8,5,5,10,9,10,1,4
486 | 787451,5,1,2,1,2,1,1,1,1,2
487 | 1002025,1,1,1,3,1,3,1,1,1,2
488 | 1070522,3,1,1,1,1,1,2,1,1,2
489 | 1073960,10,10,10,10,6,10,8,1,5,4
490 | 1076352,3,6,4,10,3,3,3,4,1,4
491 | 1084139,6,3,2,1,3,4,4,1,1,4
492 | 1115293,1,1,1,1,2,1,1,1,1,2
493 | 1119189,5,8,9,4,3,10,7,1,1,4
494 | 1133991,4,1,1,1,1,1,2,1,1,2
495 | 1142706,5,10,10,10,6,10,6,5,2,4
496 | 1155967,5,1,2,10,4,5,2,1,1,2
497 | 1170945,3,1,1,1,1,1,2,1,1,2
498 | 1181567,1,1,1,1,1,1,1,1,1,2
499 | 1182404,4,2,1,1,2,1,1,1,1,2
500 | 1204558,4,1,1,1,2,1,2,1,1,2
501 | 1217952,4,1,1,1,2,1,2,1,1,2
502 | 1224565,6,1,1,1,2,1,3,1,1,2
503 | 1238186,4,1,1,1,2,1,2,1,1,2
504 | 1253917,4,1,1,2,2,1,2,1,1,2
505 | 1265899,4,1,1,1,2,1,3,1,1,2
506 | 1268766,1,1,1,1,2,1,1,1,1,2
507 | 1277268,3,3,1,1,2,1,1,1,1,2
508 | 1286943,8,10,10,10,7,5,4,8,7,4
509 | 1295508,1,1,1,1,2,4,1,1,1,2
510 | 1297327,5,1,1,1,2,1,1,1,1,2
511 | 1297522,2,1,1,1,2,1,1,1,1,2
512 | 1298360,1,1,1,1,2,1,1,1,1,2
513 | 1299924,5,1,1,1,2,1,2,1,1,2
514 | 1299994,5,1,1,1,2,1,1,1,1,2
515 | 1304595,3,1,1,1,1,1,2,1,1,2
516 | 1306282,6,6,7,10,3,10,8,10,2,4
517 | 1313325,4,10,4,7,3,10,9,10,1,4
518 | 1320077,1,1,1,1,1,1,1,1,1,2
519 | 1320077,1,1,1,1,1,1,2,1,1,2
520 | 1320304,3,1,2,2,2,1,1,1,1,2
521 | 1330439,4,7,8,3,4,10,9,1,1,4
522 | 333093,1,1,1,1,3,1,1,1,1,2
523 | 369565,4,1,1,1,3,1,1,1,1,2
524 | 412300,10,4,5,4,3,5,7,3,1,4
525 | 672113,7,5,6,10,4,10,5,3,1,4
526 | 749653,3,1,1,1,2,1,2,1,1,2
527 | 769612,3,1,1,2,2,1,1,1,1,2
528 | 769612,4,1,1,1,2,1,1,1,1,2
529 | 798429,4,1,1,1,2,1,3,1,1,2
530 | 807657,6,1,3,2,2,1,1,1,1,2
531 | 8233704,4,1,1,1,1,1,2,1,1,2
532 | 837480,7,4,4,3,4,10,6,9,1,4
533 | 867392,4,2,2,1,2,1,2,1,1,2
534 | 869828,1,1,1,1,1,1,3,1,1,2
535 | 1043068,3,1,1,1,2,1,2,1,1,2
536 | 1056171,2,1,1,1,2,1,2,1,1,2
537 | 1061990,1,1,3,2,2,1,3,1,1,2
538 | 1113061,5,1,1,1,2,1,3,1,1,2
539 | 1116192,5,1,2,1,2,1,3,1,1,2
540 | 1135090,4,1,1,1,2,1,2,1,1,2
541 | 1145420,6,1,1,1,2,1,2,1,1,2
542 | 1158157,5,1,1,1,2,2,2,1,1,2
543 | 1171578,3,1,1,1,2,1,1,1,1,2
544 | 1174841,5,3,1,1,2,1,1,1,1,2
545 | 1184586,4,1,1,1,2,1,2,1,1,2
546 | 1186936,2,1,3,2,2,1,2,1,1,2
547 | 1197527,5,1,1,1,2,1,2,1,1,2
548 | 1222464,6,10,10,10,4,10,7,10,1,4
549 | 1240603,2,1,1,1,1,1,1,1,1,2
550 | 1240603,3,1,1,1,1,1,1,1,1,2
551 | 1241035,7,8,3,7,4,5,7,8,2,4
552 | 1287971,3,1,1,1,2,1,2,1,1,2
553 | 1289391,1,1,1,1,2,1,3,1,1,2
554 | 1299924,3,2,2,2,2,1,4,2,1,2
555 | 1306339,4,4,2,1,2,5,2,1,2,2
556 | 1313658,3,1,1,1,2,1,1,1,1,2
557 | 1313982,4,3,1,1,2,1,4,8,1,2
558 | 1321264,5,2,2,2,1,1,2,1,1,2
559 | 1321321,5,1,1,3,2,1,1,1,1,2
560 | 1321348,2,1,1,1,2,1,2,1,1,2
561 | 1321931,5,1,1,1,2,1,2,1,1,2
562 | 1321942,5,1,1,1,2,1,3,1,1,2
563 | 1321942,5,1,1,1,2,1,3,1,1,2
564 | 1328331,1,1,1,1,2,1,3,1,1,2
565 | 1328755,3,1,1,1,2,1,2,1,1,2
566 | 1331405,4,1,1,1,2,1,3,2,1,2
567 | 1331412,5,7,10,10,5,10,10,10,1,4
568 | 1333104,3,1,2,1,2,1,3,1,1,2
569 | 1334071,4,1,1,1,2,3,2,1,1,2
570 | 1343068,8,4,4,1,6,10,2,5,2,4
571 | 1343374,10,10,8,10,6,5,10,3,1,4
572 | 1344121,8,10,4,4,8,10,8,2,1,4
573 | 142932,7,6,10,5,3,10,9,10,2,4
574 | 183936,3,1,1,1,2,1,2,1,1,2
575 | 324382,1,1,1,1,2,1,2,1,1,2
576 | 378275,10,9,7,3,4,2,7,7,1,4
577 | 385103,5,1,2,1,2,1,3,1,1,2
578 | 690557,5,1,1,1,2,1,2,1,1,2
579 | 695091,1,1,1,1,2,1,2,1,1,2
580 | 695219,1,1,1,1,2,1,2,1,1,2
581 | 824249,1,1,1,1,2,1,3,1,1,2
582 | 871549,5,1,2,1,2,1,2,1,1,2
583 | 878358,5,7,10,6,5,10,7,5,1,4
584 | 1107684,6,10,5,5,4,10,6,10,1,4
585 | 1115762,3,1,1,1,2,1,1,1,1,2
586 | 1217717,5,1,1,6,3,1,1,1,1,2
587 | 1239420,1,1,1,1,2,1,1,1,1,2
588 | 1254538,8,10,10,10,6,10,10,10,1,4
589 | 1261751,5,1,1,1,2,1,2,2,1,2
590 | 1268275,9,8,8,9,6,3,4,1,1,4
591 | 1272166,5,1,1,1,2,1,1,1,1,2
592 | 1294261,4,10,8,5,4,1,10,1,1,4
593 | 1295529,2,5,7,6,4,10,7,6,1,4
594 | 1298484,10,3,4,5,3,10,4,1,1,4
595 | 1311875,5,1,2,1,2,1,1,1,1,2
596 | 1315506,4,8,6,3,4,10,7,1,1,4
597 | 1320141,5,1,1,1,2,1,2,1,1,2
598 | 1325309,4,1,2,1,2,1,2,1,1,2
599 | 1333063,5,1,3,1,2,1,3,1,1,2
600 | 1333495,3,1,1,1,2,1,2,1,1,2
601 | 1334659,5,2,4,1,1,1,1,1,1,2
602 | 1336798,3,1,1,1,2,1,2,1,1,2
603 | 1344449,1,1,1,1,1,1,2,1,1,2
604 | 1350568,4,1,1,1,2,1,2,1,1,2
605 | 1352663,5,4,6,8,4,1,8,10,1,4
606 | 188336,5,3,2,8,5,10,8,1,2,4
607 | 352431,10,5,10,3,5,8,7,8,3,4
608 | 353098,4,1,1,2,2,1,1,1,1,2
609 | 411453,1,1,1,1,2,1,1,1,1,2
610 | 557583,5,10,10,10,10,10,10,1,1,4
611 | 636375,5,1,1,1,2,1,1,1,1,2
612 | 736150,10,4,3,10,3,10,7,1,2,4
613 | 803531,5,10,10,10,5,2,8,5,1,4
614 | 822829,8,10,10,10,6,10,10,10,10,4
615 | 1016634,2,3,1,1,2,1,2,1,1,2
616 | 1031608,2,1,1,1,1,1,2,1,1,2
617 | 1041043,4,1,3,1,2,1,2,1,1,2
618 | 1042252,3,1,1,1,2,1,2,1,1,2
619 | 1057067,1,1,1,1,1,?,1,1,1,2
620 | 1061990,4,1,1,1,2,1,2,1,1,2
621 | 1073836,5,1,1,1,2,1,2,1,1,2
622 | 1083817,3,1,1,1,2,1,2,1,1,2
623 | 1096352,6,3,3,3,3,2,6,1,1,2
624 | 1140597,7,1,2,3,2,1,2,1,1,2
625 | 1149548,1,1,1,1,2,1,1,1,1,2
626 | 1174009,5,1,1,2,1,1,2,1,1,2
627 | 1183596,3,1,3,1,3,4,1,1,1,2
628 | 1190386,4,6,6,5,7,6,7,7,3,4
629 | 1190546,2,1,1,1,2,5,1,1,1,2
630 | 1213273,2,1,1,1,2,1,1,1,1,2
631 | 1218982,4,1,1,1,2,1,1,1,1,2
632 | 1225382,6,2,3,1,2,1,1,1,1,2
633 | 1235807,5,1,1,1,2,1,2,1,1,2
634 | 1238777,1,1,1,1,2,1,1,1,1,2
635 | 1253955,8,7,4,4,5,3,5,10,1,4
636 | 1257366,3,1,1,1,2,1,1,1,1,2
637 | 1260659,3,1,4,1,2,1,1,1,1,2
638 | 1268952,10,10,7,8,7,1,10,10,3,4
639 | 1275807,4,2,4,3,2,2,2,1,1,2
640 | 1277792,4,1,1,1,2,1,1,1,1,2
641 | 1277792,5,1,1,3,2,1,1,1,1,2
642 | 1285722,4,1,1,3,2,1,1,1,1,2
643 | 1288608,3,1,1,1,2,1,2,1,1,2
644 | 1290203,3,1,1,1,2,1,2,1,1,2
645 | 1294413,1,1,1,1,2,1,1,1,1,2
646 | 1299596,2,1,1,1,2,1,1,1,1,2
647 | 1303489,3,1,1,1,2,1,2,1,1,2
648 | 1311033,1,2,2,1,2,1,1,1,1,2
649 | 1311108,1,1,1,3,2,1,1,1,1,2
650 | 1315807,5,10,10,10,10,2,10,10,10,4
651 | 1318671,3,1,1,1,2,1,2,1,1,2
652 | 1319609,3,1,1,2,3,4,1,1,1,2
653 | 1323477,1,2,1,3,2,1,2,1,1,2
654 | 1324572,5,1,1,1,2,1,2,2,1,2
655 | 1324681,4,1,1,1,2,1,2,1,1,2
656 | 1325159,3,1,1,1,2,1,3,1,1,2
657 | 1326892,3,1,1,1,2,1,2,1,1,2
658 | 1330361,5,1,1,1,2,1,2,1,1,2
659 | 1333877,5,4,5,1,8,1,3,6,1,2
660 | 1334015,7,8,8,7,3,10,7,2,3,4
661 | 1334667,1,1,1,1,2,1,1,1,1,2
662 | 1339781,1,1,1,1,2,1,2,1,1,2
663 | 1339781,4,1,1,1,2,1,3,1,1,2
664 | 13454352,1,1,3,1,2,1,2,1,1,2
665 | 1345452,1,1,3,1,2,1,2,1,1,2
666 | 1345593,3,1,1,3,2,1,2,1,1,2
667 | 1347749,1,1,1,1,2,1,1,1,1,2
668 | 1347943,5,2,2,2,2,1,1,1,2,2
669 | 1348851,3,1,1,1,2,1,3,1,1,2
670 | 1350319,5,7,4,1,6,1,7,10,3,4
671 | 1350423,5,10,10,8,5,5,7,10,1,4
672 | 1352848,3,10,7,8,5,8,7,4,1,4
673 | 1353092,3,2,1,2,2,1,3,1,1,2
674 | 1354840,2,1,1,1,2,1,3,1,1,2
675 | 1354840,5,3,2,1,3,1,1,1,1,2
676 | 1355260,1,1,1,1,2,1,2,1,1,2
677 | 1365075,4,1,4,1,2,1,1,1,1,2
678 | 1365328,1,1,2,1,2,1,2,1,1,2
679 | 1368267,5,1,1,1,2,1,1,1,1,2
680 | 1368273,1,1,1,1,2,1,1,1,1,2
681 | 1368882,2,1,1,1,2,1,1,1,1,2
682 | 1369821,10,10,10,10,5,10,10,10,7,4
683 | 1371026,5,10,10,10,4,10,5,6,3,4
684 | 1371920,5,1,1,1,2,1,3,2,1,2
685 | 466906,1,1,1,1,2,1,1,1,1,2
686 | 466906,1,1,1,1,2,1,1,1,1,2
687 | 534555,1,1,1,1,2,1,1,1,1,2
688 | 536708,1,1,1,1,2,1,1,1,1,2
689 | 566346,3,1,1,1,2,1,2,3,1,2
690 | 603148,4,1,1,1,2,1,1,1,1,2
691 | 654546,1,1,1,1,2,1,1,1,8,2
692 | 654546,1,1,1,3,2,1,1,1,1,2
693 | 695091,5,10,10,5,4,5,4,4,1,4
694 | 714039,3,1,1,1,2,1,1,1,1,2
695 | 763235,3,1,1,1,2,1,2,1,2,2
696 | 776715,3,1,1,1,3,2,1,1,1,2
697 | 841769,2,1,1,1,2,1,1,1,1,2
698 | 888820,5,10,10,3,7,3,8,10,2,4
699 | 897471,4,8,6,4,3,4,10,6,1,4
700 | 897471,4,8,8,5,4,5,10,4,1,4
701 |
--------------------------------------------------------------------------------