├── Deep_Learning ├── 1_Intro.py ├── Tensorflow │ ├── 2_Tensorflow_basics.py │ ├── 3_building_model.py │ ├── 4.1_Using_our_own_data.py │ ├── 4.2_sentiment_neural_network.py │ ├── 4.3_adding_more_data(preprocessing).py │ ├── 4.4_Neural_netword_model_forMoreData.py │ ├── 5_Recurrent Neural Networks.py │ ├── 6_Convolutional_Neural_nets.py │ ├── 7_tflearn.py │ ├── cs20si │ │ ├── .ipynb_checkpoints │ │ │ ├── Untitled-checkpoint.ipynb │ │ │ └── lecture_1:graphsandsessions-checkpoint.ipynb │ │ ├── Untitled.ipynb │ │ ├── graphs │ │ │ ├── events.out.tfevents.1508077168.raghav-PC │ │ │ ├── events.out.tfevents.1508077664.raghav-PC │ │ │ └── events.out.tfevents.1512490021.raghav-PC │ │ └── lecture_1:graphsandsessions.ipynb │ ├── lexicon-2500-2638.pickle │ ├── lexicon.pickle │ ├── model.ckpt │ ├── neg.txt │ ├── pos.txt │ └── processed-test-set.csv ├── chatbot │ └── 1_intro.py ├── fast.ai │ ├── .ipynb_checkpoints │ │ ├── Untitled-checkpoint.ipynb │ │ ├── lesson1-checkpoint.ipynb │ │ └── lesson1_code-checkpoint.ipynb │ ├── Untitled.ipynb │ ├── __pycache__ │ │ ├── utils.cpython-35.pyc │ │ ├── vgg16.cpython-35.pyc │ │ └── vgg16bn.cpython-35.pyc │ ├── lesson1.ipynb │ ├── lesson1_code.ipynb │ ├── utils.py │ ├── vgg16.py │ └── vgg16bn.py ├── keras │ ├── 1_intro.py │ ├── Image_Augumentation.py │ ├── image_classifier │ │ ├── 2_Image_classifier.py │ │ ├── 2_image_classifier_only_code.py │ │ ├── 3_usingVG16.py │ │ └── 3_usingVGG16_codeonly.py │ ├── rnn_keras │ │ └── intro.py │ └── sentiment_analysis_movie │ │ ├── 4.1_code_only.py │ │ └── 4_movie_sentiment.py └── openAIGym │ ├── 1_intro.py │ ├── 2_train.py │ ├── log │ ├── openAIStuff │ │ ├── events.out.tfevents.1497419827.raghav-PC │ │ ├── events.out.tfevents.1497420569.raghav-PC │ │ ├── events.out.tfevents.1497420852.raghav-PC │ │ ├── events.out.tfevents.1497420919.raghav-PC │ │ ├── events.out.tfevents.1497441395.raghav-PC │ │ ├── events.out.tfevents.1497441421.raghav-PC │ │ └── events.out.tfevents.1497441457.raghav-PC │ └── openai_learning │ │ ├── events.out.tfevents.1497421583.raghav-PC │ │ ├── events.out.tfevents.1497421622.raghav-PC │ │ ├── events.out.tfevents.1497421683.raghav-PC │ │ ├── events.out.tfevents.1497421744.raghav-PC │ │ ├── events.out.tfevents.1497421940.raghav-PC │ │ ├── events.out.tfevents.1497421973.raghav-PC │ │ ├── events.out.tfevents.1497422209.raghav-PC │ │ ├── events.out.tfevents.1497422303.raghav-PC │ │ ├── events.out.tfevents.1497422371.raghav-PC │ │ ├── events.out.tfevents.1497422400.raghav-PC │ │ ├── events.out.tfevents.1497422427.raghav-PC │ │ └── events.out.tfevents.1497441498.raghav-PC │ └── saved.npy ├── README.md └── classical_ml ├── clustering ├── K means │ ├── 1_Intro.py │ ├── 2_handling_non_numeric_data.py │ ├── 3_K_means_from_scratch.py │ └── titanic.xls └── Mean Shift │ ├── 1_intro.py │ ├── 2_Applying_on_titanic_dataset.py │ ├── 3_from_scratch.py │ └── titanic.xls ├── k nearerst neighbours ├── 1_k_nearest_neighbours_intro.py ├── 2_k_nearest_neighbors_from_scratch.py ├── 3_applying_our_algo_on_practical_eg.py ├── README.md └── breast-cancer-wisconsin.data.txt ├── linear regression ├── 1_linear_regression_intro.py ├── 2predicting_using_regression.py ├── 3_pickling_classifier.py ├── 4linear_regression_from_scratch.py ├── 5testing_assumptions.py ├── LinearRegression.pickle └── README.md └── svm ├── 1_intro.py ├── 2_svm_from_scratch.py ├── 3_kernels_intro.py ├── 4_kerenels_using_sklearn.py └── breast-cancer-wisconsin.data.txt /Deep_Learning/1_Intro.py: -------------------------------------------------------------------------------- 1 | """ 2 | Our brain has neurons. It has dendrites which are the branches. The long portion is the axon. 3 | The dendrites are inputs. 4 | 5 | Our model of a neuron: 6 | We have our input eg X1,X2,X3 7 | These values are passed to a function which gives the weighted sum of the inputs + biases i.e sum(input*weight + bias) 8 | Bias is important for the case when all the inputs are zero 9 | 10 | This then gets passed through a threshold(Sigmoid/Activation) function and checks if output needs to be passed or not 11 | depending upon if value is greater than threshold or not. 12 | 0 means value less than threshold. 1 means greater than threshold. 13 | This might go to another input. 14 | 15 | Output Y = f(x,w) where the w are the weights 16 | 17 | Model of a neural network: 18 | 19 | Consider layers of neurons. eg the first layer may have 3 neurons, the second 2, and so on. 20 | We also have our input x1,x2,x3. 21 | Each input is fed to all the neurons of the first layer and each connection has a unique weight. 22 | The output from the neurons of the first layer becomes the input for the second layer and so on. 23 | 24 | x1,x2,x3 -> Input layer 25 | Layers inbetween ->Hidden Layers 26 | Last layer ->Output Layer. 27 | 28 | If we have one hidden layer, it is a regular neural network. If > one layer, then "deep" neural network. 29 | 30 | 31 | """ 32 | 33 | # Datasets available at : ImageNet, Wiki dumps, Tatoba, CominCrawl 34 | -------------------------------------------------------------------------------- /Deep_Learning/Tensorflow/2_Tensorflow_basics.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tensor: Array like object 3 | TensorFlow: Has tons of deeplearning functions 4 | 5 | In TensorFlow: Define the model in abstract terms. When ready, run the session. 6 | 7 | So tensorflow has a computation graph where we model everything. 8 | Then we run the session 9 | """ 10 | import tensorflow as tf 11 | 12 | # construct the computation graph, first thing to do 13 | x1 = tf.constant(5) 14 | x2 = tf.constant(6) 15 | 16 | # result = x1*x2 # can do this but not efficient 17 | result = tf.multiply(x1, x2) 18 | print(result) 19 | # result is a tensor object 20 | 21 | # to actually get the answer we need to run it in a session 22 | # Method 1 23 | # sess = tf.Session() 24 | # print(sess.run(result)) 25 | # sess.close() 26 | 27 | # Method 2, better 28 | with tf.Session() as sess: 29 | print(sess.run(result)) 30 | -------------------------------------------------------------------------------- /Deep_Learning/Tensorflow/3_building_model.py: -------------------------------------------------------------------------------- 1 | """ 2 | Our model 3 | input data > Weight it > hidden layer 1 (activation function) > weights > Hidden Layer 2(activation function)> weights 4 | > output layer. 5 | 6 | In a neural network, this data is passed straight through. That passing of data is called feed forward 7 | 8 | Compare output to intended output.> cost function 9 | 10 | optimisation function(optimiser) which will minimise the cost eg(Adam Optimiser, AdaGrad) 11 | This optimiser goes backwards and manipulates the weights. 12 | This motion is called Backward Propogation 13 | 14 | feed forward + backpropogation = epoch-> Cycle. Cost minimised at each cycle 15 | 16 | """ 17 | 18 | # tf.Variable is used to train variables such as weights. 19 | # tf.placeholder is used to feed actual training examples 20 | import tensorflow as tf 21 | from tensorflow.examples.tutorials.mnist import input_data 22 | 23 | # using MNIST dataset, set of written examples of handwritten digits, 28 by 28 pixels 24 | mnist = input_data.read_data_sets('/home/raghav/Desktop/Data', one_hot=True) # 1 is on and rest is off. Could be 25 | 26 | # usefull in multiclass 27 | # eg here 10 classes - 0 to 9 28 | # one hot means output of 0 is by [1,0,0,0,0,0,0,0,0] 29 | # output of 1 is output of 0 is by [0,1,0,0,0,0,0,0,0] 30 | 31 | # defining our model 32 | 33 | n_nodes_h1 = 500 34 | n_nodes_h2 = 500 35 | n_nodes_h3 = 500 36 | 37 | n_classes = 10 38 | batch_size = 100 # goes through batches of 100 of features and feed them to network at a time and manipulate the 39 | # weights and then another batch and so on 40 | 41 | # matrix is height by width 42 | x = tf.placeholder('float', [None, 28 * 28]) # flattening out the matrix 43 | y = tf.placeholder('float') 44 | 45 | 46 | # x is the data, y is output 47 | 48 | def neural_network_model(data): 49 | # weights are tf variable where the variable is a tf random_normal and we specify the shape of the normal 50 | # for eg in the hidden_1_layer we have 28*28 inputs and n_nodes_h1 nodes. So a total of 28*28*n_nodes_h1 weights 51 | 52 | # tf.truncated_normal selects random numbers whose mean is close to zero and values are close to 0 53 | hidden_1_layer = {'weights': tf.Variable(tf.truncated_normal([28 * 28, n_nodes_h1])), 54 | 'biases': tf.Variable(tf.truncated_normal(shape=[n_nodes_h1]))} 55 | 56 | hidden_2_layer = {'weights': tf.Variable(tf.truncated_normal([n_nodes_h1, n_nodes_h2])), 57 | 'biases': tf.Variable(tf.truncated_normal(shape=[n_nodes_h2]))} 58 | 59 | hidden_3_layer = {'weights': tf.Variable(tf.truncated_normal([n_nodes_h2, n_nodes_h3])), 60 | 'biases': tf.Variable(tf.truncated_normal(shape=[n_nodes_h2]))} 61 | 62 | output_layer = {'weights': tf.Variable(tf.truncated_normal([n_nodes_h3, n_classes])), 63 | 'biases': tf.Variable(tf.truncated_normal(shape=[n_classes]))} 64 | 65 | # (input*weights + bias) 66 | l1 = tf.add(tf.matmul(data, hidden_1_layer['weights']), hidden_1_layer['biases']) 67 | l1 = tf.nn.relu(l1) # threshold function 68 | 69 | l2 = tf.add(tf.matmul(l1, hidden_2_layer['weights']), hidden_2_layer['biases']) 70 | l2 = tf.nn.relu(l2) 71 | 72 | l3 = tf.add(tf.matmul(l2, hidden_3_layer['weights']), hidden_3_layer['biases']) 73 | l3 = tf.nn.relu(l3) 74 | 75 | output = tf.add(tf.matmul(l3, output_layer['weights']), output_layer['biases']) 76 | 77 | return output 78 | 79 | 80 | def train_neural_network(x): 81 | prediction = neural_network_model(x) 82 | cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y)) # calculates the 83 | # diff of prediction to known label 84 | # minimise the cost 85 | 86 | optimizer = tf.train.AdamOptimizer().minimize(cost) 87 | 88 | hm_epochs = 10 89 | 90 | with tf.Session() as sess: 91 | sess.run(tf.global_variables_initializer()) 92 | 93 | for epoch in range(hm_epochs): 94 | epoch_loss = 0 95 | for _ in range(int(mnist.train.num_examples) // batch_size): 96 | epoch_x, epoch_y = mnist.train.next_batch(batch_size) 97 | _, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y}) 98 | epoch_loss += c 99 | print('Epoch', epoch+1, 'completed out of ', hm_epochs, ' loss ', epoch_loss) 100 | 101 | # testing 102 | correct = tf.equal(tf.argmax(prediction, 1), 103 | tf.argmax(y, 1)) # argmax returns the index of max value in the arrays 104 | 105 | accuracy = tf.reduce_mean(tf.cast(correct, 'float')) 106 | print('accuracy ', accuracy.eval({x: mnist.test.images, y: mnist.test.labels})) 107 | 108 | 109 | train_neural_network(x) 110 | -------------------------------------------------------------------------------- /Deep_Learning/Tensorflow/4.1_Using_our_own_data.py: -------------------------------------------------------------------------------- 1 | """ 2 | Applying tf on pos and neg sentiments data. The data is in the form of string. Also every string has diff length. 3 | We want the same length input for tf 4 | 5 | These are all the unique words in our input data: 6 | ['chair' , 'table' , 'spoon' , 'tv'] 7 | New sentence: 8 | I pulled the chair upto the table. 9 | 10 | [0 , 0, 0, 0] 11 | chair is in the sentence, table is also there 12 | [1,0,0,1] 13 | """ 14 | 15 | from nltk.tokenize import word_tokenize 16 | from nltk.stem import WordNetLemmatizer # running, ran, run are same thing 17 | import numpy as np 18 | import random 19 | import pickle 20 | from collections import Counter 21 | 22 | lemmatizer = WordNetLemmatizer() 23 | hm_lines = 10000000 24 | 25 | 26 | def create_lexicon(pos, neg): # creates a list of words that are important 27 | lexicon = [] 28 | for fi in [pos, neg]: 29 | with open(fi, 'r') as f: 30 | contents = f.readlines() 31 | for l in contents[:hm_lines]: 32 | all_words = word_tokenize(l.lower()) 33 | lexicon += list(all_words) 34 | lexicon = [lemmatizer.lemmatize(i) for i in lexicon] 35 | word_counts = Counter(lexicon) # gives dictionary 36 | # word_counts = {'the':32322,'a':32134} 37 | 38 | l2 = [] 39 | for w in word_counts: 40 | if 1000 > word_counts[w] > 50: # the, an , a not necessory 41 | l2.append(w) 42 | 43 | print(len(l2)) 44 | return l2 45 | 46 | 47 | def sample_handling(sample, lexicon, classification): # creates a list of lists where the first element of list 48 | # denotes if word of lexicon present in our sample and the second tells us if it is pos or neg sampple 49 | featureset = [] 50 | # [ 51 | # [ [0 1 0 0 1],[0,1]] 52 | # ] 53 | with open(sample, 'r') as f: 54 | contents = f.readlines() 55 | for l in contents[:hm_lines]: 56 | current_words = word_tokenize(l.lower()) 57 | current_words = [lemmatizer.lemmatize(i) for i in current_words] 58 | features = np.zeros(len(lexicon)) 59 | for word in current_words: 60 | if word.lower() in lexicon: 61 | index_value = lexicon.index(word.lower()) 62 | features[index_value] += 1 63 | features = list(features) 64 | featureset.append([features, classification]) 65 | 66 | return featureset 67 | 68 | 69 | def create_feature_sets_and_labels(pos, neg, test_size=0.1): 70 | lexicon = create_lexicon(pos, neg) 71 | features = [] 72 | features += sample_handling('pos.txt', lexicon, [1, 0]) 73 | features += sample_handling('neg.txt', lexicon, [0, 1]) 74 | random.shuffle(features) 75 | 76 | features = np.array(features) 77 | testing_size = int(test_size * len(features)) 78 | 79 | train_x = list(features[:, 0][:-testing_size]) # [:,0] feature of numpy gets the first element 80 | train_y = list(features[:, 1][:-testing_size]) 81 | test_x = list(features[:, 0][-testing_size:]) 82 | test_y = list(features[:, 1][-testing_size:]) 83 | 84 | return train_x, train_y, test_x, test_y 85 | 86 | 87 | if __name__ == '__main__': 88 | train_x, train_y, test_x, test_y = create_feature_sets_and_labels('pos.txt', 'neg.txt') 89 | 90 | with open('sentiment_set.pickle', 'wb') as f: 91 | pickle.dump([train_x, train_y, test_x, test_y], f) 92 | -------------------------------------------------------------------------------- /Deep_Learning/Tensorflow/4.2_sentiment_neural_network.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import pickle 3 | import numpy as np 4 | 5 | f = open("sentiment_set.pickle", 'rb') 6 | data_pickle = pickle.load(f) 7 | train_x, train_y, test_x, test_y = data_pickle 8 | 9 | n_nodes_h1 = 500 10 | n_nodes_h2 = 500 11 | n_nodes_h3 = 500 12 | 13 | n_classes = 2 14 | batch_size = 100 15 | x = tf.placeholder('float', [None, len(train_x[0])]) 16 | y = tf.placeholder('float') 17 | 18 | 19 | # x is the data, y is output 20 | 21 | def neural_network_model(data): 22 | hidden_1_layer = {'weights': tf.Variable(tf.truncated_normal([len(train_x[0]), n_nodes_h1])), 23 | 'biases': tf.constant(0.1, shape=[n_nodes_h1])} 24 | 25 | hidden_2_layer = {'weights': tf.Variable(tf.truncated_normal([n_nodes_h1, n_nodes_h2])), 26 | 'biases': tf.constant(0.1, shape=[n_nodes_h2])} 27 | 28 | hidden_3_layer = {'weights': tf.Variable(tf.truncated_normal([n_nodes_h2, n_nodes_h3])), 29 | 'biases': tf.constant(0.1, shape=[n_nodes_h2])} 30 | 31 | output_layer = {'weights': tf.Variable(tf.truncated_normal([n_nodes_h3, n_classes])), 32 | 'biases': tf.constant(0.1, shape=[n_classes])} 33 | 34 | # (input*weights + bias) 35 | l1 = tf.add(tf.matmul(data, hidden_1_layer['weights']), hidden_1_layer['biases']) 36 | l1 = tf.nn.relu(l1) # threshold function 37 | 38 | l2 = tf.add(tf.matmul(l1, hidden_2_layer['weights']), hidden_2_layer['biases']) 39 | l2 = tf.nn.relu(l2) 40 | 41 | l3 = tf.add(tf.matmul(l2, hidden_3_layer['weights']), hidden_3_layer['biases']) 42 | l3 = tf.nn.relu(l3) 43 | 44 | output = tf.add(tf.matmul(l3, output_layer['weights']), output_layer['biases']) 45 | 46 | return output 47 | 48 | 49 | def train_neural_network(x): 50 | prediction = neural_network_model(x) 51 | cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y)) # calculates the 52 | # diff of prediction 53 | # to known label 54 | # minimise the cost 55 | 56 | optimizer = tf.train.AdamOptimizer().minimize(cost) 57 | 58 | hm_epochs = 10 59 | 60 | with tf.Session() as sess: 61 | sess.run(tf.global_variables_initializer()) 62 | 63 | for epoch in range(hm_epochs): 64 | epoch_loss = 0 65 | 66 | i = 0 67 | while i < len(train_x): 68 | start = i 69 | end = i + batch_size 70 | batch_x = np.array(train_x[start:end]) 71 | batch_y = np.array(train_y[start:end]) 72 | _, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y}) 73 | epoch_loss += c 74 | i += batch_size 75 | print('Epoch', epoch+1, 'completed out of ', hm_epochs, ' loss ', epoch_loss) 76 | 77 | # testing 78 | correct = tf.equal(tf.argmax(prediction, 1), 79 | tf.argmax(y, 1)) # argmax returns the index of max value in the arrays 80 | 81 | accuracy = tf.reduce_mean(tf.cast(correct, 'float')) 82 | print('accuracy ', accuracy.eval({x: test_x, y: test_y})) 83 | 84 | 85 | train_neural_network(x) 86 | -------------------------------------------------------------------------------- /Deep_Learning/Tensorflow/4.3_adding_more_data(preprocessing).py: -------------------------------------------------------------------------------- 1 | import nltk 2 | from nltk.tokenize import word_tokenize 3 | from nltk.stem import WordNetLemmatizer 4 | import pickle 5 | import numpy as np 6 | import pandas as pd 7 | 8 | lemmatizer = WordNetLemmatizer() 9 | 10 | ''' 11 | polarity 0 = negative. 2 = neutral. 4 = positive. 12 | id 13 | date 14 | query 15 | user 16 | tweet 17 | ''' 18 | 19 | 20 | def init_process(fin, fout): # formatting stuff for input data 21 | outfile = open(fout, 'a') 22 | with open(fin, buffering=200000, encoding='latin-1') as f: 23 | try: 24 | for line in f: 25 | line = line.replace('"', '') 26 | initial_polarity = line.split(',')[0] 27 | if initial_polarity == '0': 28 | initial_polarity = [1, 0] 29 | elif initial_polarity == '4': 30 | initial_polarity = [0, 1] 31 | 32 | tweet = line.split(',')[-1] 33 | outline = str(initial_polarity) + ':::' + tweet 34 | outfile.write(outline) 35 | except Exception as e: 36 | print(str(e)) 37 | outfile.close() 38 | 39 | 40 | # init_process('/home/raghav/Desktop/trainingandtestdata/training.1600000.processed.noemoticon.csv', 'train_set.csv') 41 | # init_process('/home/raghav/Desktop/trainingandtestdata/testdata.manual.2009.06.14.csv', 'test_set.csv') 42 | 43 | 44 | def create_lexicon(fin): # creating lexicon for our data 45 | lexicon = [] 46 | with open(fin, 'r', buffering=100000, encoding='latin-1') as f: 47 | try: 48 | counter = 1 49 | content = '' 50 | for line in f: 51 | counter += 1 52 | if (counter / 2500.0).is_integer(): # every 2500 lines 53 | tweet = line.split(':::')[1] 54 | content += ' ' + tweet 55 | words = word_tokenize(content) 56 | words = [lemmatizer.lemmatize(i) for i in words] 57 | lexicon = list(set(lexicon + words)) 58 | print(counter, len(lexicon)) 59 | 60 | except Exception as e: 61 | print(str(e)) 62 | 63 | with open('lexicon-2500-2638.pickle', 'wb') as f: # 2500 as one in every 2500 and 2638 is the number of words in 64 | # lexicon 65 | pickle.dump(lexicon, f) 66 | 67 | 68 | # create_lexicon('/home/raghav/Documents/deep_learning_datasets/train_set.csv') 69 | 70 | 71 | def convert_to_vec(fin, fout, lexicon_pickle): 72 | with open(lexicon_pickle, 'rb') as f: 73 | lexicon = pickle.load(f) 74 | outfile = open(fout, 'a') 75 | with open(fin, buffering=20000, encoding='latin-1') as f: 76 | counter = 0 77 | for line in f: 78 | counter += 1 79 | label = line.split(':::')[0] 80 | tweet = line.split(':::')[1] 81 | current_words = word_tokenize(tweet.lower()) 82 | current_words = [lemmatizer.lemmatize(i) for i in current_words] 83 | 84 | features = np.zeros(len(lexicon)) 85 | 86 | for word in current_words: 87 | if word.lower() in lexicon: 88 | index_value = lexicon.index(word.lower()) 89 | # OR DO +=1, test both 90 | features[index_value] += 1 91 | 92 | features = list(features) 93 | outline = str(features) + '::' + str(label) + '\n' 94 | outfile.write(outline) 95 | 96 | print(counter) 97 | 98 | 99 | # convert_to_vec('/home/raghav/Documents/deep_learning_datasets/test_set.csv', 'processed-test-set.csv', 'lexicon-2500-2638.pickle') 100 | 101 | 102 | def shuffle_data(fin): 103 | df = pd.read_csv(fin, error_bad_lines=False) 104 | df = df.iloc[np.random.permutation(len(df))] 105 | print(df.head()) 106 | df.to_csv('/home/raghav/Documents/train_set_shuffled.csv', index=False) 107 | 108 | 109 | shuffle_data('/home/raghav/Documents/deep_learning_datasets/train_set.csv') 110 | 111 | 112 | def create_test_data_pickle(fin): 113 | feature_sets = [] 114 | labels = [] 115 | counter = 0 116 | with open(fin, buffering=20000) as f: 117 | for line in f: 118 | try: 119 | features = list(eval(line.split('::')[0])) 120 | label = list(eval(line.split('::')[1])) 121 | 122 | feature_sets.append(features) 123 | labels.append(label) 124 | counter += 1 125 | except: 126 | pass 127 | print(counter) 128 | feature_sets = np.array(feature_sets) 129 | labels = np.array(labels) 130 | 131 | # this is too big. Will do inline later 132 | # create_test_data_pickle('processed-test-set.csv') 133 | -------------------------------------------------------------------------------- /Deep_Learning/Tensorflow/4.4_Neural_netword_model_forMoreData.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import pickle 3 | import numpy as np 4 | import nltk 5 | from nltk.tokenize import word_tokenize 6 | from nltk.stem import WordNetLemmatizer 7 | 8 | lemmatizer = WordNetLemmatizer() 9 | 10 | n_nodes_hl1 = 500 11 | n_nodes_hl2 = 500 12 | 13 | n_classes = 2 14 | 15 | batch_size = 32 16 | total_batches = int(1600000 / batch_size) 17 | hm_epochs = 10 18 | 19 | x = tf.placeholder('float', name='X') 20 | y = tf.placeholder('float', name='y') 21 | 22 | hidden_1_layer = {'f_fum': n_nodes_hl1, 23 | 'weight': tf.Variable(tf.random_normal([2638, n_nodes_hl1])), 24 | 'bias': tf.Variable(tf.random_normal([n_nodes_hl1]))} 25 | 26 | hidden_2_layer = {'f_fum': n_nodes_hl2, 27 | 'weight': tf.Variable(tf.random_normal([n_nodes_hl1, n_nodes_hl2])), 28 | 'bias': tf.Variable(tf.random_normal([n_nodes_hl2]))} 29 | 30 | output_layer = {'f_fum': None, 31 | 'weight': tf.Variable(tf.random_normal([n_nodes_hl2, n_classes])), 32 | 'bias': tf.Variable(tf.random_normal([n_classes])), } 33 | 34 | 35 | def neural_network_model(data): 36 | l1 = tf.add(tf.matmul(data, hidden_1_layer['weight']), hidden_1_layer['bias']) 37 | l1 = tf.nn.relu(l1) 38 | l2 = tf.add(tf.matmul(l1, hidden_2_layer['weight']), hidden_2_layer['bias']) 39 | l2 = tf.nn.relu(l2) 40 | output = tf.matmul(l2, output_layer['weight']) + output_layer['bias'] 41 | return output 42 | 43 | 44 | saver = tf.train.Saver() 45 | tf_log = 'tf.log' 46 | 47 | 48 | def train_neural_network(x): 49 | prediction = neural_network_model(x) 50 | cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=prediction)) 51 | optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost) 52 | with tf.Session() as sess: 53 | sess.run(tf.global_variables_initializer()) 54 | try: 55 | epoch = int(open(tf_log, 'r').read().split('\n')[-2]) + 1 56 | print('STARTING:', epoch) 57 | except: 58 | epoch = 1 59 | 60 | while epoch <= hm_epochs: 61 | if epoch != 1: 62 | saver.restore(sess, "model.ckpt") 63 | epoch_loss = 1 64 | with open('lexicon-200-2638.pickle', 'rb') as f: 65 | lexicon = pickle.load(f) 66 | with open('/home/raghav/Documents/deep_learning_datasets/train_set_shuffled.csv', buffering=20000, encoding='latin-1') as f: 67 | batch_x = [] 68 | batch_y = [] 69 | batches_run = 0 70 | for line in f: 71 | label = line.split(':::')[0] 72 | tweet = line.split(':::')[1] 73 | current_words = word_tokenize(tweet.lower()) 74 | current_words = [lemmatizer.lemmatize(i) for i in current_words] 75 | 76 | features = np.zeros(len(lexicon)) 77 | 78 | for word in current_words: 79 | if word.lower() in lexicon: 80 | index_value = lexicon.index(word.lower()) 81 | # OR DO +=1, test both 82 | features[index_value] += 1 83 | line_x = list(features) 84 | line_y = eval(label) 85 | batch_x.append(line_x) 86 | batch_y.append(line_y) 87 | if len(batch_x) >= batch_size: 88 | _, c = sess.run([optimizer, cost], feed_dict={x: np.array(batch_x), 89 | y: np.array(batch_y)}) 90 | epoch_loss += c 91 | batch_x = [] 92 | batch_y = [] 93 | batches_run += 1 94 | # print('Batch run:', batches_run, '/', total_batches, '| Epoch:', epoch, '| Batch Loss:', c, ) 95 | 96 | saver.save(sess, "model.ckpt") 97 | print('Epoch', epoch, 'completed out of', hm_epochs, 'loss:', epoch_loss) 98 | with open(tf_log, 'a') as f: 99 | f.write(str(epoch) + '\n') 100 | epoch += 1 101 | 102 | 103 | def test_neural_network(): 104 | prediction = neural_network_model(x) 105 | with tf.Session() as sess: 106 | sess.run(tf.global_variables_initializer()) 107 | for epoch in range(hm_epochs): 108 | saver.restore(sess, './model.ckpt') 109 | # try: 110 | # saver.restore(sess, "model.ckpt") 111 | # except Exception as e: 112 | # # print(str(e)) 113 | # print("fuck") 114 | # epoch_loss = 0 115 | 116 | correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1)) 117 | accuracy = tf.reduce_mean(tf.cast(correct, 'float')) 118 | feature_sets = [] 119 | labels = [] 120 | counter = 0 121 | with open('processed-test-set.csv', buffering=20000) as f: 122 | for line in f: 123 | try: 124 | features = list(eval(line.split('::')[0])) 125 | label = list(eval(line.split('::')[1])) 126 | feature_sets.append(features) 127 | labels.append(label) 128 | counter += 1 129 | except: 130 | pass 131 | print('Tested', counter, 'samples.') 132 | test_x = np.array(feature_sets) 133 | test_y = np.array(labels) 134 | print('Accuracy:', accuracy.eval({x: test_x, y: test_y})) 135 | 136 | test_neural_network() 137 | -------------------------------------------------------------------------------- /Deep_Learning/Tensorflow/5_Recurrent Neural Networks.py: -------------------------------------------------------------------------------- 1 | """ 2 | 1) Recurrent Neural Networks: Solves a problem that involves time: eg a machine playing 3 | catch, it needs to know if the ball is moving away or coming towards. They are used with languages as tense plays a 4 | role in language. 5 | 6 | LSTM cell (Long Short Term Memory Cell) most common cells used with RNN. 7 | 8 | In deep neural network, we had inputs with weights, which was then fed to a neuron. However order in which they 9 | were fed did not matter. The inputs were fed to an activation function(neurons) and then we got the output 10 | In RNN, X1 gets send into the activation function and the output is fed back to the activation function. So at t=0, 11 | only input fed to activation function. At t=1, both input and output of activation function fed back to activation 12 | function 13 | 14 | Consider "Raghav drove car" where each word is a feature. In the deep neural network, "Raghav drove car" and "car 15 | drove Raghav" is same. 16 | """ 17 | 18 | 19 | """ 20 | Say , u have a 5*5 image and u have 1 such image then it is : 21 | 22 | x = np.ones((1,5,5)) 23 | 24 | so u have , 25 | 26 | x = array([[[ 1., 1., 1., 1., 1.], 27 | [ 1., 1., 1., 1., 1.], 28 | [ 1., 1., 1., 1., 1.], 29 | [ 1., 1., 1., 1., 1.], 30 | [ 1., 1., 1., 1., 1.]]]) 31 | 32 | now for the rnn u need to convert each row of pixel into a single chunk. 33 | so , u would have 5 chunks of 5 values each 34 | so, u need to convert each row to an array 35 | 36 | x = np.transpose(x,(1,0,2)) 37 | 38 | this swaps the 0th dim with the 1st dim . so, u get shape of x as (5,1,5) 39 | which is 5 arrays of 1 chunk each of 5 elements 40 | 41 | x = array([[[ 1., 1., 1., 1., 1.]], 42 | 43 | [[ 1., 1., 1., 1., 1.]], 44 | 45 | [[ 1., 1., 1., 1., 1.]], 46 | 47 | [[ 1., 1., 1., 1., 1.]], 48 | 49 | [[ 1., 1., 1., 1., 1.]]]) 50 | 51 | now , u need to remove 1 pair of extra braces . so flatten by one dimension 52 | 53 | x = np.reshape(x,(-1,chunk_size)) 54 | 55 | so, u will have : 56 | 57 | x = array([[ 1., 1., 1., 1., 1.], 58 | [ 1., 1., 1., 1., 1.], 59 | [ 1., 1., 1., 1., 1.], 60 | [ 1., 1., 1., 1., 1.], 61 | [ 1., 1., 1., 1., 1.]]) 62 | 63 | and finally u will need to split the entire thing into 5 chunks(5 arrays) 64 | x = np.split(x,n_chunks,0) 65 | 66 | so, finally u have : 67 | 68 | x = [array([[ 1., 1., 1., 1., 1.]]), array([[ 1., 1., 1., 1., 1.]]), array([[ 1., 1., 1., 1., 1.]]), 69 | array([[ 1., 1., 1., 1., 1.]]), array([[ 1., 1., 1., 1., 1.]])] 70 | """ 71 | 72 | import tensorflow as tf 73 | from tensorflow.examples.tutorials.mnist import input_data 74 | from tensorflow.contrib import rnn 75 | 76 | # using MNIST dataset, 77 | mnist = input_data.read_data_sets('/home/raghav/Desktop/Data', one_hot=True) 78 | 79 | n_nodes_h1 = 500 80 | n_nodes_h2 = 500 81 | n_nodes_h3 = 500 82 | 83 | hm_epochs = 3 84 | n_classes = 10 85 | batch_size = 128 86 | chuck_size = 28 87 | n_chunks = 28 88 | rnn_size = 128 89 | 90 | # images are 28*28 91 | x = tf.placeholder('float', [None, n_chunks, chuck_size]) # flattening out the matrix 92 | y = tf.placeholder('float') 93 | 94 | 95 | def recurrent_neural_network_model(x): 96 | layer = {'weights': tf.Variable(tf.truncated_normal([rnn_size, n_classes])), 97 | 'biases': tf.constant(0.1, shape=[n_classes])} 98 | 99 | x = tf.transpose(x, [1, 0, 2]) 100 | x = tf.reshape(x, [-1, chuck_size]) 101 | x = tf.split(x, n_chunks, 0) 102 | 103 | lstm_cell = rnn.BasicLSTMCell(rnn_size) 104 | outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) 105 | output = tf.add(tf.matmul(outputs[-1], layer['weights']), layer['biases']) 106 | 107 | return output 108 | 109 | 110 | def train_neural_network(x): 111 | prediction = recurrent_neural_network_model(x) 112 | cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y)) # calculates the 113 | 114 | optimizer = tf.train.AdamOptimizer().minimize(cost) 115 | 116 | with tf.Session() as sess: 117 | sess.run(tf.global_variables_initializer()) 118 | 119 | for epoch in range(hm_epochs): 120 | epoch_loss = 0 121 | for _ in range(int(mnist.train.num_examples) // batch_size): 122 | epoch_x, epoch_y = mnist.train.next_batch(batch_size) 123 | epoch_x = epoch_x.reshape((batch_size, n_chunks, chuck_size)) 124 | 125 | _, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y}) 126 | epoch_loss += c 127 | print('Epoch', epoch, 'completed out of ', hm_epochs, ' loss ', epoch_loss) 128 | 129 | # testing 130 | correct = tf.equal(tf.argmax(prediction, 1), 131 | tf.argmax(y, 1)) # argmax returns the index of max value in the arrays 132 | 133 | accuracy = tf.reduce_mean(tf.cast(correct, 'float')) 134 | print('accuracy ', accuracy.eval({x: mnist.test.images.reshape((-1, n_chunks, chuck_size)), 135 | y: mnist.test.labels})) 136 | 137 | 138 | train_neural_network(x) 139 | -------------------------------------------------------------------------------- /Deep_Learning/Tensorflow/6_Convolutional_Neural_nets.py: -------------------------------------------------------------------------------- 1 | """ 2 | Convolution Neural Network: State of Art,: Used in images 3 | 4 | We have input data. Do some convolutions which creates feature maps. Then do some pooling. Conv+Pooling is the hidden 5 | layer. Then a fully connected layer added. Then output. 6 | 7 | input -> (convolution -> pooling) -> fully connected layer(hidden layers of simple neural nets) -> output 8 | | 9 | \ / 10 | hidden layer 11 | 12 | Convolve : Creating feature maps from original dataset. eg let the dataset be image of a cat. take a 3*3 window. \ 13 | This window can be moved over the image. The creation of these new windows which have some value in them is called 14 | convolution. We can now instead of image, create a feature map(2*2)grid which has the values of the windows. 15 | 16 | Pooling: eg 3*3 pooling i.e 3*3 window. Pooling is simplifying the window. For eg max pool. Take the max value out of 17 | the 3*3 pool 18 | 19 | """ 20 | """ 21 | Our model 22 | input data > Weight it > hidden layer 1 (activation function) > weights > Hidden Layer 2(activation function)> weights 23 | > output layer. 24 | 25 | In a neural network, this data is passed straight through. That passing of data is called feed forward 26 | 27 | Compare output to intended output.> cost function 28 | 29 | optimisation function(optimiser) which will minimise the cost eg(Adam Optimiser, AdaGrad) 30 | This optimiser goes backwards and manipulates the weights. 31 | This motion is called Backward Propogation 32 | 33 | feed forward + backpropogation = epoch-> Cycle. Cost minimised at each cycle 34 | 35 | """ 36 | import tensorflow as tf 37 | from tensorflow.examples.tutorials.mnist import input_data 38 | 39 | mnist = input_data.read_data_sets("/tmp/data/", one_hot=True) 40 | 41 | n_classes = 10 42 | batch_size = 128 43 | 44 | x = tf.placeholder('float', [None, 784]) 45 | y = tf.placeholder('float') 46 | 47 | keep_rate = 0.8 48 | keep_prob = tf.placeholder(tf.float32) 49 | 50 | 51 | def conv2d(x, W): 52 | return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') 53 | 54 | 55 | def maxpool2d(x): 56 | # size of window movement of window 57 | return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') # window is 2*2, and will move 58 | # 2 pixels 59 | 60 | 61 | def convolutional_neural_network(x): 62 | weights = {'W_conv1': tf.Variable(tf.random_normal([5, 5, 1, 32])), # we are taking 5*5 window on original image 63 | # and taking 32 outputs 64 | 'W_conv2': tf.Variable(tf.random_normal([5, 5, 32, 64])), 65 | 'W_fc': tf.Variable(tf.random_normal([7 * 7 * 64, 1024])), 66 | 'out': tf.Variable(tf.random_normal([1024, n_classes]))} 67 | 68 | biases = {'b_conv1': tf.Variable(tf.random_normal([32])), 69 | 'b_conv2': tf.Variable(tf.random_normal([64])), 70 | 'b_fc': tf.Variable(tf.random_normal([1024])), 71 | 'out': tf.Variable(tf.random_normal([n_classes]))} 72 | 73 | x = tf.reshape(x, shape=[-1, 28, 28, 1]) 74 | 75 | conv1 = tf.nn.relu(conv2d(x, weights['W_conv1']) + biases['b_conv1']) 76 | conv1 = maxpool2d(conv1) 77 | 78 | conv2 = tf.nn.relu(conv2d(conv1, weights['W_conv2']) + biases['b_conv2']) 79 | conv2 = maxpool2d(conv2) 80 | 81 | fc = tf.reshape(conv2, [-1, 7 * 7 * 64]) 82 | fc = tf.nn.relu(tf.matmul(fc, weights['W_fc']) + biases['b_fc']) 83 | fc = tf.nn.dropout(fc, keep_rate) 84 | 85 | output = tf.matmul(fc, weights['out']) + biases['out'] 86 | 87 | return output 88 | 89 | 90 | def train_neural_network(x): 91 | prediction = convolutional_neural_network(x) 92 | cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y)) 93 | optimizer = tf.train.AdamOptimizer().minimize(cost) 94 | 95 | hm_epochs = 10 96 | with tf.Session() as sess: 97 | sess.run(tf.global_variables_initializer()) 98 | 99 | for epoch in range(hm_epochs): 100 | epoch_loss = 0 101 | for _ in range(int(mnist.train.num_examples / batch_size)): 102 | epoch_x, epoch_y = mnist.train.next_batch(batch_size) 103 | _, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y}) 104 | epoch_loss += c 105 | 106 | print('Epoch', epoch, 'completed out of', hm_epochs, 'loss:', epoch_loss) 107 | 108 | correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1)) 109 | 110 | accuracy = tf.reduce_mean(tf.cast(correct, 'float')) 111 | print('Accuracy:', accuracy.eval({x: mnist.test.images, y: mnist.test.labels})) 112 | 113 | 114 | train_neural_network(x) 115 | -------------------------------------------------------------------------------- /Deep_Learning/Tensorflow/7_tflearn.py: -------------------------------------------------------------------------------- 1 | """ 2 | Abstraction layers over tensorflow: tflearn, keras, tfslim , skflow 3 | 4 | """ 5 | 6 | import tflearn 7 | from tflearn.layers.conv import conv_2d, max_pool_2d 8 | from tflearn.layers.core import input_data, dropout, fully_connected 9 | from tflearn.layers.estimator import regression 10 | import tflearn.datasets.mnist as mnist 11 | 12 | X, y, test_X, test_y = mnist.load_data(one_hot=True) 13 | 14 | X = X.reshape([-1, 28, 28, 1]) 15 | test_X = test_X.reshape([-1, 28, 28, 1]) 16 | 17 | convnet = input_data(shape=[None, 28, 28, 1], name='input') 18 | 19 | convnet = conv_2d(convnet, 32, 2, activation='relu') 20 | convnet = max_pool_2d(convnet, 2) 21 | 22 | convnet = conv_2d(convnet, 64, 2, activation='relu') 23 | convnet = max_pool_2d(convnet, 2) 24 | 25 | convnet = fully_connected(convnet, 1024, activation='relu') 26 | 27 | convnet = dropout(convnet, 0.8) 28 | 29 | convnet = fully_connected(convnet, 10, activation='softmax') # output layer 30 | convnet = regression(convnet, optimizer='adam', learning_rate=0.01, loss='categorical_crossentropy', name='targets') 31 | 32 | model = tflearn.DNN(convnet) 33 | 34 | # model.fit({'input': X}, {'targets': y}, n_epoch=10, 35 | # validation_set=({'input': test_X}, {'targets': test_y}), 36 | # snapshot_step=500, show_metric=True, run_id='mnist') 37 | # 38 | # 39 | # model.save('tflearncnn.model') # saves the weights. So we need to do everything before model.fit() and then load this 40 | # 41 | # once saved comment it and load 42 | 43 | model.load('tflearncnn.model') 44 | 45 | print(model.predict([test_X[1]])) 46 | -------------------------------------------------------------------------------- /Deep_Learning/Tensorflow/cs20si/.ipynb_checkpoints/lecture_1:graphsandsessions-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# **TENSORFLOW**\n", 8 | "\n", 9 | "* Open sourced by Google \n", 10 | "* Pytorch by Facebook\n", 11 | "* Wavenet-> Text to speech network\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "**Graphs and Sessions**\n", 19 | "\n", 20 | "Tensorflow separates the definition and execution of a model. We first define the graph. Then we use session to layout the graph.\n", 21 | "\n", 22 | "**Tensor -> n dim matrix**\n" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 2, 28 | "metadata": {}, 29 | "outputs": [ 30 | { 31 | "name": "stdout", 32 | "output_type": "stream", 33 | "text": [ 34 | "Tensor(\"Add:0\", shape=(), dtype=int32)\n" 35 | ] 36 | } 37 | ], 38 | "source": [ 39 | "import tensorflow as tf\n", 40 | "\n", 41 | "a = tf.add(2, 3)\n", 42 | "\n", 43 | "print(a)" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "When we print(a) above we expect 5 as output but it is not so.\n", 51 | "This is because we are just creating the graph. \n", 52 | "\n", 53 | "Here a node called \"add\" has been created. The input is a scalar ie 3 and 5, so a zero dimension. Hence shape is 0. Data type is int32" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "**Tensorboard** ->Allows us to visualise the graph defined\n", 61 | "\n", 62 | "* Nodes : are the operations, variables, constants etc\n", 63 | "* Tensor values on edges\n", 64 | "\n", 65 | "So the above statement gives a node having addition operation with 2 edges going to it having values 3 and 5" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "**To get the value of a** -> Create a session\n" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 3, 78 | "metadata": { 79 | "collapsed": true 80 | }, 81 | "outputs": [], 82 | "source": [ 83 | "sess = tf.Session()" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 4, 89 | "metadata": {}, 90 | "outputs": [ 91 | { 92 | "name": "stdout", 93 | "output_type": "stream", 94 | "text": [ 95 | "5\n" 96 | ] 97 | } 98 | ], 99 | "source": [ 100 | "print(sess.run(a)) # runs the session\n", 101 | "\n", 102 | "# tf.Session(fetches, feed_dict=None, options=None, run_metadata=None)\n" 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "metadata": {}, 108 | "source": [ 109 | "Inside sess.run(), we give either the whole graph or the node as input to the paranthesis.\n", 110 | "**If you want to compute the value of multiple nodes, provide them as a list**\n", 111 | "\n", 112 | "\n", 113 | "\n", 114 | "eg:\n" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "metadata": { 121 | "collapsed": true 122 | }, 123 | "outputs": [], 124 | "source": [ 125 | "# sess.run([a,b]) # computes the value of a and b" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | "\n", 133 | "When we give a as input to parameter of .run : tensorflow looks at the graph defined and computes whatever is necessary to get the value of a. So if many nodes, it will first compute the other nodes necessary to get the value of node a." 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": { 140 | "collapsed": true 141 | }, 142 | "outputs": [], 143 | "source": [ 144 | "sess.close() # closes the session\n" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": {}, 150 | "source": [ 151 | "** Instead of explicitly closing the session, we can use the with statement of python like in case of files **\n" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 5, 157 | "metadata": {}, 158 | "outputs": [ 159 | { 160 | "name": "stdout", 161 | "output_type": "stream", 162 | "text": [ 163 | "5\n" 164 | ] 165 | } 166 | ], 167 | "source": [ 168 | "with tf.Session() as sess:\n", 169 | " print(sess.run(a))" 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": [ 176 | "## **Now lets consider new example**\n" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 6, 182 | "metadata": {}, 183 | "outputs": [ 184 | { 185 | "name": "stdout", 186 | "output_type": "stream", 187 | "text": [ 188 | "15625\n" 189 | ] 190 | } 191 | ], 192 | "source": [ 193 | "x = 2\n", 194 | "y = 3\n", 195 | "op1 = tf.add(x,y)\n", 196 | "op2 = tf.multiply(x,y)\n", 197 | "op3 = tf.pow(op1,op2)\n", 198 | "\n", 199 | "with tf.Session() as sess:\n", 200 | " print(sess.run(op3))" 201 | ] 202 | }, 203 | { 204 | "cell_type": "markdown", 205 | "metadata": {}, 206 | "source": [ 207 | "The above will first compute op1 and op2 and then finally op3, even though we only want the value of op3. \n", 208 | "\n", 209 | "**The graph is created when defining not in the tf.Session()**\n", 210 | "\n", 211 | "The important thing to note is **not everything that is defined is calculated. Only those portion of the graph is calculated which is required by the sess.run(). This is the power of Graph dataflow used in tensorflow**" 212 | ] 213 | }, 214 | { 215 | "cell_type": "markdown", 216 | "metadata": {}, 217 | "source": [ 218 | "* Since tf is based on graph, different nodes could be calculated across different CPUs/GPUs" 219 | ] 220 | }, 221 | { 222 | "cell_type": "markdown", 223 | "metadata": {}, 224 | "source": [ 225 | "## What if we want more graphs?\n", 226 | "\n", 227 | "Ans) Not recommended. When we start to create our graph, tensorflow actually has a default graph. It just puts our nodes/values in that graph. ** You should use disconnected subgraphs **\n", 228 | "\n", 229 | "If you really want to....." 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": 5, 235 | "metadata": { 236 | "collapsed": true 237 | }, 238 | "outputs": [], 239 | "source": [ 240 | "g = tf.Graph() # if you want something other than the default graph\n", 241 | "\n", 242 | "with g.as_default(): # making it the default graph\n", 243 | " x = tf.add(2, 3)\n", 244 | "\n", 245 | "sess = tf.Session(graph=g) # need to pass the graph..\n", 246 | "sess.run(x)\n", 247 | "sess.close()" 248 | ] 249 | } 250 | ], 251 | "metadata": { 252 | "kernelspec": { 253 | "display_name": "Python 3", 254 | "language": "python", 255 | "name": "python3" 256 | }, 257 | "language_info": { 258 | "codemirror_mode": { 259 | "name": "ipython", 260 | "version": 3 261 | }, 262 | "file_extension": ".py", 263 | "mimetype": "text/x-python", 264 | "name": "python", 265 | "nbconvert_exporter": "python", 266 | "pygments_lexer": "ipython3", 267 | "version": "3.5.2" 268 | } 269 | }, 270 | "nbformat": 4, 271 | "nbformat_minor": 1 272 | } 273 | -------------------------------------------------------------------------------- /Deep_Learning/Tensorflow/cs20si/graphs/events.out.tfevents.1508077168.raghav-PC: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/Tensorflow/cs20si/graphs/events.out.tfevents.1508077168.raghav-PC -------------------------------------------------------------------------------- /Deep_Learning/Tensorflow/cs20si/graphs/events.out.tfevents.1508077664.raghav-PC: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/Tensorflow/cs20si/graphs/events.out.tfevents.1508077664.raghav-PC -------------------------------------------------------------------------------- /Deep_Learning/Tensorflow/cs20si/graphs/events.out.tfevents.1512490021.raghav-PC: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/Tensorflow/cs20si/graphs/events.out.tfevents.1512490021.raghav-PC -------------------------------------------------------------------------------- /Deep_Learning/Tensorflow/cs20si/lecture_1:graphsandsessions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# **TENSORFLOW**\n", 8 | "\n", 9 | "* Open sourced by Google \n", 10 | "* Pytorch by Facebook\n", 11 | "* Wavenet-> Text to speech network\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "**Graphs and Sessions**\n", 19 | "\n", 20 | "Tensorflow separates the definition and execution of a model. We first define the graph. Then we use session to layout the graph.\n", 21 | "\n", 22 | "**Tensor -> n dim matrix**\n" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 1, 28 | "metadata": {}, 29 | "outputs": [ 30 | { 31 | "name": "stdout", 32 | "output_type": "stream", 33 | "text": [ 34 | "Tensor(\"Add:0\", shape=(), dtype=int32)\n" 35 | ] 36 | } 37 | ], 38 | "source": [ 39 | "import tensorflow as tf\n", 40 | "\n", 41 | "a = tf.add(2, 3)\n", 42 | "\n", 43 | "print(a)" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "When we print(a) above we expect 5 as output but it is not so.\n", 51 | "This is because we are just creating the graph. \n", 52 | "\n", 53 | "Here a node called \"add\" has been created. The input is a scalar ie 3 and 5, so a zero dimension. Hence shape is 0. Data type is int32" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "**Tensorboard** ->Allows us to visualise the graph defined\n", 61 | "\n", 62 | "* Nodes : are the operations, variables, constants etc\n", 63 | "* Tensor values on edges\n", 64 | "\n", 65 | "So the above statement gives a node having addition operation with 2 edges going to it having values 3 and 5" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "**To get the value of a** -> Create a session\n" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 2, 78 | "metadata": { 79 | "collapsed": true 80 | }, 81 | "outputs": [], 82 | "source": [ 83 | "sess = tf.Session()" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 3, 89 | "metadata": {}, 90 | "outputs": [ 91 | { 92 | "name": "stdout", 93 | "output_type": "stream", 94 | "text": [ 95 | "5\n" 96 | ] 97 | } 98 | ], 99 | "source": [ 100 | "print(sess.run(a)) # runs the session\n", 101 | "\n", 102 | "# tf.Session(fetches, feed_dict=None, options=None, run_metadata=None)\n" 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "metadata": {}, 108 | "source": [ 109 | "Inside sess.run(), we give either the whole graph or the node as input to the paranthesis.\n", 110 | "**If you want to compute the value of multiple nodes, provide them as a list**\n", 111 | "\n", 112 | "\n", 113 | "\n", 114 | "eg:\n" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "metadata": { 121 | "collapsed": true 122 | }, 123 | "outputs": [], 124 | "source": [ 125 | "# sess.run([a,b]) # computes the value of a and b" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | "\n", 133 | "When we give a as input to parameter of .run : tensorflow looks at the graph defined and computes whatever is necessary to get the value of a. So if many nodes, it will first compute the other nodes necessary to get the value of node a." 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": { 140 | "collapsed": true 141 | }, 142 | "outputs": [], 143 | "source": [ 144 | "sess.close() # closes the session\n" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": {}, 150 | "source": [ 151 | "** Instead of explicitly closing the session, we can use the with statement of python like in case of files **\n" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 5, 157 | "metadata": {}, 158 | "outputs": [ 159 | { 160 | "name": "stdout", 161 | "output_type": "stream", 162 | "text": [ 163 | "5\n" 164 | ] 165 | } 166 | ], 167 | "source": [ 168 | "with tf.Session() as sess:\n", 169 | " print(sess.run(a))" 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": [ 176 | "## **Now lets consider new example**\n" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 6, 182 | "metadata": {}, 183 | "outputs": [ 184 | { 185 | "name": "stdout", 186 | "output_type": "stream", 187 | "text": [ 188 | "15625\n" 189 | ] 190 | } 191 | ], 192 | "source": [ 193 | "x = 2\n", 194 | "y = 3\n", 195 | "op1 = tf.add(x,y)\n", 196 | "op2 = tf.multiply(x,y)\n", 197 | "op3 = tf.pow(op1,op2)\n", 198 | "\n", 199 | "with tf.Session() as sess:\n", 200 | " print(sess.run(op3))" 201 | ] 202 | }, 203 | { 204 | "cell_type": "markdown", 205 | "metadata": {}, 206 | "source": [ 207 | "The above will first compute op1 and op2 and then finally op3, even though we only want the value of op3. \n", 208 | "\n", 209 | "**The graph is created when defining not in the tf.Session()**\n", 210 | "\n", 211 | "The important thing to note is **not everything that is defined is calculated. Only those portion of the graph is calculated which is required by the sess.run(). This is the power of Graph dataflow used in tensorflow**" 212 | ] 213 | }, 214 | { 215 | "cell_type": "markdown", 216 | "metadata": {}, 217 | "source": [ 218 | "* Since tf is based on graph, different nodes could be calculated across different CPUs/GPUs" 219 | ] 220 | }, 221 | { 222 | "cell_type": "markdown", 223 | "metadata": {}, 224 | "source": [ 225 | "## What if we want more graphs?\n", 226 | "\n", 227 | "**Ans)** Not recommended. When we start to create our graph, tensorflow actually has a default graph. It just puts our nodes/values in that graph. ** You should use disconnected subgraphs **\n", 228 | "\n", 229 | "If you really want to....." 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": 5, 235 | "metadata": { 236 | "collapsed": true 237 | }, 238 | "outputs": [], 239 | "source": [ 240 | "g = tf.Graph() # if you want something other than the default graph\n", 241 | "\n", 242 | "with g.as_default(): # making it the default graph\n", 243 | " x = tf.add(2, 3)\n", 244 | "\n", 245 | "sess = tf.Session(graph=g) # need to pass the graph..\n", 246 | "sess.run(x)\n", 247 | "sess.close()" 248 | ] 249 | } 250 | ], 251 | "metadata": { 252 | "kernelspec": { 253 | "display_name": "Python 3", 254 | "language": "python", 255 | "name": "python3" 256 | }, 257 | "language_info": { 258 | "codemirror_mode": { 259 | "name": "ipython", 260 | "version": 3 261 | }, 262 | "file_extension": ".py", 263 | "mimetype": "text/x-python", 264 | "name": "python", 265 | "nbconvert_exporter": "python", 266 | "pygments_lexer": "ipython3", 267 | "version": "3.5.2" 268 | } 269 | }, 270 | "nbformat": 4, 271 | "nbformat_minor": 1 272 | } 273 | -------------------------------------------------------------------------------- /Deep_Learning/Tensorflow/lexicon-2500-2638.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/Tensorflow/lexicon-2500-2638.pickle -------------------------------------------------------------------------------- /Deep_Learning/Tensorflow/lexicon.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/Tensorflow/lexicon.pickle -------------------------------------------------------------------------------- /Deep_Learning/Tensorflow/model.ckpt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/Tensorflow/model.ckpt -------------------------------------------------------------------------------- /Deep_Learning/chatbot/1_intro.py: -------------------------------------------------------------------------------- 1 | import keras 2 | -------------------------------------------------------------------------------- /Deep_Learning/fast.ai/.ipynb_checkpoints/Untitled-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 2 6 | } 7 | -------------------------------------------------------------------------------- /Deep_Learning/fast.ai/.ipynb_checkpoints/lesson1-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Introduction\n", 8 | "\n", 9 | "Neural networks are universal approximating machines. To fit the parameters for the function, we use Gradient Descent. \n", 10 | "\n", 11 | "We are able to make this function fast with the help of GPU since GPUs are based on mostly based on matrix operations (on pixels) which is also what we want for Deep learning. We need NVIDIA GPU as they support CUDA. Amazon provides us GPU instances called P2 instances.\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "To run the instances we use AWS cli. For this use the alias.sh file in the fast-ai course in documents folder and go to setup. " 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "# To run the aws-alias.sh\n", 28 | "# | source aws-alias.sh\n", 29 | "# | aws-get-p2\n", 30 | "# this line will get the instance id for the p2 instance and save it in variable \n", 31 | "#`instanceId`\n", 32 | "# to start the instance\n", 33 | "# | aws-start. This will start the instance and queries for the ip and prints it out\n", 34 | "# | aws-ssh. This will then ssh into that instance\n" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "Now we are inside our aws instance. AWS has something called AMI (Amazon Machine Images). It is basically a snapshot of the computer at a particular instance of time. We can start our instance using a copy of that snapshot. In the script given, there was an AMI which already had all the things installed." 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "When we type `jupyter notebook` in the aws ssh, it returns us the port which we should append with our ip address obtained from 'aws-start`. " 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "2 notebooks can be run in parallel compeletely separate from each other. \n", 56 | "\n", 57 | "Now to prevent typing **source aws-alias.sh** everytime, add it to **.bashrc** which contains a set of commands that bash runs before starting\n", 58 | "\n", 59 | "# Getting started with Dogs vs Cats\n", 60 | "\n", 61 | "To get started with the dogs vs cats, first we run the p2 instance of aws and then **wget** to get the .ipynb file and the data.\n", 62 | "\n", 63 | "**The structuring of the files in data is very important**\n", 64 | "* keras expects that each class of the image be in separate folder\n", 65 | "\n" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "there are about 12000 images each of dogs and cats in the train folder\n" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": {}, 78 | "source": [ 79 | "**Always test the model on a small sample first**\n" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": { 86 | "collapsed": true 87 | }, 88 | "outputs": [], 89 | "source": [ 90 | "%matplotlib inline" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "Tells the jupyter notebook to display all the matplotlib graphs" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 3, 103 | "metadata": { 104 | "collapsed": true 105 | }, 106 | "outputs": [], 107 | "source": [ 108 | "#path = \"data/dogscats/\"\n", 109 | "path = \"data/dogscats/sample/\"" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "Tells the path to use" 117 | ] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "metadata": {}, 122 | "source": [ 123 | "We will be using anaconda to install stuff. To install via anaconda ** conda install thing**. Sometimes conda installer not available, we will use pip.\n", 124 | "\n", 125 | "Now we would be using a pretrained neural network\n", 126 | "* V5516 -2014 winner\n", 127 | "* Inception-2015 winner\n", 128 | "* Resnet - 2016 winner\n", 129 | "We use VGG as it is the last \"simple\" model. Our script for VGG16 has already been downloaded.\n", 130 | "\n", 131 | "\n", 132 | "Now Keras runs on top of Theano/Tensorflow which convert our python code to CUDNN based code. Theano is sitting on top of CUDNN (CUDA deep neural network library).\n", 133 | "\n", 134 | "Tensorflow-> works well for multi GPUs\n", 135 | "Keras can easily be configured to use Tensorflow as backend instead of theano.\n", 136 | "\n", 137 | "To do that" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 5, 143 | "metadata": {}, 144 | "outputs": [], 145 | "source": [ 146 | "#cd ~/.keras/\n", 147 | "#vim keras.json\n", 148 | "# here change the backend to tensorflow\n", 149 | "# also change the th to tf" 150 | ] 151 | }, 152 | { 153 | "cell_type": "markdown", 154 | "metadata": {}, 155 | "source": [ 156 | "To change theano to use cpu instead of gpu" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": 6, 162 | "metadata": { 163 | "collapsed": true 164 | }, 165 | "outputs": [], 166 | "source": [ 167 | "#vim ~/.theanorc\n", 168 | "#Here change the cpu to gpu or vice versa" 169 | ] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "metadata": {}, 174 | "source": [ 175 | "In dnn, we train in batches. We can't do all at time as it may be too large for the GPU's memory. " 176 | ] 177 | } 178 | ], 179 | "metadata": { 180 | "kernelspec": { 181 | "display_name": "Python 3", 182 | "language": "python", 183 | "name": "python3" 184 | }, 185 | "language_info": { 186 | "codemirror_mode": { 187 | "name": "ipython", 188 | "version": 3 189 | }, 190 | "file_extension": ".py", 191 | "mimetype": "text/x-python", 192 | "name": "python", 193 | "nbconvert_exporter": "python", 194 | "pygments_lexer": "ipython3", 195 | "version": "3.5.2" 196 | } 197 | }, 198 | "nbformat": 4, 199 | "nbformat_minor": 2 200 | } 201 | -------------------------------------------------------------------------------- /Deep_Learning/fast.ai/Untitled.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "[name: \"/cpu:0\"\n", 13 | "device_type: \"CPU\"\n", 14 | "memory_limit: 268435456\n", 15 | "locality {\n", 16 | "}\n", 17 | "incarnation: 13250090349658849473\n", 18 | ", name: \"/gpu:0\"\n", 19 | "device_type: \"GPU\"\n", 20 | "memory_limit: 150470656\n", 21 | "locality {\n", 22 | " bus_id: 1\n", 23 | "}\n", 24 | "incarnation: 15036183153429570801\n", 25 | "physical_device_desc: \"device: 0, name: GeForce GTX 850M, pci bus id: 0000:0a:00.0\"\n", 26 | "]\n" 27 | ] 28 | } 29 | ], 30 | "source": [ 31 | "from tensorflow.python.client import device_lib\n", 32 | "print(device_lib.list_local_devices())" 33 | ] 34 | } 35 | ], 36 | "metadata": { 37 | "kernelspec": { 38 | "display_name": "Python 3", 39 | "language": "python", 40 | "name": "python3" 41 | }, 42 | "language_info": { 43 | "codemirror_mode": { 44 | "name": "ipython", 45 | "version": 3 46 | }, 47 | "file_extension": ".py", 48 | "mimetype": "text/x-python", 49 | "name": "python", 50 | "nbconvert_exporter": "python", 51 | "pygments_lexer": "ipython3", 52 | "version": "3.5.2" 53 | } 54 | }, 55 | "nbformat": 4, 56 | "nbformat_minor": 2 57 | } 58 | -------------------------------------------------------------------------------- /Deep_Learning/fast.ai/__pycache__/utils.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/fast.ai/__pycache__/utils.cpython-35.pyc -------------------------------------------------------------------------------- /Deep_Learning/fast.ai/__pycache__/vgg16.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/fast.ai/__pycache__/vgg16.cpython-35.pyc -------------------------------------------------------------------------------- /Deep_Learning/fast.ai/__pycache__/vgg16bn.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/fast.ai/__pycache__/vgg16bn.cpython-35.pyc -------------------------------------------------------------------------------- /Deep_Learning/fast.ai/lesson1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Introduction\n", 8 | "\n", 9 | "Neural networks are universal approximating machines. To fit the parameters for the function, we use Gradient Descent. \n", 10 | "\n", 11 | "We are able to make this function fast with the help of GPU since GPUs are based on mostly based on matrix operations (on pixels) which is also what we want for Deep learning. We need NVIDIA GPU as they support CUDA. Amazon provides us GPU instances called P2 instances.\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "To run the instances we use AWS cli. For this use the alias.sh file in the fast-ai course in documents folder and go to setup. " 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": { 25 | "collapsed": true 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "# To run the aws-alias.sh\n", 30 | "# | source aws-alias.sh\n", 31 | "# | aws-get-p2\n", 32 | "# this line will get the instance id for the p2 instance and save it in variable \n", 33 | "#`instanceId`\n", 34 | "# to start the instance\n", 35 | "# | aws-start. This will start the instance and queries for the ip and prints it out\n", 36 | "# | aws-ssh. This will then ssh into that instance\n" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "Now we are inside our aws instance. AWS has something called AMI (Amazon Machine Images). It is basically a snapshot of the computer at a particular instance of time. We can start our instance using a copy of that snapshot. In the script given, there was an AMI which already had all the things installed." 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "When we type `jupyter notebook` in the aws ssh, it returns us the port which we should append with our ip address obtained from 'aws-start`. " 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "2 notebooks can be run in parallel compeletely separate from each other. \n", 58 | "\n", 59 | "Now to prevent typing **source aws-alias.sh** everytime, add it to **.bashrc** which contains a set of commands that bash runs before starting\n", 60 | "\n", 61 | "# Getting started with Dogs vs Cats\n", 62 | "\n", 63 | "To get started with the dogs vs cats, first we run the p2 instance of aws and then **wget** to get the .ipynb file and the data.\n", 64 | "\n", 65 | "**The structuring of the files in data is very important**\n", 66 | "* keras expects that each class of the image be in separate folder\n", 67 | "\n" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "there are about 12000 images each of dogs and cats in the train folder\n" 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "**Always test the model on a small sample first**\n" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": { 88 | "collapsed": true 89 | }, 90 | "outputs": [], 91 | "source": [ 92 | "%matplotlib inline" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "Tells the jupyter notebook to display all the matplotlib graphs" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 3, 105 | "metadata": { 106 | "collapsed": true 107 | }, 108 | "outputs": [], 109 | "source": [ 110 | "#path = \"data/dogscats/\"\n", 111 | "path = \"/home/Documents/dogscats/sample/\"" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "Tells the path to use" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "We will be using anaconda to install stuff. To install via anaconda ** conda install thing**. Sometimes conda installer not available, we will use pip.\n", 126 | "\n", 127 | "Now we would be using a pretrained neural network\n", 128 | "* V5516 -2014 winner\n", 129 | "* Inception-2015 winner\n", 130 | "* Resnet - 2016 winner\n", 131 | "We use VGG as it is the last \"simple\" model. Our script for VGG16 has already been downloaded.\n", 132 | "\n", 133 | "\n", 134 | "Now Keras runs on top of Theano/Tensorflow which convert our python code to CUDNN based code. Theano is sitting on top of CUDNN (CUDA deep neural network library).\n", 135 | "\n", 136 | "Tensorflow-> works well for multi GPUs\n", 137 | "Keras can easily be configured to use Tensorflow as backend instead of theano.\n", 138 | "\n", 139 | "To do that" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 5, 145 | "metadata": { 146 | "collapsed": true 147 | }, 148 | "outputs": [], 149 | "source": [ 150 | "#cd ~/.keras/\n", 151 | "#vim keras.json\n", 152 | "# here change the backend to tensorflow\n", 153 | "# also change the th to tf" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "metadata": {}, 159 | "source": [ 160 | "To change theano to use cpu instead of gpu" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 6, 166 | "metadata": { 167 | "collapsed": true 168 | }, 169 | "outputs": [], 170 | "source": [ 171 | "#vim ~/.theanorc\n", 172 | "#Here change the cpu to gpu or vice versa" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": {}, 178 | "source": [ 179 | "In dnn, we train in batches. We can't do all at time as it may be too large for the GPU's memory. \n", 180 | "\n", 181 | "**NOTE** the password of the notebook is dl_course" 182 | ] 183 | } 184 | ], 185 | "metadata": { 186 | "kernelspec": { 187 | "display_name": "Python 3", 188 | "language": "python", 189 | "name": "python3" 190 | }, 191 | "language_info": { 192 | "codemirror_mode": { 193 | "name": "ipython", 194 | "version": 3 195 | }, 196 | "file_extension": ".py", 197 | "mimetype": "text/x-python", 198 | "name": "python", 199 | "nbconvert_exporter": "python", 200 | "pygments_lexer": "ipython3", 201 | "version": "3.5.2" 202 | } 203 | }, 204 | "nbformat": 4, 205 | "nbformat_minor": 2 206 | } 207 | -------------------------------------------------------------------------------- /Deep_Learning/fast.ai/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import division,print_function 2 | import math, os, json, sys, re 3 | 4 | # import cPickle as pickle # Python 2 5 | import pickle # Python3 6 | 7 | from glob import glob 8 | import numpy as np 9 | from matplotlib import pyplot as plt 10 | from operator import itemgetter, attrgetter, methodcaller 11 | from collections import OrderedDict 12 | import itertools 13 | from itertools import chain 14 | 15 | import pandas as pd 16 | import PIL 17 | from PIL import Image 18 | from numpy.random import random, permutation, randn, normal, uniform, choice 19 | from numpy import newaxis 20 | import scipy 21 | from scipy import misc, ndimage 22 | from scipy.ndimage.interpolation import zoom 23 | from scipy.ndimage import imread 24 | from sklearn.metrics import confusion_matrix 25 | import bcolz 26 | from sklearn.preprocessing import OneHotEncoder 27 | from sklearn.manifold import TSNE 28 | 29 | from IPython.lib.display import FileLink 30 | 31 | import theano 32 | from theano import shared, tensor as T 33 | from theano.tensor.nnet import conv2d, nnet 34 | from theano.tensor.signal import pool 35 | 36 | import keras 37 | from keras import backend as K 38 | from keras.utils.data_utils import get_file 39 | from keras.utils import np_utils 40 | from keras.utils.np_utils import to_categorical 41 | from keras.models import Sequential, Model 42 | from keras.layers import Input, Embedding, Reshape, merge, LSTM, Bidirectional 43 | from keras.layers import SpatialDropout1D, Concatenate # Keras2 44 | 45 | from keras.layers import TimeDistributed, Activation, SimpleRNN, GRU 46 | from keras.layers.core import Flatten, Dense, Dropout, Lambda 47 | 48 | # from keras.regularizers import l2, activity_l2, l1, activity_l1 # Keras1 49 | from keras.regularizers import l2, l1 # Keras2 50 | 51 | from keras.layers.normalization import BatchNormalization 52 | from keras.optimizers import SGD, RMSprop, Adam 53 | 54 | # from keras.utils.layer_utils import layer_from_config # Keras1 55 | from keras.layers import deserialize # Keras 2 56 | from keras.layers.merge import dot, add, concatenate # Keras2 57 | from keras.metrics import categorical_crossentropy, categorical_accuracy 58 | from keras.layers.convolutional import * 59 | from keras.preprocessing import image, sequence 60 | from keras.preprocessing.text import Tokenizer 61 | 62 | from vgg16 import * 63 | from vgg16bn import * 64 | np.set_printoptions(precision=4, linewidth=100) 65 | 66 | 67 | to_bw = np.array([0.299, 0.587, 0.114]) 68 | 69 | def gray(img): 70 | if K.image_dim_ordering() == 'tf': 71 | return np.rollaxis(img, 0, 1).dot(to_bw) 72 | else: 73 | return np.rollaxis(img, 0, 3).dot(to_bw) 74 | 75 | def to_plot(img): 76 | if K.image_dim_ordering() == 'tf': 77 | return np.rollaxis(img, 0, 1).astype(np.uint8) 78 | else: 79 | return np.rollaxis(img, 0, 3).astype(np.uint8) 80 | 81 | def plot(img): 82 | plt.imshow(to_plot(img)) 83 | 84 | 85 | def floor(x): 86 | return int(math.floor(x)) 87 | def ceil(x): 88 | return int(math.ceil(x)) 89 | 90 | def plots(ims, figsize=(12,6), rows=1, interp=False, titles=None): 91 | if type(ims[0]) is np.ndarray: 92 | ims = np.array(ims).astype(np.uint8) 93 | if (ims.shape[-1] != 3): 94 | ims = ims.transpose((0,2,3,1)) 95 | f = plt.figure(figsize=figsize) 96 | for i in range(len(ims)): 97 | sp = f.add_subplot(rows, len(ims)//rows, i+1) 98 | sp.axis('Off') 99 | if titles is not None: 100 | sp.set_title(titles[i], fontsize=16) 101 | plt.imshow(ims[i], interpolation=None if interp else 'none') 102 | 103 | 104 | def do_clip(arr, mx): 105 | clipped = np.clip(arr, (1-mx)/1, mx) 106 | return clipped/clipped.sum(axis=1)[:, np.newaxis] 107 | 108 | 109 | def get_batches(dirname, gen=image.ImageDataGenerator(), shuffle=True, batch_size=4, class_mode='categorical', 110 | target_size=(224,224)): 111 | return gen.flow_from_directory(dirname, target_size=target_size, 112 | class_mode=class_mode, shuffle=shuffle, batch_size=batch_size) 113 | 114 | 115 | def onehot(x): 116 | return to_categorical(x) 117 | 118 | 119 | def wrap_config(layer): 120 | return {'class_name': layer.__class__.__name__, 'config': layer.get_config()} 121 | 122 | 123 | def copy_layer(layer): return deserialize(wrap_config(layer)) # Keras2 124 | 125 | 126 | def copy_layers(layers): return [copy_layer(layer) for layer in layers] 127 | 128 | 129 | def copy_weights(from_layers, to_layers): 130 | for from_layer,to_layer in zip(from_layers, to_layers): 131 | to_layer.set_weights(from_layer.get_weights()) 132 | 133 | 134 | def copy_model(m): 135 | res = Sequential(copy_layers(m.layers)) 136 | copy_weights(m.layers, res.layers) 137 | return res 138 | 139 | 140 | def insert_layer(model, new_layer, index): 141 | res = Sequential() 142 | for i,layer in enumerate(model.layers): 143 | if i==index: res.add(new_layer) 144 | copied = deserialize(wrap_config(layer)) # Keras2 145 | res.add(copied) 146 | copied.set_weights(layer.get_weights()) 147 | return res 148 | 149 | 150 | def adjust_dropout(weights, prev_p, new_p): 151 | scal = (1-prev_p)/(1-new_p) 152 | return [o*scal for o in weights] 153 | 154 | 155 | def get_data(path, target_size=(224,224)): 156 | batches = get_batches(path, shuffle=False, batch_size=1, class_mode=None, target_size=target_size) 157 | return np.concatenate([batches.next() for i in range(batches.samples)]) # Keras2 158 | 159 | 160 | def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues): 161 | """ 162 | This function prints and plots the confusion matrix. 163 | Normalization can be applied by setting `normalize=True`. 164 | (This function is copied from the scikit docs.) 165 | """ 166 | plt.figure() 167 | plt.imshow(cm, interpolation='nearest', cmap=cmap) 168 | plt.title(title) 169 | plt.colorbar() 170 | tick_marks = np.arange(len(classes)) 171 | plt.xticks(tick_marks, classes, rotation=45) 172 | plt.yticks(tick_marks, classes) 173 | 174 | if normalize: 175 | cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] 176 | print(cm) 177 | thresh = cm.max() / 2. 178 | for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): 179 | plt.text(j, i, cm[i, j], horizontalalignment="center", color="white" if cm[i, j] > thresh else "black") 180 | 181 | plt.tight_layout() 182 | plt.ylabel('True label') 183 | plt.xlabel('Predicted label') 184 | 185 | 186 | def save_array(fname, arr): 187 | c=bcolz.carray(arr, rootdir=fname, mode='w') 188 | c.flush() 189 | 190 | 191 | def load_array(fname): 192 | return bcolz.open(fname)[:] 193 | 194 | 195 | def mk_size(img, r2c): 196 | r,c,_ = img.shape 197 | curr_r2c = r/c 198 | new_r, new_c = r,c 199 | if r2c>curr_r2c: 200 | new_r = floor(c*r2c) 201 | else: 202 | new_c = floor(r/r2c) 203 | arr = np.zeros((new_r, new_c, 3), dtype=np.float32) 204 | r2=(new_r-r)//2 205 | c2=(new_c-c)//2 206 | arr[floor(r2):floor(r2)+r,floor(c2):floor(c2)+c] = img 207 | return arr 208 | 209 | 210 | def mk_square(img): 211 | x,y,_ = img.shape 212 | maxs = max(img.shape[:2]) 213 | y2=(maxs-y)//2 214 | x2=(maxs-x)//2 215 | arr = np.zeros((maxs,maxs,3), dtype=np.float32) 216 | arr[floor(x2):floor(x2)+x,floor(y2):floor(y2)+y] = img 217 | return arr 218 | 219 | 220 | def vgg_ft(out_dim): 221 | vgg = Vgg16() 222 | vgg.ft(out_dim) 223 | model = vgg.model 224 | return model 225 | 226 | def vgg_ft_bn(out_dim): 227 | vgg = Vgg16BN() 228 | vgg.ft(out_dim) 229 | model = vgg.model 230 | return model 231 | 232 | 233 | def get_classes(path): 234 | batches = get_batches(path+'train', shuffle=False, batch_size=1) 235 | val_batches = get_batches(path+'valid', shuffle=False, batch_size=1) 236 | test_batches = get_batches(path+'test', shuffle=False, batch_size=1) 237 | return (val_batches.classes, batches.classes, onehot(val_batches.classes), onehot(batches.classes), 238 | val_batches.filenames, batches.filenames, test_batches.filenames) 239 | 240 | 241 | def split_at(model, layer_type): 242 | layers = model.layers 243 | layer_idx = [index for index,layer in enumerate(layers) 244 | if type(layer) is layer_type][-1] 245 | return layers[:layer_idx+1], layers[layer_idx+1:] 246 | 247 | 248 | class MixIterator(object): 249 | def __init__(self, iters): 250 | self.iters = iters 251 | self.multi = type(iters) is list 252 | if self.multi: 253 | self.N = sum([it[0].N for it in self.iters]) 254 | else: 255 | self.N = sum([it.N for it in self.iters]) 256 | 257 | def reset(self): 258 | for it in self.iters: it.reset() 259 | 260 | def __iter__(self): 261 | return self 262 | 263 | def next(self, *args, **kwargs): 264 | if self.multi: 265 | nexts = [[next(it) for it in o] for o in self.iters] 266 | n0 = np.concatenate([n[0] for n in nexts]) 267 | n1 = np.concatenate([n[1] for n in nexts]) 268 | return (n0, n1) 269 | else: 270 | nexts = [next(it) for it in self.iters] 271 | n0 = np.concatenate([n[0] for n in nexts]) 272 | n1 = np.concatenate([n[1] for n in nexts]) 273 | return (n0, n1) 274 | 275 | -------------------------------------------------------------------------------- /Deep_Learning/fast.ai/vgg16.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function 2 | 3 | import os, json 4 | from glob import glob 5 | import numpy as np 6 | from scipy import misc, ndimage 7 | from scipy.ndimage.interpolation import zoom 8 | 9 | from keras import backend as K 10 | from keras.layers.normalization import BatchNormalization 11 | from keras.utils.data_utils import get_file 12 | from keras.models import Sequential 13 | from keras.layers.core import Flatten, Dense, Dropout, Lambda 14 | from keras.layers.convolutional import Conv2D, MaxPooling2D, ZeroPadding2D # Conv2D: Keras2 15 | from keras.layers.pooling import GlobalAveragePooling2D 16 | from keras.optimizers import SGD, RMSprop, Adam 17 | from keras.preprocessing import image 18 | 19 | K.set_image_dim_ordering('th') 20 | vgg_mean = np.array([123.68, 116.779, 103.939], dtype=np.float32).reshape((3,1,1)) 21 | def vgg_preprocess(x): 22 | x = x - vgg_mean 23 | return x[:, ::-1] # reverse axis rgb->bgr 24 | 25 | 26 | class Vgg16(): 27 | """The VGG 16 Imagenet model""" 28 | 29 | 30 | def __init__(self): 31 | self.FILE_PATH = 'http://files.fast.ai/models/' 32 | self.create() 33 | self.get_classes() 34 | 35 | 36 | def get_classes(self): 37 | fname = 'imagenet_class_index.json' 38 | fpath = get_file(fname, self.FILE_PATH+fname, cache_subdir='models') 39 | with open(fpath) as f: 40 | class_dict = json.load(f) 41 | self.classes = [class_dict[str(i)][1] for i in range(len(class_dict))] 42 | 43 | def predict(self, imgs, details=False): 44 | all_preds = self.model.predict(imgs) 45 | idxs = np.argmax(all_preds, axis=1) 46 | preds = [all_preds[i, idxs[i]] for i in range(len(idxs))] 47 | classes = [self.classes[idx] for idx in idxs] 48 | return np.array(preds), idxs, classes 49 | 50 | 51 | def ConvBlock(self, layers, filters): 52 | model = self.model 53 | for i in range(layers): 54 | model.add(ZeroPadding2D((1, 1))) 55 | model.add(Conv2D(filters, kernel_size=(3, 3), activation='relu')) # Keras2 56 | model.add(MaxPooling2D((2, 2), strides=(2, 2))) 57 | 58 | 59 | def FCBlock(self): 60 | model = self.model 61 | model.add(Dense(4096, activation='relu')) 62 | model.add(Dropout(0.5)) 63 | 64 | 65 | def create(self): 66 | model = self.model = Sequential() 67 | model.add(Lambda(vgg_preprocess, input_shape=(3,224,224), output_shape=(3,224,224))) 68 | 69 | self.ConvBlock(2, 64) 70 | self.ConvBlock(2, 128) 71 | self.ConvBlock(3, 256) 72 | self.ConvBlock(3, 512) 73 | self.ConvBlock(3, 512) 74 | 75 | model.add(Flatten()) 76 | self.FCBlock() 77 | self.FCBlock() 78 | model.add(Dense(1000, activation='softmax')) 79 | 80 | fname = 'vgg16.h5' 81 | model.load_weights(get_file(fname, self.FILE_PATH+fname, cache_subdir='models')) 82 | 83 | 84 | def get_batches(self, path, gen=image.ImageDataGenerator(), shuffle=True, batch_size=8, class_mode='categorical'): 85 | return gen.flow_from_directory(path, target_size=(224,224), 86 | class_mode=class_mode, shuffle=shuffle, batch_size=batch_size) 87 | 88 | 89 | def ft(self, num): 90 | model = self.model 91 | model.pop() 92 | for layer in model.layers: layer.trainable=False 93 | model.add(Dense(num, activation='softmax')) 94 | self.compile() 95 | 96 | def finetune(self, batches): 97 | self.ft(batches.num_class) # Keras2 98 | classes = list(iter(batches.class_indices)) 99 | for c in batches.class_indices: 100 | classes[batches.class_indices[c]] = c 101 | self.classes = classes 102 | 103 | 104 | def compile(self, lr=0.001): 105 | self.model.compile(optimizer=Adam(lr=lr), 106 | loss='categorical_crossentropy', metrics=['accuracy']) 107 | 108 | 109 | # Keras2 110 | def fit_data(self, trn, labels, val, val_labels, nb_epoch=1, batch_size=64): 111 | self.model.fit(trn, labels, epochs=nb_epoch, 112 | validation_data=(val, val_labels), batch_size=batch_size) 113 | 114 | 115 | # Keras2 116 | def fit(self, batches, val_batches, batch_size, nb_epoch=1): 117 | self.model.fit_generator(batches, steps_per_epoch=int(np.ceil(batches.samples/batch_size)), epochs=nb_epoch, 118 | validation_data=val_batches, validation_steps=int(np.ceil(val_batches.samples/batch_size))) 119 | 120 | 121 | # Keras2 122 | def test(self, path, batch_size=8): 123 | test_batches = self.get_batches(path, shuffle=False, batch_size=batch_size, class_mode=None) 124 | return test_batches, self.model.predict_generator(test_batches, int(np.ceil(test_batches.samples/batch_size))) 125 | -------------------------------------------------------------------------------- /Deep_Learning/fast.ai/vgg16bn.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function 2 | 3 | import os, json 4 | from glob import glob 5 | import numpy as np 6 | from scipy import misc, ndimage 7 | from scipy.ndimage.interpolation import zoom 8 | 9 | from keras import backend as K 10 | from keras.layers.normalization import BatchNormalization 11 | from keras.utils.data_utils import get_file 12 | from keras.models import Sequential 13 | from keras.layers.core import Flatten, Dense, Dropout, Lambda 14 | from keras.layers.convolutional import Conv2D, MaxPooling2D, ZeroPadding2D # Conv2D: Keras2 15 | from keras.layers.pooling import GlobalAveragePooling2D 16 | from keras.optimizers import SGD, RMSprop, Adam 17 | from keras.preprocessing import image 18 | 19 | 20 | vgg_mean = np.array([123.68, 116.779, 103.939], dtype=np.float32).reshape((3,1,1)) 21 | def vgg_preprocess(x): 22 | x = x - vgg_mean 23 | return x[:, ::-1] # reverse axis rgb->bgr 24 | 25 | 26 | class Vgg16BN(): 27 | """The VGG 16 Imagenet model with Batch Normalization for the Dense Layers""" 28 | 29 | 30 | def __init__(self, size=(224,224), include_top=True): 31 | self.FILE_PATH = 'http://files.fast.ai/models/' 32 | self.create(size, include_top) 33 | self.get_classes() 34 | 35 | 36 | def get_classes(self): 37 | fname = 'imagenet_class_index.json' 38 | fpath = get_file(fname, self.FILE_PATH+fname, cache_subdir='models') 39 | with open(fpath) as f: 40 | class_dict = json.load(f) 41 | self.classes = [class_dict[str(i)][1] for i in range(len(class_dict))] 42 | 43 | def predict(self, imgs, details=False): 44 | all_preds = self.model.predict(imgs) 45 | idxs = np.argmax(all_preds, axis=1) 46 | preds = [all_preds[i, idxs[i]] for i in range(len(idxs))] 47 | classes = [self.classes[idx] for idx in idxs] 48 | return np.array(preds), idxs, classes 49 | 50 | 51 | def ConvBlock(self, layers, filters): 52 | model = self.model 53 | for i in range(layers): 54 | model.add(ZeroPadding2D((1, 1))) 55 | model.add(Conv2D(filters, kernel_size=(3, 3), activation='relu')) # Keras2 56 | model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2))) 57 | 58 | 59 | def FCBlock(self): 60 | model = self.model 61 | model.add(Dense(4096, activation='relu')) 62 | model.add(BatchNormalization()) 63 | model.add(Dropout(0.5)) 64 | 65 | 66 | def create(self, size, include_top): 67 | if size != (224,224): 68 | include_top=False 69 | 70 | model = self.model = Sequential() 71 | model.add(Lambda(vgg_preprocess, input_shape=(3,)+size, output_shape=(3,)+size)) 72 | 73 | self.ConvBlock(2, 64) 74 | self.ConvBlock(2, 128) 75 | self.ConvBlock(3, 256) 76 | self.ConvBlock(3, 512) 77 | self.ConvBlock(3, 512) 78 | 79 | if not include_top: 80 | fname = 'vgg16_bn_conv.h5' 81 | model.load_weights(get_file(fname, self.FILE_PATH+fname, cache_subdir='models')) 82 | return 83 | 84 | model.add(Flatten()) 85 | self.FCBlock() 86 | self.FCBlock() 87 | model.add(Dense(1000, activation='softmax')) 88 | 89 | fname = 'vgg16_bn.h5' 90 | model.load_weights(get_file(fname, self.FILE_PATH+fname, cache_subdir='models')) 91 | 92 | 93 | def get_batches(self, path, gen=image.ImageDataGenerator(), shuffle=True, batch_size=8, class_mode='categorical'): 94 | return gen.flow_from_directory(path, target_size=(224,224), 95 | class_mode=class_mode, shuffle=shuffle, batch_size=batch_size) 96 | 97 | 98 | def ft(self, num): 99 | model = self.model 100 | model.pop() 101 | for layer in model.layers: layer.trainable=False 102 | model.add(Dense(num, activation='softmax')) 103 | self.compile() 104 | 105 | def finetune(self, batches): 106 | model = self.model 107 | model.pop() 108 | for layer in model.layers: layer.trainable=False 109 | model.add(Dense(batches.num_class, activation='softmax')) # Keras2 110 | self.compile() 111 | 112 | 113 | def compile(self, lr=0.001): 114 | self.model.compile(optimizer=Adam(lr=lr), 115 | loss='categorical_crossentropy', metrics=['accuracy']) 116 | 117 | 118 | # Keras2 119 | def fit_data(self, trn, labels, val, val_labels, nb_epoch=1, batch_size=64): 120 | self.model.fit(trn, labels, epochs=nb_epoch, 121 | validation_data=(val, val_labels), batch_size=batch_size) 122 | 123 | 124 | # Keras2 125 | def fit(self, batches, val_batches, batch_size, nb_epoch=1): 126 | self.model.fit_generator(batches, steps_per_epoch=int(np.ceil(batches.samples/batch_size)), epochs=nb_epoch, 127 | validation_data=val_batches, validation_steps=int(np.ceil(val_batches.samples/batch_size))) 128 | 129 | 130 | # Keras2 131 | def test(self, path, batch_size=8): 132 | test_batches = self.get_batches(path, shuffle=False, batch_size=batch_size, class_mode=None) 133 | return test_batches, self.model.predict_generator(test_batches, int(np.ceil(test_batches.samples/batch_size))) 134 | -------------------------------------------------------------------------------- /Deep_Learning/keras/1_intro.py: -------------------------------------------------------------------------------- 1 | """ Here we will be looking at the sequential model in keras. The Sequential model is a linear stack of layers""" 2 | 3 | from keras.models import Sequential 4 | from keras.layers import Dense, Activation 5 | import keras 6 | import numpy as np 7 | 8 | model = Sequential() 9 | # add new layers using .add 10 | 11 | # Dense implements operation : activation(dot(input,weights)+bias) 12 | model.add(Dense(32, input_dim=100, activation='relu')) # output array is of the shape(*,32) 13 | model.add(Dense(10, activation='softmax')) # output is of the shape (*,10), now we don't need to specify input anymore 14 | 15 | """The model needs to know what input shape it should expect. For this reason, the first layer in a Sequential model 16 | needs to receive information about its input shape. 1) pass input_shape to first layer: It should be a tuple: None 17 | indicates any positive integer may be expected. In input_shape, the batch dimension is not included. 18 | 19 | 2) Some 2D layers, such as Dense, support the specification of their input shape via the argument input_dim, 20 | 21 | 3) If you ever need to specify a fixed batch size for your inputs (this is useful for stateful recurrent networks), 22 | you can pass a batch_size argument to a layer. If you pass both batch_size=32 and input_shape=(6, 8) to a layer, 23 | it will then expect every batch of inputs to have the batch shape (32, 6, 8) """ 24 | 25 | # Before training the model it needs to be compiled 26 | 27 | model.compile(optimizer='rmsprop', 28 | loss='categorical_crossentropy', 29 | metrics=['accuracy']) 30 | # Now we train the model 31 | # Keras models are trained on Numpy arrays of input data and labels 32 | 33 | data = np.random.random((1000, 100)) # 1000 rows and 100 cols 34 | labels = np.random.randint(10, size=(1000, 1)) # output can be of 10 classes so random number between 0 to 10 and 35 | # since 1000 inputs so 1000 outputs 36 | 37 | # now we need to convert the labels to one hot encoding 38 | one_hot_labels = keras.utils.to_categorical(labels, num_classes=10) 39 | 40 | # Train the model, iterating through the data in batch size of 32 41 | model.fit(data, one_hot_labels, epochs=10, batch_size=32) 42 | -------------------------------------------------------------------------------- /Deep_Learning/keras/Image_Augumentation.py: -------------------------------------------------------------------------------- 1 | """ Image Augumentation is the process of taking images of the training set and creating altered versions of the same 2 | image to deal with less training data being available and prevent overfitting. 3 | 4 | we will be using cifar10 dataset available with dataset""" 5 | 6 | # the first thing is to load the cifar10 dataset and format the images to prepare them for CNN. We will also take a look 7 | # at some images to see if it worked 8 | 9 | from __future__ import print_function 10 | import keras 11 | from keras.datasets import cifar10 12 | from keras import backend as K 13 | import matplotlib 14 | from matplotlib import pyplot as plt 15 | import numpy as np 16 | 17 | # input image dimensions 18 | img_row, img_cols = 32, 32 19 | 20 | # the data shuffled and split between train and test sets 21 | (X_train, y_train), (X_test, y_test) = cifar10.load_data() # y_train, y_test are uint8 labels from 0 to 9. 3 is for 22 | # cats and 5 for dogs 23 | 24 | # only look at cats[=3] and dogs[=5] 25 | train_picks = np.ravel(np.logical_or(y_train == 3, y_train == 5)) # np.ravel flattens an array 26 | test_picks = np.ravel(np.logical_or(y_train == 3, y_train == 5)) 27 | 28 | y_train = np.array(y_train[train_picks] ==5 , dtype= int) 29 | y_test = np.array(y_test[test_picks]== 5, dtype=int) -------------------------------------------------------------------------------- /Deep_Learning/keras/image_classifier/2_Image_classifier.py: -------------------------------------------------------------------------------- 1 | """dogs vs cats 2 | """ 3 | 4 | from keras.preprocessing.image import ImageDataGenerator # our images are augumented over random transformations so 5 | # that our model never sees the same pic twice. This prevents overfitting 6 | from keras.preprocessing.image import array_to_img, img_to_array, load_img 7 | from keras.models import Sequential, load_model 8 | from keras.layers import Conv2D, MaxPool2D, Dense, Activation, Dropout, Flatten 9 | from keras import backend as K 10 | from PIL import Image 11 | import numpy as np 12 | 13 | 14 | def check_data_augument(): 15 | datagen = ImageDataGenerator(rotation_range=40, width_shift_range=0.2, height_shift_range=0.2, 16 | shear_range=0.2, zoom_range=0.2, horizontal_flip=True, fill_mode='nearest') 17 | """rotation_range is a value in degrees (0-180), a range within which to randomly rotate pictures 18 | 19 | width_shift and height_shift are ranges (as a fraction of total width or height) within which to randomly translate 20 | pictures 21 | 22 | vertically or horizontally rescale is a value by which we will multiply the data before any other processing. Our 23 | original images consist in RGB coefficients in the 0-255, but such values would be too high for our models to process 24 | (given a typical learning rate), so we target values between 0 and 1 instead by scaling with a 1/255. factor. 25 | 26 | shear_range is for randomly applying shearing transformations 27 | 28 | zoom_range is for randomly zooming inside pictures 29 | 30 | 31 | horizontal_flip is for randomly flipping half of the images horizontally --relevant when there are no assumptions of 32 | 33 | horizontal assymetry (e.g. real-world pictures). fill_mode is the strategy used for filling in newly created pixels, 34 | which can appear after a rotation or a width/height shift. """ 35 | 36 | img = load_img('/home/raghav/Documents/kaggle/dogscats/train/cat.0.jpg') 37 | x = img_to_array(img) # this is a numpy array with shape (3, 150,150) 38 | x = x.reshape((1,) + x.shape) # reshapes to (1,2,150,150) 39 | 40 | # .flow generates batches of randomly transformed images and saves the result to preview/ directory 41 | i = 0 42 | for batch in datagen.flow(x, batch_size=1, save_to_dir='/home/raghav/Desktop', save_prefix='cat', 43 | save_format='jpeg'): 44 | i += 1 45 | if i > 20: 46 | break 47 | 48 | 49 | # for small data number one concern is overfitting. Overfitting happens when a model exposed to too few examples 50 | # learns patterns that do not generalize to new data, i.e. when the model starts using irrelevant features for making 51 | # prediction 52 | 53 | # data augumentation helps but the images generated are highly corelated. 54 | # another way is entropic capacity of the model ie how many features is model allowed to store. 55 | # methods to modulate entropic capacity: number of parameters eg no of layers, nodes 56 | # Another way is weight regularisation ie ensuring small weights. 57 | # dropout: prevents a layer from seeing the same pattern twice 58 | 59 | if K.image_data_format() == 'channels_first': 60 | input_shape = (3, 150, 150) 61 | else: 62 | input_shape = (150, 150, 3) 63 | 64 | model = Sequential() 65 | model.add(Conv2D(32, kernel_size=(3, 3), input_shape=input_shape)) # no of filters. Input is (batch size, channels, 66 | # rows, cols) 67 | # Output is 4d tensor (batch size, filter, new rows, new cols) 68 | model.add(Activation('relu')) 69 | model.add(MaxPool2D(pool_size=(2, 2))) # pool size: tuple of 2 integers to downscale. (2,2) halfs the row, 70 | # col. Output is 4d tensor (batch size, channels, rows, cols) 71 | 72 | model.add(Conv2D(32, kernel_size=(3, 3))) 73 | model.add(Activation('relu')) 74 | model.add(MaxPool2D(pool_size=(2, 2))) 75 | 76 | model.add(Conv2D(64, kernel_size=(3, 3))) 77 | model.add(Activation('relu')) 78 | model.add(MaxPool2D(pool_size=(2, 2))) 79 | 80 | # the model so far outputs 3D feature maps (height, width, features) 81 | 82 | model.add(Flatten()) # Earlier there were 64 filters each being a 2d matrix. flattens our 3d feature maps to 1d 83 | # feature maps. Now only 64*row*cols 1d inputs 84 | model.add(Dense(64)) # 64 outputs 85 | model.add(Activation('relu')) # f(x) = max(0,x), it can range from [0,inf] So used in hidden layers. 86 | model.add(Dropout(0.5)) 87 | model.add(Dense(1)) 88 | model.add( 89 | Activation('sigmoid')) # for 2 class classification, sigmoid is used. For multiclass, we use softmax. They are 90 | # applied only in the final layer as they give the probability of occurence of different classes 91 | 92 | # since binary classifier 93 | model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy']) 94 | 95 | # preparing our data 96 | batch_size = 16 97 | 98 | train_datagen = ImageDataGenerator(rescale=1 / 255, zoom_range=0.2, horizontal_flip=True, shear_range=0.2) 99 | test_datagen = ImageDataGenerator(rescale=1 / 255) 100 | 101 | # this is a generator that will read images from training portion 102 | train_generator = train_datagen.flow_from_directory(directory="/home/raghav/Desktop/data_image/train", 103 | target_size=(150, 150), batch_size=batch_size, class_mode='binary') 104 | # since we are using binary_crossentropy loss, we need binary labels 105 | 106 | # validation generator 107 | validation_generator = train_datagen.flow_from_directory(directory="/home/raghav/Desktop/data_image/validate", 108 | target_size=(150, 150), batch_size=batch_size, 109 | class_mode='binary') 110 | # all images are resized to (150,150) 111 | 112 | model.fit_generator(train_generator, epochs=50, validation_data=validation_generator, 113 | steps_per_epoch=2000 // batch_size, 114 | validation_steps=800 // batch_size) 115 | #model.save_weights('first_try.h5') 116 | """ 117 | model.load_weights('first_try.h5') 118 | img = load_img("/home/raghav/Desktop/data_image/test/cat/cat.3000.jpg", target_size=(150, 150)) 119 | x = img_to_array(img) 120 | x = np.expand_dims(x, axis=0) 121 | preds = model.predict_classes(x) 122 | prob = model.predict_proba(x) 123 | print(preds, prob) 124 | if preds: 125 | print("Dog") 126 | else: 127 | print("cat")""" -------------------------------------------------------------------------------- /Deep_Learning/keras/image_classifier/2_image_classifier_only_code.py: -------------------------------------------------------------------------------- 1 | """dogs vs cats 2 | """ 3 | 4 | from keras.preprocessing.image import ImageDataGenerator 5 | from keras.models import Sequential 6 | from keras.layers import Conv2D, MaxPool2D, Dense, Activation, Dropout, Flatten 7 | from keras import backend as K 8 | 9 | if K.image_data_format() == 'channels_first': 10 | input_shape = (3, 150, 150) 11 | else: 12 | input_shape = (150, 150, 3) # 3 because RGB 13 | 14 | model = Sequential() 15 | model.add(Conv2D(32, kernel_size=(3, 3), input_shape=input_shape)) 16 | model.add(Activation('relu')) 17 | model.add(MaxPool2D(pool_size=(2, 2))) 18 | 19 | model.add(Conv2D(32, kernel_size=(3, 3))) 20 | model.add(Activation('relu')) 21 | model.add(MaxPool2D(pool_size=(2, 2))) 22 | 23 | model.add(Conv2D(64, kernel_size=(3, 3))) 24 | model.add(Activation('relu')) 25 | model.add(MaxPool2D(pool_size=(2, 2))) 26 | 27 | model.add(Flatten()) 28 | model.add(Dense(64)) 29 | model.add(Activation('relu')) 30 | model.add(Dropout(0.5)) 31 | model.add(Dense(1)) 32 | model.add(Activation('sigmoid')) 33 | 34 | model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy']) 35 | 36 | batch_size = 16 37 | 38 | train_datagen = ImageDataGenerator(rescale=1 / 255, zoom_range=0.2, horizontal_flip=True, shear_range=0.2) 39 | test_datagen = ImageDataGenerator(rescale=1 / 255) 40 | 41 | train_generator = train_datagen.flow_from_directory(directory="/home/raghav/Desktop/data_image/train", 42 | target_size=(150, 150), batch_size=batch_size, class_mode='binary') 43 | 44 | validation_generator = train_datagen.flow_from_directory(directory="/home/raghav/Desktop/data_image/validate", 45 | target_size=(150, 150), batch_size=batch_size, 46 | class_mode='binary') 47 | 48 | model.fit_generator(train_generator, epochs=50, validation_data=validation_generator, 49 | steps_per_epoch=2000 // batch_size, 50 | validation_steps=800 // batch_size) 51 | model.save_weights('first_try.h5') 52 | -------------------------------------------------------------------------------- /Deep_Learning/keras/image_classifier/3_usingVG16.py: -------------------------------------------------------------------------------- 1 | """ 2 | Using the concept of transfer learning to improve accuracy. VGG16 is a CNN that has been trained on ImageNet data. 3 | We first load this model upto the first fully connected layer. 4 | """ 5 | import numpy as np 6 | from keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img 7 | from keras.models import Sequential 8 | from keras.layers import Dense, Flatten, Dropout 9 | from keras import applications 10 | import gc 11 | 12 | img_width, img_ht = 150, 150 13 | top_model_wt_path = "bottleneck_fc_model.h5" 14 | train_dir = "/home/raghav/Desktop/data_image/train" 15 | validation_dir = "/home/raghav/Desktop/data_image/validate" 16 | test_dir = "/home/raghav/Desktop/data_image/test" 17 | no_train_samples = 2000 18 | no_validation_samples = 800 19 | epochs = 50 20 | batch_size = 16 21 | 22 | 23 | def save_bottleneck_features(): 24 | datagen = ImageDataGenerator(rescale=1 / 255) 25 | 26 | # build the vgg16 model 27 | model = applications.VGG16(include_top=False, weights='imagenet') 28 | 29 | generator = datagen.flow_from_directory(train_dir, target_size=(img_width, img_ht), shuffle=False, class_mode=None, 30 | batch_size=batch_size) # class_mode=None means our data will only yield 31 | # batches of data, no labels, shuffle=False means our data will be in order so first 1000 images will be cats and 32 | # next 1000 dogs 33 | 34 | # generates predication for a generator. Steps: total no of batches. Returns a numpy array of predictions 35 | bottleneck_features_train = model.predict_generator(generator=generator, steps=no_train_samples // batch_size) 36 | # saves an array to a binary file 37 | np.save(file="bottleneck_features_train.npy", arr=bottleneck_features_train) 38 | 39 | generator = datagen.flow_from_directory(validation_dir, target_size=(img_width, img_ht), batch_size=batch_size, 40 | class_mode=None, shuffle=False) 41 | bottleneck_features_validation = model.predict_generator(generator, no_validation_samples // batch_size) 42 | np.save(file="bottleneck_features_validate.npy", arr=bottleneck_features_validation) 43 | 44 | 45 | def train_top_model(): 46 | train_data = np.load(file="bottleneck_features_train.npy") 47 | train_labels = np.array([0] * (no_train_samples // 2) + [1] * (no_train_samples // 2)) 48 | 49 | validation_data = np.load(file="bottleneck_features_validate.npy") 50 | validation_labels = np.array([0] * (no_validation_samples // 2) + [1] * (no_validation_samples // 2)) 51 | 52 | model = Sequential() 53 | model.add(Flatten(input_shape=train_data.shape[1:])) # don't need to tell batch size in input shape 54 | model.add(Dense(256, activation='relu')) 55 | model.add(Dropout(0.5)) 56 | model.add(Dense(1, activation='sigmoid')) 57 | 58 | model.compile(optimizer='rmsprop', 59 | loss='binary_crossentropy', metrics=['accuracy']) 60 | 61 | # this gives training data accuracy("acc") and validation data accuracy ("val_acc"). If the "acc" keeps on improving 62 | # while the "val_acc" keeps on decreasing, then we are likely overfitting the model 63 | model.fit(train_data, train_labels, 64 | epochs=epochs, 65 | batch_size=batch_size, 66 | validation_data=(validation_data, validation_labels)) 67 | 68 | model.save_weights(top_model_wt_path) 69 | 70 | 71 | def predict_image_class(file): 72 | model = applications.VGG16(include_top=False, weights='imagenet') 73 | x = load_img(file, target_size=(img_width, img_ht)) 74 | x = img_to_array(x) 75 | x = np.expand_dims(x, axis=0) 76 | array = model.predict(x, verbose=0) # verbose = 0 means no logging 77 | model = Sequential() 78 | model.add(Flatten(input_shape=array.shape[1:])) 79 | model.add(Dense(256, activation='relu')) 80 | model.add(Dropout(0.5)) 81 | model.add(Dense(1, activation='sigmoid')) 82 | model.load_weights(top_model_wt_path) 83 | class_predicted = model.predict_classes(array, verbose=0) 84 | probability = model.predict(array, verbose=0)[0][0] 85 | if class_predicted == 1 and probability > 0.5: 86 | print("dogs") 87 | elif class_predicted == 0 and probability > 0.5: 88 | print("cat") 89 | else: 90 | print("None") 91 | 92 | 93 | """ 94 | save_bottleneck_features() 95 | train_top_model() 96 | """ 97 | 98 | # predict_image_class(test_dir + "/cat/cat.3120.jpg") 99 | predict_image_class("/home/raghav/Pictures/1.png") 100 | gc.collect() # resolves an error of session of tensorflow 101 | -------------------------------------------------------------------------------- /Deep_Learning/keras/image_classifier/3_usingVGG16_codeonly.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img 3 | from keras.models import Sequential 4 | from keras.layers import Dense, Flatten, Dropout 5 | from keras import applications 6 | import gc 7 | 8 | img_width, img_ht = 150, 150 9 | top_model_wt_path = "bottleneck_fc_model.h5" 10 | train_dir = "/home/raghav/Desktop/data_image/train" 11 | validation_dir = "/home/raghav/Desktop/data_image/validate" 12 | test_dir = "/home/raghav/Desktop/data_image/test" 13 | no_train_samples = 2000 14 | no_validation_samples = 800 15 | epochs = 50 16 | batch_size = 16 17 | 18 | 19 | def save_bottleneck_features(): 20 | datagen = ImageDataGenerator(rescale=1 / 255) 21 | model = applications.VGG16(include_top=False, weights='imagenet') 22 | 23 | generator = datagen.flow_from_directory(train_dir, target_size=(img_width, img_ht), shuffle=False, class_mode=None, 24 | batch_size=batch_size) 25 | bottleneck_features_train = model.predict_generator(generator=generator, steps=no_train_samples // batch_size) 26 | np.save(file="bottleneck_features_train.npy", arr=bottleneck_features_train) 27 | 28 | generator = datagen.flow_from_directory(validation_dir, target_size=(img_width, img_ht), batch_size=batch_size, 29 | class_mode=None, shuffle=False) 30 | bottleneck_features_validation = model.predict_generator(generator, no_validation_samples // batch_size) 31 | np.save(file="bottleneck_features_validate.npy", arr=bottleneck_features_validation) 32 | 33 | 34 | def train_top_model(): 35 | train_data = np.load(file="bottleneck_features_train.npy") 36 | train_labels = np.array([0] * (no_train_samples // 2) + [1] * (no_train_samples // 2)) 37 | 38 | validation_data = np.load(file="bottleneck_features_validate.npy") 39 | validation_labels = np.array([0] * (no_validation_samples // 2) + [1] * (no_validation_samples // 2)) 40 | 41 | model = Sequential() 42 | model.add(Flatten(input_shape=train_data.shape[1:])) 43 | model.add(Dense(256, activation='relu')) 44 | model.add(Dropout(0.5)) 45 | model.add(Dense(1, activation='sigmoid')) 46 | model.compile(optimizer='rmsprop', 47 | loss='binary_crossentropy', metrics=['accuracy']) 48 | model.fit(train_data, train_labels, 49 | epochs=epochs, 50 | batch_size=batch_size, 51 | validation_data=(validation_data, validation_labels)) 52 | 53 | model.save_weights(top_model_wt_path) 54 | 55 | 56 | def predict_image_class(file): 57 | model = applications.VGG16(include_top=False, weights='imagenet') 58 | x = load_img(file, target_size=(img_width, img_ht)) 59 | x = img_to_array(x) 60 | x = np.expand_dims(x, axis=0) 61 | array = model.predict(x, verbose=0) 62 | model = Sequential() 63 | model.add(Flatten(input_shape=array.shape[1:])) 64 | model.add(Dense(256, activation='relu')) 65 | model.add(Dropout(0.5)) 66 | model.add(Dense(1, activation='sigmoid')) 67 | model.load_weights(top_model_wt_path) 68 | class_predicted = model.predict_classes(array, verbose=0) 69 | probability = model.predict(array, verbose=0)[0][0] 70 | if class_predicted == 1 and probability > 0.5: 71 | print("dogs") 72 | elif class_predicted == 0 and probability > 0.5: 73 | print("cat") 74 | else: 75 | print("None") 76 | 77 | save_bottleneck_features() 78 | #train_top_model() 79 | #predict_image_class("/home/raghav/Pictures/1.png") 80 | #gc.collect() 81 | -------------------------------------------------------------------------------- /Deep_Learning/keras/rnn_keras/intro.py: -------------------------------------------------------------------------------- 1 | import keras 2 | from keras.preprocessing import sequence 3 | from keras.models import Sequential 4 | from keras.layers import Dense, Embedding 5 | from keras.layers import LSTM 6 | from keras.datasets import imdb 7 | 8 | -------------------------------------------------------------------------------- /Deep_Learning/keras/sentiment_analysis_movie/4.1_code_only.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | from keras.datasets import imdb 4 | from keras.models import Sequential 5 | from keras.layers import Dense, Flatten 6 | from keras.layers.embeddings import Embedding 7 | from keras.preprocessing import sequence 8 | 9 | # load the dataset 10 | top_words = 5000 11 | (X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words) 12 | max_words_in_a_review = 500 13 | X_train = sequence.pad_sequences(X_train, maxlen=max_words_in_a_review) 14 | X_test = sequence.pad_sequences(X_test, maxlen=max_words_in_a_review) 15 | model = Sequential() 16 | model.add(Embedding(input_dim=top_words, output_dim=32, input_length=max_words_in_a_review)) 17 | model.add(Flatten()) 18 | model.add(Dense(250, activation='relu')) 19 | model.add(Dense(1, activation='sigmoid')) 20 | model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) 21 | print(model.summary()) 22 | 23 | model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=2, batch_size=128, verbose=2) 24 | scores = model.evaluate(X_test, y_test, verbose=0) 25 | print("\n accuracy %s" % scores[1]*100) 26 | 27 | -------------------------------------------------------------------------------- /Deep_Learning/keras/sentiment_analysis_movie/4_movie_sentiment.py: -------------------------------------------------------------------------------- 1 | """ Using the IMDB dataset of movie reviews. The Large Movie Review Dataset (often referred to as the IMDB dataset) 2 | contains 25,000 highly polar moving reviews (good or bad) for training and the same amount again for testing. The 3 | problem is to determine whether a given moving review has a positive or negative sentiment. 4 | """ 5 | 6 | from keras.datasets import imdb # keras provides access to the imdb dataset built-in 7 | from keras.models import Sequential 8 | from keras.layers import Dense, Flatten 9 | from keras.layers.embeddings import Embedding 10 | from keras.preprocessing import sequence 11 | 12 | # load the dataset 13 | top_words = 5000 14 | (X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words) # loading only the top 5000 words 15 | # imdb.load_data(): the words have been replaced by integers 16 | # which represent the absolute popularity of a word in the dataset. so that for instance the integer "3" encodes the 17 | # 3rd most frequent word in the data. This allows for quick filtering operations such as: "only consider the top 10, 18 | # 000 most common words, but eliminate the top 20 most common words". The sentences in each review thus comprises of 19 | # a sequence of integers 20 | 21 | max_words_in_a_review = 500 22 | 23 | # sequence.pad_sequences() creates a list where each review is of length = max_words_in_review. If length of actual 24 | # review greater than 500, it is truncated, else 0s are padded in the beginning 25 | X_train = sequence.pad_sequences(X_train, maxlen=max_words_in_a_review) 26 | X_test = sequence.pad_sequences(X_test, maxlen=max_words_in_a_review) 27 | 28 | # now we will create our model. We will first use Embedding layer setting the vocabulary to be 5000 , the output 29 | # vector size is 32 and input length is 500. The output is a 2d matrix of 500*32 size. Next we will Flatten this and 30 | # add a dense layer of 250 outputs and then another dense layer of 1 output unit 31 | 32 | # now we do word embeddings: This is a technique where words are encoded as real-valued vectors in a 33 | # high-dimensional space, where the similarity between words in terms of meaning translates to closeness in the 34 | # vector space 35 | 36 | # in keras we can turn positive integers into dense vectors of fixed size using embedding 37 | # keras.layers.embeddings.Embedding() 38 | 39 | # input_dim: int > 0. Size of the vocabulary, i.e. maximum integer index + 1. 40 | # output_dim: int >= 0. Dimension of the dense embedding. embeddings_initializer: Initializer for the embeddings 41 | # matrix (see initializers). 42 | # embeddings_regularizer: Regularizer function applied to the embeddings matrix (see 43 | # regularizer). 44 | # embeddings_constraint: Constraint function applied to the embeddings matrix (see constraints). 45 | # mask_zero: Whether or not the input value 0 is a special "padding" value that should be masked out. This is useful 46 | # when using recurrent layers which may take variable length input. If this is True then all subsequent 47 | # layers in the model need to support masking or an exception will be raised. If mask_zero is set to True, 48 | # as a consequence, index 0 cannot be used in the vocabulary (input_dim should equal size of vocabulary + 1). 49 | # input_length: Length of input sequences, when it is constant. This argument is required if you are going to connect 50 | # Flatten then Dense layers upstream (without it, the shape of the dense outputs cannot be computed). 51 | 52 | 53 | model = Sequential() 54 | model.add(Embedding(input_dim=top_words, output_dim=32, input_length=max_words_in_a_review)) 55 | model.add(Flatten()) 56 | model.add(Dense(250, activation='relu')) 57 | model.add(Dense(1, activation='sigmoid')) 58 | model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) 59 | print(model.summary()) 60 | 61 | # fit the model 62 | model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=2, batch_size=128, verbose=2) # only 1 log line 63 | # per epoch 64 | scores = model.evaluate(X_test, y_test, verbose=0) 65 | print("\n accuracy %s" % scores[1]*100) 66 | 67 | -------------------------------------------------------------------------------- /Deep_Learning/openAIGym/1_intro.py: -------------------------------------------------------------------------------- 1 | """ Developing tflearn model for cartpole Evaluations on openAI 2 | Here I am working with CartPol-v0 wherin I have to balance a pole on the cart 3 | Every frame it is balanced, 1 score is added 4 | 5 | """ 6 | 7 | import gym 8 | import random 9 | import numpy as np 10 | import tflearn 11 | from tflearn.layers.core import input_data, dropout, fully_connected 12 | from tflearn.layers.estimator import regression 13 | from statistics import mean, median 14 | from collections import Counter 15 | 16 | LR = 1e-3 17 | env = gym.make('CartPole-v0') # defines the environment to be CartPole environment 18 | env.reset() 19 | goal_steps = 500 20 | score_requirement = 60 21 | initial_games = 20000 22 | 23 | 24 | def some_random_games(): 25 | for episode in range(5): # creating 5 episodes to work on 26 | env.reset() 27 | for t in range(goal_steps): 28 | env.render() 29 | action = env.action_space.sample() # takes a random action in our environment 30 | observation, reward, done, info = env.step(action) # observation: an environment-specific object 31 | # representing your observation of the environment. reward : amount of reward achieved by the previous 32 | # action. done: whether it's time to reset the environment again. info:diagnostic information useful for 33 | # debugging. To get the actual actions, print(ev.action_space) 34 | if done: 35 | print("Episode finished after {} timesteps".format(t + 1)) 36 | break 37 | 38 | 39 | def initial_population(): 40 | training_data = [] # add those moves which gave score > score requirement 41 | scores = [] 42 | accepted_scores = [] 43 | for _ in range(initial_games): 44 | score = 0 45 | game_memory = [] # store the moves of every game in memory as we don't know if the score > 50 46 | prev_observation = [] 47 | for _ in range(goal_steps): 48 | action = random.randrange(0, 2) 49 | observation, reward, done, info = env.step(action) 50 | 51 | if len(prev_observation) > 0: 52 | game_memory.append([prev_observation, action]) 53 | 54 | prev_observation = observation 55 | score += reward # reward will be 0 or 1 for each frame 56 | if done: 57 | break 58 | 59 | if score >= score_requirement: 60 | accepted_scores.append(score) 61 | for data in game_memory: 62 | output = [] 63 | if data[1] == 1: 64 | output = [0, 1] 65 | elif data[1] == 0: 66 | output = [1, 0] 67 | 68 | training_data.append([data[0], output]) 69 | 70 | env.reset() 71 | scores.append(score) 72 | 73 | training_data_save = np.array(training_data) 74 | np.save('saved.npy', training_data_save) 75 | 76 | print("Average accepted score: ", mean(accepted_scores)) 77 | print("Median accepted score:", median(accepted_scores)) 78 | print(Counter(accepted_scores)) 79 | 80 | return training_data 81 | 82 | 83 | def neural_network_model(input_size): 84 | network = input_data(shape=[None, input_size, 1], name='Input') 85 | 86 | network = fully_connected(network, 128, activation='relu') # on which input, no of nodes, activation 87 | network = dropout(network, 0.8) # on which network , keep rate 88 | 89 | network = fully_connected(network, 256, activation='relu') 90 | network = dropout(network, 0.8) 91 | 92 | network = fully_connected(network, 512, activation='relu') 93 | network = dropout(network, 0.8) 94 | 95 | network = fully_connected(network, 256, activation='relu') 96 | network = dropout(network, 0.8) 97 | 98 | network = fully_connected(network, 128, activation='relu') 99 | network = dropout(network, 0.8) 100 | 101 | network = fully_connected(network, 2, activation='softmax') # no of output, activation function 102 | network = regression(network, optimizer='adam', learning_rate=LR, loss='categorical_crossentropy', name='Targets') 103 | 104 | model = tflearn.DNN(network, tensorboard_dir='log') 105 | 106 | return model 107 | 108 | 109 | def train_model(training_data, model=False): 110 | x = np.array([i[0] for i in training_data]).reshape(-1, len(training_data[0][0]), 1) 111 | y = [i[1] for i in training_data] 112 | 113 | if not model: 114 | model = neural_network_model(input_size=len(x[0])) 115 | 116 | model.fit(X_inputs=x, Y_targets=y, n_epoch=3, snapshot_epoch=1, run_id='openAIStuff', show_metric=True) 117 | 118 | return model 119 | 120 | 121 | training_data = initial_population() 122 | model = train_model(training_data) 123 | 124 | scores = [] 125 | choices = [] 126 | 127 | for each_game in range(10): 128 | score = 0 129 | game_memory = [] 130 | prev_obser = [] 131 | env.reset() 132 | for _ in range(goal_steps): 133 | env.render() 134 | if len(prev_obser) == 0: 135 | action = random.randrange(0, 2) 136 | else: 137 | action = np.argmax(model.predict(prev_obser.reshape(-1, len(prev_obser), 1))[0]) 138 | choices.append(action) 139 | 140 | new_observation, reward, done, info = env.step(action) 141 | prev_obser = new_observation 142 | game_memory.append([new_observation, action]) 143 | score += reward 144 | if done: 145 | break 146 | scores.append(score) 147 | print("Average scores", sum(scores)/len(scores)) 148 | print("Choice 1: {}, Choice 2: {}".format(choices.count(1)/len(choices), choices.count(0)/len(choices))) 149 | 150 | 151 | -------------------------------------------------------------------------------- /Deep_Learning/openAIGym/2_train.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import random 3 | import numpy as np 4 | import tflearn 5 | from tflearn.layers.core import input_data, dropout, fully_connected 6 | from tflearn.layers.estimator import regression 7 | from statistics import median, mean 8 | from collections import Counter 9 | 10 | LR = 1e-3 11 | env = gym.make("CartPole-v0") 12 | env.reset() 13 | goal_steps = 500 14 | score_requirement = 70 15 | initial_games = 30000 16 | 17 | 18 | def some_random_games_first(): 19 | # Each of these is its own game. 20 | for episode in range(100): 21 | env.reset() 22 | # this is each frame, up to 200...but we wont make it that far. 23 | for t in range(200): 24 | # This will display the environment 25 | # Only display if you really want to see it. 26 | # Takes much longer to display it. 27 | env.render() 28 | 29 | # This will just create a sample action in any environment. 30 | # In this environment, the action can be 0 or 1, which is left or right 31 | action = env.action_space.sample() 32 | 33 | # this executes the environment with an action, 34 | # and returns the observation of the environment, 35 | # the reward, if the env is over, and other info. 36 | observation, reward, done, info = env.step(action) 37 | if done: 38 | break 39 | 40 | 41 | def initial_population(): 42 | # [OBS, MOVES] 43 | training_data = [] 44 | # all scores: 45 | scores = [] 46 | # just the scores that met our threshold: 47 | accepted_scores = [] 48 | # iterate through however many games we want: 49 | for _ in range(initial_games): 50 | score = 0 51 | # moves specifically from this environment: 52 | game_memory = [] 53 | # previous observation that we saw 54 | prev_observation = [] 55 | # for each frame in 200 56 | for _ in range(goal_steps): 57 | # choose random action (0 or 1) 58 | action = random.randrange(0, 2) 59 | # do it! 60 | observation, reward, done, info = env.step(action) 61 | 62 | # notice that the observation is returned FROM the action 63 | # so we'll store the previous observation here, pairing 64 | # the prev observation to the action we'll take. 65 | if len(prev_observation) > 0: 66 | game_memory.append([prev_observation, action]) 67 | prev_observation = observation 68 | score += reward 69 | if done: break 70 | 71 | # IF our score is higher than our threshold, we'd like to save 72 | # every move we made 73 | # NOTE the reinforcement methodology here. 74 | # all we're doing is reinforcing the score, we're not trying 75 | # to influence the machine in any way as to HOW that score is 76 | # reached. 77 | if score >= score_requirement: 78 | accepted_scores.append(score) 79 | for data in game_memory: 80 | # convert to one-hot (this is the output layer for our neural network) 81 | if data[1] == 1: 82 | output = [0, 1] 83 | elif data[1] == 0: 84 | output = [1, 0] 85 | 86 | # saving our training data 87 | training_data.append([data[0], output]) 88 | 89 | # reset env to play again 90 | env.reset() 91 | # save overall scores 92 | scores.append(score) 93 | 94 | # just in case you wanted to reference later 95 | # training_data_save = np.array(training_data) 96 | # np.save('saved.npy', training_data_save) 97 | 98 | # some stats here, to further illustrate the neural network magic! 99 | print('Average accepted score:', mean(accepted_scores)) 100 | print('Median score for accepted scores:', median(accepted_scores)) 101 | print(Counter(accepted_scores)) 102 | 103 | return training_data 104 | 105 | 106 | def neural_network_model(input_size): 107 | network = input_data(shape=[None, input_size, 1], name='input') 108 | 109 | network = fully_connected(network, 128, activation='relu') 110 | network = dropout(network, 0.8) 111 | 112 | network = fully_connected(network, 256, activation='relu') 113 | network = dropout(network, 0.8) 114 | 115 | network = fully_connected(network, 512, activation='relu') 116 | network = dropout(network, 0.8) 117 | 118 | network = fully_connected(network, 256, activation='relu') 119 | network = dropout(network, 0.8) 120 | 121 | network = fully_connected(network, 128, activation='relu') 122 | network = dropout(network, 0.8) 123 | 124 | network = fully_connected(network, 2, activation='softmax') 125 | network = regression(network, optimizer='adam', learning_rate=LR, loss='categorical_crossentropy', name='targets') 126 | model = tflearn.DNN(network, tensorboard_dir='log') 127 | 128 | return model 129 | 130 | 131 | def train_model(training_data, model=False): 132 | X = np.array([i[0] for i in training_data]).reshape(-1, len(training_data[0][0]), 1) 133 | y = [i[1] for i in training_data] 134 | 135 | if not model: 136 | model = neural_network_model(input_size=len(X[0])) 137 | 138 | model.fit({'input': X}, {'targets': y}, n_epoch=5, snapshot_step=500, show_metric=True, run_id='openai_learning') 139 | return model 140 | 141 | training_data = initial_population() 142 | model = train_model(training_data) 143 | 144 | scores = [] 145 | choices = [] 146 | for each_game in range(10): 147 | score = 0 148 | game_memory = [] 149 | prev_obs = [] 150 | env.reset() 151 | for _ in range(goal_steps): 152 | env.render() 153 | 154 | if len(prev_obs) == 0: 155 | action = random.randrange(0, 2) 156 | else: 157 | action = np.argmax(model.predict(prev_obs.reshape(-1, len(prev_obs), 1))[0]) 158 | 159 | choices.append(action) 160 | 161 | new_observation, reward, done, info = env.step(action) 162 | prev_obs = new_observation 163 | game_memory.append([new_observation, action]) 164 | score += reward 165 | if done: 166 | break 167 | 168 | scores.append(score) 169 | 170 | print('Average Score:', sum(scores) / len(scores)) 171 | print('choice 1:{} choice 0:{}'.format(choices.count(1) / len(choices), choices.count(0) / len(choices))) 172 | print(score_requirement) -------------------------------------------------------------------------------- /Deep_Learning/openAIGym/log/openAIStuff/events.out.tfevents.1497419827.raghav-PC: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/log/openAIStuff/events.out.tfevents.1497419827.raghav-PC -------------------------------------------------------------------------------- /Deep_Learning/openAIGym/log/openAIStuff/events.out.tfevents.1497420569.raghav-PC: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/log/openAIStuff/events.out.tfevents.1497420569.raghav-PC -------------------------------------------------------------------------------- /Deep_Learning/openAIGym/log/openAIStuff/events.out.tfevents.1497420852.raghav-PC: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/log/openAIStuff/events.out.tfevents.1497420852.raghav-PC -------------------------------------------------------------------------------- /Deep_Learning/openAIGym/log/openAIStuff/events.out.tfevents.1497420919.raghav-PC: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/log/openAIStuff/events.out.tfevents.1497420919.raghav-PC -------------------------------------------------------------------------------- /Deep_Learning/openAIGym/log/openAIStuff/events.out.tfevents.1497441395.raghav-PC: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/log/openAIStuff/events.out.tfevents.1497441395.raghav-PC -------------------------------------------------------------------------------- /Deep_Learning/openAIGym/log/openAIStuff/events.out.tfevents.1497441421.raghav-PC: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/log/openAIStuff/events.out.tfevents.1497441421.raghav-PC -------------------------------------------------------------------------------- /Deep_Learning/openAIGym/log/openAIStuff/events.out.tfevents.1497441457.raghav-PC: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/log/openAIStuff/events.out.tfevents.1497441457.raghav-PC -------------------------------------------------------------------------------- /Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497421583.raghav-PC: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497421583.raghav-PC -------------------------------------------------------------------------------- /Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497421622.raghav-PC: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497421622.raghav-PC -------------------------------------------------------------------------------- /Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497421683.raghav-PC: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497421683.raghav-PC -------------------------------------------------------------------------------- /Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497421744.raghav-PC: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497421744.raghav-PC -------------------------------------------------------------------------------- /Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497421940.raghav-PC: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497421940.raghav-PC -------------------------------------------------------------------------------- /Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497421973.raghav-PC: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497421973.raghav-PC -------------------------------------------------------------------------------- /Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497422209.raghav-PC: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497422209.raghav-PC -------------------------------------------------------------------------------- /Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497422303.raghav-PC: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497422303.raghav-PC -------------------------------------------------------------------------------- /Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497422371.raghav-PC: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497422371.raghav-PC -------------------------------------------------------------------------------- /Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497422400.raghav-PC: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497422400.raghav-PC -------------------------------------------------------------------------------- /Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497422427.raghav-PC: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497422427.raghav-PC -------------------------------------------------------------------------------- /Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497441498.raghav-PC: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/log/openai_learning/events.out.tfevents.1497441498.raghav-PC -------------------------------------------------------------------------------- /Deep_Learning/openAIGym/saved.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/Deep_Learning/openAIGym/saved.npy -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # machine-learning 2 | 3 | A repository which contains all of my snippets and projects related to Machine Learning. 4 | 5 | * **classical_ml** : Consists of all the basic machine learning algorithms. All of them have been first coded without using **sklearn** in order to understand how the algorithm actually works. Later, they have been coded using sklearn.
6 | **Libraries used** : numpy, sklearn and pandas 7 | 8 | * **deep_learning** : Consists of snippets of various deep learning libraries like Tensorflow and Keras. It also includes my projects in deep learning.
9 | **Frameworks used** : Tensorflow, Keras, Theano 10 | 11 | ## The various projects that I have done are:
12 | * ### **Image Classifier model :**
13 | 1) First made my own Image Classifier model using Tensorflow and Keras on a small dataset. Achieved 90% accuracy. Needed to use Image Augumentation and a heavy Dropout in order to achieve this.
14 | 2) Applied Transfer Learning on the VGG 16 model by training my model just on the final fully connected layer of VGG16 model. Accuracy > 95% 15 | 16 | * ### **Google Dinosaur using CNN and Reinforcement Learning:**
17 | 1) Model is still in development phase (It has some bugs). Want to develop a model that is able to play the Google Dinosaur Game on its own. 18 | 19 | * ### **Sentiment Analysis of Movie Reviews:**
20 | 1) Given any movie review, the model is able to predict whether the review was "positive" or "negative".
21 | 2) Accuracy > 80% 22 | 23 | ## Sources 24 | 25 | * [Andrew NG's Machine learning course on coursera](https://www.coursera.org/learn/machine-learning): The most basic course. Everybody does it. Hello world of machine learning. 26 | * [Stanford's CS231n](https://cs231n.github.io/): Introduction to Deep learning and Convolutional Neural Networks
27 | * [Sentdex' playlist for Machine Learning with Python](https://www.youtube.com/watch?v=OGxgnH8y2NM&list=PLQVvvaa0QuDfKTOs3Keq_kaG2P55YRn5v) : Awesome Channel in general for any Python related stuff. This playlist especially focuses on how to use Python for Machine Learning. 28 | * [Jeremy Howard's fast.ai](http://www.fast.ai/) : An awesome MOOC which teaches the different frameworks in Python available for Deep Learning. 29 | * [Andrew NG's new course on Deep learning(paid)](https://www.coursera.org/specializations/deep-learning) : New course being offered by Andrew NG in Deep learning on coursera. It is paid though. However financial help is available like for any other coursera course. 30 | 31 | ## Installation Tutorials (Just Google it): 32 | * [Tensorflow](https://www.tensorflow.org/install/) 33 | * [Keras](https://keras.io/#installation) 34 | * sklearn, numpy, pandas : Can be installed using pip 35 | 36 | **NOTE**: In order to train various deep learning models, it is recommended that you have a GPU which supports CUDA framework to speed up things. 37 | -------------------------------------------------------------------------------- /classical_ml/clustering/K means/1_Intro.py: -------------------------------------------------------------------------------- 1 | """ 2 | Supervised: We have told the machines what the classes were 3 | 4 | Clustering: 5 | 1) Flat 6 | 2) Hirierchal 7 | 8 | In both, The machine is just given the featureset. Then the machine itself searches for groups or clusters. 9 | 10 | With Flat Clustering, we tell the machine to find 2 clusters or 3 clusters. 11 | With Hirierchal clusterign , the machine figures out how many groups are there 12 | 13 | First Algo we use: 14 | 1) K Means : K is the number of clusters we want -> does Flat Clustering 15 | 2) Mean Shift : Hirierchal Clustering 16 | 17 | K mean working: 18 | Chose K centroids randomly in the beginning, mostly the first k points are taken. 19 | Calculate the distance of each featureset to the centroids and classify each accordingly. 20 | Then take all the featureset of one cluster and take mean of those. These are the new centroids. 21 | Repeat until the centroids are no longer moving. 22 | 23 | Downside of K means: It always tries to find same sized groups 24 | 25 | """ 26 | 27 | import matplotlib.pyplot as plt 28 | from matplotlib import style 29 | from sklearn.cluster import KMeans 30 | import numpy as np 31 | 32 | style.use('ggplot') 33 | 34 | X = np.array([[1, 2], 35 | [1.5, 1.8], 36 | [5, 8], 37 | [8, 8], 38 | [1, 0.6], 39 | [9, 11]] 40 | ) 41 | 42 | 43 | clf = KMeans(n_clusters=2) 44 | clf.fit(X) 45 | 46 | centroids = clf.cluster_centers_ 47 | labels = clf.labels_ 48 | 49 | colors = ["g.", "r.", "c.", "b.", "k.", "y."] 50 | 51 | for i in range(len(X)): 52 | plt.plot(X[i][0], X[i][1], colors[labels[i]], markersize=20) 53 | plt.scatter(centroids[:, 0], centroids[:, 1], marker='x', s=150) 54 | plt.show() 55 | -------------------------------------------------------------------------------- /classical_ml/clustering/K means/2_handling_non_numeric_data.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | from matplotlib import style 4 | from sklearn.cluster import KMeans 5 | from sklearn import preprocessing 6 | from sklearn.preprocessing import LabelEncoder 7 | import pandas as pd 8 | 9 | style.use('ggplot') 10 | 11 | """ 12 | Pclass Passenger Class (1 = 1st; 2 = 2nd; 3 = 3rd) 13 | survival Survival (0 = No; 1 = Yes) 14 | name Name 15 | sex Sex 16 | age Age 17 | sibsp Number of Siblings/Spouses Aboard 18 | parch Number of Parents/Children Aboard 19 | ticket Ticket Number 20 | fare Passenger Fare (British pound) 21 | cabin Cabin 22 | embarked Port of Embarkation (C = Cherbourg; Q = Queenstown; S = Southampton) 23 | boat Lifeboat 24 | body Body Identification Number 25 | home.dest Home/Destination 26 | """ 27 | 28 | df = pd.read_excel('titanic.xls') 29 | 30 | # here we find that some values are non numeric 31 | # eg sex column, we take the set of sex column and then assign then numbers 32 | 33 | df.drop(['body', 'name'], 1) 34 | df.apply(pd.to_numeric, errors='ignore') 35 | df.fillna(0, inplace=True) 36 | 37 | 38 | def handle_non_numerical_data(df): 39 | le = LabelEncoder() 40 | columns = list(df.columns.values) # to handle non numeric data types use LabelEncoder() 41 | 42 | for column in columns: 43 | l = [] 44 | if df[column].dtype != np.int64 and df[column].dtype != np.float64: 45 | for i in df[column]: 46 | l.append(i) 47 | le.fit(np.array(l)) 48 | x = le.transform(l) 49 | df[column] = x 50 | return df 51 | 52 | 53 | df = handle_non_numerical_data(df) 54 | 55 | # once the clusters are obtained,we could svm etc 56 | 57 | X = np.array(df.drop(['survived'], 1)).astype(float) 58 | X = preprocessing.scale(X) # important 59 | y = np.array(df['survived']) 60 | 61 | clf = KMeans(n_clusters=2) 62 | clf.fit(X) 63 | labels = clf.labels_ 64 | correct = 0 65 | for i in range(len(X)): 66 | predict_me = np.array( 67 | X[i].astype(float)) # the first centroid is 0. It might be that the survived is 1 and we get 0 because 68 | # that is the first centroid. So our accuracy would be 20% instead of 80% 69 | predict_me = predict_me.reshape(-1, len(predict_me)) 70 | prediction = clf.predict(predict_me) 71 | if prediction == y[i]: 72 | correct += 1 73 | 74 | print(correct / len(X)) 75 | -------------------------------------------------------------------------------- /classical_ml/clustering/K means/3_K_means_from_scratch.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from matplotlib import style 3 | import numpy as np 4 | 5 | style.use('ggplot') 6 | 7 | X = np.array([[1, 2], 8 | [1.5, 1.8], 9 | [5, 8], 10 | [8, 8], 11 | [1, 0.6], 12 | [9, 11]] 13 | ) 14 | 15 | colors = ["g", "r", "c", "b", "k", "y"] 16 | 17 | 18 | class K_Means: 19 | def __init__(self, k=2, tol=0.0001, max_iter=300): 20 | self.classifications = {} 21 | self.centroids = {} 22 | self.k = k 23 | self.tol = tol 24 | self.max_iter = max_iter 25 | 26 | def fit(self, data): 27 | 28 | for i in range(self.k): 29 | self.centroids[i] = data[i] 30 | 31 | for i in range(self.max_iter): 32 | 33 | for j in range(self.k): 34 | self.classifications[j] = [] 35 | 36 | for featureset in data: 37 | distances = [np.linalg.norm(featureset - self.centroids[centroid]) for centroid in self.centroids] 38 | classification = distances.index(min(distances)) 39 | self.classifications[classification].append(featureset) 40 | 41 | prev_centroids = dict(self.centroids) 42 | 43 | for classification in self.classifications: 44 | self.centroids[classification] = np.average(self.classifications[classification], axis=0) 45 | 46 | optimized = True 47 | 48 | for c in self.centroids: 49 | original_centroid = prev_centroids[c] 50 | current_centroid = self.centroids[c] 51 | if float(np.sum((current_centroid - original_centroid) / original_centroid * 100.0)) > self.tol: 52 | print(np.sum((current_centroid - original_centroid) / original_centroid * 100.0)) 53 | optimized = False 54 | 55 | if optimized: 56 | break 57 | 58 | def predict(self, data): 59 | distances = [np.linalg.norm(data - self.centroids[centroid]) for centroid in self.centroids] 60 | classification = distances.index(min(distances)) 61 | return classification 62 | 63 | 64 | clf = K_Means() 65 | clf.fit(X) 66 | 67 | for centroid in clf.centroids: 68 | plt.scatter(clf.centroids[centroid][0], clf.centroids[centroid][1], marker="o", color='k', s=150, linewidths=5) 69 | 70 | for classification in clf.classifications: 71 | color = colors[classification] 72 | for featureset in clf.classifications[classification]: 73 | plt.scatter(featureset[0], featureset[1], marker='x', color=color, s=150) 74 | 75 | unknowns = np.array([[1, 2], 76 | [5, 1], 77 | [8, 1], 78 | [1, 7], 79 | [0, 0]]) 80 | 81 | for unknown in unknowns: 82 | classification = clf.predict(unknown) 83 | plt.scatter(unknown[0], unknown[1], color=colors[classification], s=150, marker='*') 84 | plt.show() 85 | -------------------------------------------------------------------------------- /classical_ml/clustering/K means/titanic.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/classical_ml/clustering/K means/titanic.xls -------------------------------------------------------------------------------- /classical_ml/clustering/Mean Shift/1_intro.py: -------------------------------------------------------------------------------- 1 | """ 2 | It is hirirechal clustering. Automatically figures out the number of clusters needed and where those clusters are. 3 | 4 | Here we say every featureset is a cluster center. 5 | 6 | It has something called Radius/Bandwidth. Every data point has a circle of radius or a bandwidth around it. 7 | Then we take the mean of data points in a bandwidth.Then this would have a new bandwidth. Repeat till when centroid does 8 | not move. 9 | 10 | Repeat for other data points. 2 Centroids from different bandwidths could coincide. 11 | 12 | """ 13 | import numpy as np 14 | from sklearn.datasets import make_blobs 15 | import matplotlib.pyplot as plt 16 | from matplotlib import style 17 | from sklearn.cluster import MeanShift 18 | from mpl_toolkits.mplot3d import Axes3D 19 | 20 | style.use('ggplot') 21 | 22 | centres = [[1, 1, 1], [5, 5, 5], [3, 10, 10]] 23 | X, _ = make_blobs(n_samples=100, centers=centres, cluster_std=1) 24 | 25 | ms = MeanShift() 26 | ms.fit(X) 27 | labels = ms.labels_ 28 | cluster_centers = ms.cluster_centers_ 29 | 30 | colors = ['r', 'g', 'b', 'c', 'k', 'y'] 31 | fig = plt.figure() 32 | ax = fig.add_subplot(111, projection='3d') 33 | 34 | for i in range(len(X)): 35 | ax.scatter(X[i][0], X[i][1], X[i][2], c=colors[labels[i]], marker='o') 36 | 37 | ax.scatter(cluster_centers[:, 0], cluster_centers[:, 1], cluster_centers[:, 2], marker='x', color='k', s=150, 38 | linewidths=5) 39 | plt.show() 40 | -------------------------------------------------------------------------------- /classical_ml/clustering/Mean Shift/2_Applying_on_titanic_dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.cluster import MeanShift, KMeans 3 | from sklearn import preprocessing 4 | from sklearn.preprocessing import LabelEncoder 5 | import pandas as pd 6 | import matplotlib.pyplot as plt 7 | 8 | ''' 9 | Pclass Passenger Class (1 = 1st; 2 = 2nd; 3 = 3rd) 10 | survival Survival (0 = No; 1 = Yes) 11 | name Name 12 | sex Sex 13 | age Age 14 | sibsp Number of Siblings/Spouses Aboard 15 | parch Number of Parents/Children Aboard 16 | ticket Ticket Number 17 | fare Passenger Fare (British pound) 18 | cabin Cabin 19 | embarked Port of Embarkation (C = Cherbourg; Q = Queenstown; S = Southampton) 20 | boat Lifeboat 21 | body Body Identification Number 22 | home.dest Home/Destination 23 | ''' 24 | pd.options.mode.chained_assignment = None # default='warn' 25 | # https://pythonprogramming.net/static/downloads/machine-learning-data/titanic.xls 26 | df = pd.read_excel('titanic.xls') 27 | 28 | original_df = pd.DataFrame.copy(df) 29 | df.drop(['body', 'name'], 1, inplace=True) 30 | df.fillna(0, inplace=True) 31 | 32 | 33 | def handle_non_numerical_data(df): 34 | le = LabelEncoder() 35 | columns = list(df.columns.values) # to handle non numeric data types use LabelEncoder() 36 | 37 | for column in columns: 38 | l = [] 39 | if df[column].dtype != np.int64 and df[column].dtype != np.float64: 40 | for i in df[column]: 41 | l.append(i) 42 | le.fit(np.array(l)) 43 | x = le.transform(l) 44 | df[column] = x 45 | return df 46 | 47 | 48 | df = handle_non_numerical_data(df) 49 | df.drop(['ticket', 'home.dest'], 1, inplace=True) 50 | 51 | X = np.array(df.drop(['survived'], 1).astype(float)) 52 | X = preprocessing.scale(X) 53 | y = np.array(df['survived']) 54 | 55 | clf = MeanShift() 56 | clf.fit(X) 57 | labels = clf.labels_ 58 | cluster_centers = clf.cluster_centers_ 59 | n_clusters_ = len(np.unique(labels)) 60 | original_df['cluster_group'] = np.nan 61 | for i in range(len(X)): 62 | original_df['cluster_group'].iloc[i] = labels[i] 63 | 64 | survival_rates = {} 65 | 66 | for i in range(n_clusters_): 67 | temp_df = original_df[(original_df['cluster_group'] == float(i))] 68 | survival_cluster = temp_df[(temp_df['survived'] == 1)] 69 | 70 | survival_rate = len(survival_cluster) / len(temp_df) 71 | survival_rates[i] = survival_rate 72 | 73 | print(survival_rates) 74 | print(original_df[(original_df['cluster_group'] == 2)]) 75 | -------------------------------------------------------------------------------- /classical_ml/clustering/Mean Shift/3_from_scratch.py: -------------------------------------------------------------------------------- 1 | """ 2 | Chose the radius dynamically. 3 | 4 | """ 5 | import matplotlib.pyplot as plt 6 | from matplotlib import style 7 | import numpy as np 8 | from sklearn.datasets import make_blobs 9 | 10 | style.use('ggplot') 11 | 12 | X = np.array([[1, 2], 13 | [1.5, 1.8], 14 | [5, 8], 15 | [8, 8], 16 | [1, 0.6], 17 | [9, 11], 18 | [8, 2], 19 | [10, 2], 20 | [9, 3]] 21 | ) 22 | 23 | colors = 10 * ["g", "r", "c", "b", "k", "y"] 24 | 25 | 26 | # plt.scatter(X[:, 0], X[:, 1]) 27 | # plt.show() 28 | 29 | 30 | class MeanShift: 31 | def __init__(self, radius=None, radius_norm_step=100): 32 | self.classifciations = {} 33 | self.radius = radius 34 | self.radius_norm_step = radius_norm_step 35 | self.centroids = {} 36 | 37 | def fit(self, data): 38 | self.data = data 39 | if self.radius is None: 40 | all_data_centroid = np.average(data, axis=0) 41 | all_data_norm = np.linalg.norm(all_data_centroid) 42 | self.radius = all_data_norm / self.radius_norm_step 43 | 44 | centroids = {} 45 | 46 | for i in range(len(data)): 47 | centroids[i] = data[i] 48 | 49 | weights = [i for i in range(self.radius_norm_step)][::-1] 50 | while True: 51 | new_centroids = [] 52 | for i in centroids: 53 | in_bandwidth = [] 54 | centroid = centroids[i] 55 | for featureset in data: 56 | distance = np.linalg.norm(featureset - centroid) 57 | if distance == 0: 58 | distance = 0.0000001 59 | weight_index = int(distance / self.radius) 60 | if weight_index > self.radius_norm_step - 1: 61 | weight_index = self.radius_norm_step - 1 62 | 63 | to_add = (weights[weight_index] ** 2) * [featureset] 64 | in_bandwidth += to_add 65 | 66 | new_centroid = np.average(in_bandwidth, axis=0) 67 | new_centroids.append(tuple(new_centroid)) 68 | 69 | uniques = sorted(list(set(new_centroids))) 70 | to_pop = [] 71 | for i in uniques: # Since we have created many steps, if 2 centroids are very close to each other, 72 | # we remove one 73 | for ii in uniques: 74 | if i == ii: 75 | pass 76 | elif np.linalg.norm(np.array(i) - np.array(ii)) <= self.radius: 77 | to_pop.append(ii) 78 | break 79 | for i in to_pop: 80 | try: 81 | uniques.remove(i) 82 | except: 83 | pass 84 | 85 | prev_centroids = dict(centroids) # copying the centroids dict 86 | 87 | centroids = {} 88 | for i in range(len(uniques)): 89 | centroids[i] = np.array(uniques[i]) 90 | 91 | optimised = True 92 | for i in centroids: 93 | if not np.array_equal(centroids[i], prev_centroids[i]): 94 | optimised = False 95 | if not optimised: 96 | break 97 | if optimised: 98 | break 99 | 100 | self.centroids = centroids 101 | for i in range(len(self.centroids)): 102 | self.classifciations[i] = [] 103 | 104 | for featureset in self.data: 105 | distances = [np.linalg.norm(featureset - self.centroids[centroid]) for centroid in self.centroids] 106 | classification = distances.index(min(distances)) 107 | self.classifciations[classification].append(featureset) 108 | 109 | def predict(self): 110 | distances = [np.linalg.norm(featureset - self.centroids[centroid]) for centroid in self.centroids] 111 | classification = distances.index(min(distances)) 112 | return classification 113 | 114 | 115 | clf = MeanShift() 116 | clf.fit(X) 117 | 118 | centroids = clf.centroids 119 | 120 | for classification in clf.classifciations: 121 | color = colors[classification] 122 | for featureset in clf.classifciations[classification]: 123 | plt.scatter(featureset[0], featureset[1], marker='x', c=color, s=150, linewidths=5) 124 | 125 | for c in centroids: 126 | plt.scatter(centroids[c][0], centroids[c][1], c='k', marker='*', s=150, ) 127 | 128 | plt.show() 129 | -------------------------------------------------------------------------------- /classical_ml/clustering/Mean Shift/titanic.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/classical_ml/clustering/Mean Shift/titanic.xls -------------------------------------------------------------------------------- /classical_ml/k nearerst neighbours/1_k_nearest_neighbours_intro.py: -------------------------------------------------------------------------------- 1 | """ 2 | classification algo. Divides the data into groups 3 | Given pluses and minuses in a graph, and an unknown point would it belong to pluses or minuses 4 | 5 | Linear regression aim was to create a model that best fits the data 6 | 7 | We would classify depending upon how close it lies to one group. This is essentialy the nearest neighbours. 8 | 9 | With k nearest neighbours , eg k =2. We check only the 2 nearest neigbours and then decide which group it lies based 10 | on that. If both the neighbours in same group well enough. Else if split vote, So we take odd number of ks. For 11 | three groups min value of k is 5. 12 | 13 | If 2 groups and k =3 and we get 2 votes for 1 group, then confidence =2/3 14 | 15 | Downfalls: we need to calculate distance from all. So on huge datasets it would be a problem. 16 | """ 17 | 18 | import numpy as np 19 | import pandas as pd 20 | from sklearn import preprocessing, neighbors 21 | from sklearn.model_selection import train_test_split 22 | 23 | df = pd.read_csv("breast-cancer-wisconsin.data.txt") 24 | 25 | df.replace('?', -9999, inplace=True) # replacing missing values. Most algo recognise -9999 as outlier 26 | df.drop(['id'], 1, inplace=True) 27 | # print(df.head()) 28 | 29 | X = np.array(df.drop(['class'], 1)) # here 1 means we want to drop the columns 30 | y = np.array(df['class']) 31 | 32 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) 33 | 34 | clf = neighbors.KNeighborsClassifier(n_jobs=-1) # default k value is 5. By setting n_jobs = -1, we are threading 35 | # the classifier. This allows us to run the classifier on multiple testst simultaneously 36 | clf.fit(X_train, y_train) 37 | 38 | accuracy = clf.score(X_test, y_test) 39 | print(accuracy) 40 | 41 | example_measures = np.array([4, 2, 1, 1, 1, 2, 3, 2, 1]) # this returns 9 rows 42 | example_measures = example_measures.reshape(1, -1) # -1 means unspecified number of cols 43 | prediction = clf.predict(example_measures) 44 | print(prediction) 45 | 46 | """ 47 | Instead of using k nearest neighbors, we could get the best fit line for each of the groups using linear regression 48 | and then find the distance of our point from the lines thus obtained. The group corresponding to the line having the 49 | least distance will be the answer. 50 | 51 | However if the data is non linear, then best fit line won't work. But k nearest neighbors will work 52 | """ -------------------------------------------------------------------------------- /classical_ml/k nearerst neighbours/2_k_nearest_neighbors_from_scratch.py: -------------------------------------------------------------------------------- 1 | """ 2 | Eucledian Distance: (sum i=1 to n where n is number of dimensions ( Qi -Pi )^2 )^(1/2) 3 | Compare for 2-d points, put n=2 4 | """ 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from matplotlib import style 8 | from collections import Counter 9 | import warnings 10 | 11 | style.use('fivethirtyeight') 12 | 13 | dataset = {'k': [[1, 2], [2, 3], [3, 1]], 'r': [[6, 5], [7, 7], [8, 6]]} # 2 classes k and r 14 | new_features = [5, 7] 15 | 16 | [[plt.scatter(j[0], j[1], s=100, color=i) for j in dataset[i]] for i in dataset] 17 | 18 | plt.show() 19 | 20 | 21 | def k_nearest_neighbors(data, predict, k=3): 22 | if len(data) >= k: 23 | warnings.warn('K is set to value less than total groups') 24 | distances = [] 25 | for group in data: 26 | for features in data[group]: 27 | # eucledian_distance = np.sqrt(np.sum(((np.array(features)-np.array(predict))**2)) 28 | eucledian_distance = np.linalg.norm( 29 | np.array(features) - np.array(predict)) # calculates the eucledian distance 30 | distances.append([eucledian_distance, group]) 31 | 32 | votes = [i[1] for i in sorted(distances)[:k]] 33 | vote_result = Counter(votes).most_common(1)[0][0] 34 | return vote_result 35 | 36 | 37 | result = k_nearest_neighbors(dataset, new_features, k=3) 38 | print(result) 39 | -------------------------------------------------------------------------------- /classical_ml/k nearerst neighbours/3_applying_our_algo_on_practical_eg.py: -------------------------------------------------------------------------------- 1 | # applying the k nearest algo we built on the breast cancer classification problem 2 | import numpy as np 3 | import pandas as pd 4 | import random 5 | from collections import Counter 6 | import warnings 7 | 8 | 9 | def k_nearest_neighbors(data, predict, k=3): 10 | if len(data) >= k: 11 | warnings.warn('K is set to value less than total groups') 12 | distances = [] 13 | for group in data: 14 | for features in data[group]: 15 | eucledian_distance = np.linalg.norm( 16 | np.array(features) - np.array(predict)) 17 | distances.append([eucledian_distance, group]) 18 | 19 | votes = [i[1] for i in sorted(distances)[:k]] 20 | vote_result = Counter(votes).most_common(1)[0][0] 21 | confidence = Counter(votes).most_common(1)[0][1] / k 22 | return vote_result, confidence 23 | 24 | df = pd.read_csv("breast-cancer-wisconsin.data.txt") 25 | df.replace('?', -9999, inplace=True) 26 | df.drop(['id'], axis=1, inplace=True) 27 | 28 | full_data = df.astype(float).values.tolist() # converting the data to float and then getting a list 29 | 30 | random.shuffle(full_data) # shuffles the list 31 | 32 | test_size = 0.2 33 | train_set = {2: [], 4: []} 34 | test_set = {2: [], 4: []} 35 | train_data = full_data[:-int(test_size * len(full_data))] 36 | test_data = full_data[-int(test_size * len(full_data)):] 37 | 38 | for i in train_data: 39 | train_set[i[-1]].append(i[:-1]) 40 | 41 | for i in test_data: 42 | test_set[i[-1]].append(i[:-1]) 43 | 44 | correct = 0 45 | total = 0 46 | 47 | for group in test_set: 48 | for data in test_set[group]: 49 | vote, confidence = k_nearest_neighbors(train_set, data, k=5) 50 | if group == vote: 51 | correct += 1 52 | total += 1 53 | 54 | print("Accuracy", correct / total) 55 | accuracies.append(correct / total) 56 | -------------------------------------------------------------------------------- /classical_ml/k nearerst neighbours/README.md: -------------------------------------------------------------------------------- 1 | # K nearest neighbors 2 | 3 | Here I am first applying the algo on dataset available [here](https://archive.ics.uci.edu/ml/datasets.html) to classify whether 4 | the cells in the patient are benign or malignant wrt breast cancer 5 | 6 | Later I developed the k nearest neighbors algo from scratch and then used this to test for the same dataset. 7 | 8 | The accuracy using the inbuilt libraries and my own algo was approximately the same 9 | -------------------------------------------------------------------------------- /classical_ml/linear regression/1_linear_regression_intro.py: -------------------------------------------------------------------------------- 1 | # applying linear regression to a training data and checking its accuracy against a test data using inbuilt libraries 2 | 3 | import pandas as pd 4 | import quandl 5 | import math 6 | import numpy as np 7 | from sklearn import preprocessing, svm 8 | from sklearn.model_selection import train_test_split 9 | from sklearn.linear_model import LinearRegression 10 | 11 | # using preprocessing for scaling. We want the features between -1 to 1. Helps to increase accuracy and processing speed 12 | # train-test-split is used to get the training and testing samples. 13 | # svm is used to do regression 14 | 15 | quandl.ApiConfig.api_key = 'vhkrsKz3TmUN6Qa4QjZK' 16 | df = quandl.get('WIKI/GOOGL') 17 | 18 | # print(df.head()) # the open, high low are features of the stock. But we need meaningul features 19 | df = df[['Adj. Open', 'Adj. High', 'Adj. Low', 'Adj. Close', 'Adj. Volume']] 20 | df['HL_percent'] = (df['Adj. High'] - df['Adj. Low']) / df['Adj. Low'] * 100.0 21 | df['percent_change'] = (df['Adj. Close'] - df['Adj. Open']) / df['Adj. Open'] * 100.0 22 | df = df[['Adj. Close', 'HL_percent', 'percent_change', 'Adj. Volume']] 23 | # print(df.head()) 24 | 25 | # features are used to predict the label. 26 | 27 | forecast_col = 'Adj. Close' 28 | df.fillna('-9999', inplace=True) # fills the nan data with -9999 29 | 30 | forecast_out = int(math.ceil(0.01 * len(df))) # we will be predicting the final 10% of the data 31 | 32 | # let forecast out be = 10 days 33 | df['label'] = df[forecast_col].shift(-forecast_out) # our label here is the stock price 10 days into the future. So 34 | # based on historical data we want to predict the stock close 10 days into the future. For that, we create a new 35 | # column for the label and shift the Adj. close 10 days up 36 | 37 | df.dropna(inplace=True) # removing the rows whose label value we don't know, we will predict this by regression 38 | # print(df.tail()) 39 | 40 | X = np.array(df.drop(['label'], 1)) # X is an array of features. Everything other than 'Label' is a feature 41 | X = preprocessing.scale(X) 42 | 43 | Y = np.array(df['label']) 44 | 45 | # print(len(X), len(Y)) 46 | 47 | X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2) # shuffles X and Y and outputs X_train, 48 | # Y_train 49 | 50 | clf = LinearRegression() 51 | clf.fit(X_train, Y_train) 52 | accuracy = clf.score(X_test, Y_test) # testing the accuracy of the classifier 53 | # train and test always on different data 54 | print(accuracy) # the accuracy will be the squared error done in numerical analysis 55 | 56 | 57 | -------------------------------------------------------------------------------- /classical_ml/linear regression/2predicting_using_regression.py: -------------------------------------------------------------------------------- 1 | # actually predicting data using linear regression 2 | 3 | import pandas as pd 4 | import quandl 5 | import math 6 | import numpy as np 7 | from sklearn import preprocessing, svm 8 | from sklearn.model_selection import train_test_split 9 | from sklearn.linear_model import LinearRegression 10 | import matplotlib.pyplot as plt 11 | from matplotlib import style 12 | import datetime 13 | 14 | style.use('ggplot') 15 | 16 | quandl.ApiConfig.api_key = 'vhkrsKz3TmUN6Qa4QjZK' 17 | df = quandl.get('WIKI/GOOGL') 18 | 19 | df = df[['Adj. Open', 'Adj. High', 'Adj. Low', 'Adj. Close', 'Adj. Volume']] 20 | df['HL_percent'] = (df['Adj. High'] - df['Adj. Low']) / df['Adj. Low'] * 100.0 21 | df['percent_change'] = (df['Adj. Close'] - df['Adj. Open']) / df['Adj. Open'] * 100.0 22 | df = df[['Adj. Close', 'HL_percent', 'percent_change', 'Adj. Volume']] 23 | 24 | forecast_col = 'Adj. Close' 25 | df.fillna('-9999', inplace=True) 26 | 27 | forecast_out = int(math.ceil(0.01 * len(df))) 28 | 29 | df['label'] = df[forecast_col].shift(-forecast_out) 30 | X = np.array(df.drop(['label'], 1)) # X is an array of features. Everything other than 'Label' is a feature 31 | X = preprocessing.scale(X) 32 | X_lately = X[-forecast_out:] 33 | X = X[:-forecast_out] 34 | 35 | df.dropna(inplace=True) 36 | Y = np.array(df['label']) 37 | 38 | X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2) 39 | clf = LinearRegression(n_jobs=-1) 40 | clf.fit(X_train, Y_train) 41 | accuracy = clf.score(X_test, Y_test) 42 | 43 | forecast_set = clf.predict(X_lately) # predicts the value for an array or a single value using our classifier 44 | print(forecast_set, accuracy, forecast_out) 45 | 46 | # plotting our predictions 47 | 48 | df['Forecast'] = np.nan 49 | last_date = df.iloc[-1].name 50 | last_unix = last_date.timestamp() 51 | one_day = 86400 52 | next_unix = last_unix + one_day 53 | 54 | for i in forecast_set: 55 | next_date = datetime.datetime.fromtimestamp(next_unix) 56 | next_unix += one_day 57 | df.loc[next_date] = [np.nan for _ in range(len(df.columns) - 1)] + [i] # .loc referes the index by name, iloc by 58 | # number 59 | 60 | df['Adj. Close'].plot() 61 | df['Forecast'].plot() 62 | plt.legend(loc=4) 63 | plt.xlabel('Date') 64 | plt.ylabel('Price') 65 | plt.show() 66 | -------------------------------------------------------------------------------- /classical_ml/linear regression/3_pickling_classifier.py: -------------------------------------------------------------------------------- 1 | # pickling the data 2 | 3 | import datetime 4 | import math, pickle 5 | 6 | import matplotlib.pyplot as plt 7 | import numpy as np 8 | import quandl 9 | from matplotlib import style 10 | from sklearn import preprocessing 11 | from sklearn.linear_model import LinearRegression 12 | from sklearn.model_selection import train_test_split 13 | 14 | style.use('ggplot') 15 | 16 | quandl.ApiConfig.api_key = 'vhkrsKz3TmUN6Qa4QjZK' 17 | df = quandl.get('WIKI/GOOGL') 18 | 19 | df = df[['Adj. Open', 'Adj. High', 'Adj. Low', 'Adj. Close', 'Adj. Volume']] 20 | df['HL_percent'] = (df['Adj. High'] - df['Adj. Low']) / df['Adj. Low'] * 100.0 21 | df['percent_change'] = (df['Adj. Close'] - df['Adj. Open']) / df['Adj. Open'] * 100.0 22 | df = df[['Adj. Close', 'HL_percent', 'percent_change', 'Adj. Volume']] 23 | 24 | forecast_col = 'Adj. Close' 25 | df.fillna('-9999', inplace=True) 26 | 27 | forecast_out = int(math.ceil(0.01 * len(df))) 28 | 29 | df['label'] = df[forecast_col].shift(-forecast_out) 30 | X = np.array(df.drop(['label'], 1)) 31 | X = preprocessing.scale(X) 32 | X_lately = X[-forecast_out:] 33 | X = X[:-forecast_out] 34 | 35 | df.dropna(inplace=True) 36 | Y = np.array(df['label']) 37 | 38 | X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2) 39 | clf = LinearRegression(n_jobs=-1) 40 | clf.fit(X_train, Y_train) 41 | 42 | with open('LinearRegression.pickle', 'wb') as f: 43 | pickle.dump(clf, f) 44 | 45 | pickle_in = open('LinearRegression.pickle', 'rb') 46 | clf = pickle.load(pickle_in) 47 | accuracy = clf.score(X_test, Y_test) 48 | 49 | forecast_set = clf.predict(X_lately) 50 | print(forecast_set, accuracy, forecast_out) 51 | 52 | df['Forecast'] = np.nan 53 | last_date = df.iloc[-1].name 54 | last_unix = last_date.timestamp() 55 | one_day = 86400 56 | next_unix = last_unix + one_day 57 | 58 | for i in forecast_set: 59 | next_date = datetime.datetime.fromtimestamp(next_unix) 60 | next_unix += one_day 61 | df.loc[next_date] = [np.nan for _ in range(len(df.columns) - 1)] + [i] 62 | 63 | df['Adj. Close'].plot() 64 | df['Forecast'].plot() 65 | plt.legend(loc=4) 66 | plt.xlabel('Date') 67 | plt.ylabel('Price') 68 | plt.show() 69 | -------------------------------------------------------------------------------- /classical_ml/linear regression/4linear_regression_from_scratch.py: -------------------------------------------------------------------------------- 1 | """ 2 | In linear regression, we approximate y using a straight line. y=mx+b. Then we minimise the squared error 3 | and differentiate with respect to m and ab to get the values of m and b 4 | 5 | m = x'y' - (xy)' / (x')^2 - (x^2)' Here x' is the mean of x and y' is mean of y 6 | for b, put this m in y'= mx' + b 7 | """ 8 | 9 | from statistics import mean 10 | import numpy as np 11 | import matplotlib.pyplot as plt 12 | from matplotlib import style 13 | 14 | style.use('fivethirtyeight') 15 | 16 | xs = np.array([1, 2, 3, 4, 5, 6], dtype=np.float64) # it is acutally the default data type 17 | ys = np.array([5, 4, 6, 5, 6, 7], dtype=np.float64) 18 | 19 | 20 | def best_fit_slope_and_intercept(xs, ys): 21 | m = ((mean(xs) * mean(ys)) - (mean(xs * ys))) / (mean(xs) ** 2 - mean(xs ** 2)) 22 | b = mean(ys) - m * mean(xs) 23 | return m, b 24 | 25 | 26 | def squared_error(ys_orig, ys_line): 27 | return sum((ys_line - ys_orig) ** 2) 28 | 29 | 30 | def coeff_of_determination(ys_orig, ys_line): 31 | ys_mean_line = [mean(ys_orig) for y in ys_orig] 32 | squared_error_regression = squared_error(ys_orig, ys_line) 33 | squared_error_mean = squared_error(ys_orig, ys_mean_line) 34 | return 1 - (squared_error_regression / squared_error_mean) 35 | 36 | 37 | m, b = best_fit_slope_and_intercept(xs, ys) 38 | 39 | regression_line = [(m * x) + b for x in xs] 40 | 41 | coeff_of_deter = coeff_of_determination(ys, regression_line) 42 | print(coeff_of_deter) 43 | 44 | plt.scatter(xs, ys) 45 | plt.plot(xs, regression_line) 46 | plt.show() 47 | 48 | # getting the accuracy of our linear fit line. We use doing squared error. We use square not mod becuase we want to 49 | # penalise the far of points from the line heavily 50 | 51 | ''' 52 | R squared theory -> coefficient of determination 53 | r^2 = 1 - (squared error of ys) / (squared error of mean of ys) 54 | More the value of R squared better the fit 55 | ''' 56 | -------------------------------------------------------------------------------- /classical_ml/linear regression/5testing_assumptions.py: -------------------------------------------------------------------------------- 1 | from statistics import mean 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | from matplotlib import style 5 | import random 6 | 7 | style.use('fivethirtyeight') 8 | 9 | 10 | # xs = np.array([1, 2, 3, 4, 5, 6], dtype=np.float64) # it is acutally the default data type 11 | # ys = np.array([5, 4, 6, 5, 6, 7], dtype=np.float64) 12 | 13 | 14 | def create_dataset(hm, variance, step=2, correlation=False): # how many data points, variance of ys, 15 | val = 1 16 | ys = [] 17 | for i in range(hm): 18 | y = val + random.randrange(-variance, variance) 19 | ys.append(y) 20 | if correlation and correlation == 'pos': 21 | val += step 22 | elif correlation and correlation == 'neg': 23 | val -= step 24 | xs = [i for i in range(len(ys))] 25 | return np.array(xs, dtype=np.float64), np.array(ys, dtype=np.float64) 26 | 27 | 28 | def best_fit_slope_and_intercept(xs, ys): 29 | m = ((mean(xs) * mean(ys)) - (mean(xs * ys))) / (mean(xs) ** 2 - mean(xs ** 2)) 30 | b = mean(ys) - m * mean(xs) 31 | return m, b 32 | 33 | 34 | def squared_error(ys_orig, ys_line): 35 | return sum((ys_line - ys_orig) ** 2) 36 | 37 | 38 | def coeff_of_determination(ys_orig, ys_line): 39 | ys_mean_line = [mean(ys_orig) for y in ys_orig] 40 | squared_error_regression = squared_error(ys_orig, ys_line) 41 | squared_error_mean = squared_error(ys_orig, ys_mean_line) 42 | return 1 - (squared_error_regression / squared_error_mean) 43 | 44 | 45 | xs, ys = create_dataset(40, 40, 2, correlation='pos') 46 | # if variance is decrease then the coeffecient of determination increases 47 | 48 | m, b = best_fit_slope_and_intercept(xs, ys) 49 | 50 | regression_line = [(m * x) + b for x in xs] 51 | 52 | coeff_of_deter = coeff_of_determination(ys, regression_line) 53 | print(coeff_of_deter) 54 | 55 | plt.scatter(xs, ys) 56 | plt.plot(xs, regression_line) 57 | plt.show() 58 | -------------------------------------------------------------------------------- /classical_ml/linear regression/LinearRegression.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ragvri/machine-learning/3c4c720944a17f3237cb9145ba653240d22517ae/classical_ml/linear regression/LinearRegression.pickle -------------------------------------------------------------------------------- /classical_ml/linear regression/README.md: -------------------------------------------------------------------------------- 1 | Applying linear regression on the data set of google stock prices obtained from quandl. 2 | 3 | The classifier is not that good because the features that I selected were not good. 4 | 5 | However, I was not able to find a better data set 6 | 7 | 8 | -------------------------------------------------------------------------------- /classical_ml/svm/1_intro.py: -------------------------------------------------------------------------------- 1 | """ 2 | SVM is a binary classifier: Separates only in 2 groups at a time. That does not mean it can only "classify" 3 | in 2 groups. It just means that at a time it can only separate one group from the rest. 4 | 5 | The 2 groups are denoted as positives and negatives. 6 | 7 | We want a street that separates the 2 goups and is as wide as possible. Then we consider a vector(w) which is 8 | perpendicular to the street's median. Now to check if a point lies on one side or other, we take the dot product of the 9 | unknown point (vector u) with (vector w). Now from this length we can check if the point is on left or the right side 10 | of the street 11 | OR 12 | (vector u).(vector w) + b >=0 (1) 13 | then one side else on other 14 | Here we don't know w and b, we just know w is perpendicular 15 | ALSO 16 | (X+).(vector w) +b > =1 where X+ is the positive sample 17 | and 18 | (X-).(vector w) + b<=1 where X- is the negative sample. 19 | 20 | All these are constraints 21 | 22 | We introduce Yi such that Yi = 1 for + 23 | Yi = -1 for _ 24 | Multiplying both by Yi gives 25 | Yi* [(X).(vector w) + b] -1 >=0 (2) 26 | where X is any known sample 27 | Also for Xi in the gutter (2) = 0 These are called Support Vectors 28 | 29 | Q) How to find the widht of the "street"? 30 | A) If we had a unit normal to the "gutter", then the dot product (X+ - X-).(w/|w|) (3) 31 | is width as w is a normal to the street. This can be simplified using (2) 32 | The width comes out as : 2/|w| 33 | 34 | To max the widht of the street, we want to min |w| or 35 | min 1/2(|w|)^2 (4) 36 | 37 | To solve a question having to find extremeties and given some constraints, we use Lagrange. We find that 38 | w = sum(Ci*Xi*Yi) (5) 39 | sum(Ci*Yi) = 0 (6) 40 | 41 | (vector X).(vector W) + bias = 0 gives decision boundary 42 | 43 | It finds the decision boundry which is a boundry which separates the 2 groups. 44 | 45 | """ 46 | # applying the svm lib on the breast cancer eg 47 | 48 | import numpy as np 49 | import pandas as pd 50 | from sklearn import preprocessing, svm 51 | from sklearn.model_selection import train_test_split 52 | 53 | df = pd.read_csv("breast-cancer-wisconsin.data.txt") 54 | 55 | df.replace('?', -9999, inplace=True) # replacing missing values. Most algo recognise -9999 as outlier 56 | df.drop(['id'], 1, inplace=True) 57 | # print(df.head()) 58 | 59 | X = np.array(df.drop(['class'], 1)) # here 1 means we want to drop the columns 60 | y = np.array(df['class']) 61 | 62 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) 63 | 64 | clf = svm.SVC() # SVC: support vector classifier 65 | clf.fit(X_train, y_train) 66 | 67 | accuracy = clf.score(X_test, y_test) 68 | print(accuracy) 69 | 70 | example_measures = np.array([4, 2, 1, 1, 1, 2, 3, 2, 1]) # this returns 9 rows 71 | example_measures = example_measures.reshape(1, -1) # -1 means unspecified number of cols 72 | prediction = clf.predict(example_measures) 73 | print(prediction) 74 | -------------------------------------------------------------------------------- /classical_ml/svm/2_svm_from_scratch.py: -------------------------------------------------------------------------------- 1 | """ 2 | We want 3 | 1) min(|w|) for max width 4 | 2) max(|b|) 5 | given constraint 6 | 3) Yi*[(Xi).(vector w) +b] >=1 7 | 8 | This is optimisation problem. 9 | 10 | This is a quadratic optimisation problem. But it will have a global min as it is convex. So convex optimisation problem 11 | 12 | can use cvxopt, libsvm lib 13 | 14 | We take a value of W initially and keep reducing and so on while satifying (3). 15 | """ 16 | 17 | import matplotlib.pyplot as plt 18 | from matplotlib import style 19 | import numpy as np 20 | 21 | style.use('ggplot') 22 | 23 | 24 | class Support_Vector_Machine: 25 | def __init__(self, visualization=True): 26 | self.visualization = visualization 27 | self.colors = {1: 'r', -1: 'b'} 28 | if self.visualization: 29 | self.fig = plt.figure() 30 | self.ax = self.fig.add_subplot(1, 1, 1) 31 | 32 | """ 33 | we want to get the values of w and b. For the beginning, we first find the max number in the feature 34 | set that to latest_optimum. w becomes [ latest_optimum, latest_optimum]. What we need to do is that 35 | for all yi,xi : yi[w.xi + b] >=1 36 | So first we check for this w and b . If True, we reduce w by a smaller step size and again check. 37 | if not true, we keep on reducing w with the same step size. 38 | """ 39 | 40 | def fit(self, data): 41 | self.data = data 42 | # { ||w||: [w,b] } 43 | opt_dict = {} 44 | 45 | transforms = [[1, 1], 46 | [-1, 1], 47 | [-1, -1], 48 | [1, -1]] 49 | 50 | all_data = [] 51 | for yi in self.data: 52 | for featureset in self.data[yi]: 53 | for feature in featureset: 54 | all_data.append(feature) 55 | 56 | self.max_feature_value = max(all_data) 57 | self.min_feature_value = min(all_data) 58 | all_data = None 59 | 60 | # support vectors yi(xi.w+b) = 1 61 | 62 | 63 | step_sizes = [self.max_feature_value * 0.1, 64 | self.max_feature_value * 0.01, 65 | # point of expense: 66 | self.max_feature_value * 0.001, 67 | ] 68 | 69 | # extremely expensive 70 | b_range_multiple = 2 71 | # we dont need to take as small of steps 72 | # with b as we do w 73 | b_multiple = 5 74 | latest_optimum = self.max_feature_value * 10 75 | 76 | for step in step_sizes: 77 | w = np.array([latest_optimum, latest_optimum]) 78 | # we can do this because convex 79 | optimized = False 80 | while not optimized: 81 | for b in np.arange(-1 * (self.max_feature_value * b_range_multiple), 82 | self.max_feature_value * b_range_multiple, 83 | step * b_multiple): 84 | for transformation in transforms: 85 | w_t = w * transformation 86 | found_option = True 87 | # weakest link in the SVM fundamentally 88 | # SMO attempts to fix this a bit 89 | # yi(xi.w+b) >= 1 90 | # 91 | # #### add a break here later.. 92 | for i in self.data: 93 | for xi in self.data[i]: 94 | yi = i 95 | if not yi * (np.dot(w_t, xi) + b) >= 1: 96 | found_option = False 97 | break 98 | if not found_option: 99 | break 100 | 101 | # print(xi,':',yi*(np.dot(w_t,xi)+b)) 102 | 103 | if found_option: 104 | opt_dict[np.linalg.norm(w_t)] = [w_t, b] 105 | 106 | if w[0] < 0: 107 | optimized = True 108 | print('Optimized a step.') 109 | else: 110 | w = w - step 111 | 112 | norms = sorted([n for n in opt_dict]) 113 | # ||w|| : [w,b] 114 | opt_choice = opt_dict[norms[0]] 115 | self.w = opt_choice[0] 116 | self.b = opt_choice[1] 117 | latest_optimum = opt_choice[0][0] + step * 2 118 | 119 | def predict(self, features): 120 | # sign( x.w+b ) 121 | classification = np.sign(np.dot(np.array(features), self.w) + self.b) 122 | if self.visualization and classification != 0: 123 | self.ax.scatter(features[0], features[1], s=200, marker='*', c=self.colors[classification]) 124 | return classification 125 | 126 | def visualise(self): 127 | [[self.ax.scatter(x[0], x[1], s=100, color=self.colors[i]) for x in data_dict[i]] for i in data_dict] 128 | 129 | # hyperplane = wx + b 130 | # v = wx + b 131 | # for positive support vectr v=1 132 | # negative support vect = -1 133 | # for decision boundary = 0. 134 | """ 135 | Here we want to draw the hyperplane for that we need 2 points to draw a line. The feature we 136 | assumed are 2. We also have the max and min value of feature. 137 | First we assume x to be min and find the corrs. y for the positve support vector line 138 | then we assume x to be max and find the corr. y for the positive support vector 139 | We repeat for negative support vector line and the decision boundary 140 | """ 141 | 142 | def hyperplane(x, w, b, v): 143 | return (-w[0] * x - b + v) / w[1] 144 | 145 | data_range = (self.min_feature_value * 0.9, self.max_feature_value * 1.1) 146 | hyp_x_min = data_range[0] 147 | hyp_x_max = data_range[1] 148 | 149 | # (w.x + b) =1 150 | # positive support vector hyperplane 151 | psv1 = hyperplane(hyp_x_min, self.w, self.b, 1) 152 | psv2 = hyperplane(hyp_x_max, self.w, self.b, 1) 153 | self.ax.plot([hyp_x_min, hyp_x_max], [psv1, psv2], 'black') 154 | 155 | # (w.x + b) =-1 156 | # negative support vector hyperplane 157 | nsv1 = hyperplane(hyp_x_min, self.w, self.b, -1) 158 | nsv2 = hyperplane(hyp_x_max, self.w, self.b, -1) 159 | self.ax.plot([hyp_x_min, hyp_x_max], [nsv1, nsv2], 'black') 160 | 161 | # (w.x+ b) = 0 162 | # decision boundary 163 | db1 = hyperplane(hyp_x_min, self.w, self.b, 0) 164 | db2 = hyperplane(hyp_x_max, self.w, self.b, 0) 165 | self.ax.plot([hyp_x_min, hyp_x_max], [db1, db2], 'y--') 166 | 167 | plt.show() 168 | 169 | 170 | data_dict = {-1: np.array([[1, 7], 171 | [2, 8], 172 | [3, 8]]), 173 | 1: np.array([[5, 1], 174 | [6, -1], 175 | [7, 3]])} 176 | 177 | clf = Support_Vector_Machine() 178 | clf.fit(data=data_dict) 179 | 180 | predict_us = [[0, 10], 181 | [1, 3], 182 | [3, 4], 183 | [3, 5], 184 | [5, 5], 185 | [5, 6], 186 | [6, -5], 187 | [5, 8]] 188 | 189 | for p in predict_us: 190 | clf.predict(p) 191 | clf.visualise() 192 | -------------------------------------------------------------------------------- /classical_ml/svm/3_kernels_intro.py: -------------------------------------------------------------------------------- 1 | """ 2 | If the data is not linearly separable 3 | 4 | We can take another perspective and add a new dimension 5 | eg if earlier features were x1, x2 6 | 7 | How to add the dimension? 8 | x3 = x1*x2 9 | But this increases the data. 10 | 11 | The main downfall of svm was training it on large data because of the optimisation problem. So not a good sol. 12 | 13 | 14 | So comes the use of kernels: 15 | It is a similiarity function, which takes two functions as there input and outputs there similiarity. 16 | We can use kernels which transforms the non linear data to different dimensions and creating a linearly 17 | separable situation. 18 | 19 | They are done using inner product. Same as dot product. 20 | 21 | If we earlier had x1,x2 now z1,z2,z3 so on Don't know the dimensions. 22 | 23 | Constraints earlier: 24 | Yi[xi.w+b] >=1 25 | Also, w = sum(AiXiYi) using lagrange 26 | 27 | We can replace X with Z since everywhere only a dot product is being used which will return a scalar. 28 | 29 | K(X,X') = Z.Z' ->dot product gives scale 30 | Z = f(X) 31 | Z' = f(X') 32 | 33 | We just need the dot product, not the actual values of Z and Z' 34 | 35 | Let us have orignally [X1,X2] => convert to a second order poly. SO 36 | X=[x1,x2] Z=[1,x1,x2,x1^2,x2^2,x1.x2] ->6 dimensions 37 | 38 | So K(X,X') = Z.Z' = 1+x1.x1'+x2.x2'+.... 39 | 40 | The kernel used is polynomial kernel. 41 | Here we first visited Z. 42 | Now K(x,x') = (1+x.x')^p where X has n terms 43 | 44 | Another Kernel is RBF:radio basis function kernel. 45 | K(x,x') = e^(-gamma|x-x'|^2) . This is the default kernel. It works in infitnite dimensions 46 | 47 | But it might be the case that data never is linearly separable. That is there is no pattern for clasification. 48 | But the RBF kernel will separate this. To detect this: 49 | check the number of support vectors. If almost all of the features are on support vectors, it is a major red flag. 50 | This means that there is overfitment. 51 | 52 | So if you are running a classifier and find accuracy to be say 53%, is my theory wrong or should i use diff kernel. 53 | Query to see percentage of SV is >20%. that means overfitment. So try different kernel. 54 | If percentage also low and accuracy also low, then our data will not work out. We can still try different 55 | kernel. 56 | 57 | Yi[xi.w+b]>= 1 - Slack, Otherwise overfitment may take place 58 | We want to minimise the slack as well 59 | Constraint: 60 | 1) min|w| or min (1/2 |w|^2 + c.sum(slacks) 61 | lower c means less important slacks is to our constraint 62 | 63 | The classifier now obtained is called soft margin classifier 64 | """ 65 | 66 | #just to visualise. Not coded by me 67 | 68 | import numpy as np 69 | from numpy import linalg 70 | import cvxopt 71 | import cvxopt.solvers 72 | 73 | 74 | def linear_kernel(x1, x2): 75 | return np.dot(x1, x2) 76 | 77 | 78 | def polynomial_kernel(x, y, p=3): 79 | return (1 + np.dot(x, y)) ** p 80 | 81 | 82 | def gaussian_kernel(x, y, sigma=5.0): 83 | return np.exp(-linalg.norm(x - y) ** 2 / (2 * (sigma ** 2))) 84 | 85 | 86 | class SVM(object): 87 | def __init__(self, kernel=linear_kernel, C=None): 88 | self.kernel = kernel 89 | self.C = C 90 | if self.C is not None: self.C = float(self.C) # C for soft margin 91 | 92 | def fit(self, X, y): 93 | n_samples, n_features = X.shape 94 | 95 | # Gram matrix 96 | K = np.zeros((n_samples, n_samples)) 97 | for i in range(n_samples): 98 | for j in range(n_samples): 99 | K[i, j] = self.kernel(X[i], X[j]) 100 | 101 | P = cvxopt.matrix(np.outer(y, y) * K) 102 | q = cvxopt.matrix(np.ones(n_samples) * -1) 103 | A = cvxopt.matrix(y, (1, n_samples)) 104 | b = cvxopt.matrix(0.0) 105 | 106 | if self.C is None: 107 | G = cvxopt.matrix(np.diag(np.ones(n_samples) * -1)) 108 | h = cvxopt.matrix(np.zeros(n_samples)) 109 | else: 110 | tmp1 = np.diag(np.ones(n_samples) * -1) 111 | tmp2 = np.identity(n_samples) 112 | G = cvxopt.matrix(np.vstack((tmp1, tmp2))) 113 | tmp1 = np.zeros(n_samples) 114 | tmp2 = np.ones(n_samples) * self.C 115 | h = cvxopt.matrix(np.hstack((tmp1, tmp2))) 116 | 117 | # solve QP problem 118 | solution = cvxopt.solvers.qp(P, q, G, h, A, b) 119 | 120 | # Lagrange multipliers 121 | a = np.ravel(solution['x']) 122 | 123 | # Support vectors have non zero lagrange multipliers 124 | sv = a > 1e-5 125 | ind = np.arange(len(a))[sv] 126 | self.a = a[sv] 127 | self.sv = X[sv] 128 | self.sv_y = y[sv] 129 | print("%d support vectors out of %d points" % (len(self.a), n_samples)) 130 | 131 | # Intercept 132 | self.b = 0 133 | for n in range(len(self.a)): 134 | self.b += self.sv_y[n] 135 | self.b -= np.sum(self.a * self.sv_y * K[ind[n], sv]) 136 | self.b /= len(self.a) 137 | 138 | # Weight vector 139 | if self.kernel == linear_kernel: 140 | self.w = np.zeros(n_features) 141 | for n in range(len(self.a)): 142 | self.w += self.a[n] * self.sv_y[n] * self.sv[n] 143 | else: 144 | self.w = None 145 | 146 | def project(self, X): 147 | if self.w is not None: 148 | return np.dot(X, self.w) + self.b 149 | else: 150 | y_predict = np.zeros(len(X)) 151 | for i in range(len(X)): 152 | s = 0 153 | for a, sv_y, sv in zip(self.a, self.sv_y, self.sv): 154 | s += a * sv_y * self.kernel(X[i], sv) 155 | y_predict[i] = s 156 | return y_predict + self.b 157 | 158 | def predict(self, X): 159 | return np.sign(self.project(X)) 160 | 161 | 162 | if __name__ == "__main__": 163 | import pylab as pl 164 | 165 | 166 | def gen_lin_separable_data(): 167 | # generate training data in the 2-d case 168 | mean1 = np.array([0, 2]) 169 | mean2 = np.array([2, 0]) 170 | cov = np.array([[0.8, 0.6], [0.6, 0.8]]) 171 | X1 = np.random.multivariate_normal(mean1, cov, 100) 172 | y1 = np.ones(len(X1)) 173 | X2 = np.random.multivariate_normal(mean2, cov, 100) 174 | y2 = np.ones(len(X2)) * -1 175 | return X1, y1, X2, y2 176 | 177 | 178 | def gen_non_lin_separable_data(): 179 | mean1 = [-1, 2] 180 | mean2 = [1, -1] 181 | mean3 = [4, -4] 182 | mean4 = [-4, 4] 183 | cov = [[1.0, 0.8], [0.8, 1.0]] 184 | X1 = np.random.multivariate_normal(mean1, cov, 50) 185 | X1 = np.vstack((X1, np.random.multivariate_normal(mean3, cov, 50))) 186 | y1 = np.ones(len(X1)) 187 | X2 = np.random.multivariate_normal(mean2, cov, 50) 188 | X2 = np.vstack((X2, np.random.multivariate_normal(mean4, cov, 50))) 189 | y2 = np.ones(len(X2)) * -1 190 | return X1, y1, X2, y2 191 | 192 | 193 | def gen_lin_separable_overlap_data(): 194 | # generate training data in the 2-d case 195 | mean1 = np.array([0, 2]) 196 | mean2 = np.array([2, 0]) 197 | cov = np.array([[1.5, 1.0], [1.0, 1.5]]) 198 | X1 = np.random.multivariate_normal(mean1, cov, 100) 199 | y1 = np.ones(len(X1)) 200 | X2 = np.random.multivariate_normal(mean2, cov, 100) 201 | y2 = np.ones(len(X2)) * -1 202 | return X1, y1, X2, y2 203 | 204 | 205 | def split_train(X1, y1, X2, y2): 206 | X1_train = X1[:90] 207 | y1_train = y1[:90] 208 | X2_train = X2[:90] 209 | y2_train = y2[:90] 210 | X_train = np.vstack((X1_train, X2_train)) 211 | y_train = np.hstack((y1_train, y2_train)) 212 | return X_train, y_train 213 | 214 | 215 | def split_test(X1, y1, X2, y2): 216 | X1_test = X1[90:] 217 | y1_test = y1[90:] 218 | X2_test = X2[90:] 219 | y2_test = y2[90:] 220 | X_test = np.vstack((X1_test, X2_test)) 221 | y_test = np.hstack((y1_test, y2_test)) 222 | return X_test, y_test 223 | 224 | 225 | def plot_margin(X1_train, X2_train, clf): 226 | def f(x, w, b, c=0): 227 | # given x, return y such that [x,y] in on the line 228 | # w.x + b = c 229 | return (-w[0] * x - b + c) / w[1] 230 | 231 | pl.plot(X1_train[:, 0], X1_train[:, 1], "ro") 232 | pl.plot(X2_train[:, 0], X2_train[:, 1], "bo") 233 | pl.scatter(clf.sv[:, 0], clf.sv[:, 1], s=100, c="g") 234 | 235 | # w.x + b = 0 236 | a0 = -4; 237 | a1 = f(a0, clf.w, clf.b) 238 | b0 = 4; 239 | b1 = f(b0, clf.w, clf.b) 240 | pl.plot([a0, b0], [a1, b1], "k") 241 | 242 | # w.x + b = 1 243 | a0 = -4; 244 | a1 = f(a0, clf.w, clf.b, 1) 245 | b0 = 4; 246 | b1 = f(b0, clf.w, clf.b, 1) 247 | pl.plot([a0, b0], [a1, b1], "k--") 248 | 249 | # w.x + b = -1 250 | a0 = -4; 251 | a1 = f(a0, clf.w, clf.b, -1) 252 | b0 = 4; 253 | b1 = f(b0, clf.w, clf.b, -1) 254 | pl.plot([a0, b0], [a1, b1], "k--") 255 | 256 | pl.axis("tight") 257 | pl.show() 258 | 259 | 260 | def plot_contour(X1_train, X2_train, clf): 261 | pl.plot(X1_train[:, 0], X1_train[:, 1], "ro") 262 | pl.plot(X2_train[:, 0], X2_train[:, 1], "bo") 263 | pl.scatter(clf.sv[:, 0], clf.sv[:, 1], s=100, c="g") 264 | 265 | X1, X2 = np.meshgrid(np.linspace(-6, 6, 50), np.linspace(-6, 6, 50)) 266 | X = np.array([[x1, x2] for x1, x2 in zip(np.ravel(X1), np.ravel(X2))]) 267 | Z = clf.project(X).reshape(X1.shape) 268 | pl.contour(X1, X2, Z, [0.0], colors='k', linewidths=1, origin='lower') 269 | pl.contour(X1, X2, Z + 1, [0.0], colors='grey', linewidths=1, origin='lower') 270 | pl.contour(X1, X2, Z - 1, [0.0], colors='grey', linewidths=1, origin='lower') 271 | 272 | pl.axis("tight") 273 | pl.show() 274 | 275 | 276 | def test_linear(): 277 | X1, y1, X2, y2 = gen_lin_separable_data() 278 | X_train, y_train = split_train(X1, y1, X2, y2) 279 | X_test, y_test = split_test(X1, y1, X2, y2) 280 | 281 | clf = SVM() 282 | clf.fit(X_train, y_train) 283 | 284 | y_predict = clf.predict(X_test) 285 | correct = np.sum(y_predict == y_test) 286 | print("%d out of %d predictions correct" % (correct, len(y_predict))) 287 | 288 | plot_margin(X_train[y_train == 1], X_train[y_train == -1], clf) 289 | 290 | 291 | def test_non_linear(): 292 | X1, y1, X2, y2 = gen_non_lin_separable_data() 293 | X_train, y_train = split_train(X1, y1, X2, y2) 294 | X_test, y_test = split_test(X1, y1, X2, y2) 295 | 296 | clf = SVM(polynomial_kernel) 297 | clf.fit(X_train, y_train) 298 | 299 | y_predict = clf.predict(X_test) 300 | correct = np.sum(y_predict == y_test) 301 | print("%d out of %d predictions correct" % (correct, len(y_predict))) 302 | 303 | plot_contour(X_train[y_train == 1], X_train[y_train == -1], clf) 304 | 305 | 306 | def test_soft(): 307 | X1, y1, X2, y2 = gen_lin_separable_overlap_data() 308 | X_train, y_train = split_train(X1, y1, X2, y2) 309 | X_test, y_test = split_test(X1, y1, X2, y2) 310 | 311 | clf = SVM(C=1000.1) 312 | clf.fit(X_train, y_train) 313 | 314 | y_predict = clf.predict(X_test) 315 | correct = np.sum(y_predict == y_test) 316 | print("%d out of %d predictions correct" % (correct, len(y_predict))) 317 | 318 | plot_contour(X_train[y_train == 1], X_train[y_train == -1], clf) 319 | 320 | 321 | # test_linear() # hard margin 322 | # test_non_linear() 323 | test_soft() 324 | -------------------------------------------------------------------------------- /classical_ml/svm/4_kerenels_using_sklearn.py: -------------------------------------------------------------------------------- 1 | """ 2 | Classifying with svm when more than 2 groups: 3 | 1) OVR : one vs rest: 4 | separate one group from rest of the data 5 | 2) OVO : One vs One: 6 | assume 3 groups : 1,2,3 7 | first make hyperplane for 1 vs 2 and 1 vs 3 8 | then 2 vs 3 9 | """ 10 | 11 | # check the documentation for svm.SVM 12 | import numpy as np 13 | import pandas as pd 14 | from sklearn import preprocessing, svm 15 | from sklearn.model_selection import train_test_split 16 | 17 | df = pd.read_csv("breast-cancer-wisconsin.data.txt") 18 | 19 | df.replace('?', -9999, inplace=True) 20 | df.drop(['id'], 1, inplace=True) 21 | 22 | X = np.array(df.drop(['class'], 1)) # here 1 means we want to drop the columns 23 | y = np.array(df['class']) 24 | 25 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) 26 | 27 | clf = svm.SVC() # SVC: support vector classifier 28 | clf.fit(X_train, y_train) 29 | 30 | accuracy = clf.score(X_test, y_test) 31 | print(accuracy) 32 | 33 | example_measures = np.array([4, 2, 1, 1, 1, 2, 3, 2, 1]) 34 | example_measures = example_measures.reshape(1, -1) 35 | prediction = clf.predict(example_measures) 36 | print(prediction) 37 | -------------------------------------------------------------------------------- /classical_ml/svm/breast-cancer-wisconsin.data.txt: -------------------------------------------------------------------------------- 1 | id,clump_thickness,unif_cell_size,uni_cell_shape,marg_adhesion,single_epi_cell_size,bare_nuclei,bland_chrom,norm_nucleoli,mitosis,class 2 | 1000025,5,1,1,1,2,1,3,1,1,2 3 | 1002945,5,4,4,5,7,10,3,2,1,2 4 | 1015425,3,1,1,1,2,2,3,1,1,2 5 | 1016277,6,8,8,1,3,4,3,7,1,2 6 | 1017023,4,1,1,3,2,1,3,1,1,2 7 | 1017122,8,10,10,8,7,10,9,7,1,4 8 | 1018099,1,1,1,1,2,10,3,1,1,2 9 | 1018561,2,1,2,1,2,1,3,1,1,2 10 | 1033078,2,1,1,1,2,1,1,1,5,2 11 | 1033078,4,2,1,1,2,1,2,1,1,2 12 | 1035283,1,1,1,1,1,1,3,1,1,2 13 | 1036172,2,1,1,1,2,1,2,1,1,2 14 | 1041801,5,3,3,3,2,3,4,4,1,4 15 | 1043999,1,1,1,1,2,3,3,1,1,2 16 | 1044572,8,7,5,10,7,9,5,5,4,4 17 | 1047630,7,4,6,4,6,1,4,3,1,4 18 | 1048672,4,1,1,1,2,1,2,1,1,2 19 | 1049815,4,1,1,1,2,1,3,1,1,2 20 | 1050670,10,7,7,6,4,10,4,1,2,4 21 | 1050718,6,1,1,1,2,1,3,1,1,2 22 | 1054590,7,3,2,10,5,10,5,4,4,4 23 | 1054593,10,5,5,3,6,7,7,10,1,4 24 | 1056784,3,1,1,1,2,1,2,1,1,2 25 | 1057013,8,4,5,1,2,?,7,3,1,4 26 | 1059552,1,1,1,1,2,1,3,1,1,2 27 | 1065726,5,2,3,4,2,7,3,6,1,4 28 | 1066373,3,2,1,1,1,1,2,1,1,2 29 | 1066979,5,1,1,1,2,1,2,1,1,2 30 | 1067444,2,1,1,1,2,1,2,1,1,2 31 | 1070935,1,1,3,1,2,1,1,1,1,2 32 | 1070935,3,1,1,1,1,1,2,1,1,2 33 | 1071760,2,1,1,1,2,1,3,1,1,2 34 | 1072179,10,7,7,3,8,5,7,4,3,4 35 | 1074610,2,1,1,2,2,1,3,1,1,2 36 | 1075123,3,1,2,1,2,1,2,1,1,2 37 | 1079304,2,1,1,1,2,1,2,1,1,2 38 | 1080185,10,10,10,8,6,1,8,9,1,4 39 | 1081791,6,2,1,1,1,1,7,1,1,2 40 | 1084584,5,4,4,9,2,10,5,6,1,4 41 | 1091262,2,5,3,3,6,7,7,5,1,4 42 | 1096800,6,6,6,9,6,?,7,8,1,2 43 | 1099510,10,4,3,1,3,3,6,5,2,4 44 | 1100524,6,10,10,2,8,10,7,3,3,4 45 | 1102573,5,6,5,6,10,1,3,1,1,4 46 | 1103608,10,10,10,4,8,1,8,10,1,4 47 | 1103722,1,1,1,1,2,1,2,1,2,2 48 | 1105257,3,7,7,4,4,9,4,8,1,4 49 | 1105524,1,1,1,1,2,1,2,1,1,2 50 | 1106095,4,1,1,3,2,1,3,1,1,2 51 | 1106829,7,8,7,2,4,8,3,8,2,4 52 | 1108370,9,5,8,1,2,3,2,1,5,4 53 | 1108449,5,3,3,4,2,4,3,4,1,4 54 | 1110102,10,3,6,2,3,5,4,10,2,4 55 | 1110503,5,5,5,8,10,8,7,3,7,4 56 | 1110524,10,5,5,6,8,8,7,1,1,4 57 | 1111249,10,6,6,3,4,5,3,6,1,4 58 | 1112209,8,10,10,1,3,6,3,9,1,4 59 | 1113038,8,2,4,1,5,1,5,4,4,4 60 | 1113483,5,2,3,1,6,10,5,1,1,4 61 | 1113906,9,5,5,2,2,2,5,1,1,4 62 | 1115282,5,3,5,5,3,3,4,10,1,4 63 | 1115293,1,1,1,1,2,2,2,1,1,2 64 | 1116116,9,10,10,1,10,8,3,3,1,4 65 | 1116132,6,3,4,1,5,2,3,9,1,4 66 | 1116192,1,1,1,1,2,1,2,1,1,2 67 | 1116998,10,4,2,1,3,2,4,3,10,4 68 | 1117152,4,1,1,1,2,1,3,1,1,2 69 | 1118039,5,3,4,1,8,10,4,9,1,4 70 | 1120559,8,3,8,3,4,9,8,9,8,4 71 | 1121732,1,1,1,1,2,1,3,2,1,2 72 | 1121919,5,1,3,1,2,1,2,1,1,2 73 | 1123061,6,10,2,8,10,2,7,8,10,4 74 | 1124651,1,3,3,2,2,1,7,2,1,2 75 | 1125035,9,4,5,10,6,10,4,8,1,4 76 | 1126417,10,6,4,1,3,4,3,2,3,4 77 | 1131294,1,1,2,1,2,2,4,2,1,2 78 | 1132347,1,1,4,1,2,1,2,1,1,2 79 | 1133041,5,3,1,2,2,1,2,1,1,2 80 | 1133136,3,1,1,1,2,3,3,1,1,2 81 | 1136142,2,1,1,1,3,1,2,1,1,2 82 | 1137156,2,2,2,1,1,1,7,1,1,2 83 | 1143978,4,1,1,2,2,1,2,1,1,2 84 | 1143978,5,2,1,1,2,1,3,1,1,2 85 | 1147044,3,1,1,1,2,2,7,1,1,2 86 | 1147699,3,5,7,8,8,9,7,10,7,4 87 | 1147748,5,10,6,1,10,4,4,10,10,4 88 | 1148278,3,3,6,4,5,8,4,4,1,4 89 | 1148873,3,6,6,6,5,10,6,8,3,4 90 | 1152331,4,1,1,1,2,1,3,1,1,2 91 | 1155546,2,1,1,2,3,1,2,1,1,2 92 | 1156272,1,1,1,1,2,1,3,1,1,2 93 | 1156948,3,1,1,2,2,1,1,1,1,2 94 | 1157734,4,1,1,1,2,1,3,1,1,2 95 | 1158247,1,1,1,1,2,1,2,1,1,2 96 | 1160476,2,1,1,1,2,1,3,1,1,2 97 | 1164066,1,1,1,1,2,1,3,1,1,2 98 | 1165297,2,1,1,2,2,1,1,1,1,2 99 | 1165790,5,1,1,1,2,1,3,1,1,2 100 | 1165926,9,6,9,2,10,6,2,9,10,4 101 | 1166630,7,5,6,10,5,10,7,9,4,4 102 | 1166654,10,3,5,1,10,5,3,10,2,4 103 | 1167439,2,3,4,4,2,5,2,5,1,4 104 | 1167471,4,1,2,1,2,1,3,1,1,2 105 | 1168359,8,2,3,1,6,3,7,1,1,4 106 | 1168736,10,10,10,10,10,1,8,8,8,4 107 | 1169049,7,3,4,4,3,3,3,2,7,4 108 | 1170419,10,10,10,8,2,10,4,1,1,4 109 | 1170420,1,6,8,10,8,10,5,7,1,4 110 | 1171710,1,1,1,1,2,1,2,3,1,2 111 | 1171710,6,5,4,4,3,9,7,8,3,4 112 | 1171795,1,3,1,2,2,2,5,3,2,2 113 | 1171845,8,6,4,3,5,9,3,1,1,4 114 | 1172152,10,3,3,10,2,10,7,3,3,4 115 | 1173216,10,10,10,3,10,8,8,1,1,4 116 | 1173235,3,3,2,1,2,3,3,1,1,2 117 | 1173347,1,1,1,1,2,5,1,1,1,2 118 | 1173347,8,3,3,1,2,2,3,2,1,2 119 | 1173509,4,5,5,10,4,10,7,5,8,4 120 | 1173514,1,1,1,1,4,3,1,1,1,2 121 | 1173681,3,2,1,1,2,2,3,1,1,2 122 | 1174057,1,1,2,2,2,1,3,1,1,2 123 | 1174057,4,2,1,1,2,2,3,1,1,2 124 | 1174131,10,10,10,2,10,10,5,3,3,4 125 | 1174428,5,3,5,1,8,10,5,3,1,4 126 | 1175937,5,4,6,7,9,7,8,10,1,4 127 | 1176406,1,1,1,1,2,1,2,1,1,2 128 | 1176881,7,5,3,7,4,10,7,5,5,4 129 | 1177027,3,1,1,1,2,1,3,1,1,2 130 | 1177399,8,3,5,4,5,10,1,6,2,4 131 | 1177512,1,1,1,1,10,1,1,1,1,2 132 | 1178580,5,1,3,1,2,1,2,1,1,2 133 | 1179818,2,1,1,1,2,1,3,1,1,2 134 | 1180194,5,10,8,10,8,10,3,6,3,4 135 | 1180523,3,1,1,1,2,1,2,2,1,2 136 | 1180831,3,1,1,1,3,1,2,1,1,2 137 | 1181356,5,1,1,1,2,2,3,3,1,2 138 | 1182404,4,1,1,1,2,1,2,1,1,2 139 | 1182410,3,1,1,1,2,1,1,1,1,2 140 | 1183240,4,1,2,1,2,1,2,1,1,2 141 | 1183246,1,1,1,1,1,?,2,1,1,2 142 | 1183516,3,1,1,1,2,1,1,1,1,2 143 | 1183911,2,1,1,1,2,1,1,1,1,2 144 | 1183983,9,5,5,4,4,5,4,3,3,4 145 | 1184184,1,1,1,1,2,5,1,1,1,2 146 | 1184241,2,1,1,1,2,1,2,1,1,2 147 | 1184840,1,1,3,1,2,?,2,1,1,2 148 | 1185609,3,4,5,2,6,8,4,1,1,4 149 | 1185610,1,1,1,1,3,2,2,1,1,2 150 | 1187457,3,1,1,3,8,1,5,8,1,2 151 | 1187805,8,8,7,4,10,10,7,8,7,4 152 | 1188472,1,1,1,1,1,1,3,1,1,2 153 | 1189266,7,2,4,1,6,10,5,4,3,4 154 | 1189286,10,10,8,6,4,5,8,10,1,4 155 | 1190394,4,1,1,1,2,3,1,1,1,2 156 | 1190485,1,1,1,1,2,1,1,1,1,2 157 | 1192325,5,5,5,6,3,10,3,1,1,4 158 | 1193091,1,2,2,1,2,1,2,1,1,2 159 | 1193210,2,1,1,1,2,1,3,1,1,2 160 | 1193683,1,1,2,1,3,?,1,1,1,2 161 | 1196295,9,9,10,3,6,10,7,10,6,4 162 | 1196915,10,7,7,4,5,10,5,7,2,4 163 | 1197080,4,1,1,1,2,1,3,2,1,2 164 | 1197270,3,1,1,1,2,1,3,1,1,2 165 | 1197440,1,1,1,2,1,3,1,1,7,2 166 | 1197510,5,1,1,1,2,?,3,1,1,2 167 | 1197979,4,1,1,1,2,2,3,2,1,2 168 | 1197993,5,6,7,8,8,10,3,10,3,4 169 | 1198128,10,8,10,10,6,1,3,1,10,4 170 | 1198641,3,1,1,1,2,1,3,1,1,2 171 | 1199219,1,1,1,2,1,1,1,1,1,2 172 | 1199731,3,1,1,1,2,1,1,1,1,2 173 | 1199983,1,1,1,1,2,1,3,1,1,2 174 | 1200772,1,1,1,1,2,1,2,1,1,2 175 | 1200847,6,10,10,10,8,10,10,10,7,4 176 | 1200892,8,6,5,4,3,10,6,1,1,4 177 | 1200952,5,8,7,7,10,10,5,7,1,4 178 | 1201834,2,1,1,1,2,1,3,1,1,2 179 | 1201936,5,10,10,3,8,1,5,10,3,4 180 | 1202125,4,1,1,1,2,1,3,1,1,2 181 | 1202812,5,3,3,3,6,10,3,1,1,4 182 | 1203096,1,1,1,1,1,1,3,1,1,2 183 | 1204242,1,1,1,1,2,1,1,1,1,2 184 | 1204898,6,1,1,1,2,1,3,1,1,2 185 | 1205138,5,8,8,8,5,10,7,8,1,4 186 | 1205579,8,7,6,4,4,10,5,1,1,4 187 | 1206089,2,1,1,1,1,1,3,1,1,2 188 | 1206695,1,5,8,6,5,8,7,10,1,4 189 | 1206841,10,5,6,10,6,10,7,7,10,4 190 | 1207986,5,8,4,10,5,8,9,10,1,4 191 | 1208301,1,2,3,1,2,1,3,1,1,2 192 | 1210963,10,10,10,8,6,8,7,10,1,4 193 | 1211202,7,5,10,10,10,10,4,10,3,4 194 | 1212232,5,1,1,1,2,1,2,1,1,2 195 | 1212251,1,1,1,1,2,1,3,1,1,2 196 | 1212422,3,1,1,1,2,1,3,1,1,2 197 | 1212422,4,1,1,1,2,1,3,1,1,2 198 | 1213375,8,4,4,5,4,7,7,8,2,2 199 | 1213383,5,1,1,4,2,1,3,1,1,2 200 | 1214092,1,1,1,1,2,1,1,1,1,2 201 | 1214556,3,1,1,1,2,1,2,1,1,2 202 | 1214966,9,7,7,5,5,10,7,8,3,4 203 | 1216694,10,8,8,4,10,10,8,1,1,4 204 | 1216947,1,1,1,1,2,1,3,1,1,2 205 | 1217051,5,1,1,1,2,1,3,1,1,2 206 | 1217264,1,1,1,1,2,1,3,1,1,2 207 | 1218105,5,10,10,9,6,10,7,10,5,4 208 | 1218741,10,10,9,3,7,5,3,5,1,4 209 | 1218860,1,1,1,1,1,1,3,1,1,2 210 | 1218860,1,1,1,1,1,1,3,1,1,2 211 | 1219406,5,1,1,1,1,1,3,1,1,2 212 | 1219525,8,10,10,10,5,10,8,10,6,4 213 | 1219859,8,10,8,8,4,8,7,7,1,4 214 | 1220330,1,1,1,1,2,1,3,1,1,2 215 | 1221863,10,10,10,10,7,10,7,10,4,4 216 | 1222047,10,10,10,10,3,10,10,6,1,4 217 | 1222936,8,7,8,7,5,5,5,10,2,4 218 | 1223282,1,1,1,1,2,1,2,1,1,2 219 | 1223426,1,1,1,1,2,1,3,1,1,2 220 | 1223793,6,10,7,7,6,4,8,10,2,4 221 | 1223967,6,1,3,1,2,1,3,1,1,2 222 | 1224329,1,1,1,2,2,1,3,1,1,2 223 | 1225799,10,6,4,3,10,10,9,10,1,4 224 | 1226012,4,1,1,3,1,5,2,1,1,4 225 | 1226612,7,5,6,3,3,8,7,4,1,4 226 | 1227210,10,5,5,6,3,10,7,9,2,4 227 | 1227244,1,1,1,1,2,1,2,1,1,2 228 | 1227481,10,5,7,4,4,10,8,9,1,4 229 | 1228152,8,9,9,5,3,5,7,7,1,4 230 | 1228311,1,1,1,1,1,1,3,1,1,2 231 | 1230175,10,10,10,3,10,10,9,10,1,4 232 | 1230688,7,4,7,4,3,7,7,6,1,4 233 | 1231387,6,8,7,5,6,8,8,9,2,4 234 | 1231706,8,4,6,3,3,1,4,3,1,2 235 | 1232225,10,4,5,5,5,10,4,1,1,4 236 | 1236043,3,3,2,1,3,1,3,6,1,2 237 | 1241232,3,1,4,1,2,?,3,1,1,2 238 | 1241559,10,8,8,2,8,10,4,8,10,4 239 | 1241679,9,8,8,5,6,2,4,10,4,4 240 | 1242364,8,10,10,8,6,9,3,10,10,4 241 | 1243256,10,4,3,2,3,10,5,3,2,4 242 | 1270479,5,1,3,3,2,2,2,3,1,2 243 | 1276091,3,1,1,3,1,1,3,1,1,2 244 | 1277018,2,1,1,1,2,1,3,1,1,2 245 | 128059,1,1,1,1,2,5,5,1,1,2 246 | 1285531,1,1,1,1,2,1,3,1,1,2 247 | 1287775,5,1,1,2,2,2,3,1,1,2 248 | 144888,8,10,10,8,5,10,7,8,1,4 249 | 145447,8,4,4,1,2,9,3,3,1,4 250 | 167528,4,1,1,1,2,1,3,6,1,2 251 | 169356,3,1,1,1,2,?,3,1,1,2 252 | 183913,1,2,2,1,2,1,1,1,1,2 253 | 191250,10,4,4,10,2,10,5,3,3,4 254 | 1017023,6,3,3,5,3,10,3,5,3,2 255 | 1100524,6,10,10,2,8,10,7,3,3,4 256 | 1116116,9,10,10,1,10,8,3,3,1,4 257 | 1168736,5,6,6,2,4,10,3,6,1,4 258 | 1182404,3,1,1,1,2,1,1,1,1,2 259 | 1182404,3,1,1,1,2,1,2,1,1,2 260 | 1198641,3,1,1,1,2,1,3,1,1,2 261 | 242970,5,7,7,1,5,8,3,4,1,2 262 | 255644,10,5,8,10,3,10,5,1,3,4 263 | 263538,5,10,10,6,10,10,10,6,5,4 264 | 274137,8,8,9,4,5,10,7,8,1,4 265 | 303213,10,4,4,10,6,10,5,5,1,4 266 | 314428,7,9,4,10,10,3,5,3,3,4 267 | 1182404,5,1,4,1,2,1,3,2,1,2 268 | 1198641,10,10,6,3,3,10,4,3,2,4 269 | 320675,3,3,5,2,3,10,7,1,1,4 270 | 324427,10,8,8,2,3,4,8,7,8,4 271 | 385103,1,1,1,1,2,1,3,1,1,2 272 | 390840,8,4,7,1,3,10,3,9,2,4 273 | 411453,5,1,1,1,2,1,3,1,1,2 274 | 320675,3,3,5,2,3,10,7,1,1,4 275 | 428903,7,2,4,1,3,4,3,3,1,4 276 | 431495,3,1,1,1,2,1,3,2,1,2 277 | 432809,3,1,3,1,2,?,2,1,1,2 278 | 434518,3,1,1,1,2,1,2,1,1,2 279 | 452264,1,1,1,1,2,1,2,1,1,2 280 | 456282,1,1,1,1,2,1,3,1,1,2 281 | 476903,10,5,7,3,3,7,3,3,8,4 282 | 486283,3,1,1,1,2,1,3,1,1,2 283 | 486662,2,1,1,2,2,1,3,1,1,2 284 | 488173,1,4,3,10,4,10,5,6,1,4 285 | 492268,10,4,6,1,2,10,5,3,1,4 286 | 508234,7,4,5,10,2,10,3,8,2,4 287 | 527363,8,10,10,10,8,10,10,7,3,4 288 | 529329,10,10,10,10,10,10,4,10,10,4 289 | 535331,3,1,1,1,3,1,2,1,1,2 290 | 543558,6,1,3,1,4,5,5,10,1,4 291 | 555977,5,6,6,8,6,10,4,10,4,4 292 | 560680,1,1,1,1,2,1,1,1,1,2 293 | 561477,1,1,1,1,2,1,3,1,1,2 294 | 563649,8,8,8,1,2,?,6,10,1,4 295 | 601265,10,4,4,6,2,10,2,3,1,4 296 | 606140,1,1,1,1,2,?,2,1,1,2 297 | 606722,5,5,7,8,6,10,7,4,1,4 298 | 616240,5,3,4,3,4,5,4,7,1,2 299 | 61634,5,4,3,1,2,?,2,3,1,2 300 | 625201,8,2,1,1,5,1,1,1,1,2 301 | 63375,9,1,2,6,4,10,7,7,2,4 302 | 635844,8,4,10,5,4,4,7,10,1,4 303 | 636130,1,1,1,1,2,1,3,1,1,2 304 | 640744,10,10,10,7,9,10,7,10,10,4 305 | 646904,1,1,1,1,2,1,3,1,1,2 306 | 653777,8,3,4,9,3,10,3,3,1,4 307 | 659642,10,8,4,4,4,10,3,10,4,4 308 | 666090,1,1,1,1,2,1,3,1,1,2 309 | 666942,1,1,1,1,2,1,3,1,1,2 310 | 667204,7,8,7,6,4,3,8,8,4,4 311 | 673637,3,1,1,1,2,5,5,1,1,2 312 | 684955,2,1,1,1,3,1,2,1,1,2 313 | 688033,1,1,1,1,2,1,1,1,1,2 314 | 691628,8,6,4,10,10,1,3,5,1,4 315 | 693702,1,1,1,1,2,1,1,1,1,2 316 | 704097,1,1,1,1,1,1,2,1,1,2 317 | 704168,4,6,5,6,7,?,4,9,1,2 318 | 706426,5,5,5,2,5,10,4,3,1,4 319 | 709287,6,8,7,8,6,8,8,9,1,4 320 | 718641,1,1,1,1,5,1,3,1,1,2 321 | 721482,4,4,4,4,6,5,7,3,1,2 322 | 730881,7,6,3,2,5,10,7,4,6,4 323 | 733639,3,1,1,1,2,?,3,1,1,2 324 | 733639,3,1,1,1,2,1,3,1,1,2 325 | 733823,5,4,6,10,2,10,4,1,1,4 326 | 740492,1,1,1,1,2,1,3,1,1,2 327 | 743348,3,2,2,1,2,1,2,3,1,2 328 | 752904,10,1,1,1,2,10,5,4,1,4 329 | 756136,1,1,1,1,2,1,2,1,1,2 330 | 760001,8,10,3,2,6,4,3,10,1,4 331 | 760239,10,4,6,4,5,10,7,1,1,4 332 | 76389,10,4,7,2,2,8,6,1,1,4 333 | 764974,5,1,1,1,2,1,3,1,2,2 334 | 770066,5,2,2,2,2,1,2,2,1,2 335 | 785208,5,4,6,6,4,10,4,3,1,4 336 | 785615,8,6,7,3,3,10,3,4,2,4 337 | 792744,1,1,1,1,2,1,1,1,1,2 338 | 797327,6,5,5,8,4,10,3,4,1,4 339 | 798429,1,1,1,1,2,1,3,1,1,2 340 | 704097,1,1,1,1,1,1,2,1,1,2 341 | 806423,8,5,5,5,2,10,4,3,1,4 342 | 809912,10,3,3,1,2,10,7,6,1,4 343 | 810104,1,1,1,1,2,1,3,1,1,2 344 | 814265,2,1,1,1,2,1,1,1,1,2 345 | 814911,1,1,1,1,2,1,1,1,1,2 346 | 822829,7,6,4,8,10,10,9,5,3,4 347 | 826923,1,1,1,1,2,1,1,1,1,2 348 | 830690,5,2,2,2,3,1,1,3,1,2 349 | 831268,1,1,1,1,1,1,1,3,1,2 350 | 832226,3,4,4,10,5,1,3,3,1,4 351 | 832567,4,2,3,5,3,8,7,6,1,4 352 | 836433,5,1,1,3,2,1,1,1,1,2 353 | 837082,2,1,1,1,2,1,3,1,1,2 354 | 846832,3,4,5,3,7,3,4,6,1,2 355 | 850831,2,7,10,10,7,10,4,9,4,4 356 | 855524,1,1,1,1,2,1,2,1,1,2 357 | 857774,4,1,1,1,3,1,2,2,1,2 358 | 859164,5,3,3,1,3,3,3,3,3,4 359 | 859350,8,10,10,7,10,10,7,3,8,4 360 | 866325,8,10,5,3,8,4,4,10,3,4 361 | 873549,10,3,5,4,3,7,3,5,3,4 362 | 877291,6,10,10,10,10,10,8,10,10,4 363 | 877943,3,10,3,10,6,10,5,1,4,4 364 | 888169,3,2,2,1,4,3,2,1,1,2 365 | 888523,4,4,4,2,2,3,2,1,1,2 366 | 896404,2,1,1,1,2,1,3,1,1,2 367 | 897172,2,1,1,1,2,1,2,1,1,2 368 | 95719,6,10,10,10,8,10,7,10,7,4 369 | 160296,5,8,8,10,5,10,8,10,3,4 370 | 342245,1,1,3,1,2,1,1,1,1,2 371 | 428598,1,1,3,1,1,1,2,1,1,2 372 | 492561,4,3,2,1,3,1,2,1,1,2 373 | 493452,1,1,3,1,2,1,1,1,1,2 374 | 493452,4,1,2,1,2,1,2,1,1,2 375 | 521441,5,1,1,2,2,1,2,1,1,2 376 | 560680,3,1,2,1,2,1,2,1,1,2 377 | 636437,1,1,1,1,2,1,1,1,1,2 378 | 640712,1,1,1,1,2,1,2,1,1,2 379 | 654244,1,1,1,1,1,1,2,1,1,2 380 | 657753,3,1,1,4,3,1,2,2,1,2 381 | 685977,5,3,4,1,4,1,3,1,1,2 382 | 805448,1,1,1,1,2,1,1,1,1,2 383 | 846423,10,6,3,6,4,10,7,8,4,4 384 | 1002504,3,2,2,2,2,1,3,2,1,2 385 | 1022257,2,1,1,1,2,1,1,1,1,2 386 | 1026122,2,1,1,1,2,1,1,1,1,2 387 | 1071084,3,3,2,2,3,1,1,2,3,2 388 | 1080233,7,6,6,3,2,10,7,1,1,4 389 | 1114570,5,3,3,2,3,1,3,1,1,2 390 | 1114570,2,1,1,1,2,1,2,2,1,2 391 | 1116715,5,1,1,1,3,2,2,2,1,2 392 | 1131411,1,1,1,2,2,1,2,1,1,2 393 | 1151734,10,8,7,4,3,10,7,9,1,4 394 | 1156017,3,1,1,1,2,1,2,1,1,2 395 | 1158247,1,1,1,1,1,1,1,1,1,2 396 | 1158405,1,2,3,1,2,1,2,1,1,2 397 | 1168278,3,1,1,1,2,1,2,1,1,2 398 | 1176187,3,1,1,1,2,1,3,1,1,2 399 | 1196263,4,1,1,1,2,1,1,1,1,2 400 | 1196475,3,2,1,1,2,1,2,2,1,2 401 | 1206314,1,2,3,1,2,1,1,1,1,2 402 | 1211265,3,10,8,7,6,9,9,3,8,4 403 | 1213784,3,1,1,1,2,1,1,1,1,2 404 | 1223003,5,3,3,1,2,1,2,1,1,2 405 | 1223306,3,1,1,1,2,4,1,1,1,2 406 | 1223543,1,2,1,3,2,1,1,2,1,2 407 | 1229929,1,1,1,1,2,1,2,1,1,2 408 | 1231853,4,2,2,1,2,1,2,1,1,2 409 | 1234554,1,1,1,1,2,1,2,1,1,2 410 | 1236837,2,3,2,2,2,2,3,1,1,2 411 | 1237674,3,1,2,1,2,1,2,1,1,2 412 | 1238021,1,1,1,1,2,1,2,1,1,2 413 | 1238464,1,1,1,1,1,?,2,1,1,2 414 | 1238633,10,10,10,6,8,4,8,5,1,4 415 | 1238915,5,1,2,1,2,1,3,1,1,2 416 | 1238948,8,5,6,2,3,10,6,6,1,4 417 | 1239232,3,3,2,6,3,3,3,5,1,2 418 | 1239347,8,7,8,5,10,10,7,2,1,4 419 | 1239967,1,1,1,1,2,1,2,1,1,2 420 | 1240337,5,2,2,2,2,2,3,2,2,2 421 | 1253505,2,3,1,1,5,1,1,1,1,2 422 | 1255384,3,2,2,3,2,3,3,1,1,2 423 | 1257200,10,10,10,7,10,10,8,2,1,4 424 | 1257648,4,3,3,1,2,1,3,3,1,2 425 | 1257815,5,1,3,1,2,1,2,1,1,2 426 | 1257938,3,1,1,1,2,1,1,1,1,2 427 | 1258549,9,10,10,10,10,10,10,10,1,4 428 | 1258556,5,3,6,1,2,1,1,1,1,2 429 | 1266154,8,7,8,2,4,2,5,10,1,4 430 | 1272039,1,1,1,1,2,1,2,1,1,2 431 | 1276091,2,1,1,1,2,1,2,1,1,2 432 | 1276091,1,3,1,1,2,1,2,2,1,2 433 | 1276091,5,1,1,3,4,1,3,2,1,2 434 | 1277629,5,1,1,1,2,1,2,2,1,2 435 | 1293439,3,2,2,3,2,1,1,1,1,2 436 | 1293439,6,9,7,5,5,8,4,2,1,2 437 | 1294562,10,8,10,1,3,10,5,1,1,4 438 | 1295186,10,10,10,1,6,1,2,8,1,4 439 | 527337,4,1,1,1,2,1,1,1,1,2 440 | 558538,4,1,3,3,2,1,1,1,1,2 441 | 566509,5,1,1,1,2,1,1,1,1,2 442 | 608157,10,4,3,10,4,10,10,1,1,4 443 | 677910,5,2,2,4,2,4,1,1,1,2 444 | 734111,1,1,1,3,2,3,1,1,1,2 445 | 734111,1,1,1,1,2,2,1,1,1,2 446 | 780555,5,1,1,6,3,1,2,1,1,2 447 | 827627,2,1,1,1,2,1,1,1,1,2 448 | 1049837,1,1,1,1,2,1,1,1,1,2 449 | 1058849,5,1,1,1,2,1,1,1,1,2 450 | 1182404,1,1,1,1,1,1,1,1,1,2 451 | 1193544,5,7,9,8,6,10,8,10,1,4 452 | 1201870,4,1,1,3,1,1,2,1,1,2 453 | 1202253,5,1,1,1,2,1,1,1,1,2 454 | 1227081,3,1,1,3,2,1,1,1,1,2 455 | 1230994,4,5,5,8,6,10,10,7,1,4 456 | 1238410,2,3,1,1,3,1,1,1,1,2 457 | 1246562,10,2,2,1,2,6,1,1,2,4 458 | 1257470,10,6,5,8,5,10,8,6,1,4 459 | 1259008,8,8,9,6,6,3,10,10,1,4 460 | 1266124,5,1,2,1,2,1,1,1,1,2 461 | 1267898,5,1,3,1,2,1,1,1,1,2 462 | 1268313,5,1,1,3,2,1,1,1,1,2 463 | 1268804,3,1,1,1,2,5,1,1,1,2 464 | 1276091,6,1,1,3,2,1,1,1,1,2 465 | 1280258,4,1,1,1,2,1,1,2,1,2 466 | 1293966,4,1,1,1,2,1,1,1,1,2 467 | 1296572,10,9,8,7,6,4,7,10,3,4 468 | 1298416,10,6,6,2,4,10,9,7,1,4 469 | 1299596,6,6,6,5,4,10,7,6,2,4 470 | 1105524,4,1,1,1,2,1,1,1,1,2 471 | 1181685,1,1,2,1,2,1,2,1,1,2 472 | 1211594,3,1,1,1,1,1,2,1,1,2 473 | 1238777,6,1,1,3,2,1,1,1,1,2 474 | 1257608,6,1,1,1,1,1,1,1,1,2 475 | 1269574,4,1,1,1,2,1,1,1,1,2 476 | 1277145,5,1,1,1,2,1,1,1,1,2 477 | 1287282,3,1,1,1,2,1,1,1,1,2 478 | 1296025,4,1,2,1,2,1,1,1,1,2 479 | 1296263,4,1,1,1,2,1,1,1,1,2 480 | 1296593,5,2,1,1,2,1,1,1,1,2 481 | 1299161,4,8,7,10,4,10,7,5,1,4 482 | 1301945,5,1,1,1,1,1,1,1,1,2 483 | 1302428,5,3,2,4,2,1,1,1,1,2 484 | 1318169,9,10,10,10,10,5,10,10,10,4 485 | 474162,8,7,8,5,5,10,9,10,1,4 486 | 787451,5,1,2,1,2,1,1,1,1,2 487 | 1002025,1,1,1,3,1,3,1,1,1,2 488 | 1070522,3,1,1,1,1,1,2,1,1,2 489 | 1073960,10,10,10,10,6,10,8,1,5,4 490 | 1076352,3,6,4,10,3,3,3,4,1,4 491 | 1084139,6,3,2,1,3,4,4,1,1,4 492 | 1115293,1,1,1,1,2,1,1,1,1,2 493 | 1119189,5,8,9,4,3,10,7,1,1,4 494 | 1133991,4,1,1,1,1,1,2,1,1,2 495 | 1142706,5,10,10,10,6,10,6,5,2,4 496 | 1155967,5,1,2,10,4,5,2,1,1,2 497 | 1170945,3,1,1,1,1,1,2,1,1,2 498 | 1181567,1,1,1,1,1,1,1,1,1,2 499 | 1182404,4,2,1,1,2,1,1,1,1,2 500 | 1204558,4,1,1,1,2,1,2,1,1,2 501 | 1217952,4,1,1,1,2,1,2,1,1,2 502 | 1224565,6,1,1,1,2,1,3,1,1,2 503 | 1238186,4,1,1,1,2,1,2,1,1,2 504 | 1253917,4,1,1,2,2,1,2,1,1,2 505 | 1265899,4,1,1,1,2,1,3,1,1,2 506 | 1268766,1,1,1,1,2,1,1,1,1,2 507 | 1277268,3,3,1,1,2,1,1,1,1,2 508 | 1286943,8,10,10,10,7,5,4,8,7,4 509 | 1295508,1,1,1,1,2,4,1,1,1,2 510 | 1297327,5,1,1,1,2,1,1,1,1,2 511 | 1297522,2,1,1,1,2,1,1,1,1,2 512 | 1298360,1,1,1,1,2,1,1,1,1,2 513 | 1299924,5,1,1,1,2,1,2,1,1,2 514 | 1299994,5,1,1,1,2,1,1,1,1,2 515 | 1304595,3,1,1,1,1,1,2,1,1,2 516 | 1306282,6,6,7,10,3,10,8,10,2,4 517 | 1313325,4,10,4,7,3,10,9,10,1,4 518 | 1320077,1,1,1,1,1,1,1,1,1,2 519 | 1320077,1,1,1,1,1,1,2,1,1,2 520 | 1320304,3,1,2,2,2,1,1,1,1,2 521 | 1330439,4,7,8,3,4,10,9,1,1,4 522 | 333093,1,1,1,1,3,1,1,1,1,2 523 | 369565,4,1,1,1,3,1,1,1,1,2 524 | 412300,10,4,5,4,3,5,7,3,1,4 525 | 672113,7,5,6,10,4,10,5,3,1,4 526 | 749653,3,1,1,1,2,1,2,1,1,2 527 | 769612,3,1,1,2,2,1,1,1,1,2 528 | 769612,4,1,1,1,2,1,1,1,1,2 529 | 798429,4,1,1,1,2,1,3,1,1,2 530 | 807657,6,1,3,2,2,1,1,1,1,2 531 | 8233704,4,1,1,1,1,1,2,1,1,2 532 | 837480,7,4,4,3,4,10,6,9,1,4 533 | 867392,4,2,2,1,2,1,2,1,1,2 534 | 869828,1,1,1,1,1,1,3,1,1,2 535 | 1043068,3,1,1,1,2,1,2,1,1,2 536 | 1056171,2,1,1,1,2,1,2,1,1,2 537 | 1061990,1,1,3,2,2,1,3,1,1,2 538 | 1113061,5,1,1,1,2,1,3,1,1,2 539 | 1116192,5,1,2,1,2,1,3,1,1,2 540 | 1135090,4,1,1,1,2,1,2,1,1,2 541 | 1145420,6,1,1,1,2,1,2,1,1,2 542 | 1158157,5,1,1,1,2,2,2,1,1,2 543 | 1171578,3,1,1,1,2,1,1,1,1,2 544 | 1174841,5,3,1,1,2,1,1,1,1,2 545 | 1184586,4,1,1,1,2,1,2,1,1,2 546 | 1186936,2,1,3,2,2,1,2,1,1,2 547 | 1197527,5,1,1,1,2,1,2,1,1,2 548 | 1222464,6,10,10,10,4,10,7,10,1,4 549 | 1240603,2,1,1,1,1,1,1,1,1,2 550 | 1240603,3,1,1,1,1,1,1,1,1,2 551 | 1241035,7,8,3,7,4,5,7,8,2,4 552 | 1287971,3,1,1,1,2,1,2,1,1,2 553 | 1289391,1,1,1,1,2,1,3,1,1,2 554 | 1299924,3,2,2,2,2,1,4,2,1,2 555 | 1306339,4,4,2,1,2,5,2,1,2,2 556 | 1313658,3,1,1,1,2,1,1,1,1,2 557 | 1313982,4,3,1,1,2,1,4,8,1,2 558 | 1321264,5,2,2,2,1,1,2,1,1,2 559 | 1321321,5,1,1,3,2,1,1,1,1,2 560 | 1321348,2,1,1,1,2,1,2,1,1,2 561 | 1321931,5,1,1,1,2,1,2,1,1,2 562 | 1321942,5,1,1,1,2,1,3,1,1,2 563 | 1321942,5,1,1,1,2,1,3,1,1,2 564 | 1328331,1,1,1,1,2,1,3,1,1,2 565 | 1328755,3,1,1,1,2,1,2,1,1,2 566 | 1331405,4,1,1,1,2,1,3,2,1,2 567 | 1331412,5,7,10,10,5,10,10,10,1,4 568 | 1333104,3,1,2,1,2,1,3,1,1,2 569 | 1334071,4,1,1,1,2,3,2,1,1,2 570 | 1343068,8,4,4,1,6,10,2,5,2,4 571 | 1343374,10,10,8,10,6,5,10,3,1,4 572 | 1344121,8,10,4,4,8,10,8,2,1,4 573 | 142932,7,6,10,5,3,10,9,10,2,4 574 | 183936,3,1,1,1,2,1,2,1,1,2 575 | 324382,1,1,1,1,2,1,2,1,1,2 576 | 378275,10,9,7,3,4,2,7,7,1,4 577 | 385103,5,1,2,1,2,1,3,1,1,2 578 | 690557,5,1,1,1,2,1,2,1,1,2 579 | 695091,1,1,1,1,2,1,2,1,1,2 580 | 695219,1,1,1,1,2,1,2,1,1,2 581 | 824249,1,1,1,1,2,1,3,1,1,2 582 | 871549,5,1,2,1,2,1,2,1,1,2 583 | 878358,5,7,10,6,5,10,7,5,1,4 584 | 1107684,6,10,5,5,4,10,6,10,1,4 585 | 1115762,3,1,1,1,2,1,1,1,1,2 586 | 1217717,5,1,1,6,3,1,1,1,1,2 587 | 1239420,1,1,1,1,2,1,1,1,1,2 588 | 1254538,8,10,10,10,6,10,10,10,1,4 589 | 1261751,5,1,1,1,2,1,2,2,1,2 590 | 1268275,9,8,8,9,6,3,4,1,1,4 591 | 1272166,5,1,1,1,2,1,1,1,1,2 592 | 1294261,4,10,8,5,4,1,10,1,1,4 593 | 1295529,2,5,7,6,4,10,7,6,1,4 594 | 1298484,10,3,4,5,3,10,4,1,1,4 595 | 1311875,5,1,2,1,2,1,1,1,1,2 596 | 1315506,4,8,6,3,4,10,7,1,1,4 597 | 1320141,5,1,1,1,2,1,2,1,1,2 598 | 1325309,4,1,2,1,2,1,2,1,1,2 599 | 1333063,5,1,3,1,2,1,3,1,1,2 600 | 1333495,3,1,1,1,2,1,2,1,1,2 601 | 1334659,5,2,4,1,1,1,1,1,1,2 602 | 1336798,3,1,1,1,2,1,2,1,1,2 603 | 1344449,1,1,1,1,1,1,2,1,1,2 604 | 1350568,4,1,1,1,2,1,2,1,1,2 605 | 1352663,5,4,6,8,4,1,8,10,1,4 606 | 188336,5,3,2,8,5,10,8,1,2,4 607 | 352431,10,5,10,3,5,8,7,8,3,4 608 | 353098,4,1,1,2,2,1,1,1,1,2 609 | 411453,1,1,1,1,2,1,1,1,1,2 610 | 557583,5,10,10,10,10,10,10,1,1,4 611 | 636375,5,1,1,1,2,1,1,1,1,2 612 | 736150,10,4,3,10,3,10,7,1,2,4 613 | 803531,5,10,10,10,5,2,8,5,1,4 614 | 822829,8,10,10,10,6,10,10,10,10,4 615 | 1016634,2,3,1,1,2,1,2,1,1,2 616 | 1031608,2,1,1,1,1,1,2,1,1,2 617 | 1041043,4,1,3,1,2,1,2,1,1,2 618 | 1042252,3,1,1,1,2,1,2,1,1,2 619 | 1057067,1,1,1,1,1,?,1,1,1,2 620 | 1061990,4,1,1,1,2,1,2,1,1,2 621 | 1073836,5,1,1,1,2,1,2,1,1,2 622 | 1083817,3,1,1,1,2,1,2,1,1,2 623 | 1096352,6,3,3,3,3,2,6,1,1,2 624 | 1140597,7,1,2,3,2,1,2,1,1,2 625 | 1149548,1,1,1,1,2,1,1,1,1,2 626 | 1174009,5,1,1,2,1,1,2,1,1,2 627 | 1183596,3,1,3,1,3,4,1,1,1,2 628 | 1190386,4,6,6,5,7,6,7,7,3,4 629 | 1190546,2,1,1,1,2,5,1,1,1,2 630 | 1213273,2,1,1,1,2,1,1,1,1,2 631 | 1218982,4,1,1,1,2,1,1,1,1,2 632 | 1225382,6,2,3,1,2,1,1,1,1,2 633 | 1235807,5,1,1,1,2,1,2,1,1,2 634 | 1238777,1,1,1,1,2,1,1,1,1,2 635 | 1253955,8,7,4,4,5,3,5,10,1,4 636 | 1257366,3,1,1,1,2,1,1,1,1,2 637 | 1260659,3,1,4,1,2,1,1,1,1,2 638 | 1268952,10,10,7,8,7,1,10,10,3,4 639 | 1275807,4,2,4,3,2,2,2,1,1,2 640 | 1277792,4,1,1,1,2,1,1,1,1,2 641 | 1277792,5,1,1,3,2,1,1,1,1,2 642 | 1285722,4,1,1,3,2,1,1,1,1,2 643 | 1288608,3,1,1,1,2,1,2,1,1,2 644 | 1290203,3,1,1,1,2,1,2,1,1,2 645 | 1294413,1,1,1,1,2,1,1,1,1,2 646 | 1299596,2,1,1,1,2,1,1,1,1,2 647 | 1303489,3,1,1,1,2,1,2,1,1,2 648 | 1311033,1,2,2,1,2,1,1,1,1,2 649 | 1311108,1,1,1,3,2,1,1,1,1,2 650 | 1315807,5,10,10,10,10,2,10,10,10,4 651 | 1318671,3,1,1,1,2,1,2,1,1,2 652 | 1319609,3,1,1,2,3,4,1,1,1,2 653 | 1323477,1,2,1,3,2,1,2,1,1,2 654 | 1324572,5,1,1,1,2,1,2,2,1,2 655 | 1324681,4,1,1,1,2,1,2,1,1,2 656 | 1325159,3,1,1,1,2,1,3,1,1,2 657 | 1326892,3,1,1,1,2,1,2,1,1,2 658 | 1330361,5,1,1,1,2,1,2,1,1,2 659 | 1333877,5,4,5,1,8,1,3,6,1,2 660 | 1334015,7,8,8,7,3,10,7,2,3,4 661 | 1334667,1,1,1,1,2,1,1,1,1,2 662 | 1339781,1,1,1,1,2,1,2,1,1,2 663 | 1339781,4,1,1,1,2,1,3,1,1,2 664 | 13454352,1,1,3,1,2,1,2,1,1,2 665 | 1345452,1,1,3,1,2,1,2,1,1,2 666 | 1345593,3,1,1,3,2,1,2,1,1,2 667 | 1347749,1,1,1,1,2,1,1,1,1,2 668 | 1347943,5,2,2,2,2,1,1,1,2,2 669 | 1348851,3,1,1,1,2,1,3,1,1,2 670 | 1350319,5,7,4,1,6,1,7,10,3,4 671 | 1350423,5,10,10,8,5,5,7,10,1,4 672 | 1352848,3,10,7,8,5,8,7,4,1,4 673 | 1353092,3,2,1,2,2,1,3,1,1,2 674 | 1354840,2,1,1,1,2,1,3,1,1,2 675 | 1354840,5,3,2,1,3,1,1,1,1,2 676 | 1355260,1,1,1,1,2,1,2,1,1,2 677 | 1365075,4,1,4,1,2,1,1,1,1,2 678 | 1365328,1,1,2,1,2,1,2,1,1,2 679 | 1368267,5,1,1,1,2,1,1,1,1,2 680 | 1368273,1,1,1,1,2,1,1,1,1,2 681 | 1368882,2,1,1,1,2,1,1,1,1,2 682 | 1369821,10,10,10,10,5,10,10,10,7,4 683 | 1371026,5,10,10,10,4,10,5,6,3,4 684 | 1371920,5,1,1,1,2,1,3,2,1,2 685 | 466906,1,1,1,1,2,1,1,1,1,2 686 | 466906,1,1,1,1,2,1,1,1,1,2 687 | 534555,1,1,1,1,2,1,1,1,1,2 688 | 536708,1,1,1,1,2,1,1,1,1,2 689 | 566346,3,1,1,1,2,1,2,3,1,2 690 | 603148,4,1,1,1,2,1,1,1,1,2 691 | 654546,1,1,1,1,2,1,1,1,8,2 692 | 654546,1,1,1,3,2,1,1,1,1,2 693 | 695091,5,10,10,5,4,5,4,4,1,4 694 | 714039,3,1,1,1,2,1,1,1,1,2 695 | 763235,3,1,1,1,2,1,2,1,2,2 696 | 776715,3,1,1,1,3,2,1,1,1,2 697 | 841769,2,1,1,1,2,1,1,1,1,2 698 | 888820,5,10,10,3,7,3,8,10,2,4 699 | 897471,4,8,6,4,3,4,10,6,1,4 700 | 897471,4,8,8,5,4,5,10,4,1,4 701 | --------------------------------------------------------------------------------