├── .gitignore ├── ArtificialNeuralNetworks ├── BusinessProblem │ ├── data.csv │ └── source.py ├── MultiLayerBasic │ └── source.py └── SingleLayerFeedforward │ └── source.py ├── ConvolutionalNeuralNetworks ├── HandwrittenDigitsRecognition │ └── source.py └── ObjectRecognition-10 │ ├── build_model.py │ ├── eval.py │ ├── read_input.py │ ├── read_input_test.py │ └── train.py ├── LICENSE ├── README.md └── Regression ├── Linear └── Basic │ ├── data.csv │ └── source.py ├── Logistic ├── Basic │ ├── data.csv │ └── source.py └── SocialNetworkAds │ ├── data.csv │ └── source.py └── Softmax └── HandwrittenDigitsRecognition └── source.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm 2 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 3 | 4 | # User-specific stuff: 5 | .idea/**/workspace.xml 6 | .idea/**/tasks.xml 7 | .idea/dictionaries 8 | 9 | # Sensitive or high-churn files: 10 | .idea/**/dataSources/ 11 | .idea/**/dataSources.ids 12 | .idea/**/dataSources.xml 13 | .idea/**/dataSources.local.xml 14 | .idea/**/sqlDataSources.xml 15 | .idea/**/dynamic.xml 16 | .idea/**/uiDesigner.xml 17 | 18 | # Gradle: 19 | .idea/**/gradle.xml 20 | .idea/**/libraries 21 | 22 | # Mongo Explorer plugin: 23 | .idea/**/mongoSettings.xml 24 | 25 | ## File-based project format: 26 | *.iws 27 | 28 | ## Plugin-specific files: 29 | 30 | # IntelliJ 31 | /out/ 32 | 33 | # mpeltonen/sbt-idea plugin 34 | .idea_modules/ 35 | 36 | # JIRA plugin 37 | atlassian-ide-plugin.xml 38 | 39 | # Cursive Clojure plugin 40 | .idea/replstate.xml 41 | 42 | # Crashlytics plugin (for Android Studio and IntelliJ) 43 | com_crashlytics_export_strings.xml 44 | crashlytics.properties 45 | crashlytics-build.properties 46 | fabric.properties 47 | 48 | # Byte-compiled / optimized / DLL files 49 | __pycache__/ 50 | *.py[cod] 51 | *$py.class 52 | 53 | # C extensions 54 | *.so 55 | 56 | # Distribution / packaging 57 | .Python 58 | env/ 59 | build/ 60 | develop-eggs/ 61 | dist/ 62 | downloads/ 63 | eggs/ 64 | .eggs/ 65 | lib/ 66 | lib64/ 67 | parts/ 68 | sdist/ 69 | var/ 70 | *.egg-info/ 71 | .installed.cfg 72 | *.egg 73 | 74 | # PyInstaller 75 | # Usually these files are written by a python script from a template 76 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 77 | *.manifest 78 | *.spec 79 | 80 | # Installer logs 81 | pip-log.txt 82 | pip-delete-this-directory.txt 83 | 84 | # Unit test / coverage reports 85 | htmlcov/ 86 | .tox/ 87 | .coverage 88 | .coverage.* 89 | .cache 90 | nosetests.xml 91 | coverage.xml 92 | *,cover 93 | .hypothesis/ 94 | 95 | # Translations 96 | *.mo 97 | *.pot 98 | 99 | # Django stuff: 100 | *.log 101 | local_settings.py 102 | 103 | # Flask stuff: 104 | instance/ 105 | .webassets-cache 106 | 107 | # Scrapy stuff: 108 | .scrapy 109 | 110 | # Sphinx documentation 111 | docs/_build/ 112 | 113 | # PyBuilder 114 | target/ 115 | 116 | # IPython Notebook 117 | .ipynb_checkpoints 118 | 119 | # pyenv 120 | .python-version 121 | 122 | # celery beat schedule file 123 | celerybeat-schedule 124 | 125 | # dotenv 126 | .env 127 | 128 | # virtualenv 129 | venv/ 130 | ENV/ 131 | 132 | # Spyder project settings 133 | .spyderproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | -------------------------------------------------------------------------------- /ArtificialNeuralNetworks/BusinessProblem/source.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder 4 | from sklearn.model_selection import train_test_split, GridSearchCV 5 | from sklearn.metrics import confusion_matrix 6 | from keras.models import Sequential 7 | from keras.layers import Dense, Dropout 8 | from keras.wrappers.scikit_learn import KerasClassifier 9 | 10 | class BusinessProblem: 11 | 12 | dataset = None 13 | X = X_train = X_test = None 14 | y = y_train = y_test = y_pred = None 15 | classifier = conf_mat = None 16 | best_params = best_accuracy = None 17 | 18 | 19 | def __init__(self): 20 | # Importing the dataset 21 | self.dataset = pd.read_csv('data.csv') 22 | self.X = self.dataset.iloc[:,3:13].values 23 | self.y = self.dataset.iloc[:,13].values 24 | 25 | # Encoding categorical data 26 | label_encoder_X_1 = LabelEncoder() 27 | self.X[:,1] = label_encoder_X_1.fit_transform(self.X[:,1]) 28 | label_encoder_X_2 = LabelEncoder() 29 | self.X[:,2] = label_encoder_X_2.fit_transform(self.X[:,2]) 30 | one_hot_encoder = OneHotEncoder(categorical_features=[1]) 31 | self.X = one_hot_encoder.fit_transform(self.X).toarray() 32 | self.X = self.X[:,1:] 33 | 34 | # Splitting the dataset into the training set and test set 35 | self.X_train, self.X_test, self.y_train, self.y_test = train_test_split( 36 | self.X, self.y, 37 | test_size=0.2, 38 | random_state=0) 39 | 40 | 41 | def scale_features(self): 42 | ''' 43 | Feature scaling 44 | ''' 45 | sc = StandardScaler() 46 | self.X_train = sc.fit_transform(self.X_train) 47 | self.X_test = sc.transform(self.X_test) 48 | 49 | 50 | def build_model(self): 51 | ''' 52 | Use grid search with k-Fold cross-validation to find the best 53 | hyperparameters and accuracy 54 | ''' 55 | classifier = KerasClassifier(build_fn=_build_classifier) 56 | params = {'batch_size': [25, 32], 57 | 'epochs': [100, 500], 58 | 'optimizer': ['adam', 'rmsprop']} 59 | grid_search = GridSearchCV(estimator=classifier, 60 | param_grid=params, 61 | scoring='accuracy', 62 | cv=10) 63 | grid_search = grid_search.fit(self.X_train, self.y_train) 64 | self.best_params = grid_search.best_params_ 65 | self.best_accuracy = grid_search.best_score_ 66 | 67 | 68 | def get_accuracy(self): 69 | ''' 70 | Retrieve the best accuracy after grid search 71 | ''' 72 | print("Accuracy: " + str(self.best_accuracy)) 73 | print("Params: ", self.best_params) 74 | 75 | 76 | def _build_classifier(optimizer): 77 | classifier = Sequential() 78 | 79 | # Add the input layer and the first hidden layer with dropout 80 | classifier.add(Dense(input_dim=11, units=6, 81 | kernel_initializer='uniform', activation='relu')) 82 | classifier.add(Dropout(rate=0.1)) 83 | 84 | # Add the second hidden layer with dropout 85 | classifier.add(Dense(units=6, kernel_initializer='uniform', 86 | activation='relu')) 87 | classifier.add(Dropout(rate=0.1)) 88 | 89 | # Add the output layer 90 | classifier.add(Dense(units=1, kernel_initializer='uniform', 91 | activation='sigmoid')) 92 | 93 | # Compile the artificial neural network 94 | classifier.compile(optimizer=optimizer, loss='binary_crossentropy', 95 | metrics=['accuracy']) 96 | 97 | return classifier 98 | 99 | def run(): 100 | ''' 101 | Based on a sample of 10.000 bank customers' details from within the past 102 | 6 months, decide whether or not a user is likely to leave the bank for 103 | a competitor 104 | ''' 105 | business_problem = BusinessProblem() 106 | business_problem.scale_features() 107 | business_problem.build_model() 108 | business_problem.get_accuracy() 109 | 110 | if __name__ == '__main__': 111 | run() 112 | -------------------------------------------------------------------------------- /ArtificialNeuralNetworks/MultiLayerBasic/source.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def activation(z, deriv=False): 5 | if deriv == True: 6 | return z * (1-z) 7 | return 1 / (1 + np.exp(-z)) 8 | 9 | 10 | def forwardpropagation(X, theta0, theta1): 11 | layer0 = X 12 | layer1 = activation(np.dot(layer0, theta0)) 13 | layer2 = activation(np.dot(layer1, theta1)) 14 | return layer0, layer1, layer2 15 | 16 | 17 | def backpropagation(y, theta1, layer1, layer2, it): 18 | err2 = y - layer2 19 | if it % 10000 == 0: 20 | print 'Error: ' + str(np.mean(np.abs(err2))) 21 | delta2 = err2 * activation(layer2, deriv=True) 22 | err1 = delta2.dot(theta1.T) 23 | delta1 = err1 * activation(layer1, deriv=True) 24 | return delta1, delta2 25 | 26 | 27 | def gradientdescent(theta0, theta1, layer0, layer1, delta1, delta2): 28 | theta1 += layer1.T.dot(delta2) 29 | theta0 += layer0.T.dot(delta1) 30 | return theta0, theta1 31 | 32 | 33 | def run(): 34 | X = np.array([[0, 0, 1], [ 0, 1, 1], [1, 0, 1], [1, 1, 1]]) 35 | y = np.array([[0], [1], [1], [0]]) 36 | np.random.seed(1) 37 | theta0 = 2 * np.random.random((3, 4)) - 1 38 | theta1 = 2 * np.random.random((4, 1)) - 1 39 | 40 | print 'Starting training' 41 | 42 | for j in xrange(100000): 43 | layer0, layer1, layer2 = forwardpropagation(X, theta0, theta1) 44 | delta1, delta2 = backpropagation(y, theta1, layer1, layer2, j) 45 | theta0, theta1 = gradientdescent(theta0, theta1, layer0, layer1, delta1, delta2) 46 | 47 | print 'Actual values' 48 | print y 49 | print 'Predictions after training' 50 | print layer2 51 | 52 | if __name__ == '__main__': 53 | run() 54 | 55 | -------------------------------------------------------------------------------- /ArtificialNeuralNetworks/SingleLayerFeedforward/source.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def activation(z, deriv=False): 5 | if deriv == True: 6 | return z * (1-z) 7 | return 1 / (1 + np.exp(-z)) 8 | 9 | 10 | def forwardpropagation(X, theta): 11 | return activation(np.dot(X, theta)) 12 | 13 | 14 | def backpropagation(y, theta, output, it): 15 | err = y - output 16 | if it % 10000 == 0: 17 | print 'Error: ' + str(np.mean(np.abs(err))) 18 | delta = err * activation(output, deriv=True) 19 | return delta 20 | 21 | 22 | def gradientdescent(X, theta, delta): 23 | theta += X.T.dot(delta) 24 | return theta 25 | 26 | 27 | def run(): 28 | X = np.array([[0, 0, 1], [1, 1, 1], [1, 0, 1], [0, 1, 1]]) 29 | y = np.array([[0], [1], [1], [0]]) 30 | np.random.seed(1) 31 | theta = 2 * np.random.random((3, 1)) - 1 32 | 33 | print 'Starting training' 34 | 35 | for j in xrange(100000): 36 | output = forwardpropagation(X, theta) 37 | delta = backpropagation(y, theta, output, j) 38 | theta = gradientdescent(X, theta, delta) 39 | 40 | print 'Actual values' 41 | print y 42 | print 'Predictions after training' 43 | print output 44 | 45 | if __name__ == '__main__': 46 | run() 47 | 48 | -------------------------------------------------------------------------------- /ConvolutionalNeuralNetworks/HandwrittenDigitsRecognition/source.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | import tensorflow as tf 4 | from tensorflow.examples.tutorials.mnist import input_data 5 | 6 | FLAGS = None 7 | 8 | 9 | def weight_variable(shape): 10 | ''' 11 | Properly initialize the weight 12 | ''' 13 | initial = tf.truncated_normal(shape, stddev=0.1) 14 | return tf.Variable(initial) 15 | 16 | 17 | def bias_variable(shape): 18 | ''' 19 | Properly initialize the bias 20 | ''' 21 | initial = tf.constant(0.1, shape=shape) 22 | return tf.Variable(initial) 23 | 24 | 25 | def conv2d(x, W): 26 | ''' 27 | Convolution with stride of one and zero padding 28 | ''' 29 | return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') 30 | 31 | 32 | def max_pool_2x2(x): 33 | ''' 34 | Max pooling over 2x2 blocks 35 | ''' 36 | return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], 37 | strides=[1, 2, 2, 1], padding='SAME') 38 | 39 | 40 | def main(_): 41 | # Import data 42 | mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True) 43 | 44 | # Create the model 45 | x = tf.placeholder(tf.float32, [None, 784]) 46 | x_image = tf.reshape(x, [-1, 28, 28, 1]) 47 | 48 | # First convolutional layer - 32 features for each 5x5 patch 49 | W_conv1 = weight_variable([5, 5, 1, 32]) 50 | b_conv1 = bias_variable([32]) 51 | h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) 52 | h_pool1 = max_pool_2x2(h_conv1) 53 | 54 | # Second convolutional layer - 64 features for each 5x5 patch 55 | W_conv2 = weight_variable([5, 5, 32, 64]) 56 | b_conv2 = bias_variable([64]) 57 | h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) 58 | h_pool2 = max_pool_2x2(h_conv2) 59 | 60 | # Densely connected layer - 1024 neurons for the 7x7 pixels image 61 | W_fc1 = weight_variable([7 * 7 * 64, 1024]) 62 | b_fc1 = bias_variable([1024]) 63 | h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64]) 64 | h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) 65 | 66 | # Dropout 67 | keep_prob = tf.placeholder(tf.float32) 68 | h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) 69 | 70 | # Readout layer 71 | W_fc2 = weight_variable([1024, 10]) 72 | b_fc2 = bias_variable([10]) 73 | y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 74 | 75 | # Define loss and optimizer 76 | y_ = tf.placeholder(tf.float32, [None, 10]) 77 | 78 | # Compute the cross entropy 79 | cross_entropy = tf.reduce_mean( 80 | tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv)) 81 | train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) 82 | correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) 83 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 84 | 85 | sess = tf.InteractiveSession() 86 | sess.run(tf.global_variables_initializer()) 87 | 88 | # Train 89 | for i in range(20000): 90 | batch_xs, batch_ys = mnist.train.next_batch(50) 91 | if i % 100 == 0: 92 | train_accuracy = accuracy.eval(feed_dict={ 93 | x: batch_xs, 94 | y_: batch_ys, 95 | keep_prob: 1.0 96 | }) 97 | print("Step %d: training accuracy %g" % (i, train_accuracy)) 98 | train_step.run(feed_dict={ 99 | x: batch_xs, 100 | y_: batch_ys, 101 | keep_prob: 0.5 102 | }) 103 | 104 | # Test 105 | print("Test accuracy %g" % accuracy.eval(feed_dict={ 106 | x: mnist.test.images, 107 | y_: mnist.test.labels, 108 | keep_prob: 1.0 109 | })) 110 | 111 | if __name__ == '__main__': 112 | parser = argparse.ArgumentParser() 113 | parser.add_argument('--data_dir', type=str, default='/tmp/mnist/input_data', 114 | help='Directory for storing input data') 115 | FLAGS, unparsed = parser.parse_known_args() 116 | tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) 117 | -------------------------------------------------------------------------------- /ConvolutionalNeuralNetworks/ObjectRecognition-10/build_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import sys 4 | import tarfile 5 | import tensorflow as tf 6 | import read_input 7 | import six.moves.urllib as urllib 8 | 9 | FLAGS = tf.app.flags.FLAGS 10 | 11 | # Basic model parameters 12 | tf.app.flags.DEFINE_integer('batch_size', 128, 13 | '''Number of images to process in a batch''') 14 | tf.app.flags.DEFINE_string('data_dir', '/tmp/cnn-object-recognition-10', 15 | '''Path to the data directory''') 16 | tf.app.flags.DEFINE_boolean('use_fp16', False, 17 | '''Train the model using fp16''') 18 | 19 | # Global constants describing the dataset 20 | IMAGE_SIZE = read_input.IMAGE_SIZE 21 | NUM_CLASSES = read_input.NUM_CLASSES 22 | NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = read_input.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN 23 | NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = read_input.NUM_EXAMPLES_PER_EPOCH_FOR_EVAL 24 | 25 | # Constants describing the training process 26 | MOVING_AVERAGE_DECAY = 0.9999 27 | NUM_EPOCHS_PER_DECAY = 350.0 28 | LEARNING_RATE_DECAY_FACTOR = 0.1 29 | INITIAL_LEARNING_RATE = 0.1 30 | TOWER_NAME = 'tower' 31 | DATA_URL = 'http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz' 32 | 33 | 34 | def _activation_summary(x): 35 | ''' 36 | Helper to create summaries for activations 37 | ''' 38 | tensor_name = re.sub('%s_[0-9]&*/' % TOWER_NAME, '', x.op.name) 39 | tf.summary.histogram(tensor_name + '/activations', x) 40 | tf.summary.scalar(tensor_name + '/sparsity', tf.nn.zero_fraction(x)) 41 | 42 | 43 | def _variable_on_cpu(name, shape, initializer): 44 | ''' 45 | Helper to create a variable stored on CPU memory 46 | ''' 47 | with tf.device('/cpu:0'): 48 | dtype = tf.float16 if FLAGS.use_fp16 else tf.float32 49 | var = tf.get_variable(name, shape, initializer=initializer, dtype=dtype) 50 | return var 51 | 52 | 53 | def _variable_with_weight_decay(name, shape, stddev, wd): 54 | ''' 55 | Helper to create an initialized variable with weight decay 56 | ''' 57 | dtype = tf.float16 if FLAGS.use_fp16 else tf.float32 58 | var = _variable_on_cpu( 59 | name, 60 | shape, 61 | tf.truncated_normal_initializer(stddev=stddev, dtype=dtype) 62 | ) 63 | if wd is not None: 64 | weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss') 65 | tf.add_to_collection('losses', weight_decay) 66 | return var 67 | 68 | 69 | def distorted_inputs(): 70 | ''' 71 | Construct distorted input for training using the Reader ops 72 | ''' 73 | if not FLAGS.data_dir: 74 | raise ValueError('Please supply a data_dir') 75 | data_dir = os.path.join(FLAGS.data_dir, 'cifar-10-batches-bin') 76 | images, labels = read_input.distorted_inputs(data_dir=data_dir, 77 | batch_size=FLAGS.batch_size) 78 | if FLAGS.use_fp16: 79 | images = tf.cast(images, tf.float16) 80 | labels = tf.cast(labels, tf.float16) 81 | return images, labels 82 | 83 | 84 | def inputs(eval_data): 85 | ''' 86 | Construct input for evaluation using the Reader ops 87 | ''' 88 | if not FLAGS.data_dir: 89 | raise ValueError('Please supply a data_dir') 90 | data_dir = os.path.join(FLAGS.data_dir, 'cifar-10-batches-bin') 91 | images, labels = read_input.inputs(eval_data=eval_data, 92 | data_dir=data_dir, 93 | batch_size=FLAGS.batch_size) 94 | if FLAGS.use_fp16: 95 | images = tf.cast(images, tf.float16) 96 | labels = tf.cast(labels, tf.float16) 97 | return images, labels 98 | 99 | 100 | def inference(images): 101 | ''' 102 | Build the model 103 | ''' 104 | # Convolutional Layer 1 105 | with tf.variable_scope('conv1') as scope: 106 | kernel = _variable_with_weight_decay('weights', 107 | shape=[5, 5, 3, 64], 108 | stddev=5e-2, 109 | wd=0.0) 110 | conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME') 111 | biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0)) 112 | pre_activation = tf.nn.bias_add(conv, biases) 113 | conv1 = tf.nn.relu(pre_activation, name=scope.name) 114 | _activation_summary(conv1) 115 | 116 | # Pooling 1 117 | pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], 118 | padding='SAME', name='pool1') 119 | 120 | # Normalization 1 121 | norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, 122 | name='norm1') 123 | 124 | # Convolutional Layer 2 125 | with tf.variable_scope('conv2') as scope: 126 | kernel = _variable_with_weight_decay('weights', 127 | shape=[5, 5, 64, 64], 128 | stddev=5e-2, 129 | wd=0.0) 130 | conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME') 131 | biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1)) 132 | pre_activation = tf.nn.bias_add(conv, biases) 133 | conv2 = tf.nn.relu(pre_activation, name=scope.name) 134 | _activation_summary(conv2) 135 | 136 | # Normalization 2 137 | norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, 138 | name='norm2') 139 | 140 | # Pooling 2 141 | pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], 142 | padding='SAME', name='pool2') 143 | 144 | # Local 3 145 | with tf.variable_scope('local3') as scope: 146 | # Move everything into depth so we can perform a single matrix multiply 147 | reshape = tf.reshape(pool2, [FLAGS.batch_size, -1]) 148 | dim = reshape.get_shape()[1].value 149 | weights = _variable_with_weight_decay('weights', shape=[dim, 384], 150 | stddev=0.04, wd=0.004) 151 | biases = _variable_on_cpu('biases', [384], tf.constant_initializer(0.1)) 152 | local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name) 153 | _activation_summary(local3) 154 | 155 | # Local 4 156 | with tf.variable_scope('local4') as scope: 157 | weights = _variable_with_weight_decay('weights', shape=[384, 192], 158 | stddev=0.04, wd=0.004) 159 | biases = _variable_on_cpu('biases', [192], tf.constant_initializer(0.1)) 160 | local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name=scope.name) 161 | _activation_summary(local4) 162 | 163 | # Linear layer 164 | with tf.variable_scope('softmax_linear') as scope: 165 | weights = _variable_with_weight_decay('weights', [192, NUM_CLASSES], 166 | stddev=1/192.0, wd=0.0) 167 | biases = _variable_on_cpu('biases', [NUM_CLASSES], tf.constant_initializer(0.0)) 168 | softmax_linear = tf.add(tf.matmul(local4, weights), biases, name=scope.name) 169 | _activation_summary(softmax_linear) 170 | 171 | return softmax_linear 172 | 173 | 174 | def loss(logits, labels): 175 | ''' 176 | Add L2 loss to all the trainable variables 177 | ''' 178 | # Calculate the average cross entropy loss across the batch 179 | labels = tf.cast(labels, tf.int64) 180 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( 181 | labels=labels, logits=logits, name='cross_entropy_per_example') 182 | cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy') 183 | tf.add_to_collection('losses', cross_entropy_mean) 184 | 185 | return tf.add_n(tf.get_collection('losses'), name='total_loss') 186 | 187 | 188 | def _add_loss_summaries(total_loss): 189 | ''' 190 | Add summaries for losses in model 191 | ''' 192 | # Compute the moving average of all individual losses and the total loss 193 | loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg') 194 | losses = tf.get_collection('losses') 195 | loss_averages_op = loss_averages.apply(losses + [total_loss]) 196 | 197 | # Attach a scalar summary to all individual losses and the total loss 198 | # Do the same for the averaged version of the losses 199 | for l in losses + [total_loss]: 200 | tf.summary.scalar(l.op.name + ' (raw)', l) 201 | tf.summary.scalar(l.op.name, loss_averages.average(l)) 202 | 203 | return loss_averages_op 204 | 205 | 206 | def train(total_loss, global_step): 207 | ''' 208 | Train model 209 | ''' 210 | # Variables that affect learning rate 211 | num_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / FLAGS.batch_size 212 | decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY) 213 | 214 | # Decay the learning rate exponentially based on the number of steps 215 | lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE, 216 | global_step, 217 | decay_steps, 218 | LEARNING_RATE_DECAY_FACTOR, 219 | staircase=True) 220 | tf.summary.scalar('learning_rate', lr) 221 | 222 | # Generate moving averages of all losses and associated summaries 223 | loss_averages_op = _add_loss_summaries(total_loss) 224 | 225 | # Compute gradients 226 | with tf.control_dependencies([loss_averages_op]): 227 | opt = tf.train.GradientDescentOptimizer(lr) 228 | grads = opt.compute_gradients(total_loss) 229 | 230 | # Apply gradients 231 | apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) 232 | 233 | # Add histograms for trainable variables 234 | for var in tf.trainable_variables(): 235 | tf.summary.histogram(var.op.name, var) 236 | 237 | # Add histograms for gradients 238 | for grad, var in grads: 239 | if grad is not None: 240 | tf.summary.histogram(var.op.name + '/gradients', grad) 241 | 242 | # Track the moving averages of all trainable variables 243 | variable_averages = tf.train.ExponentialMovingAverage( 244 | MOVING_AVERAGE_DECAY, global_step 245 | ) 246 | variable_averages_op = variable_averages.apply(tf.trainable_variables()) 247 | 248 | with tf.control_dependencies([apply_gradient_op, variable_averages_op]): 249 | train_op = tf.no_op(name='train') 250 | 251 | return train_op 252 | 253 | 254 | def maybe_download_and_extract(): 255 | ''' 256 | Download and extract the tarball 257 | ''' 258 | dest_directory = FLAGS.data_dir 259 | if not os.path.exists(dest_directory): 260 | os.makedirs(dest_directory) 261 | filename = DATA_URL.split('/')[-1] 262 | filepath = os.path.join(dest_directory, filename) 263 | if not os.path.exists(filepath): 264 | def _progress(count, block_size, total_size): 265 | sys.stdout.write('\r>> Downloading %s %.1f%%' % (filename, 266 | float(count * block_size) / float(total_size) * 100.0)) 267 | sys.stdout.flush() 268 | filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress) 269 | print() 270 | statinfo = os.stat(filepath) 271 | print('Successfully downloaded', filename, statinfo.st_size, 'bytes') 272 | extracted_dir_path = os.path.join(dest_directory, 'cnn-obj-rec-batches-bin') 273 | if not os.path.exists(extracted_dir_path): 274 | tarfile.open(filepath, 'r:gz').extractall(dest_directory) 275 | -------------------------------------------------------------------------------- /ConvolutionalNeuralNetworks/ObjectRecognition-10/eval.py: -------------------------------------------------------------------------------- 1 | import math 2 | import time 3 | import build_model 4 | import numpy as np 5 | import tensorflow as tf 6 | from datetime import datetime 7 | 8 | FLAGS = tf.app.flags.FLAGS 9 | 10 | tf.app.flags.DEFINE_string('eval_dir', '/tmp/cnn-object-recognition-10_eval', 11 | """Directory where to write event logs.""") 12 | tf.app.flags.DEFINE_string('eval_data', 'test', 13 | """Either 'test' or 'train_eval'.""") 14 | tf.app.flags.DEFINE_string('checkpoint_dir', '/tmp/cnn-object-recognition-10_train', 15 | """Directory where to read model checkpoints.""") 16 | tf.app.flags.DEFINE_integer('eval_interval_secs', 60 * 5, 17 | """How often to run the eval.""") 18 | tf.app.flags.DEFINE_integer('num_examples', 10000, 19 | """Number of examples to run.""") 20 | tf.app.flags.DEFINE_boolean('run_once', False, 21 | """Whether to run eval only once.""") 22 | 23 | 24 | def eval_once(saver, summary_writer, top_k_op, summary_op): 25 | ''' 26 | Run eval once 27 | ''' 28 | with tf.Session() as sess: 29 | ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) 30 | if ckpt and ckpt.model_checkpoint_path: 31 | # Restores from checkpoint 32 | saver.restore(sess, ckpt.model_checkpoint_path) 33 | global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] 34 | else: 35 | print('No checkpoint file found') 36 | return 37 | 38 | # Start the queue runners 39 | coord = tf.train.Coordinator() 40 | try: 41 | threads = [] 42 | for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS): 43 | threads.extend(qr.create_threads(sess, coord=coord, daemon=True, 44 | start=True)) 45 | 46 | num_iter = int(math.ceil(FLAGS.num_examples / FLAGS.batch_size)) 47 | true_count = 0 48 | total_sample_count = num_iter * FLAGS.batch_size 49 | step = 0 50 | while step < num_iter and not coord.should_stop(): 51 | predictions = sess.run([top_k_op]) 52 | true_count += np.sum(predictions) 53 | step += 1 54 | 55 | # Compute precision @ 1 56 | precision = true_count / total_sample_count 57 | print('%s: precision @ 1 = %.3f' % (datetime.now(), precision)) 58 | 59 | summary = tf.Summary() 60 | summary.ParseFromString(sess.run(summary_op)) 61 | summary.value.add(tag='Precision @ 1', simple_value=precision) 62 | summary_writer.add_summary(summary, global_step) 63 | except Exception as e: 64 | coord.request_stop(e) 65 | 66 | coord.request_stop() 67 | coord.join(threads, stop_grace_period_secs=10) 68 | 69 | 70 | def evaluate(): 71 | ''' 72 | Eval for a number of steps 73 | ''' 74 | with tf.Graph().as_default() as g: 75 | # Get images and labels for the dataset 76 | eval_data = FLAGS.eval_data == 'test' 77 | images, labels = build_model.inputs(eval_data=eval_data) 78 | 79 | # Build a Graph that computes the logits predictions from the 80 | # inference model 81 | logits = build_model.inference(images) 82 | 83 | # Calculate predictions 84 | top_k_op = tf.nn.in_top_k(logits, labels, 1) 85 | 86 | # Restore the moving average version of the learned variables for eval 87 | variable_averages = tf.train.ExponentialMovingAverage( 88 | build_model.MOVING_AVERAGE_DECAY) 89 | variables_to_restore = variable_averages.variables_to_restore() 90 | saver = tf.train.Saver(variables_to_restore) 91 | 92 | # Build the summary operation based on the TF collection of Summaries 93 | summary_op = tf.summary.merge_all() 94 | 95 | summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g) 96 | 97 | while True: 98 | eval_once(saver, summary_writer, top_k_op, summary_op) 99 | if FLAGS.run_once: 100 | break 101 | time.sleep(FLAGS.eval_interval_secs) 102 | 103 | 104 | def main(argv=None): 105 | build_model.maybe_download_and_extract() 106 | if tf.gfile.Exists(FLAGS.eval_dir): 107 | tf.gfile.DeleteRecursively(FLAGS.eval_dir) 108 | tf.gfile.MakeDirs(FLAGS.eval_dir) 109 | evaluate() 110 | 111 | 112 | if __name__ == '__main__': 113 | tf.app.run() 114 | -------------------------------------------------------------------------------- /ConvolutionalNeuralNetworks/ObjectRecognition-10/read_input.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tensorflow as tf 3 | 4 | IMAGE_SIZE = 24 5 | NUM_CLASSES = 10 6 | NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 50000 7 | NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 10000 8 | 9 | 10 | def read_data(filename_queue): 11 | ''' 12 | Reads and parses examples from data files 13 | ''' 14 | class Record(object): 15 | pass 16 | result = Record() 17 | 18 | # Dimensions of the images in the dataset 19 | label_bytes = 1 20 | result.height = 32 21 | result.width = 32 22 | result.depth = 3 23 | 24 | image_bytes = result.height * result.width * result.depth 25 | record_bytes = label_bytes + image_bytes 26 | 27 | # Read a record 28 | reader = tf.FixedLengthRecordReader(record_bytes=record_bytes) 29 | result.key, value = reader.read(filename_queue) 30 | 31 | # Convert from str to a vector of uint8 that is record_bytes lng. 32 | record_bytes = tf.decode_raw(value, tf.uint8) 33 | 34 | # The first bytes represent the label 35 | # which we convert from uint8 to uint32 36 | result.label = tf.cast( 37 | tf.strided_slice(record_bytes, [0], [label_bytes]), tf.int32 38 | ) 39 | 40 | # The remaining bytes after the label represent the image 41 | # which we reshape from [depth * height * width] to [depth, height, width] 42 | depth_major = tf.reshape( 43 | tf.strided_slice(record_bytes, [label_bytes], [label_bytes + image_bytes]), 44 | [result.depth, result.height, result.width] 45 | ) 46 | result.uint8image = tf.transpose(depth_major, [1, 2, 0]) 47 | 48 | return result 49 | 50 | 51 | def _generate_image_and_label_batch(image, label, min_queue_examples, batch_size, 52 | shuffle): 53 | ''' 54 | Construct a queued batch of images and labels 55 | ''' 56 | num_preprocess_threads = 16 57 | if shuffle: 58 | images, label_batch = tf.train.shuffle_batch( 59 | [image, label], 60 | batch_size=batch_size, 61 | num_threads=num_preprocess_threads, 62 | capacity=min_queue_examples + 3 * batch_size, 63 | min_after_dequeue= min_queue_examples 64 | ) 65 | else: 66 | images, label_batch = tf.train.batch( 67 | [image, label], 68 | batch_size=batch_size, 69 | num_threads=num_preprocess_threads, 70 | capacity=min_queue_examples + 3 * batch_size 71 | ) 72 | 73 | # Display the training images in the visualizer 74 | tf.summary.image('images', images) 75 | 76 | return images, tf.reshape(label_batch, [batch_size]) 77 | 78 | 79 | def distorted_inputs(data_dir, batch_size): 80 | ''' 81 | Construct distorted input for training using the Reader ops 82 | ''' 83 | filenames = [os.path.join(data_dir, 'data_batch_%d.bin' % i) 84 | for i in xrange(1, 6)] 85 | for f in filenames: 86 | if not tf.gfile.Exists(f): 87 | raise ValueError('Failed to find file: ' + f) 88 | 89 | # Create a queue that produces the filenames to read 90 | filename_queue = tf.train.string_input_producer(filenames) 91 | 92 | # Read examples from files in the filename queue 93 | read_input = read_data(filename_queue) 94 | reshaped_image = tf.cast(read_input.uint8image, tf.float32) 95 | 96 | height = IMAGE_SIZE 97 | width = IMAGE_SIZE 98 | 99 | # Image processing for training the network 100 | 101 | # Randomly crop a [height, width] section of the image 102 | distorted_image = tf.random_crop(reshaped_image, [height, width, 3]) 103 | 104 | # Randomly flip the image horizontally 105 | distorted_image = tf.image.random_flip_left_right(distorted_image) 106 | 107 | # These operations are not commutative 108 | distorted_image = tf.image.random_brightness(distorted_image, 109 | max_delta=63) 110 | distorted_image = tf.image.random_contrast(distorted_image, 111 | lower=0.2, upper=1.8) 112 | 113 | # Subtract off the mean and divide by the variance of the pixels 114 | float_image = tf.image.per_image_standardization(distorted_image) 115 | 116 | # Set the shapes of tensors 117 | float_image.set_shape([height, width, 3]) 118 | read_input.label.set_shape([1]) 119 | 120 | # Ensure that the random shuffling has good mixing properties 121 | min_fraction_of_examples_in_queue = 0.4 122 | min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN * 123 | min_fraction_of_examples_in_queue) 124 | print('Filling queue with %d images before starting to train. ' 125 | 'This will take a few minutes.' % min_queue_examples) 126 | 127 | # Generate a batch of images and labels by building up a queue of examples 128 | return _generate_image_and_label_batch(float_image, read_input.label, 129 | min_queue_examples, batch_size, 130 | shuffle=True) 131 | 132 | def inputs(eval_data, data_dir, batch_size): 133 | ''' 134 | Construct input for evaluation using the Reader ops 135 | ''' 136 | if not eval_data: 137 | filenames = [os.path.join(data_dir, 'data_batch_%d.bin' % i) 138 | for i in xrange(1, 6)] 139 | num_examples_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN 140 | else: 141 | filenames = [os.path.join(data_dir, 'test_batch.bin')] 142 | num_examples_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_EVAL 143 | 144 | for f in filenames: 145 | if not tf.gfile.Exists(f): 146 | raise ValueError('Failed to find file: ' + f) 147 | 148 | # Create a queue that produces the filenames to read 149 | filename_queue = tf.train.string_input_producer(filenames) 150 | 151 | # Read examples from files in the filename queue 152 | read_input = read_data(filename_queue) 153 | reshaped_image = tf.cast(read_input.uint8image, tf.float32) 154 | 155 | height = IMAGE_SIZE 156 | width = IMAGE_SIZE 157 | 158 | # Image processing for evaluation 159 | # Crop the central [height, width] of the image 160 | resized_image = tf.image.resize_image_with_crop_or_pad(reshaped_image, 161 | height, width) 162 | 163 | # Subtract off the mean and divide by the variance of the pixels 164 | float_image = tf.image.per_image_standardization(resized_image) 165 | 166 | # Set the shapes of tensors 167 | float_image.set_shape([height, width, 3]) 168 | read_input.label.set_shape([1]) 169 | 170 | # Ensure that the random shuffling has good mixing properties 171 | min_fraction_of_examples_in_queue = 0.4 172 | min_queue_examples = int(num_examples_per_epoch * 173 | min_fraction_of_examples_in_queue) 174 | 175 | # Generate a batch of images and labels by bulding up a queue of examples 176 | return _generate_image_and_label_batch(float_image, read_input.label, 177 | min_queue_examples, batch_size, 178 | shuffle=False) 179 | -------------------------------------------------------------------------------- /ConvolutionalNeuralNetworks/ObjectRecognition-10/read_input_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import read_input 3 | import tensorflow as tf 4 | 5 | 6 | class ReadInputTest(tf.test.TestCase): 7 | 8 | def _record(self, label, red, green, blue): 9 | image_size = 32 * 32 10 | record = bytes(bytearray([label] + [red] * image_size + 11 | [green] * image_size + [blue] * image_size)) 12 | expected = [[[red, green, blue]] * 32] * 32 13 | return record, expected 14 | 15 | def testSimple(self): 16 | labels = [9, 3, 0] 17 | records = [self._record(labels[0], 0, 128, 255), 18 | self._record(labels[1], 255, 0, 1), 19 | self._record(labels[2], 254, 255, 0)] 20 | contents = b"".join([record for record, _ in records]) 21 | expected = [expected for _, expected in records] 22 | filename = os.path.join(self.get_temp_dir(), "cnn-obj-rec-10") 23 | open(filename, "wb").write(contents) 24 | 25 | with self.test_session() as sess: 26 | q = tf.FIFOQueue(99, [tf.string], shapes=()) 27 | q.enqueue([filename]).run() 28 | q.close().run() 29 | result = read_input.read_data(q) 30 | 31 | for i in range(3): 32 | key, label, uint8image = sess.run([ 33 | result.key, result.label, result.uint8image]) 34 | self.assertEqual("%s:%d" % (filename, i), tf.compat.as_text(key)) 35 | self.assertEqual(labels[i], label) 36 | self.assertAllEqual(expected[i], uint8image) 37 | 38 | with self.assertRaises(tf.errors.OutOfRangeError): 39 | sess.run([result.key, result.uint8image]) 40 | 41 | 42 | if __name__ == "__main__": 43 | tf.test.main() 44 | -------------------------------------------------------------------------------- /ConvolutionalNeuralNetworks/ObjectRecognition-10/train.py: -------------------------------------------------------------------------------- 1 | import time 2 | import build_model 3 | import tensorflow as tf 4 | from datetime import datetime 5 | 6 | FLAGS = tf.app.flags.FLAGS 7 | 8 | tf.app.flags.DEFINE_string('train_dir', '/tmp/cnn-object-recognition-10_train', 9 | '''Directory where to write event logs ''' 10 | '''and checkpoint''') 11 | tf.app.flags.DEFINE_integer('max_steps', 1000000, 12 | '''Number of batches to run''') 13 | tf.app.flags.DEFINE_boolean('log_device_placement', False, 14 | '''Whether to log device placement''') 15 | tf.app.flags.DEFINE_integer('log_frequency', 10, 16 | '''How often to log results to the console''') 17 | 18 | 19 | def train(): 20 | ''' 21 | Train for a number of steps 22 | ''' 23 | with tf.Graph().as_default(): 24 | global_step = tf.contrib.framework.get_or_create_global_step() 25 | 26 | # Get images and labels for the dataset 27 | images, labels = build_model.distorted_inputs() 28 | 29 | # Build a Graph that computes the logits predictions from the 30 | # inference model. 31 | logits = build_model.inference(images) 32 | 33 | # Calculate loss 34 | loss = build_model.loss(logits, labels) 35 | 36 | # Build a Graph that trains the model with one batch of examples and 37 | # updates the model parameters 38 | train_op = build_model.train(loss, global_step) 39 | 40 | class _LoggerHook(tf.train.SessionRunHook): 41 | ''' 42 | Logs loss and runtime 43 | ''' 44 | def begin(self): 45 | self._step = -1 46 | self._start_time = time.time() 47 | 48 | def before_run(self, run_context): 49 | self._step += 1 50 | return tf.train.SessionRunArgs(loss) 51 | 52 | def after_run(self, run_context, run_values): 53 | if self._step % FLAGS.log_frequency == 0: 54 | current_time = time.time() 55 | duration = current_time - self._start_time 56 | self._start_time = current_time 57 | 58 | loss_value = run_values.results 59 | examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration 60 | sec_per_batch = float(duration / FLAGS.log_frequency) 61 | 62 | format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 63 | 'sec/batch)') 64 | print (format_str % (datetime.now(), self._step, loss_value, 65 | examples_per_sec, sec_per_batch)) 66 | 67 | with tf.train.MonitoredTrainingSession( 68 | checkpoint_dir=FLAGS.train_dir, 69 | hooks=[tf.train.StopAtStepHook(last_step=FLAGS.max_steps), 70 | tf.train.NanTensorHook(loss), 71 | _LoggerHook()], 72 | config=tf.ConfigProto( 73 | log_device_placement=FLAGS.log_device_placement)) as mon_sess: 74 | while not mon_sess.should_stop(): 75 | mon_sess.run(train_op) 76 | 77 | 78 | def main(argv=None): 79 | build_model.maybe_download_and_extract() 80 | if tf.gfile.Exists(FLAGS.train_dir): 81 | tf.gfile.DeleteRecursively(FLAGS.train_dir) 82 | tf.gfile.MakeDirs(FLAGS.train_dir) 83 | train() 84 | 85 | 86 | if __name__ == '__main__': 87 | tf.app.run() 88 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Raul Butuc 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MachineLearning 2 | Machine Learning Projects (will eventually contain every popular technique/model/algorithm). 3 | 4 | # Currently included 5 | 6 | ``` 7 | . 8 | ├── Regression 9 | │   ├── Linear 10 | │   │   └── Basic 11 | │   ├── Logistic 12 | │   │   ├── Basic 13 | │   │   └── SocialNetworkAds 14 | │   └── Softmax 15 | │   └── HandwrittenDigitsRecognition 16 | ├── ArtificialNeuralNetworks 17 | │   ├── SingleLayerFeedforward 18 | │   ├── MultiLayerBasic 19 | │   └── BusinessProblem 20 | └── ConvolutionalNeuralNetworks 21 | ├── HandwrittenDigitsRecognition 22 | └── ObjectRecognition-10 23 | ``` 24 | 25 | # Coming soon 26 | 27 | Recurrent Neural Networks 28 | 29 | Reinforcement Learning 30 | 31 | Deep Reinforcement Learning 32 | 33 | Support Vector Machines 34 | 35 | Clustering 36 | 37 | Dimensionality Reduction 38 | 39 | Anomaly Detection 40 | 41 | Recommender Systems 42 | -------------------------------------------------------------------------------- /Regression/Linear/Basic/data.csv: -------------------------------------------------------------------------------- 1 | 32.502345269453031,31.70700584656992 2 | 53.426804033275019,68.77759598163891 3 | 61.530358025636438,62.562382297945803 4 | 47.475639634786098,71.546632233567777 5 | 59.813207869512318,87.230925133687393 6 | 55.142188413943821,78.211518270799232 7 | 52.211796692214001,79.64197304980874 8 | 39.299566694317065,59.171489321869508 9 | 48.10504169176825,75.331242297063056 10 | 52.550014442733818,71.300879886850353 11 | 45.419730144973755,55.165677145959123 12 | 54.351634881228918,82.478846757497919 13 | 44.164049496773352,62.008923245725825 14 | 58.16847071685779,75.392870425994957 15 | 56.727208057096611,81.43619215887864 16 | 48.955888566093719,60.723602440673965 17 | 44.687196231480904,82.892503731453715 18 | 60.297326851333466,97.379896862166078 19 | 45.618643772955828,48.847153317355072 20 | 38.816817537445637,56.877213186268506 21 | 66.189816606752601,83.878564664602763 22 | 65.41605174513407,118.59121730252249 23 | 47.48120860786787,57.251819462268969 24 | 41.57564261748702,51.391744079832307 25 | 51.84518690563943,75.380651665312357 26 | 59.370822011089523,74.765564032151374 27 | 57.31000343834809,95.455052922574737 28 | 63.615561251453308,95.229366017555307 29 | 46.737619407976972,79.052406169565586 30 | 50.556760148547767,83.432071421323712 31 | 52.223996085553047,63.358790317497878 32 | 35.567830047746632,41.412885303700563 33 | 42.436476944055642,76.617341280074044 34 | 58.16454011019286,96.769566426108199 35 | 57.504447615341789,74.084130116602523 36 | 45.440530725319981,66.588144414228594 37 | 61.89622268029126,77.768482417793024 38 | 33.093831736163963,50.719588912312084 39 | 36.436009511386871,62.124570818071781 40 | 37.675654860850742,60.810246649902211 41 | 44.555608383275356,52.682983366387781 42 | 43.318282631865721,58.569824717692867 43 | 50.073145632289034,82.905981485070512 44 | 43.870612645218372,61.424709804339123 45 | 62.997480747553091,115.24415280079529 46 | 32.669043763467187,45.570588823376085 47 | 40.166899008703702,54.084054796223612 48 | 53.575077531673656,87.994452758110413 49 | 33.864214971778239,52.725494375900425 50 | 64.707138666121296,93.576118692658241 51 | 38.119824026822805,80.166275447370964 52 | 44.502538064645101,65.101711570560326 53 | 40.599538384552318,65.562301260400375 54 | 41.720676356341293,65.280886920822823 55 | 51.088634678336796,73.434641546324301 56 | 55.078095904923202,71.13972785861894 57 | 41.377726534895203,79.102829683549857 58 | 62.494697427269791,86.520538440347153 59 | 49.203887540826003,84.742697807826218 60 | 41.102685187349664,59.358850248624933 61 | 41.182016105169822,61.684037524833627 62 | 50.186389494880601,69.847604158249183 63 | 52.378446219236217,86.098291205774103 64 | 50.135485486286122,59.108839267699643 65 | 33.644706006191782,69.89968164362763 66 | 39.557901222906828,44.862490711164398 67 | 56.130388816875467,85.498067778840223 68 | 57.362052133238237,95.536686846467219 69 | 60.269214393997906,70.251934419771587 70 | 35.678093889410732,52.721734964774988 71 | 31.588116998132829,50.392670135079896 72 | 53.66093226167304,63.642398775657753 73 | 46.682228649471917,72.247251068662365 74 | 43.107820219102464,57.812512976181402 75 | 70.34607561504933,104.25710158543822 76 | 44.492855880854073,86.642020318822006 77 | 57.50453330326841,91.486778000110135 78 | 36.930076609191808,55.231660886212836 79 | 55.805733357942742,79.550436678507609 80 | 38.954769073377065,44.847124242467601 81 | 56.901214702247074,80.207523139682763 82 | 56.868900661384046,83.14274979204346 83 | 34.33312470421609,55.723489260543914 84 | 59.04974121466681,77.634182511677864 85 | 57.788223993230673,99.051414841748269 86 | 54.282328705967409,79.120646274680027 87 | 51.088719898979143,69.588897851118475 88 | 50.282836348230731,69.510503311494389 89 | 44.211741752090113,73.687564318317285 90 | 38.005488008060688,61.366904537240131 91 | 32.940479942618296,67.170655768995118 92 | 53.691639571070056,85.668203145001542 93 | 68.76573426962166,114.85387123391394 94 | 46.230966498310252,90.123572069967423 95 | 68.319360818255362,97.919821035242848 96 | 50.030174340312143,81.536990783015028 97 | 49.239765342753763,72.111832469615663 98 | 50.039575939875988,85.232007342325673 99 | 48.149858891028863,66.224957888054632 100 | 25.128484647772304,53.454394214850524 -------------------------------------------------------------------------------- /Regression/Linear/Basic/source.py: -------------------------------------------------------------------------------- 1 | from numpy import * 2 | 3 | 4 | def compute_error_for_given_points(theta_0, theta_1, points): 5 | total_error = 0 6 | 7 | for i in range(0, len(points)): 8 | x = points[i, 0] 9 | y = points[i, 1] 10 | total_error += (y - (theta_0 + theta_1 * x)) ** 2 11 | 12 | return total_error / float(len(points)) 13 | 14 | 15 | def step_gradient(current_theta_0, current_theta_1, points, learning_rate): 16 | gradient_theta_0 = 0 17 | gradient_theta_1 = 0 18 | m = float(len(points)) 19 | 20 | for i in range(0, len(points)): 21 | x = points[i, 0] 22 | y = points[i, 1] 23 | gradient_theta_0 += -(2/m) * (y - (current_theta_0 + current_theta_1 * x)) 24 | gradient_theta_1 += -(2/m) * x * (y - (current_theta_0 + current_theta_1 * x)) 25 | 26 | new_theta_0 = current_theta_0 - (learning_rate * gradient_theta_0) 27 | new_theta_1 = current_theta_1 - (learning_rate * gradient_theta_1) 28 | 29 | return new_theta_0, new_theta_1 30 | 31 | 32 | def gradient_descent_runner(points, starting_theta_0, starting_theta_1, learning_rate, num_iterations): 33 | theta_0 = starting_theta_0 34 | theta_1 = starting_theta_1 35 | 36 | for i in range(num_iterations): 37 | theta_0, theta_1 = step_gradient(theta_0, theta_1, array(points), learning_rate) 38 | return [theta_0, theta_1] 39 | 40 | 41 | def run(): 42 | points = genfromtxt('data.csv', delimiter = ',') 43 | learning_rate = 0.0001 44 | initial_theta_0 = 0 45 | initial_theta_1 = 0 46 | num_iterations = 1000 47 | [theta_0, theta_1] = gradient_descent_runner(points, initial_theta_0, initial_theta_1, learning_rate, num_iterations) 48 | print(theta_0) 49 | print(theta_1) 50 | 51 | if __name__ == '__main__': 52 | run() -------------------------------------------------------------------------------- /Regression/Logistic/Basic/data.csv: -------------------------------------------------------------------------------- 1 | 34.62365962451697,78.0246928153624,0 2 | 30.28671076822607,43.89499752400101,0 3 | 35.84740876993872,72.90219802708364,0 4 | 60.18259938620976,86.30855209546826,1 5 | 79.0327360507101,75.3443764369103,1 6 | 45.08327747668339,56.3163717815305,0 7 | 61.10666453684766,96.51142588489624,1 8 | 75.02474556738889,46.55401354116538,1 9 | 76.09878670226257,87.42056971926803,1 10 | 84.43281996120035,43.53339331072109,1 11 | 95.86155507093572,38.22527805795094,0 12 | 75.01365838958247,30.60326323428011,0 13 | 82.30705337399482,76.48196330235604,1 14 | 69.36458875970939,97.71869196188608,1 15 | 39.53833914367223,76.03681085115882,0 16 | 53.9710521485623,89.20735013750205,1 17 | 69.07014406283025,52.74046973016765,1 18 | 67.94685547711617,46.67857410673128,0 19 | 70.66150955499435,92.92713789364831,1 20 | 76.97878372747498,47.57596364975532,1 21 | 67.37202754570876,42.83843832029179,0 22 | 89.67677575072079,65.79936592745237,1 23 | 50.534788289883,48.85581152764205,0 24 | 34.21206097786789,44.20952859866288,0 25 | 77.9240914545704,68.9723599933059,1 26 | 62.27101367004632,69.95445795447587,1 27 | 80.1901807509566,44.82162893218353,1 28 | 93.114388797442,38.80067033713209,0 29 | 61.83020602312595,50.25610789244621,0 30 | 38.78580379679423,64.99568095539578,0 31 | 61.379289447425,72.80788731317097,1 32 | 85.40451939411645,57.05198397627122,1 33 | 52.10797973193984,63.12762376881715,0 34 | 52.04540476831827,69.43286012045222,1 35 | 40.23689373545111,71.16774802184875,0 36 | 54.63510555424817,52.21388588061123,0 37 | 33.91550010906887,98.86943574220611,0 38 | 64.17698887494485,80.90806058670817,1 39 | 74.78925295941542,41.57341522824434,0 40 | 34.1836400264419,75.2377203360134,0 41 | 83.90239366249155,56.30804621605327,1 42 | 51.54772026906181,46.85629026349976,0 43 | 94.44336776917852,65.56892160559052,1 44 | 82.36875375713919,40.61825515970618,0 45 | 51.04775177128865,45.82270145776001,0 46 | 62.22267576120188,52.06099194836679,0 47 | 77.19303492601364,70.45820000180959,1 48 | 97.77159928000232,86.7278223300282,1 49 | 62.07306379667647,96.76882412413983,1 50 | 91.56497449807442,88.69629254546599,1 51 | 79.94481794066932,74.16311935043758,1 52 | 99.2725269292572,60.99903099844988,1 53 | 90.54671411399852,43.39060180650027,1 54 | 34.52451385320009,60.39634245837173,0 55 | 50.2864961189907,49.80453881323059,0 56 | 49.58667721632031,59.80895099453265,0 57 | 97.64563396007767,68.86157272420604,1 58 | 32.57720016809309,95.59854761387875,0 59 | 74.24869136721598,69.82457122657193,1 60 | 71.79646205863379,78.45356224515052,1 61 | 75.3956114656803,85.75993667331619,1 62 | 35.28611281526193,47.02051394723416,0 63 | 56.25381749711624,39.26147251058019,0 64 | 30.05882244669796,49.59297386723685,0 65 | 44.66826172480893,66.45008614558913,0 66 | 66.56089447242954,41.09209807936973,0 67 | 40.45755098375164,97.53518548909936,1 68 | 49.07256321908844,51.88321182073966,0 69 | 80.27957401466998,92.11606081344084,1 70 | 66.74671856944039,60.99139402740988,1 71 | 32.72283304060323,43.30717306430063,0 72 | 64.0393204150601,78.03168802018232,1 73 | 72.34649422579923,96.22759296761404,1 74 | 60.45788573918959,73.09499809758037,1 75 | 58.84095621726802,75.85844831279042,1 76 | 99.82785779692128,72.36925193383885,1 77 | 47.26426910848174,88.47586499559782,1 78 | 50.45815980285988,75.80985952982456,1 79 | 60.45555629271532,42.50840943572217,0 80 | 82.22666157785568,42.71987853716458,0 81 | 88.9138964166533,69.80378889835472,1 82 | 94.83450672430196,45.69430680250754,1 83 | 67.31925746917527,66.58935317747915,1 84 | 57.23870631569862,59.51428198012956,1 85 | 80.36675600171273,90.96014789746954,1 86 | 68.46852178591112,85.59430710452014,1 87 | 42.0754545384731,78.84478600148043,0 88 | 75.47770200533905,90.42453899753964,1 89 | 78.63542434898018,96.64742716885644,1 90 | 52.34800398794107,60.76950525602592,0 91 | 94.09433112516793,77.15910509073893,1 92 | 90.44855097096364,87.50879176484702,1 93 | 55.48216114069585,35.57070347228866,0 94 | 74.49269241843041,84.84513684930135,1 95 | 89.84580670720979,45.35828361091658,1 96 | 83.48916274498238,48.38028579728175,1 97 | 42.2617008099817,87.10385094025457,1 98 | 99.31500880510394,68.77540947206617,1 99 | 55.34001756003703,64.9319380069486,1 100 | 74.77589300092767,89.52981289513276,1 101 | -------------------------------------------------------------------------------- /Regression/Logistic/Basic/source.py: -------------------------------------------------------------------------------- 1 | from numpy import loadtxt, where, zeros, e, array, log, ones, mean, where 2 | from pylab import scatter, show, legend, xlabel, ylabel, plot 3 | from scipy.optimize import fmin_bfgs 4 | 5 | class MLHelper: 6 | 7 | 8 | @staticmethod 9 | def sigmoid(X): 10 | return 1.0 / (1.0 + e ** (-1.0 * X)) 11 | 12 | 13 | class Admission: 14 | 15 | data = None 16 | X = None 17 | y = None 18 | pos = None 19 | neg = None 20 | m = None 21 | n = None 22 | it = None 23 | 24 | 25 | def __init__(self): 26 | self.data = loadtxt('data.csv', delimiter=',') 27 | self.X = self.data[:, 0:2] 28 | self.y = self.data[:, 2] 29 | self.pos = where(self.y == 1) 30 | self.neg = where(self.y == 0) 31 | scatter(self.X[self.pos, 0], self.X[self.pos, 1], marker='o', c='b') 32 | scatter(self.X[self.neg, 0], self.X[self.neg, 1], marker='x', c='r') 33 | xlabel('Exam 1 score') 34 | ylabel('Exam 2 score') 35 | legend(['Admitted', 'Not Admitted']) 36 | #show() 37 | self.m, self.n = self.X.shape 38 | self.y.shape = (self.m, 1) 39 | self.it = ones(shape=(self.m, 3)) 40 | self.it[:, 1:3] = self.X 41 | 42 | 43 | @staticmethod 44 | def compute_cost(theta, X, y): 45 | theta.shape = (1, 3) 46 | m = y.size 47 | h = MLHelper.sigmoid(X.dot(theta.T)) 48 | J = (1.0 / m) * ((-y.T.dot(log(h))) - ((1.0 - y.T).dot(log(1.0 - h)))) 49 | return -1 * J.sum() 50 | 51 | 52 | @staticmethod 53 | def compute_grad(theta, X, y, m): 54 | theta.shape = (1, 3) 55 | grad = zeros(3) 56 | h = MLHelper.sigmoid(X.dot(theta.T)) 57 | delta = h - y 58 | l = grad.size 59 | for i in range(l): 60 | sumdelta = delta.T.dot(X[:, i]) 61 | grad[i] = (1.0 / m) * sumdelta * - 1 62 | theta.shape = (3,) 63 | return grad 64 | 65 | 66 | def decorated_cost(self): 67 | def f(theta): 68 | return Admission.compute_cost(theta, self.it, self.y) 69 | 70 | def fprime(theta): 71 | return Admission.compute_grad(theta, self.it, self.y, self.m) 72 | 73 | theta = zeros(3) 74 | return fmin_bfgs(f, theta, fprime, disp=True, maxiter=400) 75 | 76 | 77 | def predict(self, theta, X): 78 | m, n = X.shape 79 | p = zeros(shape=(m, 1)) 80 | h = MLHelper.sigmoid(X.dot(theta.T)) 81 | for it in range(0, h.shape[0]): 82 | if h[it] > 0.5: 83 | p[it, 0] = 1 84 | else: 85 | p[it, 0] = 0 86 | return p 87 | 88 | if __name__ == '__main__': 89 | admission = Admission() 90 | admission.decorated_cost() 91 | theta = [-25.161272, 0.206233, 0.201470] 92 | 93 | plot_x = array([min(admission.it[:, 1]) - 2, max(admission.it[:, 2]) + 2]) 94 | plot_y = (- 1.0 / theta[2]) * (theta[1] * plot_x + theta[0]) 95 | plot(plot_x, plot_y) 96 | legend(['Decision Boundary', 'Admitted', 'Not Admitted']) 97 | show() 98 | 99 | prob = MLHelper.sigmoid(array([1.0, 45.0, 85.0]).dot(array(theta).T)) 100 | print 'For a student with scores 45 and 85, we predict an admission probability of %f' % prob 101 | 102 | p = admission.predict(array(theta), admission.it) 103 | print 'Training accuracy: %f' % ((admission.y[where(p == admission.y)].size / float(admission.y.size)) * 100.0) 104 | -------------------------------------------------------------------------------- /Regression/Logistic/SocialNetworkAds/data.csv: -------------------------------------------------------------------------------- 1 | User ID,Gender,Age,EstimatedSalary,Purchased 2 | 15624510,Male,19,19000,0 3 | 15810944,Male,35,20000,0 4 | 15668575,Female,26,43000,0 5 | 15603246,Female,27,57000,0 6 | 15804002,Male,19,76000,0 7 | 15728773,Male,27,58000,0 8 | 15598044,Female,27,84000,0 9 | 15694829,Female,32,150000,1 10 | 15600575,Male,25,33000,0 11 | 15727311,Female,35,65000,0 12 | 15570769,Female,26,80000,0 13 | 15606274,Female,26,52000,0 14 | 15746139,Male,20,86000,0 15 | 15704987,Male,32,18000,0 16 | 15628972,Male,18,82000,0 17 | 15697686,Male,29,80000,0 18 | 15733883,Male,47,25000,1 19 | 15617482,Male,45,26000,1 20 | 15704583,Male,46,28000,1 21 | 15621083,Female,48,29000,1 22 | 15649487,Male,45,22000,1 23 | 15736760,Female,47,49000,1 24 | 15714658,Male,48,41000,1 25 | 15599081,Female,45,22000,1 26 | 15705113,Male,46,23000,1 27 | 15631159,Male,47,20000,1 28 | 15792818,Male,49,28000,1 29 | 15633531,Female,47,30000,1 30 | 15744529,Male,29,43000,0 31 | 15669656,Male,31,18000,0 32 | 15581198,Male,31,74000,0 33 | 15729054,Female,27,137000,1 34 | 15573452,Female,21,16000,0 35 | 15776733,Female,28,44000,0 36 | 15724858,Male,27,90000,0 37 | 15713144,Male,35,27000,0 38 | 15690188,Female,33,28000,0 39 | 15689425,Male,30,49000,0 40 | 15671766,Female,26,72000,0 41 | 15782806,Female,27,31000,0 42 | 15764419,Female,27,17000,0 43 | 15591915,Female,33,51000,0 44 | 15772798,Male,35,108000,0 45 | 15792008,Male,30,15000,0 46 | 15715541,Female,28,84000,0 47 | 15639277,Male,23,20000,0 48 | 15798850,Male,25,79000,0 49 | 15776348,Female,27,54000,0 50 | 15727696,Male,30,135000,1 51 | 15793813,Female,31,89000,0 52 | 15694395,Female,24,32000,0 53 | 15764195,Female,18,44000,0 54 | 15744919,Female,29,83000,0 55 | 15671655,Female,35,23000,0 56 | 15654901,Female,27,58000,0 57 | 15649136,Female,24,55000,0 58 | 15775562,Female,23,48000,0 59 | 15807481,Male,28,79000,0 60 | 15642885,Male,22,18000,0 61 | 15789109,Female,32,117000,0 62 | 15814004,Male,27,20000,0 63 | 15673619,Male,25,87000,0 64 | 15595135,Female,23,66000,0 65 | 15583681,Male,32,120000,1 66 | 15605000,Female,59,83000,0 67 | 15718071,Male,24,58000,0 68 | 15679760,Male,24,19000,0 69 | 15654574,Female,23,82000,0 70 | 15577178,Female,22,63000,0 71 | 15595324,Female,31,68000,0 72 | 15756932,Male,25,80000,0 73 | 15726358,Female,24,27000,0 74 | 15595228,Female,20,23000,0 75 | 15782530,Female,33,113000,0 76 | 15592877,Male,32,18000,0 77 | 15651983,Male,34,112000,1 78 | 15746737,Male,18,52000,0 79 | 15774179,Female,22,27000,0 80 | 15667265,Female,28,87000,0 81 | 15655123,Female,26,17000,0 82 | 15595917,Male,30,80000,0 83 | 15668385,Male,39,42000,0 84 | 15709476,Male,20,49000,0 85 | 15711218,Male,35,88000,0 86 | 15798659,Female,30,62000,0 87 | 15663939,Female,31,118000,1 88 | 15694946,Male,24,55000,0 89 | 15631912,Female,28,85000,0 90 | 15768816,Male,26,81000,0 91 | 15682268,Male,35,50000,0 92 | 15684801,Male,22,81000,0 93 | 15636428,Female,30,116000,0 94 | 15809823,Male,26,15000,0 95 | 15699284,Female,29,28000,0 96 | 15786993,Female,29,83000,0 97 | 15709441,Female,35,44000,0 98 | 15710257,Female,35,25000,0 99 | 15582492,Male,28,123000,1 100 | 15575694,Male,35,73000,0 101 | 15756820,Female,28,37000,0 102 | 15766289,Male,27,88000,0 103 | 15593014,Male,28,59000,0 104 | 15584545,Female,32,86000,0 105 | 15675949,Female,33,149000,1 106 | 15672091,Female,19,21000,0 107 | 15801658,Male,21,72000,0 108 | 15706185,Female,26,35000,0 109 | 15789863,Male,27,89000,0 110 | 15720943,Male,26,86000,0 111 | 15697997,Female,38,80000,0 112 | 15665416,Female,39,71000,0 113 | 15660200,Female,37,71000,0 114 | 15619653,Male,38,61000,0 115 | 15773447,Male,37,55000,0 116 | 15739160,Male,42,80000,0 117 | 15689237,Male,40,57000,0 118 | 15679297,Male,35,75000,0 119 | 15591433,Male,36,52000,0 120 | 15642725,Male,40,59000,0 121 | 15701962,Male,41,59000,0 122 | 15811613,Female,36,75000,0 123 | 15741049,Male,37,72000,0 124 | 15724423,Female,40,75000,0 125 | 15574305,Male,35,53000,0 126 | 15678168,Female,41,51000,0 127 | 15697020,Female,39,61000,0 128 | 15610801,Male,42,65000,0 129 | 15745232,Male,26,32000,0 130 | 15722758,Male,30,17000,0 131 | 15792102,Female,26,84000,0 132 | 15675185,Male,31,58000,0 133 | 15801247,Male,33,31000,0 134 | 15725660,Male,30,87000,0 135 | 15638963,Female,21,68000,0 136 | 15800061,Female,28,55000,0 137 | 15578006,Male,23,63000,0 138 | 15668504,Female,20,82000,0 139 | 15687491,Male,30,107000,1 140 | 15610403,Female,28,59000,0 141 | 15741094,Male,19,25000,0 142 | 15807909,Male,19,85000,0 143 | 15666141,Female,18,68000,0 144 | 15617134,Male,35,59000,0 145 | 15783029,Male,30,89000,0 146 | 15622833,Female,34,25000,0 147 | 15746422,Female,24,89000,0 148 | 15750839,Female,27,96000,1 149 | 15749130,Female,41,30000,0 150 | 15779862,Male,29,61000,0 151 | 15767871,Male,20,74000,0 152 | 15679651,Female,26,15000,0 153 | 15576219,Male,41,45000,0 154 | 15699247,Male,31,76000,0 155 | 15619087,Female,36,50000,0 156 | 15605327,Male,40,47000,0 157 | 15610140,Female,31,15000,0 158 | 15791174,Male,46,59000,0 159 | 15602373,Male,29,75000,0 160 | 15762605,Male,26,30000,0 161 | 15598840,Female,32,135000,1 162 | 15744279,Male,32,100000,1 163 | 15670619,Male,25,90000,0 164 | 15599533,Female,37,33000,0 165 | 15757837,Male,35,38000,0 166 | 15697574,Female,33,69000,0 167 | 15578738,Female,18,86000,0 168 | 15762228,Female,22,55000,0 169 | 15614827,Female,35,71000,0 170 | 15789815,Male,29,148000,1 171 | 15579781,Female,29,47000,0 172 | 15587013,Male,21,88000,0 173 | 15570932,Male,34,115000,0 174 | 15794661,Female,26,118000,0 175 | 15581654,Female,34,43000,0 176 | 15644296,Female,34,72000,0 177 | 15614420,Female,23,28000,0 178 | 15609653,Female,35,47000,0 179 | 15594577,Male,25,22000,0 180 | 15584114,Male,24,23000,0 181 | 15673367,Female,31,34000,0 182 | 15685576,Male,26,16000,0 183 | 15774727,Female,31,71000,0 184 | 15694288,Female,32,117000,1 185 | 15603319,Male,33,43000,0 186 | 15759066,Female,33,60000,0 187 | 15814816,Male,31,66000,0 188 | 15724402,Female,20,82000,0 189 | 15571059,Female,33,41000,0 190 | 15674206,Male,35,72000,0 191 | 15715160,Male,28,32000,0 192 | 15730448,Male,24,84000,0 193 | 15662067,Female,19,26000,0 194 | 15779581,Male,29,43000,0 195 | 15662901,Male,19,70000,0 196 | 15689751,Male,28,89000,0 197 | 15667742,Male,34,43000,0 198 | 15738448,Female,30,79000,0 199 | 15680243,Female,20,36000,0 200 | 15745083,Male,26,80000,0 201 | 15708228,Male,35,22000,0 202 | 15628523,Male,35,39000,0 203 | 15708196,Male,49,74000,0 204 | 15735549,Female,39,134000,1 205 | 15809347,Female,41,71000,0 206 | 15660866,Female,58,101000,1 207 | 15766609,Female,47,47000,0 208 | 15654230,Female,55,130000,1 209 | 15794566,Female,52,114000,0 210 | 15800890,Female,40,142000,1 211 | 15697424,Female,46,22000,0 212 | 15724536,Female,48,96000,1 213 | 15735878,Male,52,150000,1 214 | 15707596,Female,59,42000,0 215 | 15657163,Male,35,58000.0,0 216 | 15622478,Male,47.0,43000,0 217 | 15779529,Female,60,108000,1 218 | 15636023,Male,49,65000,0 219 | 15582066,Male,40,78000,0 220 | 15666675,Female,46,96000,0 221 | 15732987,Male,59,143000,1 222 | 15789432,Female,41,80000,0 223 | 15663161,Male,35,91000,1 224 | 15694879,Male,37,144000,1 225 | 15593715,Male,60,102000,1 226 | 15575002,Female,35,60000,0 227 | 15622171,Male,37,53000,0 228 | 15795224,Female,36,126000,1 229 | 15685346,Male,56,133000,1 230 | 15691808,Female,40,72000,0 231 | 15721007,Female,42,80000,1 232 | 15794253,Female,35,147000,1 233 | 15694453,Male,39,42000,0 234 | 15813113,Male,40,107000,1 235 | 15614187,Male,49,86000,1 236 | 15619407,Female,38,112000,0 237 | 15646227,Male,46,79000,1 238 | 15660541,Male,40,57000,0 239 | 15753874,Female,37,80000,0 240 | 15617877,Female,46,82000,0 241 | 15772073,Female,53,143000,1 242 | 15701537,Male,42,149000,1 243 | 15736228,Male,38,59000,0 244 | 15780572,Female,50,88000,1 245 | 15769596,Female,56,104000,1 246 | 15586996,Female,41,72000,0 247 | 15722061,Female,51,146000,1 248 | 15638003,Female,35,50000,0 249 | 15775590,Female,57,122000,1 250 | 15730688,Male,41,52000,0 251 | 15753102,Female,35,97000,1 252 | 15810075,Female,44,39000,0 253 | 15723373,Male,37,52000,0 254 | 15795298,Female,48,134000,1 255 | 15584320,Female,37,146000,1 256 | 15724161,Female,50,44000,0 257 | 15750056,Female,52,90000,1 258 | 15609637,Female,41,72000,0 259 | 15794493,Male,40,57000,0 260 | 15569641,Female,58,95000,1 261 | 15815236,Female,45,131000,1 262 | 15811177,Female,35,77000,0 263 | 15680587,Male,36,144000,1 264 | 15672821,Female,55,125000,1 265 | 15767681,Female,35,72000,0 266 | 15600379,Male,48,90000,1 267 | 15801336,Female,42,108000,1 268 | 15721592,Male,40,75000,0 269 | 15581282,Male,37,74000,0 270 | 15746203,Female,47,144000,1 271 | 15583137,Male,40,61000,0 272 | 15680752,Female,43,133000,0 273 | 15688172,Female,59,76000,1 274 | 15791373,Male,60,42000,1 275 | 15589449,Male,39,106000,1 276 | 15692819,Female,57,26000,1 277 | 15727467,Male,57,74000,1 278 | 15734312,Male,38,71000,0 279 | 15764604,Male,49,88000,1 280 | 15613014,Female,52,38000,1 281 | 15759684,Female,50,36000,1 282 | 15609669,Female,59,88000,1 283 | 15685536,Male,35,61000,0 284 | 15750447,Male,37,70000,1 285 | 15663249,Female,52,21000,1 286 | 15638646,Male,48,141000,0 287 | 15734161,Female,37,93000,1 288 | 15631070,Female,37,62000,0 289 | 15761950,Female,48,138000,1 290 | 15649668,Male,41,79000,0 291 | 15713912,Female,37,78000,1 292 | 15586757,Male,39,134000,1 293 | 15596522,Male,49,89000,1 294 | 15625395,Male,55,39000,1 295 | 15760570,Male,37,77000,0 296 | 15566689,Female,35,57000,0 297 | 15725794,Female,36,63000,0 298 | 15673539,Male,42,73000,1 299 | 15705298,Female,43,112000,1 300 | 15675791,Male,45,79000,0 301 | 15747043,Male,46,117000,1 302 | 15736397,Female,58,38000,1 303 | 15678201,Male,48,74000,1 304 | 15720745,Female,37,137000,1 305 | 15637593,Male,37,79000,1 306 | 15598070,Female,40,60000,0 307 | 15787550,Male,42,54000,0 308 | 15603942,Female,51,134000,0 309 | 15733973,Female,47,113000,1 310 | 15596761,Male,36,125000,1 311 | 15652400,Female,38,50000,0 312 | 15717893,Female,42,70000,0 313 | 15622585,Male,39,96000,1 314 | 15733964,Female,38,50000,0 315 | 15753861,Female,49,141000,1 316 | 15747097,Female,39,79000,0 317 | 15594762,Female,39,75000,1 318 | 15667417,Female,54,104000,1 319 | 15684861,Male,35,55000,0 320 | 15742204,Male,45,32000,1 321 | 15623502,Male,36,60000,0 322 | 15774872,Female,52,138000,1 323 | 15611191,Female,53,82000,1 324 | 15674331,Male,41,52000,0 325 | 15619465,Female,48,30000,1 326 | 15575247,Female,48,131000,1 327 | 15695679,Female,41,60000,0 328 | 15713463,Male,41,72000,0 329 | 15785170,Female,42,75000,0 330 | 15796351,Male,36,118000,1 331 | 15639576,Female,47,107000,1 332 | 15693264,Male,38,51000,0 333 | 15589715,Female,48,119000,1 334 | 15769902,Male,42,65000,0 335 | 15587177,Male,40,65000,0 336 | 15814553,Male,57,60000,1 337 | 15601550,Female,36,54000,0 338 | 15664907,Male,58,144000,1 339 | 15612465,Male,35,79000,0 340 | 15810800,Female,38,55000,0 341 | 15665760,Male,39,122000,1 342 | 15588080,Female,53,104000,1 343 | 15776844,Male,35,75000,0 344 | 15717560,Female,38,65000,0 345 | 15629739,Female,47,51000,1 346 | 15729908,Male,47,105000,1 347 | 15716781,Female,41,63000,0 348 | 15646936,Male,53,72000,1 349 | 15768151,Female,54,108000,1 350 | 15579212,Male,39,77000,0 351 | 15721835,Male,38,61000,0 352 | 15800515,Female,38,113000,1 353 | 15591279,Male,37,75000,0 354 | 15587419,Female,42,90000,1 355 | 15750335,Female,37,57000,0 356 | 15699619,Male,36,99000,1 357 | 15606472,Male,60,34000,1 358 | 15778368,Male,54,70000,1 359 | 15671387,Female,41,72000,0 360 | 15573926,Male,40,71000,1 361 | 15709183,Male,42,54000,0 362 | 15577514,Male,43,129000,1 363 | 15778830,Female,53,34000,1 364 | 15768072,Female,47,50000,1 365 | 15768293,Female,42,79000,0 366 | 15654456,Male,42,104000,1 367 | 15807525,Female,59,29000,1 368 | 15574372,Female,58,47000,1 369 | 15671249,Male,46,88000,1 370 | 15779744,Male,38,71000,0 371 | 15624755,Female,54,26000,1 372 | 15611430,Female,60,46000,1 373 | 15774744,Male,60,83000,1 374 | 15629885,Female,39,73000,0 375 | 15708791,Male,59,130000,1 376 | 15793890,Female,37,80000,0 377 | 15646091,Female,46,32000,1 378 | 15596984,Female,46,74000,0 379 | 15800215,Female,42,53000,0 380 | 15577806,Male,41,87000,1 381 | 15749381,Female,58,23000,1 382 | 15683758,Male,42,64000,0 383 | 15670615,Male,48,33000,1 384 | 15715622,Female,44,139000,1 385 | 15707634,Male,49,28000,1 386 | 15806901,Female,57,33000,1 387 | 15775335,Male,56,60000,1 388 | 15724150,Female,49,39000,1 389 | 15627220,Male,39,71000,0 390 | 15672330,Male,47,34000,1 391 | 15668521,Female,48,35000,1 392 | 15807837,Male,48,33000,1 393 | 15592570,Male,47,23000,1 394 | 15748589,Female,45,45000,1 395 | 15635893,Male,60,42000,1 396 | 15757632,Female,39,59000,0 397 | 15691863,Female,46,41000,1 398 | 15706071,Male,51,23000,1 399 | 15654296,Female,50,20000,1 400 | 15755018,Male,36,33000,0 401 | 15594041,Female,49,36000,1 -------------------------------------------------------------------------------- /Regression/Logistic/SocialNetworkAds/source.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | from matplotlib.colors import ListedColormap 5 | from sklearn.cross_validation import train_test_split 6 | from sklearn.preprocessing import StandardScaler 7 | from sklearn.linear_model import LogisticRegression 8 | from sklearn.metrics import confusion_matrix 9 | 10 | class SocialNetworkAds: 11 | 12 | dataset = None 13 | X = X_train = X_test = None 14 | y = y_train = y_test = y_pred = None 15 | classifier = conf_mat = None 16 | 17 | 18 | def __init__(self): 19 | # Importing the dataset 20 | self.dataset = pd.read_csv('data.csv') 21 | self.X = self.dataset.iloc[:,[2,3]].values 22 | self.y = self.dataset.iloc[:,4].values 23 | 24 | # Splitting the dataset into the training set and test set 25 | self.X_train, self.X_test, self.y_train, self.y_test = train_test_split( 26 | self.X, self.y, 27 | test_size=0.25, 28 | random_state=0) 29 | 30 | 31 | def scale_features(self): 32 | ''' 33 | Feature scaling 34 | ''' 35 | sc = StandardScaler() 36 | self.X_train = sc.fit_transform(self.X_train) 37 | self.X_test = sc.transform(self.X_test) 38 | 39 | 40 | def fit_classifier(self): 41 | ''' 42 | Fitting classifier to the training set and predicting 43 | the test set result 44 | ''' 45 | self.classifier = LogisticRegression(random_state=0) 46 | self.classifier.fit(self.X_train, self.y_train) 47 | self.y_pred = self.classifier.predict(self.X_test) 48 | 49 | 50 | def make_confusion_matrix(self): 51 | ''' 52 | Making the confusion matrix 53 | ''' 54 | self.conf_mat = confusion_matrix(self.y_test, self.y_pred) 55 | 56 | 57 | def plot_training(self): 58 | ''' 59 | Visualizing the training set results 60 | ''' 61 | X_set, y_set = self.X_train, self.y_train 62 | X1, X2 = np.meshgrid(np.arange(start=X_set[:,0].min()-1, 63 | stop=X_set[:,0].max()+1, 64 | step=0.01), 65 | np.arange(start=X_set[:,1].min()-1, 66 | stop=X_set[:,1].max()+1, 67 | step=0.01)) 68 | plt.contourf(X1, X2, self.classifier.predict( 69 | np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 70 | alpha=0.75, cmap=ListedColormap(('red', 'green'))) 71 | plt.xlim(X1.min(), X1.max()) 72 | plt.ylim(X2.min(), X2.max()) 73 | for i, j in enumerate(np.unique(y_set)): 74 | plt.scatter(X_set[y_set==j,0], X_set[y_set==j,1], 75 | c=ListedColormap(('red', 'green'))(i), label=j) 76 | plt.title('Classifier (Training set)') 77 | plt.xlabel('Age') 78 | plt.ylabel('Estimated Salary') 79 | plt.legend() 80 | plt.show() 81 | 82 | 83 | def plot_testing(self): 84 | ''' 85 | Visualizing the testing set results 86 | ''' 87 | X_set, y_set = self.X_test, self.y_test 88 | X1, X2 = np.meshgrid(np.arange(start=X_set[:,0].min()-1, 89 | stop=X_set[:,0].max()+1, 90 | step=0.01), 91 | np.arange(start=X_set[:,1].min()-1, 92 | stop=X_set[:,1].max()+1, 93 | step=0.01)) 94 | plt.contourf(X1, X2, self.classifier.predict( 95 | np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 96 | alpha=0.75, cmap=ListedColormap(('red', 'green'))) 97 | plt.xlim(X1.min(), X1.max()) 98 | plt.ylim(X2.min(), X2.max()) 99 | for i, j in enumerate(np.unique(y_set)): 100 | plt.scatter(X_set[y_set==j,0], X_set[y_set==j,1], 101 | c=ListedColormap(('red', 'green'))(i), label=j) 102 | plt.title('Classifier (Test set)') 103 | plt.xlabel('Age') 104 | plt.ylabel('Estimated Salary') 105 | plt.legend() 106 | plt.show() 107 | 108 | 109 | def run(): 110 | social_network_ads = SocialNetworkAds() 111 | social_network_ads.scale_features() 112 | social_network_ads.fit_classifier() 113 | social_network_ads.make_confusion_matrix() 114 | social_network_ads.plot_training() 115 | social_network_ads.plot_testing() 116 | 117 | if __name__ == '__main__': 118 | run() 119 | -------------------------------------------------------------------------------- /Regression/Softmax/HandwrittenDigitsRecognition/source.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | import tensorflow as tf 4 | from tensorflow.examples.tutorials.mnist import input_data 5 | 6 | FLAGS = None 7 | 8 | 9 | def main(_): 10 | # Import data 11 | mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True) 12 | 13 | # Create the model 14 | x = tf.placeholder(tf.float32, [None, 784]) 15 | W = tf.Variable(tf.zeros([784, 10])) 16 | b = tf.Variable(tf.zeros([10])) 17 | y = tf.matmul(x, W) + b 18 | 19 | # Define loss and optimizer 20 | y_ = tf.placeholder(tf.float32, [None, 10]) 21 | 22 | # Compute the cross entropy 23 | cross_entropy = tf.reduce_mean( 24 | tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y)) 25 | train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) 26 | 27 | sess = tf.InteractiveSession() 28 | tf.global_variables_initializer().run() 29 | 30 | # Train 31 | for _ in range(1000): 32 | batch_xs, batch_ys = mnist.train.next_batch(100) 33 | sess.run(train_step, feed_dict={ 34 | x: batch_xs, 35 | y_: batch_ys 36 | }) 37 | 38 | # Test 39 | correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) 40 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 41 | print(sess.run(accuracy, feed_dict={ 42 | x: mnist.test.images, 43 | y_: mnist.test.labels 44 | })) 45 | 46 | if __name__ == '__main__': 47 | parser = argparse.ArgumentParser() 48 | parser.add_argument('--data_dir', type=str, default='/tmp/mnist/input_data', 49 | help='Directory for storing input data') 50 | FLAGS, unparsed = parser.parse_known_args() 51 | tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) --------------------------------------------------------------------------------