├── Cybersecurity-Data-Science-on-Udemy-master ├── CAPTCHA Breaker │ ├── Processing a CAPTCHA Dataset.ipynb │ ├── Training a CAPTCHA Solver Neural Network.ipynb │ └── captcha_images.7z ├── Examining the PE Header │ └── Examining the PE Header.ipynb ├── Extracting N-grams Quickly using the Hash-Gram Algorithm │ └── Extracting N-grams Quickly.ipynb ├── Extracting N-grams │ └── Extracting N-grams.ipynb ├── Featurizing the PE Header │ └── Featurizing the PE Header.ipynb ├── Handling Type I and Type II Errors │ └── Handling Type I and Type II Errors.ipynb ├── MalConv - End-to-end Deep Learning for Malicious PE Detection │ └── MalConv - End-to-end Deep Learning for Malicious PE Detection.ipynb ├── Natural Language Processing (NLP) using Hashing Vectorizer and Tf-Idf with Scikit-Learn │ └── NLP.ipynb ├── Network Behavior Anomaly Detection │ ├── Network Behavior Anomaly Detection.ipynb │ └── kddcup_corrected_subset.7z ├── PE Samples Dataset │ ├── Benign PE Samples 1.7z │ ├── Benign PE Samples 2.7z │ ├── Benign PE Samples 3.7z │ ├── Benign PE Samples 4.7z │ ├── Benign PE Samples 5.7z │ ├── Benign PE Samples 6.7z │ ├── Malicious PE Samples 1.7z │ └── Malicious PE Samples 2.7z ├── README.md ├── Tackling Class Imbalance │ ├── Tackling Class Imbalance.ipynb │ ├── X_test.npz │ ├── X_train.npz │ ├── y_test.npy │ └── y_train.npy ├── Train-Test Splitting Your Data │ └── Train-Test Splitting Your Data.ipynb └── Training an XGBoost Classifier │ └── Training an XGBoost Classifier.ipynb └── README.md /Cybersecurity-Data-Science-on-Udemy-master/CAPTCHA Breaker/Training a CAPTCHA Solver Neural Network.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 11, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "LETTER_IMAGES_FOLDER = \"extracted_letter_images\"" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 12, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import cv2\n", 19 | "import imutils\n", 20 | "def resize_image_to_fit(image, width, height):\n", 21 | " (h, w) = image.shape[:2]\n", 22 | " if w > h:\n", 23 | " image = imutils.resize(image, width=width)\n", 24 | " else:\n", 25 | " image = imutils.resize(image, height=height)\n", 26 | " padW = int((width - image.shape[1]) / 2.0)\n", 27 | " padH = int((height - image.shape[0]) / 2.0)\n", 28 | " image = cv2.copyMakeBorder(image, padH, padH, padW, padW,\n", 29 | " cv2.BORDER_REPLICATE)\n", 30 | " image = cv2.resize(image, (width, height))\n", 31 | " return image" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 13, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "import numpy as np\n", 41 | "import os\n", 42 | "from imutils import paths\n", 43 | "data = []\n", 44 | "labels = []\n", 45 | "\n", 46 | "for image_file in imutils.paths.list_images(LETTER_IMAGES_FOLDER):\n", 47 | " image = cv2.imread(image_file)\n", 48 | " image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n", 49 | " image = resize_image_to_fit(image, 20, 20)\n", 50 | " image = np.expand_dims(image, axis=2)\n", 51 | " label = image_file.split(os.path.sep)[-2]\n", 52 | " data.append(image)\n", 53 | " labels.append(label)" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 31, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "data = np.array(data, dtype=\"float\") / 255.0\n", 63 | "labels = np.array(labels)" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 32, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "from sklearn.model_selection import train_test_split\n", 73 | "(X_train, X_test, Y_train, Y_test) = train_test_split(data, labels, test_size=0.25, random_state=0)" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 33, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "from sklearn.preprocessing import LabelBinarizer\n", 83 | "lb = LabelBinarizer().fit(Y_train)\n", 84 | "Y_train = lb.transform(Y_train)\n", 85 | "Y_test = lb.transform(Y_test)" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 34, 91 | "metadata": {}, 92 | "outputs": [ 93 | { 94 | "name": "stdout", 95 | "output_type": "stream", 96 | "text": [ 97 | "_________________________________________________________________\n", 98 | "Layer (type) Output Shape Param # \n", 99 | "=================================================================\n", 100 | "conv2d_5 (Conv2D) (None, 20, 20, 20) 520 \n", 101 | "_________________________________________________________________\n", 102 | "max_pooling2d_5 (MaxPooling2 (None, 10, 10, 20) 0 \n", 103 | "_________________________________________________________________\n", 104 | "conv2d_6 (Conv2D) (None, 10, 10, 50) 25050 \n", 105 | "_________________________________________________________________\n", 106 | "max_pooling2d_6 (MaxPooling2 (None, 5, 5, 50) 0 \n", 107 | "_________________________________________________________________\n", 108 | "flatten_2 (Flatten) (None, 1250) 0 \n", 109 | "_________________________________________________________________\n", 110 | "dense_3 (Dense) (None, 500) 625500 \n", 111 | "_________________________________________________________________\n", 112 | "dense_4 (Dense) (None, 32) 16032 \n", 113 | "=================================================================\n", 114 | "Total params: 667,102\n", 115 | "Trainable params: 667,102\n", 116 | "Non-trainable params: 0\n", 117 | "_________________________________________________________________\n" 118 | ] 119 | } 120 | ], 121 | "source": [ 122 | "from keras.models import Sequential\n", 123 | "from keras.layers.convolutional import Conv2D, MaxPooling2D\n", 124 | "from keras.layers.core import Flatten, Dense\n", 125 | "num_classes = 32\n", 126 | "model = Sequential()\n", 127 | "model.add(Conv2D(20, (5, 5), padding=\"same\", input_shape=(20, 20, 1), activation=\"relu\"))\n", 128 | "model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))\n", 129 | "model.add(Conv2D(50, (5, 5), padding=\"same\", activation=\"relu\"))\n", 130 | "model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))\n", 131 | "model.add(Flatten())\n", 132 | "model.add(Dense(500, activation=\"relu\"))\n", 133 | "model.add(Dense(num_classes, activation=\"softmax\"))\n", 134 | "model.compile(loss=\"categorical_crossentropy\", optimizer=\"adam\", metrics=[\"accuracy\"])\n", 135 | "model.summary()" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 36, 141 | "metadata": {}, 142 | "outputs": [ 143 | { 144 | "name": "stdout", 145 | "output_type": "stream", 146 | "text": [ 147 | "Train on 29058 samples, validate on 9686 samples\n", 148 | "Epoch 1/1\n", 149 | "29058/29058 [==============================] - 58s 2ms/step - loss: 0.0669 - acc: 0.9850 - val_loss: 0.0192 - val_acc: 0.9946\n" 150 | ] 151 | }, 152 | { 153 | "data": { 154 | "text/plain": [ 155 | "" 156 | ] 157 | }, 158 | "execution_count": 36, 159 | "metadata": {}, 160 | "output_type": "execute_result" 161 | } 162 | ], 163 | "source": [ 164 | "model.fit(X_train, Y_train, validation_data=(X_test, Y_test), batch_size=32, epochs=10, verbose=1)" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": 38, 170 | "metadata": {}, 171 | "outputs": [], 172 | "source": [ 173 | "from keras.models import save_model\n", 174 | "import pickle\n", 175 | "model.save(\"CAPTCHA_NN.model\")\n", 176 | "with open(\"labels\", \"wb\") as f:\n", 177 | " pickle.dump(lb, f)" 178 | ] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "metadata": {}, 183 | "source": [ 184 | "Prediction" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 3, 190 | "metadata": {}, 191 | "outputs": [], 192 | "source": [ 193 | "import pickle\n", 194 | "from keras.models import load_model\n", 195 | "\n", 196 | "with open(\"labels\", \"rb\") as f:\n", 197 | " lb = pickle.load(f)\n", 198 | "model = load_model(\"CAPTCHA_NN.model\")" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 4, 204 | "metadata": {}, 205 | "outputs": [], 206 | "source": [ 207 | "CAPTCHA = \"captcha_images\\\\2CXM.png\"" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": 5, 213 | "metadata": {}, 214 | "outputs": [], 215 | "source": [ 216 | "def preprocessImage(img):\n", 217 | " gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)\n", 218 | " gray = cv2.copyMakeBorder(gray, 8, 8, 8, 8, cv2.BORDER_REPLICATE)\n", 219 | " thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]\n", 220 | " return gray, thresh\n", 221 | "\n", 222 | "def contoursToRectangles(contours):\n", 223 | " letter_image_regions = []\n", 224 | " for contour in contours:\n", 225 | " (x, y, w, h) = cv2.boundingRect(contour)\n", 226 | " if w / h > 1.25:\n", 227 | " half_width = int(w / 2)\n", 228 | " letter_image_regions.append((x, y, half_width, h))\n", 229 | " letter_image_regions.append((x + half_width, y, half_width, h))\n", 230 | " else:\n", 231 | " letter_image_regions.append((x, y, w, h))\n", 232 | " return letter_image_regions" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": 19, 238 | "metadata": {}, 239 | "outputs": [], 240 | "source": [ 241 | "image = cv2.imread(CAPTCHA)\n", 242 | "gray, preprocessedImage = preprocessImage(image)\n", 243 | "contours = cv2.findContours(preprocessedImage.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n", 244 | "contours = contours[0]\n", 245 | "letter_image_regions = contoursToRectangles(contours)\n", 246 | "letter_image_regions = sorted(letter_image_regions, key=lambda x: x[0])\n", 247 | "predictions = []\n", 248 | "for letter_bounding_box in letter_image_regions:\n", 249 | " x, y, w, h = letter_bounding_box\n", 250 | " letter_image = gray[y - 2:y + h + 2, x - 2:x + w + 2]\n", 251 | " letter_image = resize_image_to_fit(letter_image, 20, 20)\n", 252 | " letter_image = np.expand_dims(letter_image, axis=2)\n", 253 | " letter_image = np.expand_dims(letter_image, axis=0)\n", 254 | " prediction = model.predict(letter_image)\n", 255 | " letter = lb.inverse_transform(prediction)[0]\n", 256 | " predictions.append(letter)" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": 20, 262 | "metadata": {}, 263 | "outputs": [ 264 | { 265 | "name": "stdout", 266 | "output_type": "stream", 267 | "text": [ 268 | "Predicted CAPTCHA text is: 2CXM\n", 269 | "CAPTCHA text is: 2CXM\n" 270 | ] 271 | } 272 | ], 273 | "source": [ 274 | "predicted_captcha_text = \"\".join(predictions)\n", 275 | "print(\"Predicted CAPTCHA text is: {}\".format(predicted_captcha_text))\n", 276 | "print(\"CAPTCHA text is: {}\".format(CAPTCHA.split(\"\\\\\")[-1].split(\".\")[0]))" 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": null, 282 | "metadata": {}, 283 | "outputs": [], 284 | "source": [] 285 | } 286 | ], 287 | "metadata": { 288 | "kernelspec": { 289 | "display_name": "Python 3", 290 | "language": "python", 291 | "name": "python3" 292 | }, 293 | "language_info": { 294 | "codemirror_mode": { 295 | "name": "ipython", 296 | "version": 3 297 | }, 298 | "file_extension": ".py", 299 | "mimetype": "text/x-python", 300 | "name": "python", 301 | "nbconvert_exporter": "python", 302 | "pygments_lexer": "ipython3", 303 | "version": "3.6.7" 304 | } 305 | }, 306 | "nbformat": 4, 307 | "nbformat_minor": 2 308 | } 309 | -------------------------------------------------------------------------------- /Cybersecurity-Data-Science-on-Udemy-master/CAPTCHA Breaker/captcha_images.7z: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ulookme/CyberSecurity-Data-Science/393c58655c5a902d7a69edfe0cbfbb4e231dcb4d/Cybersecurity-Data-Science-on-Udemy-master/CAPTCHA Breaker/captcha_images.7z -------------------------------------------------------------------------------- /Cybersecurity-Data-Science-on-Udemy-master/Extracting N-grams Quickly using the Hash-Gram Algorithm/Extracting N-grams Quickly.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 43, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from os import listdir\n", 10 | "from nltk import ngrams\n", 11 | "import hashlib\n", 12 | "directories = [\"Benign PE Samples\", \"Malicious PE Samples\"]\n", 13 | "N=2" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 44, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "def readFile(filePath):\n", 23 | " with open(filePath, \"rb\") as binary_file:\n", 24 | " data = binary_file.read()\n", 25 | " return data\n", 26 | "def byteSequenceToNgrams(byteSequence, n):\n", 27 | " return ngrams(byteSequence, n)\n", 28 | "def hashInput(inp):\n", 29 | " return int(hashlib.md5(inp).hexdigest(), 16)\n", 30 | "def makeNgramHashable(Ngram):\n", 31 | " return bytes(Ngram)\n", 32 | "def hashFileNgramsIntoDictionary(fileNgrams, T):\n", 33 | " for Ngram in fileNgrams:\n", 34 | " hashableNgram = makeNgramHashable(Ngram)\n", 35 | " hashedAndReduced = hashInput(hashableNgram) % B\n", 36 | " T[hashedAndReduced]=T.get(hashedAndReduced,0)+1" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 45, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "B = 65521\n", 46 | "T = {}\n", 47 | "for datasetPath in directories:\n", 48 | " samples = [f for f in listdir(datasetPath)]\n", 49 | " for file in samples:\n", 50 | " filePath = datasetPath+\"/\"+file\n", 51 | " fileByteSequence = readFile(filePath)\n", 52 | " fileNgrams = byteSequenceToNgrams(fileByteSequence,N)\n", 53 | " hashFileNgramsIntoDictionary(fileNgrams,T)\n", 54 | "K1 = 1000\n", 55 | "import heapq\n", 56 | "K1_most_common_Ngrams_Using_Hash_Grams = heapq.nlargest(K1, T)" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 56, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "def featurizeSample(file, K1_most_common_Ngrams_Using_Hash_Grams):\n", 66 | " K1 = len(K1_most_common_Ngrams_Using_Hash_Grams)\n", 67 | " fv = K1*[0]\n", 68 | " fileByteSequence = readFile(filePath)\n", 69 | " fileNgrams = byteSequenceToNgrams(fileByteSequence,N)\n", 70 | " for Ngram in fileNgrams:\n", 71 | " hashableNgram = makeNgramHashable(Ngram)\n", 72 | " hashedAndReduced = hashInput(hashableNgram) % B\n", 73 | " if hashedAndReduced in K1_most_common_Ngrams_Using_Hash_Grams:\n", 74 | " index = K1_most_common_Ngrams_Using_Hash_Grams.index(hashedAndReduced)\n", 75 | " fv[index]+=1\n", 76 | " return fv" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 57, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "X = []\n", 86 | "for datasetPath in directories:\n", 87 | " samples = [f for f in listdir(datasetPath)]\n", 88 | " for file in samples:\n", 89 | " filePath = datasetPath+\"/\"+file\n", 90 | " X.append(featurizeSample(filePath, K1_most_common_Ngrams_Using_Hash_Grams))" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 62, 96 | "metadata": {}, 97 | "outputs": [ 98 | { 99 | "data": { 100 | "text/plain": [ 101 | "[3,\n", 102 | " 1,\n", 103 | " 3,\n", 104 | " 6,\n", 105 | " 3,\n", 106 | " 2,\n", 107 | " 7,\n", 108 | " 4,\n", 109 | " 2,\n", 110 | " 5,\n", 111 | " 0,\n", 112 | " 6,\n", 113 | " 1,\n", 114 | " 5,\n", 115 | " 8,\n", 116 | " 1,\n", 117 | " 1,\n", 118 | " 0,\n", 119 | " 5,\n", 120 | " 3,\n", 121 | " 3,\n", 122 | " 10,\n", 123 | " 3,\n", 124 | " 4,\n", 125 | " 0,\n", 126 | " 4,\n", 127 | " 2,\n", 128 | " 3,\n", 129 | " 0,\n", 130 | " 2,\n", 131 | " 4,\n", 132 | " 5,\n", 133 | " 6,\n", 134 | " 7,\n", 135 | " 4,\n", 136 | " 3,\n", 137 | " 3,\n", 138 | " 11,\n", 139 | " 2,\n", 140 | " 1,\n", 141 | " 4,\n", 142 | " 1,\n", 143 | " 0,\n", 144 | " 4,\n", 145 | " 4,\n", 146 | " 3,\n", 147 | " 3,\n", 148 | " 5,\n", 149 | " 5,\n", 150 | " 3,\n", 151 | " 6,\n", 152 | " 1,\n", 153 | " 4,\n", 154 | " 0,\n", 155 | " 3,\n", 156 | " 2,\n", 157 | " 5,\n", 158 | " 3,\n", 159 | " 0,\n", 160 | " 2,\n", 161 | " 2,\n", 162 | " 5,\n", 163 | " 4,\n", 164 | " 2,\n", 165 | " 2,\n", 166 | " 9,\n", 167 | " 8,\n", 168 | " 4,\n", 169 | " 3,\n", 170 | " 1,\n", 171 | " 4,\n", 172 | " 2,\n", 173 | " 9,\n", 174 | " 3,\n", 175 | " 1,\n", 176 | " 5,\n", 177 | " 3,\n", 178 | " 3,\n", 179 | " 3,\n", 180 | " 3,\n", 181 | " 13,\n", 182 | " 3,\n", 183 | " 1,\n", 184 | " 1,\n", 185 | " 1,\n", 186 | " 4,\n", 187 | " 2,\n", 188 | " 6,\n", 189 | " 6,\n", 190 | " 4,\n", 191 | " 5,\n", 192 | " 7,\n", 193 | " 1,\n", 194 | " 2,\n", 195 | " 0,\n", 196 | " 2,\n", 197 | " 3,\n", 198 | " 0,\n", 199 | " 2,\n", 200 | " 5,\n", 201 | " 2,\n", 202 | " 6,\n", 203 | " 1,\n", 204 | " 4,\n", 205 | " 1,\n", 206 | " 4,\n", 207 | " 2,\n", 208 | " 8,\n", 209 | " 3,\n", 210 | " 0,\n", 211 | " 1,\n", 212 | " 5,\n", 213 | " 1,\n", 214 | " 2,\n", 215 | " 4,\n", 216 | " 8,\n", 217 | " 2,\n", 218 | " 0,\n", 219 | " 6,\n", 220 | " 8,\n", 221 | " 2,\n", 222 | " 6,\n", 223 | " 4,\n", 224 | " 0,\n", 225 | " 5,\n", 226 | " 6,\n", 227 | " 1,\n", 228 | " 6,\n", 229 | " 2,\n", 230 | " 4,\n", 231 | " 1,\n", 232 | " 4,\n", 233 | " 1,\n", 234 | " 3,\n", 235 | " 7,\n", 236 | " 9,\n", 237 | " 4,\n", 238 | " 3,\n", 239 | " 8,\n", 240 | " 0,\n", 241 | " 5,\n", 242 | " 4,\n", 243 | " 2,\n", 244 | " 2,\n", 245 | " 2,\n", 246 | " 0,\n", 247 | " 8,\n", 248 | " 2,\n", 249 | " 0,\n", 250 | " 3,\n", 251 | " 0,\n", 252 | " 1,\n", 253 | " 3,\n", 254 | " 2,\n", 255 | " 1,\n", 256 | " 1,\n", 257 | " 1,\n", 258 | " 6,\n", 259 | " 6,\n", 260 | " 1,\n", 261 | " 4,\n", 262 | " 13,\n", 263 | " 4,\n", 264 | " 5,\n", 265 | " 5,\n", 266 | " 3,\n", 267 | " 3,\n", 268 | " 2,\n", 269 | " 1,\n", 270 | " 7,\n", 271 | " 7,\n", 272 | " 2,\n", 273 | " 1,\n", 274 | " 5,\n", 275 | " 0,\n", 276 | " 3,\n", 277 | " 1,\n", 278 | " 1,\n", 279 | " 2,\n", 280 | " 3,\n", 281 | " 3,\n", 282 | " 2,\n", 283 | " 2,\n", 284 | " 2,\n", 285 | " 3,\n", 286 | " 11,\n", 287 | " 4,\n", 288 | " 2,\n", 289 | " 1,\n", 290 | " 4,\n", 291 | " 4,\n", 292 | " 3,\n", 293 | " 5,\n", 294 | " 5,\n", 295 | " 1,\n", 296 | " 5,\n", 297 | " 6,\n", 298 | " 1,\n", 299 | " 4,\n", 300 | " 5,\n", 301 | " 1,\n", 302 | " 4,\n", 303 | " 1,\n", 304 | " 1,\n", 305 | " 1,\n", 306 | " 2,\n", 307 | " 1,\n", 308 | " 1,\n", 309 | " 2,\n", 310 | " 2,\n", 311 | " 6,\n", 312 | " 1,\n", 313 | " 9,\n", 314 | " 3,\n", 315 | " 1,\n", 316 | " 2,\n", 317 | " 2,\n", 318 | " 2,\n", 319 | " 0,\n", 320 | " 1,\n", 321 | " 1,\n", 322 | " 1,\n", 323 | " 1,\n", 324 | " 3,\n", 325 | " 3,\n", 326 | " 0,\n", 327 | " 8,\n", 328 | " 3,\n", 329 | " 0,\n", 330 | " 4,\n", 331 | " 0,\n", 332 | " 6,\n", 333 | " 3,\n", 334 | " 4,\n", 335 | " 1,\n", 336 | " 2,\n", 337 | " 3,\n", 338 | " 3,\n", 339 | " 4,\n", 340 | " 4,\n", 341 | " 1,\n", 342 | " 3,\n", 343 | " 3,\n", 344 | " 1,\n", 345 | " 7,\n", 346 | " 4,\n", 347 | " 1,\n", 348 | " 3,\n", 349 | " 1,\n", 350 | " 7,\n", 351 | " 6,\n", 352 | " 2,\n", 353 | " 6,\n", 354 | " 5,\n", 355 | " 1,\n", 356 | " 1,\n", 357 | " 0,\n", 358 | " 2,\n", 359 | " 3,\n", 360 | " 0,\n", 361 | " 1,\n", 362 | " 3,\n", 363 | " 1,\n", 364 | " 0,\n", 365 | " 2,\n", 366 | " 0,\n", 367 | " 2,\n", 368 | " 0,\n", 369 | " 10,\n", 370 | " 2,\n", 371 | " 6,\n", 372 | " 6,\n", 373 | " 1,\n", 374 | " 1,\n", 375 | " 0,\n", 376 | " 4,\n", 377 | " 1,\n", 378 | " 5,\n", 379 | " 4,\n", 380 | " 1,\n", 381 | " 5,\n", 382 | " 1,\n", 383 | " 1,\n", 384 | " 2,\n", 385 | " 3,\n", 386 | " 1,\n", 387 | " 5,\n", 388 | " 3,\n", 389 | " 3,\n", 390 | " 1,\n", 391 | " 2,\n", 392 | " 1,\n", 393 | " 0,\n", 394 | " 3,\n", 395 | " 2,\n", 396 | " 6,\n", 397 | " 2,\n", 398 | " 5,\n", 399 | " 7,\n", 400 | " 1,\n", 401 | " 6,\n", 402 | " 1,\n", 403 | " 5,\n", 404 | " 5,\n", 405 | " 2,\n", 406 | " 1,\n", 407 | " 8,\n", 408 | " 1,\n", 409 | " 7,\n", 410 | " 3,\n", 411 | " 2,\n", 412 | " 6,\n", 413 | " 4,\n", 414 | " 3,\n", 415 | " 6,\n", 416 | " 3,\n", 417 | " 1,\n", 418 | " 13,\n", 419 | " 6,\n", 420 | " 3,\n", 421 | " 3,\n", 422 | " 0,\n", 423 | " 3,\n", 424 | " 1,\n", 425 | " 0,\n", 426 | " 3,\n", 427 | " 9,\n", 428 | " 6,\n", 429 | " 2,\n", 430 | " 5,\n", 431 | " 1,\n", 432 | " 1,\n", 433 | " 2,\n", 434 | " 2,\n", 435 | " 1,\n", 436 | " 6,\n", 437 | " 3,\n", 438 | " 2,\n", 439 | " 1,\n", 440 | " 1,\n", 441 | " 2,\n", 442 | " 0,\n", 443 | " 1,\n", 444 | " 4,\n", 445 | " 3,\n", 446 | " 3,\n", 447 | " 3,\n", 448 | " 1,\n", 449 | " 1,\n", 450 | " 4,\n", 451 | " 2,\n", 452 | " 0,\n", 453 | " 1,\n", 454 | " 3,\n", 455 | " 5,\n", 456 | " 5,\n", 457 | " 0,\n", 458 | " 2,\n", 459 | " 0,\n", 460 | " 2,\n", 461 | " 1,\n", 462 | " 5,\n", 463 | " 4,\n", 464 | " 1,\n", 465 | " 3,\n", 466 | " 4,\n", 467 | " 2,\n", 468 | " 3,\n", 469 | " 0,\n", 470 | " 2,\n", 471 | " 3,\n", 472 | " 5,\n", 473 | " 3,\n", 474 | " 6,\n", 475 | " 2,\n", 476 | " 5,\n", 477 | " 1,\n", 478 | " 1,\n", 479 | " 3,\n", 480 | " 4,\n", 481 | " 2,\n", 482 | " 1,\n", 483 | " 1,\n", 484 | " 0,\n", 485 | " 5,\n", 486 | " 2,\n", 487 | " 1,\n", 488 | " 0,\n", 489 | " 2,\n", 490 | " 3,\n", 491 | " 2,\n", 492 | " 1,\n", 493 | " 2,\n", 494 | " 2,\n", 495 | " 0,\n", 496 | " 2,\n", 497 | " 2,\n", 498 | " 0,\n", 499 | " 3,\n", 500 | " 1,\n", 501 | " 0,\n", 502 | " 7,\n", 503 | " 2,\n", 504 | " 0,\n", 505 | " 1,\n", 506 | " 6,\n", 507 | " 5,\n", 508 | " 0,\n", 509 | " 2,\n", 510 | " 4,\n", 511 | " 4,\n", 512 | " 3,\n", 513 | " 4,\n", 514 | " 5,\n", 515 | " 8,\n", 516 | " 1,\n", 517 | " 3,\n", 518 | " 1,\n", 519 | " 5,\n", 520 | " 3,\n", 521 | " 1,\n", 522 | " 2,\n", 523 | " 2,\n", 524 | " 2,\n", 525 | " 4,\n", 526 | " 5,\n", 527 | " 2,\n", 528 | " 5,\n", 529 | " 2,\n", 530 | " 7,\n", 531 | " 0,\n", 532 | " 2,\n", 533 | " 6,\n", 534 | " 8,\n", 535 | " 5,\n", 536 | " 2,\n", 537 | " 2,\n", 538 | " 2,\n", 539 | " 4,\n", 540 | " 10,\n", 541 | " 3,\n", 542 | " 5,\n", 543 | " 3,\n", 544 | " 3,\n", 545 | " 4,\n", 546 | " 1,\n", 547 | " 1,\n", 548 | " 10,\n", 549 | " 3,\n", 550 | " 3,\n", 551 | " 1,\n", 552 | " 2,\n", 553 | " 2,\n", 554 | " 3,\n", 555 | " 5,\n", 556 | " 0,\n", 557 | " 8,\n", 558 | " 0,\n", 559 | " 2,\n", 560 | " 4,\n", 561 | " 3,\n", 562 | " 7,\n", 563 | " 4,\n", 564 | " 2,\n", 565 | " 2,\n", 566 | " 3,\n", 567 | " 5,\n", 568 | " 0,\n", 569 | " 3,\n", 570 | " 5,\n", 571 | " 1,\n", 572 | " 1,\n", 573 | " 2,\n", 574 | " 4,\n", 575 | " 3,\n", 576 | " 3,\n", 577 | " 2,\n", 578 | " 5,\n", 579 | " 1,\n", 580 | " 5,\n", 581 | " 2,\n", 582 | " 4,\n", 583 | " 1,\n", 584 | " 1,\n", 585 | " 1,\n", 586 | " 1,\n", 587 | " 5,\n", 588 | " 6,\n", 589 | " 3,\n", 590 | " 3,\n", 591 | " 13,\n", 592 | " 5,\n", 593 | " 2,\n", 594 | " 3,\n", 595 | " 5,\n", 596 | " 1,\n", 597 | " 4,\n", 598 | " 2,\n", 599 | " 3,\n", 600 | " 0,\n", 601 | " 7,\n", 602 | " 2,\n", 603 | " 2,\n", 604 | " 2,\n", 605 | " 3,\n", 606 | " 14,\n", 607 | " 3,\n", 608 | " 1,\n", 609 | " 9,\n", 610 | " 3,\n", 611 | " 1,\n", 612 | " 2,\n", 613 | " 1,\n", 614 | " 2,\n", 615 | " 1,\n", 616 | " 2,\n", 617 | " 4,\n", 618 | " 2,\n", 619 | " 1,\n", 620 | " 2,\n", 621 | " 4,\n", 622 | " 0,\n", 623 | " 3,\n", 624 | " 1,\n", 625 | " 2,\n", 626 | " 3,\n", 627 | " 2,\n", 628 | " 1,\n", 629 | " 0,\n", 630 | " 3,\n", 631 | " 1,\n", 632 | " 3,\n", 633 | " 0,\n", 634 | " 1,\n", 635 | " 1,\n", 636 | " 1,\n", 637 | " 3,\n", 638 | " 4,\n", 639 | " 1,\n", 640 | " 5,\n", 641 | " 3,\n", 642 | " 2,\n", 643 | " 0,\n", 644 | " 4,\n", 645 | " 1,\n", 646 | " 5,\n", 647 | " 2,\n", 648 | " 4,\n", 649 | " 1,\n", 650 | " 2,\n", 651 | " 2,\n", 652 | " 4,\n", 653 | " 0,\n", 654 | " 3,\n", 655 | " 3,\n", 656 | " 2,\n", 657 | " 1,\n", 658 | " 5,\n", 659 | " 1,\n", 660 | " 4,\n", 661 | " 6,\n", 662 | " 4,\n", 663 | " 1,\n", 664 | " 2,\n", 665 | " 2,\n", 666 | " 9,\n", 667 | " 1,\n", 668 | " 3,\n", 669 | " 1,\n", 670 | " 6,\n", 671 | " 2,\n", 672 | " 2,\n", 673 | " 0,\n", 674 | " 9,\n", 675 | " 3,\n", 676 | " 3,\n", 677 | " 1,\n", 678 | " 3,\n", 679 | " 1,\n", 680 | " 0,\n", 681 | " 1,\n", 682 | " 1,\n", 683 | " 1,\n", 684 | " 2,\n", 685 | " 3,\n", 686 | " 1,\n", 687 | " 4,\n", 688 | " 1,\n", 689 | " 7,\n", 690 | " 1,\n", 691 | " 3,\n", 692 | " 6,\n", 693 | " 4,\n", 694 | " 8,\n", 695 | " 3,\n", 696 | " 0,\n", 697 | " 2,\n", 698 | " 0,\n", 699 | " 1,\n", 700 | " 0,\n", 701 | " 0,\n", 702 | " 2,\n", 703 | " 0,\n", 704 | " 2,\n", 705 | " 7,\n", 706 | " 0,\n", 707 | " 2,\n", 708 | " 5,\n", 709 | " 2,\n", 710 | " 7,\n", 711 | " 0,\n", 712 | " 3,\n", 713 | " 6,\n", 714 | " 0,\n", 715 | " 7,\n", 716 | " 0,\n", 717 | " 3,\n", 718 | " 0,\n", 719 | " 3,\n", 720 | " 2,\n", 721 | " 3,\n", 722 | " 0,\n", 723 | " 0,\n", 724 | " 11,\n", 725 | " 1,\n", 726 | " 5,\n", 727 | " 2,\n", 728 | " 5,\n", 729 | " 2,\n", 730 | " 4,\n", 731 | " 0,\n", 732 | " 4,\n", 733 | " 3,\n", 734 | " 0,\n", 735 | " 4,\n", 736 | " 2,\n", 737 | " 4,\n", 738 | " 3,\n", 739 | " 3,\n", 740 | " 7,\n", 741 | " 1,\n", 742 | " 2,\n", 743 | " 0,\n", 744 | " 1,\n", 745 | " 0,\n", 746 | " 4,\n", 747 | " 6,\n", 748 | " 3,\n", 749 | " 17,\n", 750 | " 1,\n", 751 | " 3,\n", 752 | " 4,\n", 753 | " 0,\n", 754 | " 3,\n", 755 | " 4,\n", 756 | " 3,\n", 757 | " 1,\n", 758 | " 2,\n", 759 | " 4,\n", 760 | " 5,\n", 761 | " 1,\n", 762 | " 2,\n", 763 | " 2,\n", 764 | " 3,\n", 765 | " 1,\n", 766 | " 4,\n", 767 | " 2,\n", 768 | " 2,\n", 769 | " 3,\n", 770 | " 10,\n", 771 | " 7,\n", 772 | " 0,\n", 773 | " 1,\n", 774 | " 1,\n", 775 | " 3,\n", 776 | " 2,\n", 777 | " 0,\n", 778 | " 3,\n", 779 | " 3,\n", 780 | " 1,\n", 781 | " 3,\n", 782 | " 6,\n", 783 | " 4,\n", 784 | " 3,\n", 785 | " 0,\n", 786 | " 3,\n", 787 | " 4,\n", 788 | " 4,\n", 789 | " 2,\n", 790 | " 3,\n", 791 | " 4,\n", 792 | " 2,\n", 793 | " 0,\n", 794 | " 8,\n", 795 | " 2,\n", 796 | " 4,\n", 797 | " 6,\n", 798 | " 0,\n", 799 | " 5,\n", 800 | " 4,\n", 801 | " 1,\n", 802 | " 2,\n", 803 | " 0,\n", 804 | " 3,\n", 805 | " 1,\n", 806 | " 2,\n", 807 | " 2,\n", 808 | " 2,\n", 809 | " 0,\n", 810 | " 0,\n", 811 | " 7,\n", 812 | " 7,\n", 813 | " 7,\n", 814 | " 2,\n", 815 | " 0,\n", 816 | " 1,\n", 817 | " 3,\n", 818 | " 3,\n", 819 | " 3,\n", 820 | " 2,\n", 821 | " 2,\n", 822 | " 7,\n", 823 | " 1,\n", 824 | " 1,\n", 825 | " 2,\n", 826 | " 0,\n", 827 | " 1,\n", 828 | " 4,\n", 829 | " 4,\n", 830 | " 3,\n", 831 | " 4,\n", 832 | " 1,\n", 833 | " 3,\n", 834 | " 0,\n", 835 | " 4,\n", 836 | " 1,\n", 837 | " 1,\n", 838 | " 2,\n", 839 | " 2,\n", 840 | " 1,\n", 841 | " 1,\n", 842 | " 2,\n", 843 | " 3,\n", 844 | " 1,\n", 845 | " 1,\n", 846 | " 7,\n", 847 | " 0,\n", 848 | " 6,\n", 849 | " 3,\n", 850 | " 3,\n", 851 | " 3,\n", 852 | " 2,\n", 853 | " 3,\n", 854 | " 4,\n", 855 | " 1,\n", 856 | " 8,\n", 857 | " 1,\n", 858 | " 5,\n", 859 | " 4,\n", 860 | " 2,\n", 861 | " 2,\n", 862 | " 5,\n", 863 | " 4,\n", 864 | " 1,\n", 865 | " 0,\n", 866 | " 3,\n", 867 | " 1,\n", 868 | " 1,\n", 869 | " 2,\n", 870 | " 4,\n", 871 | " 3,\n", 872 | " 2,\n", 873 | " 2,\n", 874 | " 4,\n", 875 | " 5,\n", 876 | " 0,\n", 877 | " 5,\n", 878 | " 3,\n", 879 | " 2,\n", 880 | " 2,\n", 881 | " 3,\n", 882 | " 11,\n", 883 | " 3,\n", 884 | " 6,\n", 885 | " 6,\n", 886 | " 3,\n", 887 | " 5,\n", 888 | " 7,\n", 889 | " 3,\n", 890 | " 9,\n", 891 | " 2,\n", 892 | " 1,\n", 893 | " 2,\n", 894 | " 1,\n", 895 | " 6,\n", 896 | " 4,\n", 897 | " 3,\n", 898 | " 3,\n", 899 | " 3,\n", 900 | " 3,\n", 901 | " 4,\n", 902 | " 5,\n", 903 | " 10,\n", 904 | " 1,\n", 905 | " 1,\n", 906 | " 3,\n", 907 | " 1,\n", 908 | " 3,\n", 909 | " 3,\n", 910 | " 5,\n", 911 | " 2,\n", 912 | " 2,\n", 913 | " 5,\n", 914 | " 5,\n", 915 | " 0,\n", 916 | " 1,\n", 917 | " 1,\n", 918 | " 2,\n", 919 | " 1,\n", 920 | " 3,\n", 921 | " 4,\n", 922 | " 2,\n", 923 | " 1,\n", 924 | " 0,\n", 925 | " 4,\n", 926 | " 1,\n", 927 | " 4,\n", 928 | " 2,\n", 929 | " 1,\n", 930 | " 0,\n", 931 | " 1,\n", 932 | " 1,\n", 933 | " 1,\n", 934 | " 5,\n", 935 | " 7,\n", 936 | " 3,\n", 937 | " 1,\n", 938 | " 2,\n", 939 | " 4,\n", 940 | " 7,\n", 941 | " 0,\n", 942 | " 0,\n", 943 | " 2,\n", 944 | " 4,\n", 945 | " 4,\n", 946 | " 1,\n", 947 | " 1,\n", 948 | " 2,\n", 949 | " 5,\n", 950 | " 2,\n", 951 | " 3,\n", 952 | " 0,\n", 953 | " 3,\n", 954 | " 1,\n", 955 | " 5,\n", 956 | " 3,\n", 957 | " 1,\n", 958 | " 3,\n", 959 | " 8,\n", 960 | " 0,\n", 961 | " 4,\n", 962 | " 0,\n", 963 | " 3,\n", 964 | " 0,\n", 965 | " 1,\n", 966 | " 6,\n", 967 | " 3,\n", 968 | " 7,\n", 969 | " 0,\n", 970 | " 4,\n", 971 | " 1,\n", 972 | " 3,\n", 973 | " 1,\n", 974 | " 2,\n", 975 | " 1,\n", 976 | " 2,\n", 977 | " 0,\n", 978 | " 1,\n", 979 | " 1,\n", 980 | " 0,\n", 981 | " 2,\n", 982 | " 3,\n", 983 | " 4,\n", 984 | " 3,\n", 985 | " 3,\n", 986 | " 3,\n", 987 | " 2,\n", 988 | " 11,\n", 989 | " 1,\n", 990 | " 7,\n", 991 | " 12,\n", 992 | " 4,\n", 993 | " 9,\n", 994 | " 0,\n", 995 | " 4,\n", 996 | " 0,\n", 997 | " 2,\n", 998 | " 3,\n", 999 | " 1,\n", 1000 | " 4,\n", 1001 | " 1,\n", 1002 | " 2,\n", 1003 | " 1,\n", 1004 | " 4,\n", 1005 | " 6,\n", 1006 | " 0,\n", 1007 | " 5,\n", 1008 | " 6,\n", 1009 | " 4,\n", 1010 | " 11,\n", 1011 | " 9,\n", 1012 | " 6,\n", 1013 | " 3,\n", 1014 | " 1,\n", 1015 | " 13,\n", 1016 | " 3,\n", 1017 | " 2,\n", 1018 | " 5,\n", 1019 | " 2,\n", 1020 | " 1,\n", 1021 | " 4,\n", 1022 | " 5,\n", 1023 | " 2,\n", 1024 | " 5,\n", 1025 | " 2,\n", 1026 | " 3,\n", 1027 | " 1,\n", 1028 | " 3,\n", 1029 | " 2,\n", 1030 | " 7,\n", 1031 | " 10,\n", 1032 | " 2,\n", 1033 | " 2,\n", 1034 | " 5,\n", 1035 | " 1,\n", 1036 | " 3,\n", 1037 | " 3,\n", 1038 | " 9,\n", 1039 | " 8,\n", 1040 | " 0,\n", 1041 | " 6,\n", 1042 | " 4,\n", 1043 | " 1,\n", 1044 | " 1,\n", 1045 | " 4,\n", 1046 | " 0,\n", 1047 | " 2,\n", 1048 | " 5,\n", 1049 | " 4,\n", 1050 | " 5,\n", 1051 | " 4,\n", 1052 | " 3,\n", 1053 | " 5,\n", 1054 | " 6,\n", 1055 | " 1,\n", 1056 | " 2,\n", 1057 | " 0,\n", 1058 | " 4,\n", 1059 | " 1,\n", 1060 | " 5,\n", 1061 | " 2,\n", 1062 | " 2,\n", 1063 | " 1,\n", 1064 | " 4,\n", 1065 | " 1,\n", 1066 | " 3,\n", 1067 | " 0,\n", 1068 | " 4,\n", 1069 | " 1,\n", 1070 | " 5,\n", 1071 | " 0,\n", 1072 | " 1,\n", 1073 | " 3,\n", 1074 | " 2,\n", 1075 | " 5,\n", 1076 | " 1,\n", 1077 | " 4,\n", 1078 | " 3,\n", 1079 | " 4,\n", 1080 | " 4,\n", 1081 | " 5,\n", 1082 | " 3,\n", 1083 | " 12,\n", 1084 | " 2,\n", 1085 | " 2,\n", 1086 | " 0,\n", 1087 | " 5,\n", 1088 | " 1,\n", 1089 | " 1,\n", 1090 | " 1,\n", 1091 | " 1,\n", 1092 | " 0,\n", 1093 | " 4,\n", 1094 | " 0,\n", 1095 | " 8,\n", 1096 | " 1,\n", 1097 | " 2,\n", 1098 | " 1,\n", 1099 | " 4,\n", 1100 | " 0]" 1101 | ] 1102 | }, 1103 | "execution_count": 62, 1104 | "metadata": {}, 1105 | "output_type": "execute_result" 1106 | } 1107 | ], 1108 | "source": [ 1109 | "X[0]" 1110 | ] 1111 | }, 1112 | { 1113 | "cell_type": "code", 1114 | "execution_count": null, 1115 | "metadata": {}, 1116 | "outputs": [], 1117 | "source": [] 1118 | } 1119 | ], 1120 | "metadata": { 1121 | "kernelspec": { 1122 | "display_name": "Python 3", 1123 | "language": "python", 1124 | "name": "python3" 1125 | }, 1126 | "language_info": { 1127 | "codemirror_mode": { 1128 | "name": "ipython", 1129 | "version": 3 1130 | }, 1131 | "file_extension": ".py", 1132 | "mimetype": "text/x-python", 1133 | "name": "python", 1134 | "nbconvert_exporter": "python", 1135 | "pygments_lexer": "ipython3", 1136 | "version": "3.6.7" 1137 | } 1138 | }, 1139 | "nbformat": 4, 1140 | "nbformat_minor": 2 1141 | } 1142 | -------------------------------------------------------------------------------- /Cybersecurity-Data-Science-on-Udemy-master/Extracting N-grams/Extracting N-grams.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import collections\n", 10 | "from nltk import ngrams\n", 11 | "file = \"python-3.7.2-amd64.exe\"" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "def readFile(filePath):\n", 21 | " with open(filePath, \"rb\") as binary_file:\n", 22 | " data = binary_file.read()\n", 23 | " return data\n", 24 | "\n", 25 | "def byteSequenceToNgrams(byteSequence, n):\n", 26 | " Ngrams = ngrams(byteSequence, n)\n", 27 | " return list(Ngrams)\n", 28 | " \n", 29 | "def extractNgramCounts(file, N):\n", 30 | " fileByteSequence = readFile(file)\n", 31 | " fileNgrams = byteSequenceToNgrams(fileByteSequence, N)\n", 32 | " return collections.Counter(fileNgrams)" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 3, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "extractedNgrams = extractNgramCounts(file, 3)" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 4, 47 | "metadata": {}, 48 | "outputs": [ 49 | { 50 | "data": { 51 | "text/plain": [ 52 | "Counter({(77, 90, 144): 2,\n", 53 | " (90, 144, 0): 2,\n", 54 | " (144, 0, 3): 2,\n", 55 | " (0, 3, 0): 131,\n", 56 | " (3, 0, 0): 632,\n", 57 | " (0, 0, 0): 32126,\n", 58 | " (0, 0, 4): 85,\n", 59 | " (0, 4, 0): 265,\n", 60 | " (4, 0, 0): 638,\n", 61 | " (0, 0, 255): 566,\n", 62 | " (0, 255, 255): 214,\n", 63 | " (255, 255, 0): 394,\n", 64 | " (255, 0, 0): 370,\n", 65 | " (0, 0, 184): 65,\n", 66 | " (0, 184, 0): 14,\n", 67 | " (184, 0, 0): 99,\n", 68 | " (0, 0, 64): 115,\n", 69 | " (0, 64, 0): 84,\n", 70 | " (64, 0, 0): 91,\n", 71 | " (0, 0, 16): 137,\n", 72 | " (0, 16, 1): 17,\n", 73 | " (16, 1, 0): 27,\n", 74 | " (1, 0, 0): 1353,\n", 75 | " (0, 0, 14): 35,\n", 76 | " (0, 14, 31): 1,\n", 77 | " (14, 31, 186): 1,\n", 78 | " (31, 186, 14): 2,\n", 79 | " (186, 14, 0): 5,\n", 80 | " (14, 0, 180): 1,\n", 81 | " (0, 180, 9): 2,\n", 82 | " (180, 9, 205): 3,\n", 83 | " (9, 205, 33): 5,\n", 84 | " (205, 33, 184): 3,\n", 85 | " (33, 184, 1): 1,\n", 86 | " (184, 1, 76): 2,\n", 87 | " (1, 76, 205): 1,\n", 88 | " (76, 205, 33): 2,\n", 89 | " (205, 33, 84): 4,\n", 90 | " (33, 84, 104): 1,\n", 91 | " (84, 104, 105): 5,\n", 92 | " (104, 105, 115): 5,\n", 93 | " (105, 115, 32): 30,\n", 94 | " (115, 32, 112): 27,\n", 95 | " (32, 112, 114): 263,\n", 96 | " (112, 114, 111): 285,\n", 97 | " (114, 111, 103): 28,\n", 98 | " (111, 103, 114): 30,\n", 99 | " (103, 114, 97): 20,\n", 100 | " (114, 97, 109): 10,\n", 101 | " (97, 109, 32): 22,\n", 102 | " (109, 32, 99): 21,\n", 103 | " (32, 99, 97): 148,\n", 104 | " (99, 97, 110): 12,\n", 105 | " (97, 110, 110): 24,\n", 106 | " (110, 110, 111): 14,\n", 107 | " (110, 111, 116): 45,\n", 108 | " (111, 116, 32): 52,\n", 109 | " (116, 32, 98): 37,\n", 110 | " (32, 98, 101): 63,\n", 111 | " (98, 101, 32): 10,\n", 112 | " (101, 32, 114): 70,\n", 113 | " (32, 114, 117): 19,\n", 114 | " (114, 117, 110): 21,\n", 115 | " (117, 110, 32): 16,\n", 116 | " (110, 32, 105): 22,\n", 117 | " (32, 105, 110): 159,\n", 118 | " (105, 110, 32): 68,\n", 119 | " (110, 32, 68): 1,\n", 120 | " (32, 68, 79): 4,\n", 121 | " (68, 79, 83): 4,\n", 122 | " (79, 83, 32): 13,\n", 123 | " (83, 32, 109): 1,\n", 124 | " (32, 109, 111): 32,\n", 125 | " (109, 111, 100): 28,\n", 126 | " (111, 100, 101): 155,\n", 127 | " (100, 101, 46): 44,\n", 128 | " (101, 46, 13): 6,\n", 129 | " (46, 13, 13): 4,\n", 130 | " (13, 13, 10): 2,\n", 131 | " (13, 10, 36): 2,\n", 132 | " (10, 36, 0): 4,\n", 133 | " (36, 0, 0): 77,\n", 134 | " (0, 0, 65): 74,\n", 135 | " (0, 65, 33): 2,\n", 136 | " (65, 33, 17): 5,\n", 137 | " (33, 17, 83): 2,\n", 138 | " (17, 83, 5): 1,\n", 139 | " (83, 5, 64): 4,\n", 140 | " (5, 64, 127): 7,\n", 141 | " (64, 127, 0): 19,\n", 142 | " (127, 0, 5): 5,\n", 143 | " (0, 5, 64): 8,\n", 144 | " (127, 0, 177): 5,\n", 145 | " (0, 177, 220): 3,\n", 146 | " (177, 220, 142): 3,\n", 147 | " (220, 142, 0): 2,\n", 148 | " (142, 0, 12): 2,\n", 149 | " (0, 12, 64): 9,\n", 150 | " (12, 64, 127): 4,\n", 151 | " (177, 220, 140): 2,\n", 152 | " (220, 140, 0): 4,\n", 153 | " (140, 0, 121): 3,\n", 154 | " (0, 121, 64): 2,\n", 155 | " (121, 64, 127): 1,\n", 156 | " (177, 220, 141): 4,\n", 157 | " (220, 141, 0): 1,\n", 158 | " (141, 0, 29): 5,\n", 159 | " (0, 29, 64): 5,\n", 160 | " (29, 64, 127): 2,\n", 161 | " (127, 0, 220): 4,\n", 162 | " (0, 220, 34): 4,\n", 163 | " (220, 34, 124): 2,\n", 164 | " (34, 124, 1): 2,\n", 165 | " (124, 1, 22): 4,\n", 166 | " (1, 22, 64): 2,\n", 167 | " (22, 64, 127): 5,\n", 168 | " (220, 34, 123): 3,\n", 169 | " (34, 123, 1): 5,\n", 170 | " (123, 1, 22): 2,\n", 171 | " (220, 34, 122): 3,\n", 172 | " (34, 122, 1): 2,\n", 173 | " (122, 1, 35): 5,\n", 174 | " (1, 35, 64): 6,\n", 175 | " (35, 64, 127): 4,\n", 176 | " (127, 0, 12): 6,\n", 177 | " (0, 12, 56): 7,\n", 178 | " (12, 56, 252): 1,\n", 179 | " (56, 252, 0): 2,\n", 180 | " (252, 0, 0): 27,\n", 181 | " (0, 64, 127): 3,\n", 182 | " (12, 56, 236): 3,\n", 183 | " (56, 236, 0): 3,\n", 184 | " (236, 0, 20): 1,\n", 185 | " (0, 20, 64): 6,\n", 186 | " (20, 64, 127): 2,\n", 187 | " (5, 64, 126): 1,\n", 188 | " (64, 126, 0): 2,\n", 189 | " (126, 0, 80): 2,\n", 190 | " (0, 80, 65): 6,\n", 191 | " (80, 65, 127): 2,\n", 192 | " (65, 127, 0): 3,\n", 193 | " (127, 0, 161): 3,\n", 194 | " (0, 161, 35): 5,\n", 195 | " (161, 35, 122): 4,\n", 196 | " (35, 122, 1): 2,\n", 197 | " (122, 1, 78): 2,\n", 198 | " (1, 78, 64): 2,\n", 199 | " (78, 64, 127): 1,\n", 200 | " (161, 35, 128): 2,\n", 201 | " (35, 128, 0): 4,\n", 202 | " (128, 0, 4): 5,\n", 203 | " (0, 4, 64): 6,\n", 204 | " (4, 64, 127): 3,\n", 205 | " (5, 64, 232): 5,\n", 206 | " (64, 232, 0): 2,\n", 207 | " (232, 0, 7): 2,\n", 208 | " (0, 7, 64): 3,\n", 209 | " (7, 64, 127): 2,\n", 210 | " (161, 35, 125): 3,\n", 211 | " (35, 125, 1): 4,\n", 212 | " (125, 1, 4): 2,\n", 213 | " (1, 4, 64): 6,\n", 214 | " (127, 0, 82): 4,\n", 215 | " (0, 82, 105): 1,\n", 216 | " (82, 105, 99): 3,\n", 217 | " (105, 99, 104): 2,\n", 218 | " (99, 104, 5): 2,\n", 219 | " (104, 5, 64): 5,\n", 220 | " (127, 0, 0): 38,\n", 221 | " (0, 0, 80): 458,\n", 222 | " (0, 80, 69): 3,\n", 223 | " (80, 69, 0): 13,\n", 224 | " (69, 0, 0): 39,\n", 225 | " (0, 0, 76): 40,\n", 226 | " (0, 76, 1): 4,\n", 227 | " (76, 1, 6): 1,\n", 228 | " (1, 6, 0): 9,\n", 229 | " (6, 0, 134): 2,\n", 230 | " (0, 134, 173): 5,\n", 231 | " (134, 173, 16): 7,\n", 232 | " (173, 16, 90): 6,\n", 233 | " (16, 90, 0): 5,\n", 234 | " (90, 0, 0): 35,\n", 235 | " (0, 0, 224): 60,\n", 236 | " (0, 224, 0): 17,\n", 237 | " (224, 0, 2): 6,\n", 238 | " (0, 2, 13): 2,\n", 239 | " (2, 13, 11): 2,\n", 240 | " (13, 11, 1): 5,\n", 241 | " (11, 1, 14): 2,\n", 242 | " (1, 14, 11): 4,\n", 243 | " (14, 11, 0): 1,\n", 244 | " (11, 0, 154): 2,\n", 245 | " (0, 154, 4): 5,\n", 246 | " (154, 4, 0): 3,\n", 247 | " (0, 0, 158): 12,\n", 248 | " (0, 158, 3): 3,\n", 249 | " (158, 3, 0): 3,\n", 250 | " (0, 0, 166): 13,\n", 251 | " (0, 166, 226): 2,\n", 252 | " (166, 226, 2): 4,\n", 253 | " (226, 2, 0): 4,\n", 254 | " (2, 0, 0): 708,\n", 255 | " (0, 16, 0): 150,\n", 256 | " (16, 0, 0): 140,\n", 257 | " (0, 0, 176): 33,\n", 258 | " (0, 176, 4): 7,\n", 259 | " (176, 4, 0): 33,\n", 260 | " (0, 0, 2): 87,\n", 261 | " (0, 2, 0): 168,\n", 262 | " (0, 0, 5): 33,\n", 263 | " (0, 5, 0): 87,\n", 264 | " (5, 0, 1): 4,\n", 265 | " (0, 1, 0): 344,\n", 266 | " (0, 0, 128): 197,\n", 267 | " (0, 128, 8): 23,\n", 268 | " (128, 8, 0): 13,\n", 269 | " (8, 0, 0): 159,\n", 270 | " (0, 0, 37): 39,\n", 271 | " (0, 37, 63): 1,\n", 272 | " (37, 63, 143): 2,\n", 273 | " (63, 143, 1): 5,\n", 274 | " (143, 1, 2): 3,\n", 275 | " (1, 2, 0): 12,\n", 276 | " (2, 0, 64): 7,\n", 277 | " (0, 64, 129): 9,\n", 278 | " (64, 129, 0): 2,\n", 279 | " (129, 0, 0): 32,\n", 280 | " (0, 0, 180): 11,\n", 281 | " (0, 180, 134): 5,\n", 282 | " (180, 134, 6): 5,\n", 283 | " (134, 6, 0): 13,\n", 284 | " (6, 0, 180): 7,\n", 285 | " (0, 180, 0): 5,\n", 286 | " (180, 0, 0): 59,\n", 287 | " (0, 0, 208): 40,\n", 288 | " (0, 208, 6): 5,\n", 289 | " (208, 6, 0): 7,\n", 290 | " (6, 0, 244): 10,\n", 291 | " (0, 244, 101): 5,\n", 292 | " (244, 101, 1): 4,\n", 293 | " (101, 1, 0): 7,\n", 294 | " (0, 0, 32): 157,\n", 295 | " (0, 32, 199): 3,\n", 296 | " (32, 199, 142): 4,\n", 297 | " (199, 142, 1): 1,\n", 298 | " (142, 1, 16): 2,\n", 299 | " (1, 16, 26): 3,\n", 300 | " (16, 26, 0): 6,\n", 301 | " (26, 0, 0): 33,\n", 302 | " (0, 64, 8): 6,\n", 303 | " (64, 8, 0): 7,\n", 304 | " (8, 0, 252): 1,\n", 305 | " (0, 252, 61): 4,\n", 306 | " (252, 61, 0): 6,\n", 307 | " (61, 0, 0): 54,\n", 308 | " (0, 80, 118): 4,\n", 309 | " (80, 118, 6): 2,\n", 310 | " (118, 6, 0): 7,\n", 311 | " (6, 0, 84): 6,\n", 312 | " (0, 84, 0): 96,\n", 313 | " (84, 0, 0): 34,\n", 314 | " (0, 0, 164): 18,\n", 315 | " (0, 164, 118): 5,\n", 316 | " (164, 118, 6): 3,\n", 317 | " (6, 0, 24): 13,\n", 318 | " (0, 24, 0): 21,\n", 319 | " (24, 0, 0): 61,\n", 320 | " (0, 0, 48): 57,\n", 321 | " (0, 48, 112): 2,\n", 322 | " (48, 112, 6): 2,\n", 323 | " (112, 6, 0): 4,\n", 324 | " (6, 0, 64): 7,\n", 325 | " (4, 0, 224): 5,\n", 326 | " (0, 224, 3): 7,\n", 327 | " (224, 3, 0): 14,\n", 328 | " (0, 0, 52): 56,\n", 329 | " (0, 52, 130): 4,\n", 330 | " (52, 130, 6): 8,\n", 331 | " (130, 6, 0): 5,\n", 332 | " (6, 0, 0): 235,\n", 333 | " (0, 0, 1): 262,\n", 334 | " (0, 0, 46): 72,\n", 335 | " (0, 46, 116): 6,\n", 336 | " (46, 116, 101): 3,\n", 337 | " (116, 101, 120): 19,\n", 338 | " (101, 120, 116): 50,\n", 339 | " (120, 116, 0): 4,\n", 340 | " (116, 0, 0): 101,\n", 341 | " (0, 0, 55): 6,\n", 342 | " (0, 55, 153): 2,\n", 343 | " (55, 153, 4): 2,\n", 344 | " (153, 4, 0): 2,\n", 345 | " (0, 0, 154): 11,\n", 346 | " (0, 32, 0): 240,\n", 347 | " (32, 0, 0): 72,\n", 348 | " (0, 0, 96): 74,\n", 349 | " (0, 96, 46): 4,\n", 350 | " (96, 46, 114): 2,\n", 351 | " (46, 114, 100): 7,\n", 352 | " (114, 100, 97): 8,\n", 353 | " (100, 97, 116): 87,\n", 354 | " (97, 116, 97): 50,\n", 355 | " (116, 97, 0): 8,\n", 356 | " (97, 0, 0): 40,\n", 357 | " (0, 96, 237): 2,\n", 358 | " (96, 237, 1): 4,\n", 359 | " (237, 1, 0): 13,\n", 360 | " (0, 0, 238): 6,\n", 361 | " (0, 238, 1): 6,\n", 362 | " (238, 1, 0): 7,\n", 363 | " (0, 158, 4): 2,\n", 364 | " (158, 4, 0): 3,\n", 365 | " (0, 64, 46): 5,\n", 366 | " (64, 46, 100): 3,\n", 367 | " (46, 100, 97): 3,\n", 368 | " (0, 48, 23): 1,\n", 369 | " (48, 23, 0): 3,\n", 370 | " (23, 0, 0): 25,\n", 371 | " (0, 0, 160): 74,\n", 372 | " (0, 160, 6): 7,\n", 373 | " (160, 6, 0): 9,\n", 374 | " (0, 0, 10): 20,\n", 375 | " (0, 10, 0): 21,\n", 376 | " (10, 0, 0): 156,\n", 377 | " (0, 0, 140): 38,\n", 378 | " (0, 140, 6): 1,\n", 379 | " (140, 6, 0): 24,\n", 380 | " (0, 0, 192): 91,\n", 381 | " (0, 192, 46): 2,\n", 382 | " (192, 46, 119): 4,\n", 383 | " (46, 119, 105): 9,\n", 384 | " (119, 105, 120): 5,\n", 385 | " (105, 120, 98): 4,\n", 386 | " (120, 98, 117): 5,\n", 387 | " (98, 117, 114): 14,\n", 388 | " (117, 114, 110): 28,\n", 389 | " (114, 110, 56): 4,\n", 390 | " (110, 56, 0): 4,\n", 391 | " (56, 0, 0): 30,\n", 392 | " (0, 192, 6): 4,\n", 393 | " (192, 6, 0): 5,\n", 394 | " (0, 0, 150): 10,\n", 395 | " (0, 150, 6): 2,\n", 396 | " (150, 6, 0): 29,\n", 397 | " (64, 46, 114): 3,\n", 398 | " (46, 114, 115): 5,\n", 399 | " (114, 115, 114): 4,\n", 400 | " (115, 114, 99): 3,\n", 401 | " (114, 99, 0): 3,\n", 402 | " (99, 0, 0): 21,\n", 403 | " (0, 0, 244): 18,\n", 404 | " (0, 0, 102): 189,\n", 405 | " (0, 102, 1): 7,\n", 406 | " (102, 1, 0): 8,\n", 407 | " (0, 0, 152): 26,\n", 408 | " (0, 152, 6): 3,\n", 409 | " (152, 6, 0): 32,\n", 410 | " (46, 114, 101): 2,\n", 411 | " (114, 101, 108): 62,\n", 412 | " (101, 108, 111): 2,\n", 413 | " (108, 111, 99): 131,\n", 414 | " (111, 99, 0): 9,\n", 415 | " (0, 0, 252): 14,\n", 416 | " (0, 0, 62): 17,\n", 417 | " (0, 62, 0): 11,\n", 418 | " (62, 0, 0): 24,\n", 419 | " (0, 0, 254): 58,\n", 420 | " (0, 254, 7): 3,\n", 421 | " (254, 7, 0): 4,\n", 422 | " (7, 0, 0): 159,\n", 423 | " (0, 0, 66): 94,\n", 424 | " (0, 66, 0): 83,\n", 425 | " (66, 0, 0): 33,\n", 426 | " (0, 0, 161): 79,\n", 427 | " (0, 161, 144): 4,\n", 428 | " (161, 144, 176): 5,\n", 429 | " (144, 176, 68): 1,\n", 430 | " (176, 68, 0): 465,\n", 431 | " (68, 0, 163): 17,\n", 432 | " (0, 163, 56): 5,\n", 433 | " (163, 56, 182): 4,\n", 434 | " (56, 182, 70): 5,\n", 435 | " (182, 70, 0): 157,\n", 436 | " (70, 0, 195): 34,\n", 437 | " (0, 195, 161): 13,\n", 438 | " (195, 161, 140): 3,\n", 439 | " (161, 140, 176): 3,\n", 440 | " (140, 176, 68): 2,\n", 441 | " (0, 163, 64): 2,\n", 442 | " (163, 64, 182): 2,\n", 443 | " (64, 182, 70): 4,\n", 444 | " (195, 161, 32): 1,\n", 445 | " (161, 32, 176): 2,\n", 446 | " (32, 176, 68): 5,\n", 447 | " (0, 163, 88): 2,\n", 448 | " (163, 88, 182): 1,\n", 449 | " (88, 182, 70): 2,\n", 450 | " (195, 161, 136): 2,\n", 451 | " (161, 136, 176): 2,\n", 452 | " (136, 176, 68): 4,\n", 453 | " (0, 163, 68): 3,\n", 454 | " (163, 68, 182): 2,\n", 455 | " (68, 182, 70): 4,\n", 456 | " (195, 161, 132): 3,\n", 457 | " (161, 132, 176): 2,\n", 458 | " (132, 176, 68): 1,\n", 459 | " (0, 163, 72): 3,\n", 460 | " (163, 72, 182): 4,\n", 461 | " (72, 182, 70): 3,\n", 462 | " (195, 161, 4): 10,\n", 463 | " (161, 4, 176): 4,\n", 464 | " (4, 176, 68): 1,\n", 465 | " (0, 163, 60): 4,\n", 466 | " (163, 60, 182): 2,\n", 467 | " (60, 182, 70): 3,\n", 468 | " (195, 161, 128): 2,\n", 469 | " (161, 128, 176): 4,\n", 470 | " (128, 176, 68): 3,\n", 471 | " (0, 163, 76): 5,\n", 472 | " (163, 76, 182): 3,\n", 473 | " (76, 182, 70): 3,\n", 474 | " (195, 161, 28): 3,\n", 475 | " (161, 28, 176): 5,\n", 476 | " (28, 176, 68): 5,\n", 477 | " (0, 163, 80): 2,\n", 478 | " (163, 80, 182): 2,\n", 479 | " (80, 182, 70): 8,\n", 480 | " (195, 161, 124): 2,\n", 481 | " (161, 124, 176): 4,\n", 482 | " (124, 176, 68): 3,\n", 483 | " (0, 163, 84): 3,\n", 484 | " (163, 84, 182): 3,\n", 485 | " (84, 182, 70): 5,\n", 486 | " (195, 161, 84): 5,\n", 487 | " (161, 84, 179): 4,\n", 488 | " (84, 179, 68): 2,\n", 489 | " (179, 68, 0): 219,\n", 490 | " (0, 163, 220): 1,\n", 491 | " (163, 220, 182): 1,\n", 492 | " (220, 182, 70): 4,\n", 493 | " (0, 195, 204): 6,\n", 494 | " (195, 204, 204): 22,\n", 495 | " (204, 204, 85): 18,\n", 496 | " (204, 85, 139): 22,\n", 497 | " (85, 139, 236): 1249,\n", 498 | " (139, 236, 131): 331,\n", 499 | " (236, 131, 236): 302,\n", 500 | " (131, 236, 48): 11,\n", 501 | " (236, 48, 161): 6,\n", 502 | " (48, 161, 4): 6,\n", 503 | " (161, 4, 160): 134,\n", 504 | " (4, 160, 70): 167,\n", 505 | " (160, 70, 0): 208,\n", 506 | " (70, 0, 51): 125,\n", 507 | " (0, 51, 197): 113,\n", 508 | " (51, 197, 137): 112,\n", 509 | " (197, 137, 69): 111,\n", 510 | " (137, 69, 252): 292,\n", 511 | " (69, 252, 83): 45,\n", 512 | " (252, 83, 139): 15,\n", 513 | " (83, 139, 93): 178,\n", 514 | " (139, 93, 8): 121,\n", 515 | " (93, 8, 51): 22,\n", 516 | " (8, 51, 192): 56,\n", 517 | " (51, 192, 86): 48,\n", 518 | " (192, 86, 139): 4,\n", 519 | " (86, 139, 117): 203,\n", 520 | " (139, 117, 16): 27,\n", 521 | " (117, 16, 87): 17,\n", 522 | " (16, 87, 80): 3,\n", 523 | " (87, 80, 137): 5,\n", 524 | " (80, 137, 69): 20,\n", 525 | " (137, 69, 208): 13,\n", 526 | " (69, 208, 131): 4,\n", 527 | " (208, 131, 207): 2,\n", 528 | " (131, 207, 255): 19,\n", 529 | " (207, 255, 137): 2,\n", 530 | " (255, 137, 69): 36,\n", 531 | " (137, 69, 212): 21,\n", 532 | " (69, 212, 141): 4,\n", 533 | " (212, 141, 69): 5,\n", 534 | " (141, 69, 212): 16,\n", 535 | " (69, 212, 80): 17,\n", 536 | " (212, 80, 199): 1,\n", 537 | " (80, 199, 69): 6,\n", 538 | " (199, 69, 216): 6,\n", 539 | " (69, 216, 96): 1,\n", 540 | " (216, 96, 180): 5,\n", 541 | " (96, 180, 68): 4,\n", 542 | " (180, 68, 0): 18,\n", 543 | " (68, 0, 199): 35,\n", 544 | " (0, 199, 69): 42,\n", 545 | " (199, 69, 220): 13,\n", 546 | " (69, 220, 120): 1,\n", 547 | " (220, 120, 180): 3,\n", 548 | " (120, 180, 68): 1,\n", 549 | " (199, 69, 224): 19,\n", 550 | " (69, 224, 136): 2,\n", 551 | " (224, 136, 180): 2,\n", 552 | " (136, 180, 68): 3,\n", 553 | " (199, 69, 228): 3,\n", 554 | " (69, 228, 160): 3,\n", 555 | " (228, 160, 180): 2,\n", 556 | " (160, 180, 68): 4,\n", 557 | " (199, 69, 232): 12,\n", 558 | " (69, 232, 184): 1,\n", 559 | " (232, 184, 180): 4,\n", 560 | " (184, 180, 68): 2,\n", 561 | " (199, 69, 236): 11,\n", 562 | " (69, 236, 208): 1,\n", 563 | " (236, 208, 180): 1,\n", 564 | " (208, 180, 68): 3,\n", 565 | " (199, 69, 240): 20,\n", 566 | " (69, 240, 232): 4,\n", 567 | " (240, 232, 180): 1,\n", 568 | " (232, 180, 68): 2,\n", 569 | " (199, 69, 244): 26,\n", 570 | " (69, 244, 0): 7,\n", 571 | " (244, 0, 181): 2,\n", 572 | " (0, 181, 68): 3,\n", 573 | " (181, 68, 0): 63,\n", 574 | " (199, 69, 248): 20,\n", 575 | " (69, 248, 24): 3,\n", 576 | " (248, 24, 181): 2,\n", 577 | " (24, 181, 68): 4,\n", 578 | " (68, 0, 232): 100,\n", 579 | " (0, 232, 234): 2,\n", 580 | " (232, 234, 34): 3,\n", 581 | " (234, 34, 0): 2,\n", 582 | " (34, 0, 0): 80,\n", 583 | " (0, 0, 133): 208,\n", 584 | " (0, 133, 192): 479,\n", 585 | " (133, 192, 120): 101,\n", 586 | " (192, 120, 29): 3,\n", 587 | " (120, 29, 106): 2,\n", 588 | " (29, 106, 0): 4,\n", 589 | " (106, 0, 104): 37,\n", 590 | " (0, 104, 128): 13,\n", 591 | " (104, 128, 0): 24,\n", 592 | " (128, 0, 0): 172,\n", 593 | " (0, 0, 106): 237,\n", 594 | " (0, 106, 3): 22,\n", 595 | " (106, 3, 106): 15,\n", 596 | " (3, 106, 0): 9,\n", 597 | " (106, 0, 106): 139,\n", 598 | " (0, 106, 5): 10,\n", 599 | " (106, 5, 104): 10,\n", 600 | " (5, 104, 0): 10,\n", 601 | " (104, 0, 0): 117,\n", 602 | " (0, 128, 255): 682,\n", 603 | " (128, 255, 117): 17,\n", 604 | " (255, 117, 212): 15,\n", 605 | " (117, 212, 255): 3,\n", 606 | " (212, 255, 21): 4,\n", 607 | " (255, 21, 228): 23,\n", 608 | " (21, 228, 176): 19,\n", 609 | " (228, 176, 68): 22,\n", 610 | " (68, 0, 139): 465,\n", 611 | " (0, 139, 248): 90,\n", 612 | " (139, 248, 86): 9,\n", 613 | " (248, 86, 232): 5,\n", 614 | " (86, 232, 31): 8,\n", 615 | " (232, 31, 64): 4,\n", 616 | " (31, 64, 0): 1,\n", 617 | " (133, 192, 116): 514,\n", 618 | " (192, 116, 7): 16,\n", 619 | " (116, 7, 232): 10,\n", 620 | " (7, 232, 239): 1,\n", 621 | " (232, 239, 0): 5,\n", 622 | " (239, 0, 0): 9,\n", 623 | " (0, 0, 235): 281,\n", 624 | " (0, 235, 11): 8,\n", 625 | " (235, 11, 106): 4,\n", 626 | " (11, 106, 9): 1,\n", 627 | " (106, 9, 141): 3,\n", 628 | " (9, 141, 69): 3,\n", 629 | " (141, 69, 216): 25,\n", 630 | " (69, 216, 80): 20,\n", 631 | " (216, 80, 232): 7,\n", 632 | " (80, 232, 91): 4,\n", 633 | " (232, 91, 0): 2,\n", 634 | " (91, 0, 0): 11,\n", 635 | " (0, 0, 141): 437,\n", 636 | " (0, 141, 69): 413,\n", 637 | " (141, 69, 208): 28,\n", 638 | " (69, 208, 80): 17,\n", 639 | " (208, 80, 255): 13,\n", 640 | " (80, 255, 117): 476,\n", 641 | " (255, 117, 20): 212,\n", 642 | " (117, 20, 86): 7,\n", 643 | " (20, 86, 87): 14,\n", 644 | " (86, 87, 83): 9,\n", 645 | " (87, 83, 232): 75,\n", 646 | " (83, 232, 108): 5,\n", 647 | " (232, 108, 64): 5,\n", 648 | " (108, 64, 0): 1,\n", 649 | " (0, 0, 139): 1033,\n", 650 | " (0, 139, 240): 1180,\n", 651 | " (139, 240, 131): 127,\n", 652 | " (240, 131, 255): 4,\n", 653 | " (131, 255, 255): 30,\n", 654 | " (255, 255, 116): 143,\n", 655 | " (255, 116, 7): 23,\n", 656 | " (116, 7, 87): 12,\n", 657 | " (7, 87, 255): 18,\n", 658 | " (87, 255, 21): 70,\n", 659 | " (255, 21, 224): 94,\n", 660 | " (21, 224, 176): 40,\n", 661 | " (224, 176, 68): 52,\n", 662 | " (68, 0, 131): 169,\n", 663 | " (0, 131, 125): 189,\n", 664 | " (131, 125, 212): 3,\n", 665 | " (125, 212, 0): 3,\n", 666 | " (212, 0, 116): 1,\n", 667 | " (0, 116, 8): 338,\n", 668 | " (116, 8, 255): 445,\n", 669 | " (8, 255, 117): 339,\n", 670 | " (117, 212, 232): 4,\n", 671 | " (212, 232, 241): 4,\n", 672 | " (232, 241, 68): 2,\n", 673 | " (241, 68, 4): 1,\n", 674 | " (68, 4, 0): 10,\n", 675 | " (4, 0, 133): 4,\n", 676 | " (0, 133, 246): 43,\n", 677 | " (133, 246, 120): 630,\n", 678 | " (246, 120, 3): 3,\n", 679 | " (120, 3, 139): 6,\n", 680 | " (3, 139, 117): 14,\n", 681 | " (139, 117, 208): 3,\n", 682 | " (117, 208, 139): 4,\n", 683 | " (208, 139, 77): 4,\n", 684 | " (139, 77, 252): 203,\n", 685 | " (77, 252, 139): 76,\n", 686 | " (252, 139, 198): 60,\n", 687 | " (139, 198, 95): 55,\n", 688 | " (198, 95, 94): 49,\n", 689 | " (95, 94, 51): 57,\n", 690 | " (94, 51, 205): 62,\n", 691 | " (51, 205, 91): 66,\n", 692 | " (205, 91, 232): 64,\n", 693 | " (91, 232, 20): 4,\n", 694 | " (232, 20, 207): 2,\n", 695 | " (20, 207, 2): 6,\n", 696 | " (207, 2, 0): 12,\n", 697 | " (2, 0, 139): 305,\n", 698 | " (0, 139, 229): 50,\n", 699 | " (139, 229, 93): 681,\n", 700 | " (229, 93, 194): 528,\n", 701 | " (93, 194, 16): 115,\n", 702 | " (194, 16, 0): 117,\n", 703 | " (16, 0, 85): 102,\n", 704 | " (0, 85, 139): 808,\n", 705 | " (139, 236, 139): 206,\n", 706 | " (236, 139, 69): 138,\n", 707 | " (139, 69, 8): 393,\n", 708 | " (69, 8, 131): 68,\n", 709 | " (8, 131, 192): 25,\n", 710 | " (131, 192, 252): 2,\n", 711 | " (192, 252, 80): 3,\n", 712 | " (252, 80, 255): 122,\n", 713 | " (80, 255, 21): 173,\n", 714 | " (255, 21, 236): 11,\n", 715 | " (21, 236, 176): 6,\n", 716 | " (236, 176, 68): 7,\n", 717 | " (68, 0, 93): 20,\n", 718 | " (0, 93, 194): 38,\n", 719 | " (93, 194, 4): 173,\n", 720 | " (194, 4, 0): 175,\n", 721 | " (4, 0, 85): 138,\n", 722 | " (139, 236, 81): 320,\n", 723 | " (236, 81, 83): 55,\n", 724 | " (81, 83, 86): 56,\n", 725 | " (83, 86, 87): 172,\n", 726 | " (86, 87, 51): 67,\n", 727 | " (87, 51, 246): 17,\n", 728 | " (51, 246, 86): 31,\n", 729 | " (246, 86, 86): 7,\n", 730 | " (86, 86, 106): 8,\n", 731 | " (86, 106, 1): 13,\n", 732 | " (106, 1, 86): 21,\n", 733 | " (1, 86, 137): 4,\n", 734 | " (86, 137, 117): 3,\n", 735 | " (137, 117, 252): 73,\n", 736 | " (117, 252, 255): 171,\n", 737 | " (252, 255, 21): 77,\n", 738 | " (255, 21, 240): 10,\n", 739 | " (21, 240, 176): 3,\n", 740 | " (240, 176, 68): 2,\n", 741 | " (68, 0, 104): 28,\n", 742 | " (0, 104, 56): 5,\n", 743 | " (104, 56, 181): 3,\n", 744 | " (56, 181, 68): 4,\n", 745 | " (68, 0, 255): 190,\n", 746 | " (0, 255, 21): 108,\n", 747 | " (255, 21, 248): 20,\n", 748 | " (21, 248, 176): 7,\n", 749 | " (248, 176, 68): 7,\n", 750 | " (0, 139, 216): 39,\n", 751 | " (139, 216, 104): 3,\n", 752 | " (216, 104, 76): 2,\n", 753 | " (104, 76, 181): 2,\n", 754 | " (76, 181, 68): 3,\n", 755 | " (68, 0, 83): 36,\n", 756 | " (0, 83, 255): 37,\n", 757 | " (83, 255, 21): 43,\n", 758 | " (255, 21, 232): 18,\n", 759 | " (21, 232, 176): 16,\n", 760 | " (232, 176, 68): 18,\n", 761 | " (0, 139, 61): 11,\n", 762 | " (139, 61, 244): 3,\n", 763 | " (61, 244, 176): 3,\n", 764 | " (244, 176, 68): 267,\n", 765 | " (68, 0, 133): 231,\n", 766 | " (192, 116, 13): 19,\n", 767 | " (116, 13, 104): 3,\n", 768 | " (13, 104, 0): 5,\n", 769 | " (104, 0, 8): 8,\n", 770 | " (0, 8, 0): 100,\n", 771 | " (0, 255, 208): 6,\n", 772 | " (255, 208, 133): 6,\n", 773 | " (208, 133, 192): 7,\n", 774 | " (133, 192, 117): 410,\n", 775 | " (192, 117, 57): 2,\n", 776 | " (117, 57, 255): 18,\n", 777 | " (57, 255, 215): 3,\n", 778 | " (255, 215, 104): 6,\n", 779 | " (215, 104, 104): 3,\n", 780 | " (104, 104, 181): 3,\n", 781 | " (104, 181, 68): 1,\n", 782 | " (192, 116, 11): 14,\n", 783 | " (116, 11, 104): 5,\n", 784 | " (11, 104, 52): 3,\n", 785 | " (104, 52, 181): 9,\n", 786 | " (52, 181, 68): 38,\n", 787 | " (192, 117, 2): 9,\n", 788 | " (117, 2, 255): 2,\n", 789 | " (2, 255, 215): 2,\n", 790 | " (255, 215, 57): 1,\n", 791 | " (215, 57, 117): 4,\n", 792 | " (57, 117, 12): 12,\n", 793 | " (117, 12, 118): 5,\n", 794 | " (12, 118, 21): 1,\n", 795 | " (118, 21, 139): 3,\n", 796 | " (21, 139, 125): 5,\n", 797 | " (139, 125, 8): 165,\n", 798 | " (125, 8, 141): 24,\n", 799 | " (8, 141, 69): 51,\n", 800 | " (141, 69, 252): 614,\n", 801 | " (69, 252, 80): 468,\n", 802 | " (80, 255, 52): 5,\n", 803 | " (255, 52, 183): 3,\n", 804 | " (52, 183, 232): 2,\n", 805 | " (183, 232, 55): 2,\n", 806 | " (232, 55, 38): 2,\n", 807 | " (55, 38, 0): 2,\n", 808 | " (38, 0, 0): 35,\n", 809 | " (0, 0, 70): 1262,\n", 810 | " (0, 70, 59): 4,\n", 811 | " (70, 59, 117): 8,\n", 812 | " (59, 117, 12): 8,\n", 813 | " (117, 12, 114): 8,\n", 814 | " (12, 114, 238): 1,\n", 815 | " (114, 238, 95): 2,\n", 816 | " (238, 95, 94): 2,\n", 817 | " (95, 94, 91): 108,\n", 818 | " (94, 91, 139): 257,\n", 819 | " (91, 139, 229): 289,\n", 820 | " (93, 194, 8): 256,\n", 821 | " (194, 8, 0): 258,\n", 822 | " (8, 0, 51): 5,\n", 823 | " (0, 51, 192): 86,\n", 824 | " (51, 192, 80): 79,\n", 825 | " (192, 80, 80): 42,\n", 826 | " (80, 80, 106): 9,\n", 827 | " (80, 106, 1): 126,\n", 828 | " (106, 1, 80): 38,\n", 829 | " (1, 80, 255): 12,\n", 830 | " (68, 0, 195): 8,\n", 831 | " (0, 195, 85): 14,\n", 832 | " (195, 85, 139): 80,\n", 833 | " (236, 81, 81): 112,\n", 834 | " (81, 81, 86): 28,\n", 835 | " (81, 86, 87): 33,\n", 836 | " (87, 51, 255): 69,\n", 837 | " (51, 255, 141): 17,\n", 838 | " (255, 141, 69): 63,\n", 839 | " (69, 252, 87): 17,\n", 840 | " (252, 87, 104): 7,\n", 841 | " (87, 104, 124): 3,\n", 842 | " (104, 124, 181): 3,\n", 843 | " (124, 181, 68): 1,\n", 844 | " (68, 0, 80): 68,\n", 845 | " (0, 80, 137): 10,\n", 846 | " (80, 137, 125): 14,\n", 847 | " (137, 125, 252): 76,\n", 848 | " (125, 252, 137): 18,\n", 849 | " (252, 137, 125): 22,\n", 850 | " (137, 125, 248): 53,\n", 851 | " (125, 248, 232): 8,\n", 852 | " (248, 232, 189): 5,\n", 853 | " (232, 189, 12): 6,\n", 854 | " (189, 12, 0): 3,\n", 855 | " (12, 0, 0): 92,\n", 856 | " (139, 240, 133): 1929,\n", 857 | " (240, 133, 246): 1924,\n", 858 | " (246, 120, 98): 7,\n", 859 | " (120, 98, 87): 1,\n", 860 | " (98, 87, 255): 3,\n", 861 | " (87, 255, 117): 187,\n", 862 | " (255, 117, 8): 589,\n", 863 | " (117, 8, 141): 30,\n", 864 | " (252, 80, 232): 125,\n", 865 | " (80, 232, 170): 4,\n", 866 | " (232, 170, 12): 1,\n", 867 | " (170, 12, 0): 3,\n", 868 | " (246, 120, 79): 6,\n", 869 | " (120, 79, 141): 4,\n", 870 | " (79, 141, 69): 4,\n", 871 | " (141, 69, 248): 345,\n", 872 | " (69, 248, 80): 266,\n", 873 | " (248, 80, 255): 88,\n", 874 | " (255, 117, 252): 805,\n", 875 | " (255, 21, 76): 11,\n", 876 | " (21, 76, 179): 2,\n", 877 | " (76, 179, 68): 2,\n", 878 | " (192, 117, 45): 7,\n", 879 | " (117, 45, 255): 6,\n", 880 | " (45, 255, 21): 7,\n", 881 | " (255, 21, 244): 264,\n", 882 | " (21, 244, 176): 261,\n", 883 | " (133, 246, 126): 243,\n", 884 | " (246, 126, 11): 226,\n", 885 | " (126, 11, 15): 246,\n", 886 | " (11, 15, 183): 247,\n", 887 | " (15, 183, 246): 252,\n", 888 | " (183, 246, 129): 250,\n", 889 | " (246, 129, 206): 248,\n", 890 | " (129, 206, 0): 293,\n", 891 | " (206, 0, 0): 308,\n", 892 | " (0, 0, 7): 375,\n", 893 | " (0, 7, 128): 750,\n", 894 | " (7, 128, 133): 303,\n", 895 | " (128, 133, 246): 275,\n", 896 | " (246, 120, 5): 246,\n", 897 | " (120, 5, 190): 246,\n", 898 | " (5, 190, 5): 253,\n", 899 | " (190, 5, 64): 268,\n", 900 | " (5, 64, 0): 294,\n", 901 | " (64, 0, 128): 323,\n", 902 | " (0, 128, 86): 268,\n", 903 | " (128, 86, 106): 56,\n", 904 | " (86, 106, 99): 4,\n", 905 | " (106, 99, 104): 4,\n", 906 | " (99, 104, 144): 3,\n", 907 | " (104, 144, 181): 1,\n", 908 | " (144, 181, 68): 4,\n", 909 | " (0, 232, 164): 3,\n", 910 | " (232, 164, 37): 3,\n", 911 | " (164, 37, 0): 4,\n", 912 | " (37, 0, 0): 40,\n", 913 | " (0, 235, 17): 8,\n", 914 | " (235, 17, 141): 3,\n", 915 | " (17, 141, 72): 2,\n", 916 | " (141, 72, 4): 2,\n", 917 | " (72, 4, 139): 8,\n", 918 | " (4, 139, 69): 38,\n", 919 | " (139, 69, 16): 227,\n", 920 | " (69, 16, 137): 62,\n", 921 | " (16, 137, 8): 11,\n", 922 | " (137, 8, 139): 25,\n", 923 | " (8, 139, 69): 75,\n", 924 | " (139, 69, 12): 287,\n", 925 | " (69, 12, 139): 43,\n", 926 | " (12, 139, 77): 26,\n", 927 | " (139, 77, 248): 77,\n", 928 | " (77, 248, 73): 4,\n", 929 | " (248, 73, 137): 1,\n", 930 | " (73, 137, 8): 3,\n", 931 | " (137, 8, 57): 2,\n", 932 | " (8, 57, 125): 3,\n", 933 | " (57, 125, 252): 19,\n", 934 | " (125, 252, 116): 21,\n", 935 | " (252, 116, 8): 35,\n", 936 | " (117, 252, 232): 380,\n", 937 | " (252, 232, 153): 5,\n", 938 | " (232, 153, 67): 2,\n", 939 | " (153, 67, 4): 3,\n", 940 | " (67, 4, 0): 6,\n", 941 | " (4, 0, 95): 9,\n", 942 | " (0, 95, 139): 143,\n", 943 | " (95, 139, 198): 366,\n", 944 | " (139, 198, 94): 553,\n", 945 | " (198, 94, 139): 189,\n", 946 | " (94, 139, 229): 222,\n", 947 | " (93, 194, 12): 229,\n", 948 | " (194, 12, 0): 243,\n", 949 | " (12, 0, 85): 197,\n", 950 | " (86, 87, 139): 206,\n", 951 | " (87, 139, 125): 269,\n", 952 | " (125, 8, 51): 53,\n", 953 | " (8, 51, 219): 28,\n", 954 | " (51, 219, 51): 12,\n", 955 | " (219, 51, 246): 6,\n", 956 | " (51, 246, 137): 45,\n", 957 | " (246, 137, 93): 4,\n", 958 | " (137, 93, 252): 97,\n", 959 | " (93, 252, 57): 11,\n", 960 | " (252, 57, 31): 4,\n", 961 | " (57, 31, 116): 10,\n", 962 | " (31, 116, 49): 2,\n", 963 | " (116, 49, 255): 3,\n", 964 | " (49, 255, 55): 3,\n", 965 | " (255, 55, 232): 44,\n", 966 | " (55, 232, 16): 1,\n", 967 | " (232, 16, 41): 4,\n", 968 | " (16, 41, 0): 4,\n", 969 | " (41, 0, 0): 34,\n", 970 | " (240, 131, 254): 24,\n", 971 | " (131, 254, 255): 16,\n", 972 | " (254, 255, 117): 8,\n", 973 | " (255, 117, 10): 8,\n", 974 | " (117, 10, 184): 2,\n", 975 | " (10, 184, 87): 1,\n", 976 | " (184, 87, 0): 32,\n", 977 | " (87, 0, 7): 157,\n", 978 | " (7, 128, 233): 22,\n", 979 | " (128, 233, 132): 1,\n", 980 | " (233, 132, 0): 7,\n", 981 | " (132, 0, 0): 69,\n", 982 | " (69, 252, 209): 5,\n", 983 | " (252, 209, 238): 3,\n", 984 | " (209, 238, 80): 2,\n", 985 | " (238, 80, 104): 2,\n", 986 | " (80, 104, 255): 15,\n", 987 | " (104, 255, 255): 28,\n", 988 | " (255, 255, 255): 3445,\n", 989 | " (255, 255, 127): 64,\n", 990 | " (255, 127, 255): 22,\n", 991 | " (127, 255, 55): 3,\n", 992 | " (55, 232, 212): 2,\n", 993 | " (232, 212, 9): 1,\n", 994 | " (212, 9, 0): 2,\n", 995 | " (9, 0, 0): 93,\n", 996 | " (192, 120, 110): 3,\n", 997 | " (120, 110, 139): 4,\n", 998 | " (110, 139, 93): 1,\n", 999 | " (139, 93, 252): 26,\n", 1000 | " (93, 252, 139): 14,\n", 1001 | " (252, 139, 125): 12,\n", 1002 | " (139, 125, 16): 68,\n", 1003 | " (125, 16, 133): 16,\n", 1004 | " (16, 133, 255): 22,\n", 1005 | " (133, 255, 117): 89,\n", 1006 | " (255, 117, 24): 112,\n", 1007 | " (117, 24, 141): 13,\n", 1008 | " (24, 141, 69): 22,\n", 1009 | " (141, 69, 16): 24,\n", 1010 | " (69, 16, 80): 19,\n", 1011 | " (16, 80, 104): 11,\n", 1012 | " (127, 255, 117): 10,\n", 1013 | " (255, 117, 12): 577,\n", 1014 | " (117, 12, 232): 160,\n", 1015 | " (12, 232, 181): 1,\n", 1016 | " (232, 181, 9): 2,\n", 1017 | " (181, 9, 0): 1,\n", 1018 | " (192, 120, 79): 3,\n", 1019 | " (120, 79, 139): 4,\n", 1020 | " (79, 139, 125): 4,\n", 1021 | " (125, 16, 139): 8,\n", 1022 | " (16, 139, 206): 8,\n", 1023 | " (139, 206, 141): 4,\n", 1024 | " (206, 141, 71): 2,\n", 1025 | " (141, 71, 1): 6,\n", 1026 | " (71, 1, 43): 4,\n", 1027 | " (1, 43, 203): 5,\n", 1028 | " (43, 203, 59): 3,\n", 1029 | " (203, 59, 200): 3,\n", 1030 | " (59, 200, 115): 6,\n", 1031 | " (200, 115, 29): 3,\n", 1032 | " (115, 29, 255): 3,\n", 1033 | " (29, 255, 117): 9,\n", 1034 | " (117, 20, 141): 22,\n", 1035 | " (20, 141, 4): 5,\n", 1036 | " (141, 4, 59): 5,\n", 1037 | " (4, 59, 139): 3,\n", 1038 | " (59, 139, 93): 2,\n", 1039 | " (93, 8, 141): 17,\n", 1040 | " (8, 141, 52): 3,\n", 1041 | " (141, 52, 69): 2,\n", 1042 | " (52, 69, 2): 2,\n", 1043 | " (69, 2, 0): 18,\n", 1044 | " (0, 0, 86): 81,\n", 1045 | " (0, 86, 83): 7,\n", 1046 | " (86, 83, 232): 22,\n", 1047 | " (83, 232, 15): 3,\n", 1048 | " (232, 15, 1): 4,\n", 1049 | " (15, 1, 0): 8,\n", 1050 | " (192, 120, 38): 3,\n", 1051 | " (120, 38, 235): 5,\n", 1052 | " ...})" 1053 | ] 1054 | }, 1055 | "execution_count": 4, 1056 | "metadata": {}, 1057 | "output_type": "execute_result" 1058 | } 1059 | ], 1060 | "source": [ 1061 | "extractedNgrams" 1062 | ] 1063 | }, 1064 | { 1065 | "cell_type": "code", 1066 | "execution_count": 5, 1067 | "metadata": {}, 1068 | "outputs": [ 1069 | { 1070 | "name": "stdout", 1071 | "output_type": "stream", 1072 | "text": [ 1073 | "[((0, 0, 0), 32126), ((255, 255, 255), 3445), ((139, 240, 133), 1929), ((240, 133, 246), 1924), ((101, 100, 32), 1892), ((32, 116, 111), 1815), ((116, 111, 32), 1802), ((105, 108, 101), 1642), ((100, 32, 116), 1551), ((108, 101, 100), 1537), ((97, 105, 108), 1517), ((70, 97, 105), 1506), ((0, 70, 97), 1505), ((1, 0, 0), 1353), ((133, 246, 121), 1279), ((0, 0, 70), 1262), ((85, 139, 236), 1249), ((255, 255, 139), 1234), ((0, 139, 240), 1180), ((46, 0, 0), 1067), ((255, 139, 240), 1054), ((0, 0, 139), 1033), ((241, 239, 240), 984), ((239, 240, 255), 940), ((255, 241, 239), 927), ((128, 0, 128), 904), ((240, 255, 241), 856), ((0, 101, 0), 853), ((0, 128, 67), 850), ((128, 67, 75), 842), ((0, 85, 139), 808), ((255, 117, 252), 805), ((254, 255, 255), 789), ((0, 7, 128), 750), ((2, 0, 0), 708), ((0, 128, 255), 682), ((139, 229, 93), 681), ((1, 0, 128), 658), ((75, 1, 0), 652), ((128, 255, 127), 643), ((7, 128, 0), 643), ((67, 75, 1), 641), ((32, 112, 97), 639), ((4, 0, 0), 638), ((0, 86, 232), 636), ((3, 0, 0), 632), ((133, 246, 120), 630), ((141, 69, 252), 614), ((253, 255, 255), 598), ((255, 117, 8), 589)]\n" 1074 | ] 1075 | } 1076 | ], 1077 | "source": [ 1078 | "print(extractedNgrams.most_common(50))" 1079 | ] 1080 | }, 1081 | { 1082 | "cell_type": "code", 1083 | "execution_count": null, 1084 | "metadata": {}, 1085 | "outputs": [], 1086 | "source": [] 1087 | } 1088 | ], 1089 | "metadata": { 1090 | "kernelspec": { 1091 | "display_name": "Python 3", 1092 | "language": "python", 1093 | "name": "python3" 1094 | }, 1095 | "language_info": { 1096 | "codemirror_mode": { 1097 | "name": "ipython", 1098 | "version": 3 1099 | }, 1100 | "file_extension": ".py", 1101 | "mimetype": "text/x-python", 1102 | "name": "python", 1103 | "nbconvert_exporter": "python", 1104 | "pygments_lexer": "ipython3", 1105 | "version": "3.6.7" 1106 | } 1107 | }, 1108 | "nbformat": 4, 1109 | "nbformat_minor": 2 1110 | } 1111 | -------------------------------------------------------------------------------- /Cybersecurity-Data-Science-on-Udemy-master/Featurizing the PE Header/Featurizing the PE Header.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from os import listdir\n", 10 | "from os.path import isfile, join\n", 11 | "directories = [\"Benign PE Samples\", \"Malicious PE Samples\"]\n", 12 | "import pefile" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "# takes input such as [b'ADVAPI32.dll', b'KERNEL32.dll', b'msvcrt.dll']\n", 22 | "# and converts case to lower and removes .dll \n", 23 | "def preprocessImports(listOfDLLs):\n", 24 | " processedListOfDLLs = []\n", 25 | " return [x.decode().split(\".\")[0].lower() for x in listOfDLLs]\n", 26 | "\n", 27 | "def getImports(pe):\n", 28 | " listOfImports = []\n", 29 | " for entry in pe.DIRECTORY_ENTRY_IMPORT:\n", 30 | " listOfImports.append(entry.dll)\n", 31 | " return preprocessImports(listOfImports)\n", 32 | "\n", 33 | "def getSectionNames(pe):\n", 34 | " listOfSectionNames = []\n", 35 | " for eachSection in pe.sections:\n", 36 | " refined_name = eachSection.Name.decode().replace('\\x00','').lower()\n", 37 | " listOfSectionNames.append(refined_name)\n", 38 | " return listOfSectionNames" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 3, 44 | "metadata": {}, 45 | "outputs": [ 46 | { 47 | "name": "stdout", 48 | "output_type": "stream", 49 | "text": [ 50 | "'DOS Header magic not found.'\n", 51 | "Unable to obtain imports from Benign PE Samples/adamuninstall.exe\n", 52 | "'DOS Header magic not found.'\n", 53 | "Unable to obtain imports from Benign PE Samples/ADSchemaAnalyzer.exe\n", 54 | "'DOS Header magic not found.'\n", 55 | "Unable to obtain imports from Benign PE Samples/appcmd.exe\n", 56 | "'PE' object has no attribute 'DIRECTORY_ENTRY_IMPORT'\n", 57 | "Unable to obtain imports from Benign PE Samples/AppVStreamingUX.exe\n", 58 | "'DOS Header magic not found.'\n", 59 | "Unable to obtain imports from Benign PE Samples/aspnetca.exe\n", 60 | "'DOS Header magic not found.'\n", 61 | "Unable to obtain imports from Benign PE Samples/bash.exe\n", 62 | "'DOS Header magic not found.'\n", 63 | "Unable to obtain imports from Benign PE Samples/BootExpCfg.exe\n", 64 | "'DOS Header magic not found.'\n", 65 | "Unable to obtain imports from Benign PE Samples/c2wtshost.exe\n", 66 | "'DOS Header magic not found.'\n", 67 | "Unable to obtain imports from Benign PE Samples/CCG.exe\n", 68 | "'DOS Header magic not found.'\n", 69 | "Unable to obtain imports from Benign PE Samples/CExecSvc.exe\n", 70 | "'DOS Header magic not found.'\n", 71 | "Unable to obtain imports from Benign PE Samples/cmak.exe\n", 72 | "'DOS Header magic not found.'\n", 73 | "Unable to obtain imports from Benign PE Samples/csvde.exe\n", 74 | "'DOS Header magic not found.'\n", 75 | "Unable to obtain imports from Benign PE Samples/dcdiag.exe\n", 76 | "'DOS Header magic not found.'\n", 77 | "Unable to obtain imports from Benign PE Samples/dplaysvr.exe\n", 78 | "'DOS Header magic not found.'\n", 79 | "Unable to obtain imports from Benign PE Samples/dpnsvr.exe\n", 80 | "'DOS Header magic not found.'\n", 81 | "Unable to obtain imports from Benign PE Samples/dsacls.exe\n", 82 | "'DOS Header magic not found.'\n", 83 | "Unable to obtain imports from Benign PE Samples/dsamain.exe\n", 84 | "'DOS Header magic not found.'\n", 85 | "Unable to obtain imports from Benign PE Samples/dsdbutil.exe\n", 86 | "'DOS Header magic not found.'\n", 87 | "Unable to obtain imports from Benign PE Samples/dsmgmt.exe\n", 88 | "'DOS Header magic not found.'\n", 89 | "Unable to obtain imports from Benign PE Samples/eshell.exe\n", 90 | "'DOS Header magic not found.'\n", 91 | "Unable to obtain imports from Benign PE Samples/evntcmd.exe\n", 92 | "'DOS Header magic not found.'\n", 93 | "Unable to obtain imports from Benign PE Samples/evntwin.exe\n", 94 | "'DOS Header magic not found.'\n", 95 | "Unable to obtain imports from Benign PE Samples/hcsdiag.exe\n", 96 | "'DOS Header magic not found.'\n", 97 | "Unable to obtain imports from Benign PE Samples/hvc.exe\n", 98 | "'DOS Header magic not found.'\n", 99 | "Unable to obtain imports from Benign PE Samples/hvsimgr.exe\n", 100 | "'DOS Header magic not found.'\n", 101 | "Unable to obtain imports from Benign PE Samples/hvsirdpclient.exe\n", 102 | "'DOS Header magic not found.'\n", 103 | "Unable to obtain imports from Benign PE Samples/hvsirpcd.exe\n", 104 | "'DOS Header magic not found.'\n", 105 | "Unable to obtain imports from Benign PE Samples/iisreset.exe\n", 106 | "'DOS Header magic not found.'\n", 107 | "Unable to obtain imports from Benign PE Samples/iisrstas.exe\n", 108 | "'DOS Header magic not found.'\n", 109 | "Unable to obtain imports from Benign PE Samples/iissetup.exe\n", 110 | "'DOS Header magic not found.'\n", 111 | "Unable to obtain imports from Benign PE Samples/inetinfo.exe\n", 112 | "'DOS Header magic not found.'\n", 113 | "Unable to obtain imports from Benign PE Samples/InetMgr.exe\n", 114 | "'DOS Header magic not found.'\n", 115 | "Unable to obtain imports from Benign PE Samples/InetMgr6.exe\n", 116 | "'DOS Header magic not found.'\n", 117 | "Unable to obtain imports from Benign PE Samples/InspectVhdDialog.exe\n", 118 | "'DOS Header magic not found.'\n", 119 | "Unable to obtain imports from Benign PE Samples/InspectVhdDialog6.2.exe\n", 120 | "'DOS Header magic not found.'\n", 121 | "Unable to obtain imports from Benign PE Samples/InspectVhdDialog6.3.exe\n", 122 | "'DOS Header magic not found.'\n", 123 | "Unable to obtain imports from Benign PE Samples/ldifde.exe\n", 124 | "'DOS Header magic not found.'\n", 125 | "Unable to obtain imports from Benign PE Samples/ldp.exe\n", 126 | "'DOS Header magic not found.'\n", 127 | "Unable to obtain imports from Benign PE Samples/LogCollector.exe\n", 128 | "'DOS Header magic not found.'\n", 129 | "Unable to obtain imports from Benign PE Samples/lpq.exe\n", 130 | "'DOS Header magic not found.'\n", 131 | "Unable to obtain imports from Benign PE Samples/lpr.exe\n", 132 | "'DOS Header magic not found.'\n", 133 | "Unable to obtain imports from Benign PE Samples/LxRun.exe\n", 134 | "'utf-8' codec can't decode byte 0xd2 in position 6: invalid continuation byte\n", 135 | "Unable to obtain imports from Malicious PE Samples/Build.exe\n", 136 | "'PE' object has no attribute 'DIRECTORY_ENTRY_IMPORT'\n", 137 | "Unable to obtain imports from Malicious PE Samples/malware.exe\n", 138 | "'utf-8' codec can't decode byte 0xff in position 1: invalid start byte\n", 139 | "Unable to obtain imports from Malicious PE Samples/wirelesskeyview.exe\n", 140 | "'Invalid NT Headers signature.'\n", 141 | "Unable to obtain imports from Malicious PE Samples/{71257279-042b-371d-a1d3-fbf8d2fadffa}.exe\n" 142 | ] 143 | } 144 | ], 145 | "source": [ 146 | "importsCorpus = []\n", 147 | "numSections = []\n", 148 | "sectionNames = []\n", 149 | "for datasetPath in directories:\n", 150 | " samples = [f for f in listdir(datasetPath) if isfile(join(datasetPath,f))]\n", 151 | " for file in samples:\n", 152 | " filePath = datasetPath+\"/\"+file\n", 153 | " try:\n", 154 | " pe = pefile.PE(filePath)\n", 155 | " imports = getImports(pe)\n", 156 | " nSections = len(pe.sections)\n", 157 | " secNames = getSectionNames(pe)\n", 158 | " importsCorpus.append(imports)\n", 159 | " numSections.append(nSections)\n", 160 | " sectionNames.append(secNames)\n", 161 | " \n", 162 | " except Exception as e: \n", 163 | " print(e)\n", 164 | " print(\"Unable to obtain imports from \"+filePath)" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": 4, 170 | "metadata": {}, 171 | "outputs": [ 172 | { 173 | "name": "stdout", 174 | "output_type": "stream", 175 | "text": [ 176 | "[['mscoree'], ['mscoree'], ['mscoree'], ['wincorlib', 'api-ms-win-eventing-provider-l1-1-0', 'api-ms-win-core-libraryloader-l1-2-0', 'api-ms-win-core-localization-l1-2-0', 'api-ms-win-core-processthreads-l1-1-0', 'api-ms-win-core-heap-l1-1-0', 'api-ms-win-core-debug-l1-1-0', 'api-ms-win-core-errorhandling-l1-1-0', 'api-ms-win-core-handle-l1-1-0', 'api-ms-win-core-synch-l1-1-0', 'api-ms-win-core-synch-l1-2-0', 'api-ms-win-core-com-l1-1-0', 'ext-ms-win-shell32-shellfolders-l1-1-0', 'api-ms-win-core-string-l1-1-0', 'api-ms-win-core-registry-l1-1-0', 'api-ms-win-core-util-l1-1-0', 'api-ms-win-core-winrt-error-l1-1-0', 'api-ms-win-core-winrt-error-l1-1-1', 'api-ms-win-core-winrt-string-l1-1-0', 'msvcrt', 'ntdll', 'api-ms-win-core-profile-l1-1-0', 'api-ms-win-core-sysinfo-l1-1-0'], ['advapi32', 'kernel32', 'msvcrt', 'ntdll', 'ole32', 'oleaut32', 'wintrust', 'fltlib', 'shell32', 'version', 'activeds']]\n", 177 | "[3, 3, 3, 6, 6]\n", 178 | "[['.text', '.rsrc', '.reloc'], ['.text', '.rsrc', '.reloc'], ['.text', '.rsrc', '.reloc'], ['.text', '.rdata', '.data', '.pdata', '.rsrc', '.reloc'], ['.text', '.rdata', '.data', '.pdata', '.rsrc', '.reloc']]\n" 179 | ] 180 | } 181 | ], 182 | "source": [ 183 | "print(importsCorpus[0:5])\n", 184 | "print(numSections[0:5])\n", 185 | "print(sectionNames[0:5])" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": null, 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [] 194 | } 195 | ], 196 | "metadata": { 197 | "kernelspec": { 198 | "display_name": "Python 3", 199 | "language": "python", 200 | "name": "python3" 201 | }, 202 | "language_info": { 203 | "codemirror_mode": { 204 | "name": "ipython", 205 | "version": 3 206 | }, 207 | "file_extension": ".py", 208 | "mimetype": "text/x-python", 209 | "name": "python", 210 | "nbconvert_exporter": "python", 211 | "pygments_lexer": "ipython3", 212 | "version": "3.6.7" 213 | } 214 | }, 215 | "nbformat": 4, 216 | "nbformat_minor": 2 217 | } 218 | -------------------------------------------------------------------------------- /Cybersecurity-Data-Science-on-Udemy-master/Handling Type I and Type II Errors/Handling Type I and Type II Errors.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 49, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from sklearn.metrics import confusion_matrix\n", 10 | "import numpy as np\n", 11 | "import scipy\n", 12 | "X_train = scipy.sparse.load_npz(\"X_train.npz\")\n", 13 | "y_train = np.load(\"y_train.npy\")\n", 14 | "X_test = scipy.sparse.load_npz(\"X_test.npz\")\n", 15 | "y_test = np.load(\"y_test.npy\")\n", 16 | "desiredFPR = 0.05" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 12, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "def FPR(y_true, y_pred):\n", 26 | " CM = confusion_matrix(y_true, y_pred)\n", 27 | " TN = CM[0][0]\n", 28 | " FP = CM[0][1]\n", 29 | " FPR = FP/(FP+TN)\n", 30 | " return FPR\n", 31 | "\n", 32 | "def TPR(y_true, y_pred):\n", 33 | " CM = confusion_matrix(y_true, y_pred)\n", 34 | " TP = CM[1][1]\n", 35 | " FN = CM[1][0]\n", 36 | " TPR = TP/(TP+FN)\n", 37 | " return TPR" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 42, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "def thresholdVector(vector, threshold):\n", 47 | " return [0 if x>=threshold else 1 for x in vector]" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 53, 53 | "metadata": {}, 54 | "outputs": [ 55 | { 56 | "name": "stdout", 57 | "output_type": "stream", 58 | "text": [ 59 | "Probabilities look like so:\n", 60 | "[[0.95251546 0.04748454]\n", 61 | " [0.98944238 0.01055762]\n", 62 | " [0.98989589 0.01010411]\n", 63 | " [0.7679705 0.2320295 ]\n", 64 | " [0.88519213 0.11480787]]\n", 65 | "\n", 66 | "Testing thresholds:\n", 67 | "99 0.9681818181818181 1.0\n", 68 | "98 0.4636363636363636 1.0\n", 69 | "97 0.41818181818181815 1.0\n", 70 | "96 0.37272727272727274 1.0\n", 71 | "95 0.2863636363636364 1.0\n", 72 | "94 0.18636363636363637 1.0\n", 73 | "93 0.12727272727272726 1.0\n", 74 | "92 0.09545454545454546 1.0\n", 75 | "91 0.07727272727272727 1.0\n", 76 | "90 0.07272727272727272 1.0\n", 77 | "89 0.07272727272727272 1.0\n", 78 | "88 0.06818181818181818 1.0\n", 79 | "87 0.06818181818181818 1.0\n", 80 | "86 0.06818181818181818 1.0\n", 81 | "85 0.06818181818181818 1.0\n", 82 | "84 0.06818181818181818 1.0\n", 83 | "83 0.06818181818181818 1.0\n", 84 | "82 0.06818181818181818 1.0\n", 85 | "81 0.06818181818181818 1.0\n", 86 | "80 0.06818181818181818 1.0\n", 87 | "79 0.06818181818181818 1.0\n", 88 | "78 0.06363636363636363 1.0\n", 89 | "77 0.06363636363636363 1.0\n", 90 | "76 0.05909090909090909 1.0\n", 91 | "75 0.05909090909090909 1.0\n", 92 | "74 0.05909090909090909 1.0\n", 93 | "73 0.05909090909090909 1.0\n", 94 | "72 0.05909090909090909 1.0\n", 95 | "71 0.05909090909090909 1.0\n", 96 | "70 0.05909090909090909 1.0\n", 97 | "69 0.05909090909090909 1.0\n", 98 | "68 0.05909090909090909 1.0\n", 99 | "67 0.05909090909090909 1.0\n", 100 | "66 0.05909090909090909 1.0\n", 101 | "65 0.05909090909090909 1.0\n", 102 | "64 0.05909090909090909 1.0\n", 103 | "63 0.05909090909090909 1.0\n", 104 | "62 0.004545454545454545 0.7857142857142857\n", 105 | "Selected threshold: \n", 106 | "0.62\n" 107 | ] 108 | } 109 | ], 110 | "source": [ 111 | "from sklearn.linear_model import LogisticRegression\n", 112 | "LR=LogisticRegression()\n", 113 | "LR.fit(X_train,y_train)\n", 114 | "LRPredProb = LR.predict_proba(X_train)\n", 115 | "print(\"Probabilities look like so:\")\n", 116 | "print(LRPredProb[0:5])\n", 117 | "print()\n", 118 | "M = 100\n", 119 | "print(\"Testing thresholds:\")\n", 120 | "for threshold in reversed(range(M)):\n", 121 | " thresholdScaled = float(threshold)/M\n", 122 | " thresholdedPrediction = thresholdVector(LRPredProb[:,0], thresholdScaled) \n", 123 | " print(threshold, FPR(y_train, thresholdedPrediction), TPR(y_train, thresholdedPrediction))\n", 124 | " if FPR(y_train,thresholdedPrediction)'\n", 53 | "\twith 4692106 stored elements in Compressed Sparse Row format>" 54 | ] 55 | }, 56 | "execution_count": 13, 57 | "metadata": {}, 58 | "output_type": "execute_result" 59 | } 60 | ], 61 | "source": [ 62 | "X_train_tf" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 26, 68 | "metadata": {}, 69 | "outputs": [ 70 | { 71 | "name": "stdout", 72 | "output_type": "stream", 73 | "text": [ 74 | " (0, 772)\t-0.03499521127162594\n", 75 | " (0, 11631)\t-0.03500460052106256\n", 76 | " (0, 27191)\t0.019719620139643457\n", 77 | " (0, 38050)\t0.15335092929327068\n", 78 | " (0, 40893)\t0.06819812765137043\n", 79 | " (0, 43150)\t-0.030652868514715367\n", 80 | " (0, 45206)\t-0.07176014995016518\n", 81 | " (0, 46512)\t0.09043016550513941\n", 82 | " (0, 46826)\t-0.06397798297931095\n", 83 | " (0, 47514)\t-0.077684454241659\n", 84 | " (0, 48144)\t-0.03868564643766536\n", 85 | " (0, 51175)\t0.09236029043126265\n", 86 | " (0, 55881)\t-0.077684454241659\n", 87 | " (0, 59729)\t-0.023653464977688998\n", 88 | " (0, 64534)\t0.05954154148426631\n", 89 | " (0, 67784)\t-0.08401338274143716\n", 90 | " (0, 71498)\t0.08613775858631713\n", 91 | " (0, 75648)\t0.048875792068053855\n", 92 | " (0, 89279)\t-0.09776808359994123\n", 93 | " (0, 97216)\t0.07667546464663534\n", 94 | " (0, 97424)\t0.030449337354616923\n", 95 | " (0, 101490)\t0.13473479492888113\n", 96 | " (0, 106541)\t0.08738465432915987\n", 97 | " (0, 111842)\t0.049086823203040524\n", 98 | " (0, 112243)\t-0.03659275246766097\n", 99 | " :\t:\n", 100 | " (11313, 750493)\t-0.1160078779736303\n", 101 | " (11313, 771693)\t0.09086384516141675\n", 102 | " (11313, 784574)\t-0.05818237780963616\n", 103 | " (11313, 790269)\t-0.0551002906854262\n", 104 | " (11313, 811745)\t0.02060061619447049\n", 105 | " (11313, 822308)\t-0.0213155777629503\n", 106 | " (11313, 856299)\t-0.04013279738946367\n", 107 | " (11313, 856372)\t-0.09989136081614433\n", 108 | " (11313, 868771)\t-0.10574010616015751\n", 109 | " (11313, 884288)\t-0.0931614428956718\n", 110 | " (11313, 886526)\t0.1103825165109229\n", 111 | " (11313, 898773)\t-0.08058277963118607\n", 112 | " (11313, 905444)\t0.1103825165109229\n", 113 | " (11313, 920844)\t0.07330096727656096\n", 114 | " (11313, 922741)\t-0.02813881305030049\n", 115 | " (11313, 933061)\t-0.09451001068348226\n", 116 | " (11313, 936788)\t-0.19978272163228866\n", 117 | " (11313, 958756)\t-0.03581718135240147\n", 118 | " (11313, 959146)\t0.022096111751659896\n", 119 | " (11313, 959571)\t0.1103825165109229\n", 120 | " (11313, 966864)\t-0.07579988171758481\n", 121 | " (11313, 992878)\t0.023775066719160458\n", 122 | " (11313, 1005413)\t-0.09989136081614433\n", 123 | " (11313, 1021257)\t-0.032641882467555394\n", 124 | " (11313, 1032344)\t-0.031622585584606254\n" 125 | ] 126 | } 127 | ], 128 | "source": [ 129 | "print(X_train_tf)" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [] 138 | } 139 | ], 140 | "metadata": { 141 | "kernelspec": { 142 | "display_name": "Python 3", 143 | "language": "python", 144 | "name": "python3" 145 | }, 146 | "language_info": { 147 | "codemirror_mode": { 148 | "name": "ipython", 149 | "version": 3 150 | }, 151 | "file_extension": ".py", 152 | "mimetype": "text/x-python", 153 | "name": "python", 154 | "nbconvert_exporter": "python", 155 | "pygments_lexer": "ipython3", 156 | "version": "3.6.7" 157 | } 158 | }, 159 | "nbformat": 4, 160 | "nbformat_minor": 2 161 | } 162 | -------------------------------------------------------------------------------- /Cybersecurity-Data-Science-on-Udemy-master/Network Behavior Anomaly Detection/Network Behavior Anomaly Detection.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "df = pd.read_csv(\"kddcup_corrected_subset.csv\", index_col=None)" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 3, 24 | "metadata": {}, 25 | "outputs": [ 26 | { 27 | "data": { 28 | "text/html": [ 29 | "
\n", 30 | "\n", 43 | "\n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | "
duration\"protocol_type\"\"flag\"\"src_bytes\"\"dst_bytes\"\"land\"\"wrong_fragment\"\"urgent\"hot\"num_failed_logins\"...\"dst_host_srv_count\"\"dst_host_same_srv_rate\"\"dst_host_diff_srv_rate\"\"dst_host_same_src_port_rate\"\"dst_host_srv_diff_host_rate\"dst_host_serror_rate\"dst_host_srv_serror_rate\"\"dst_host_rerror_rate\"\"dst_host_srv_rerror_rate\"label
00tcpSF22318500000...25510110000normal
10tcpSF23026000000...2551033733000normal
20tcpSF2971378700000...25510110000normal
30tcpSF291354200000...25510110000normal
40tcpSF29575300000...25510110000normal
\n", 193 | "

5 rows × 41 columns

\n", 194 | "
" 195 | ], 196 | "text/plain": [ 197 | " duration \"protocol_type\" \"flag\" \"src_bytes\" \"dst_bytes\" \"land\" \\\n", 198 | "0 0 tcp SF 223 185 0 \n", 199 | "1 0 tcp SF 230 260 0 \n", 200 | "2 0 tcp SF 297 13787 0 \n", 201 | "3 0 tcp SF 291 3542 0 \n", 202 | "4 0 tcp SF 295 753 0 \n", 203 | "\n", 204 | " \"wrong_fragment\" \"urgent\" hot \"num_failed_logins\" ... \\\n", 205 | "0 0 0 0 0 ... \n", 206 | "1 0 0 0 0 ... \n", 207 | "2 0 0 0 0 ... \n", 208 | "3 0 0 0 0 ... \n", 209 | "4 0 0 0 0 ... \n", 210 | "\n", 211 | " \"dst_host_srv_count\" \"dst_host_same_srv_rate\" \\\n", 212 | "0 255 1 \n", 213 | "1 255 1 \n", 214 | "2 255 1 \n", 215 | "3 255 1 \n", 216 | "4 255 1 \n", 217 | "\n", 218 | " \"dst_host_diff_srv_rate\" \"dst_host_same_src_port_rate\" \\\n", 219 | "0 0 1 \n", 220 | "1 0 33 \n", 221 | "2 0 1 \n", 222 | "3 0 1 \n", 223 | "4 0 1 \n", 224 | "\n", 225 | " \"dst_host_srv_diff_host_rate\" dst_host_serror_rate \\\n", 226 | "0 1 0 \n", 227 | "1 7 33 \n", 228 | "2 1 0 \n", 229 | "3 1 0 \n", 230 | "4 1 0 \n", 231 | "\n", 232 | " \"dst_host_srv_serror_rate\" \"dst_host_rerror_rate\" \\\n", 233 | "0 0 0 \n", 234 | "1 0 0 \n", 235 | "2 0 0 \n", 236 | "3 0 0 \n", 237 | "4 0 0 \n", 238 | "\n", 239 | " \"dst_host_srv_rerror_rate\" label \n", 240 | "0 0 normal \n", 241 | "1 0 normal \n", 242 | "2 0 normal \n", 243 | "3 0 normal \n", 244 | "4 0 normal \n", 245 | "\n", 246 | "[5 rows x 41 columns]" 247 | ] 248 | }, 249 | "execution_count": 3, 250 | "metadata": {}, 251 | "output_type": "execute_result" 252 | } 253 | ], 254 | "source": [ 255 | "df.head()" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": 4, 261 | "metadata": {}, 262 | "outputs": [ 263 | { 264 | "data": { 265 | "text/plain": [ 266 | "(41237, 41)" 267 | ] 268 | }, 269 | "execution_count": 4, 270 | "metadata": {}, 271 | "output_type": "execute_result" 272 | } 273 | ], 274 | "source": [ 275 | "df.shape" 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": 5, 281 | "metadata": {}, 282 | "outputs": [ 283 | { 284 | "data": { 285 | "text/plain": [ 286 | "Counter({'normal': 39247,\n", 287 | " 'apache2': 794,\n", 288 | " 'phf': 2,\n", 289 | " 'back': 1098,\n", 290 | " 'neptune': 93,\n", 291 | " 'portsweep': 2,\n", 292 | " 'saint': 1})" 293 | ] 294 | }, 295 | "execution_count": 5, 296 | "metadata": {}, 297 | "output_type": "execute_result" 298 | } 299 | ], 300 | "source": [ 301 | "y = df[\"label\"].values\n", 302 | "from collections import Counter\n", 303 | "Counter(y)" 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "execution_count": 7, 309 | "metadata": {}, 310 | "outputs": [], 311 | "source": [ 312 | "from sklearn.preprocessing import LabelEncoder\n", 313 | "encodings = dict()\n", 314 | "for c in df.columns:\n", 315 | " if df[c].dtype == \"object\":\n", 316 | " encodings[c] = LabelEncoder()\n", 317 | " df[c] = encodings[c].fit_transform(df[c])" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": 8, 323 | "metadata": {}, 324 | "outputs": [], 325 | "source": [ 326 | "y = df.pop(\"label\").values\n", 327 | "X = df.values" 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": 9, 333 | "metadata": {}, 334 | "outputs": [], 335 | "source": [ 336 | "from sklearn.model_selection import train_test_split\n", 337 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", 338 | "X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": 10, 344 | "metadata": {}, 345 | "outputs": [], 346 | "source": [ 347 | "from sklearn.ensemble import IsolationForest\n", 348 | "contaminationParameter = 1-sum(y_train==encodings[\"label\"].transform([\"normal\"])[0])/len(y_train)\n", 349 | "IF = IsolationForest(n_estimators=100, max_samples=256,contamination=contaminationParameter)" 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "execution_count": 11, 355 | "metadata": {}, 356 | "outputs": [ 357 | { 358 | "name": "stderr", 359 | "output_type": "stream", 360 | "text": [ 361 | "c:\\users\\etsukerman\\appdata\\local\\programs\\python\\python37\\lib\\site-packages\\sklearn\\ensemble\\iforest.py:223: FutureWarning: behaviour=\"old\" is deprecated and will be removed in version 0.22. Please use behaviour=\"new\", which makes the decision_function change to match other anomaly detection algorithm API.\n", 362 | " FutureWarning)\n" 363 | ] 364 | }, 365 | { 366 | "data": { 367 | "text/plain": [ 368 | "IsolationForest(behaviour='old', bootstrap=False,\n", 369 | " contamination=0.048987510609918794, max_features=1.0,\n", 370 | " max_samples=256, n_estimators=100, n_jobs=None, random_state=None,\n", 371 | " verbose=0)" 372 | ] 373 | }, 374 | "execution_count": 11, 375 | "metadata": {}, 376 | "output_type": "execute_result" 377 | } 378 | ], 379 | "source": [ 380 | "IF.fit(X_train)" 381 | ] 382 | }, 383 | { 384 | "cell_type": "code", 385 | "execution_count": 12, 386 | "metadata": {}, 387 | "outputs": [], 388 | "source": [ 389 | "scores = IF.decision_function(X_val)" 390 | ] 391 | }, 392 | { 393 | "cell_type": "code", 394 | "execution_count": 13, 395 | "metadata": {}, 396 | "outputs": [ 397 | { 398 | "data": { 399 | "image/png": "iVBORw0KGgoAAAANSUhEUgAABIcAAAJCCAYAAABahKemAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAHpNJREFUeJzt3X+MZWd93/HPFw+GhjTY2Aula9NxxLYpqVSgW4cGNapwmgCOYksF1RUJTuLKikTbpPQHQ9IKKRKSUauaREppLBxqqqRAnR+4WRpEbOgPqbiswYKAS70YFy82sARDSAmkhqd/zFkxXc96787cO/fOfF8vyZp7zzn37nP97NmZfe9z7q0xRgAAAADo6UnLHgAAAAAAyyMOAQAAADQmDgEAAAA0Jg4BAAAANCYOAQAAADQmDgEAAAA0Jg4BAAAANCYOAQAAADQmDgEAAAA0trbsASTJpZdeOtbX15c9DAAAAIAD45577vniGOPQuY5biTi0vr6e48ePL3sYAAAAAAdGVf3vWY5zWRkAAABAY+IQAAAAQGPiEAAAAEBj4hAAAABAY+IQAAAAQGPiEAAAAEBj4hAAAABAY+IQAAAAQGPiEAAAAEBj4hAAAABAY+IQAAAAQGPiEAAAAEBj4hAAAABAY+IQAAAAQGPiEAAAAEBj4hAAAABAY+IQAAAAQGPiEAAAAEBj4hAAAABAY+IQAAAAQGPiEAAAAEBj4hAAAABAY+IQAAAAQGPiEAAAAEBja8seAAAAAMCZ1jeOzXTcgzddveCRHHxWDgEAAAA0Jg4BAAAANCYOAQAAADQmDgEAAAA0Jg4BAAAANCYOAQAAADQmDgEAAAA0Jg4BAAAANCYOAQAAADQmDgEAAAA0Jg4BAAAANCYOAQAAADQmDgEAAAA0Jg4BAAAANCYOAQAAADQmDgEAAAA0Jg4BAAAANCYOAQAAADQmDgEAAAA0Jg4BAAAANCYOAQAAADQmDgEAAAA0Jg4BAAAANCYOAQAAADQmDgEAAAA0Jg4BAAAANCYOAQAAADQmDgEAAAA0Jg4BAAAANCYOAQAAADQmDgEAAAA0Jg4BAAAANCYOAQAAADQmDgEAAAA0Jg4BAAAANCYOAQAAADQmDgEAAAA0Jg4BAAAANCYOAQAAADQmDgEAAAA0Jg4BAAAANCYOAQAAADQmDgEAAAA0Jg4BAAAANCYOAQAAADQmDgEAAAA0Jg4BAAAANCYOAQAAADQmDgEAAAA0Jg4BAAAANCYOAQAAADQmDgEAAAA0NlMcqqp/WFUfr6rfr6p/X1VPraorquruqrq/qt5ZVRdOxz5lun9i2r++yBcAAAAAwM6dMw5V1eEk/yDJ0THGX0pyQZLrkrwpyc1jjCNJHk1yw/SQG5I8OsZ4bpKbp+MAAAAAWEGzXla2luRPVdVaku9I8kiSlyS5fdp/W5Jrp9vXTPcz7b+qqmo+wwUAAABgns4Zh8YYn03yL5N8JptR6CtJ7kny5THGY9NhJ5Mcnm4fTvLQ9NjHpuMvOfN5q+rGqjpeVcdPnTq129cBAAAAwA7MclnZxdlcDXRFkj+b5GlJXrbNoeP0Q55g37c3jHHLGOPoGOPooUOHZh8xAAAAAHMzy2VlP5jk02OMU2OM/5vkN5N8f5KLpsvMkuSyJA9Pt08muTxJpv1PT/KluY4aAAAAgLmYJQ59JsmLquo7pvcOuirJJ5K8P8krpmOuT/Lu6fYd0/1M++8aYzxu5RAAAAAAyzfLew7dnc03lv5wko9Nj7klyeuSvLaqTmTzPYVunR5ya5JLpu2vTbKxgHEDAAAAMAdr5z4kGWO8Ickbztj8QJIrtzn260leufuhAQAAALBoM8UhAAAAgP1sfePYTMc9eNPVCx7J6pnlPYcAAAAAOKDEIQAAAIDGxCEAAACAxsQhAAAAgMbEIQAAAIDGxCEAAACAxsQhAAAAgMbEIQAAAIDGxCEAAACAxsQhAAAAgMbEIQAAAIDGxCEAAACAxsQhAAAAgMbEIQAAAIDGxCEAAACAxsQhAAAAgMbEIQAAAIDGxCEAAACAxsQhAAAAgMbEIQAAAIDGxCEAAACAxsQhAAAAgMbEIQAAAIDGxCEAAACAxsQhAAAAgMbWlj0AAAAAoI/1jWPLHgJnsHIIAAAAoDErhwAAAIB9y0qk3bNyCAAAAKAxcQgAAACgMXEIAAAAoDFxCAAAAKAxcQgAAACgMXEIAAAAoDFxCAAAAKAxcQgAAACgMXEIAAAAoDFxCAAAAKAxcQgAAACgMXEIAAAAoDFxCAAAAKAxcQgAAACgMXEIAAAAoDFxCAAAAKAxcQgAAACgMXEIAAAAoDFxCAAAAKAxcQgAAACgMXEIAAAAoDFxCAAAAKAxcQgAAACgMXEIAAAAoDFxCAAAAKAxcQgAAACgMXEIAAAAoDFxCAAAAKAxcQgAAACgMXEIAAAAoDFxCAAAAKAxcQgAAACgMXEIAAAAoDFxCAAAAKAxcQgAAACgMXEIAAAAoDFxCAAAAKAxcQgAAACgMXEIAAAAoDFxCAAAAKCxtWUPAAAAANj/1jeOLXsI7JCVQwAAAACNiUMAAAAAjYlDAAAAAI2JQwAAAACNiUMAAAAAjYlDAAAAAI2JQwAAAACNiUMAAAAAjYlDAAAAAI2JQwAAAACNiUMAAAAAjYlDAAAAAI2JQwAAAACNiUMAAAAAjYlDAAAAAI2JQwAAAACNiUMAAAAAjYlDAAAAAI2JQwAAAACNiUMAAAAAjYlDAAAAAI2JQwAAAACNiUMAAAAAjYlDAAAAAI2JQwAAAACNiUMAAAAAjYlDAAAAAI2JQwAAAACNiUMAAAAAjYlDAAAAAI2JQwAAAACNiUMAAAAAjYlDAAAAAI2JQwAAAACNiUMAAAAAjYlDAAAAAI3NFIeq6qKqur2q/mdV3VdVf62qnlFV76uq+6evF0/HVlX9UlWdqKqPVtULF/sSAAAAANipWVcO/WKS3x1jfE+Sv5zkviQbSe4cYxxJcud0P0leluTI9N+NSd4y1xEDAAAAMDfnjENV9V1JfiDJrUkyxviTMcaXk1yT5LbpsNuSXDvdvibJ28emDya5qKqePfeRAwAAALBrs6wc+u4kp5K8rao+UlVvraqnJXnWGOORJJm+PnM6/nCSh7Y8/uS0DQAAAIAVM0scWkvywiRvGWO8IMn/ybcvIdtObbNtPO6gqhur6nhVHT916tRMgwUAAABgvmaJQyeTnBxj3D3dvz2bsejzpy8Xm75+Ycvxl295/GVJHj7zSccYt4wxjo4xjh46dGin4wcAAABgF84Zh8YYn0vyUFX9hWnTVUk+keSOJNdP265P8u7p9h1JXj19atmLknzl9OVnAAAAAKyWtRmP+/tJfq2qLkzyQJKfzGZYeldV3ZDkM0leOR37niQvT3IiydemYwEAAABYQTPFoTHGvUmObrPrqm2OHUles8txAQAAALAHZnnPIQAAAAAOKHEIAAAAoDFxCAAAAKAxcQgAAACgMXEIAAAAoDFxCAAAAKAxcQgAAACgMXEIAAAAoDFxCAAAAKAxcQgAAACgMXEIAAAAoDFxCAAAAKAxcQgAAACgMXEIAAAAoDFxCAAAAKAxcQgAAACgMXEIAAAAoDFxCAAAAKAxcQgAAACgMXEIAAAAoDFxCAAAAKAxcQgAAACgMXEIAAAAoDFxCAAAAKAxcQgAAACgMXEIAAAAoDFxCAAAAKAxcQgAAACgMXEIAAAAoDFxCAAAAKAxcQgAAACgMXEIAAAAoDFxCAAAAKAxcQgAAACgMXEIAAAAoDFxCAAAAKAxcQgAAACgMXEIAAAAoDFxCAAAAKAxcQgAAACgMXEIAAAAoDFxCAAAAKAxcQgAAACgMXEIAAAAoDFxCAAAAKAxcQgAAACgMXEIAAAAoDFxCAAAAKCxtWUPAAAAAFhN6xvHlj0E9oCVQwAAAACNiUMAAAAAjYlDAAAAAI2JQwAAAACNiUMAAAAAjYlDAAAAAI2JQwAAAACNiUMAAAAAjYlDAAAAAI2JQwAAAACNiUMAAAAAjYlDAAAAAI2JQwAAAACNiUMAAAAAjYlDAAAAAI2JQwAAAACNiUMAAAAAjYlDAAAAAI2JQwAAAACNiUMAAAAAjYlDAAAAAI2JQwAAAACNiUMAAAAAjYlDAAAAAI2JQwAAAACNiUMAAAAAja0tewAAAADA3lrfOLbsIbBCrBwCAAAAaEwcAgAAAGhMHAIAAABoTBwCAAAAaEwcAgAAAGhMHAIAAABoTBwCAAAAaEwcAgAAAGhMHAIAAABoTBwCAAAAaEwcAgAAAGhMHAIAAABoTBwCAAAAaEwcAgAAAGhMHAIAAABoTBwCAAAAaEwcAgAAAGhMHAIAAABoTBwCAAAAaEwcAgAAAGhMHAIAAABoTBwCAAAAaEwcAgAAAGhMHAIAAABoTBwCAAAAaEwcAgAAAGhMHAIAAABoTBwCAAAAaEwcAgAAAGhMHAIAAABobOY4VFUXVNVHqup3pvtXVNXdVXV/Vb2zqi6ctj9lun9i2r++mKEDAAAAsFvns3LoZ5Lct+X+m5LcPMY4kuTRJDdM229I8ugY47lJbp6OAwAAAGAFzRSHquqyJFcneet0v5K8JMnt0yG3Jbl2un3NdD/T/qum4wEAAABYMbOuHHpzkn+a5FvT/UuSfHmM8dh0/2SSw9Ptw0keSpJp/1em4wEAAABYMeeMQ1X1I0m+MMa4Z+vmbQ4dM+zb+rw3VtXxqjp+6tSpmQYLAAAAwHzNsnLoxUl+tKoeTPKObF5O9uYkF1XV2nTMZUkenm6fTHJ5kkz7n57kS2c+6RjjljHG0THG0UOHDu3qRQAAAACwM+eMQ2OM148xLhtjrCe5LsldY4xXJXl/kldMh12f5N3T7Tum+5n23zXGeNzKIQAAAACW73w+rexMr0vy2qo6kc33FLp12n5rkkum7a9NsrG7IQIAAACwKGvnPuTbxhgfSPKB6fYDSa7c5pivJ3nlHMYGAAAAwILtZuUQAAAAAPvcea0cAgAAAFbX+saxZQ+BfcjKIQAAAIDGxCEAAACAxsQhAAAAgMbEIQAAAIDGxCEAAACAxsQhAAAAgMbEIQAAAIDGxCEAAACAxsQhAAAAgMbEIQAAAIDGxCEAAACAxsQhAAAAgMbEIQAAAIDGxCEAAACAxsQhAAAAgMbEIQAAAIDGxCEAAACAxsQhAAAAgMbEIQAAAIDGxCEAAACAxsQhAAAAgMbEIQAAAIDGxCEAAACAxsQhAAAAgMbEIQAAAIDGxCEAAACAxsQhAAAAgMbEIQAAAIDGxCEAAACAxsQhAAAAgMbEIQAAAIDGxCEAAACAxsQhAAAAgMbEIQAAAIDGxCEAAACAxsQhAAAAgMbEIQAAAIDGxCEAAACAxsQhAAAAgMbEIQAAAIDGxCEAAACAxsQhAAAAgMbEIQAAAIDGxCEAAACAxtaWPQAAAADgia1vHFv2EDjArBwCAAAAaEwcAgAAAGhMHAIAAABoTBwCAAAAaEwcAgAAAGhMHAIAAABoTBwCAAAAaEwcAgAAAGhMHAIAAABoTBwCAAAAaEwcAgAAAGhMHAIAAABoTBwCAAAAaEwcAgAAAGhMHAIAAABoTBwCAAAAaEwcAgAAAGhsbdkDAAAAgK7WN44tewhg5RAAAABAZ+IQAAAAQGPiEAAAAEBj4hAAAABAY+IQAAAAQGPiEAAAAEBj4hAAAABAY+IQAAAAQGPiEAAAAEBj4hAAAABAY+IQAAAAQGPiEAAAAEBj4hAAAABAY2vLHgAAAAAcNOsbx5Y9BJiZlUMAAAAAjYlDAAAAAI2JQwAAAACNiUMAAAAAjYlDAAAAAI2JQwAAAACNiUMAAAAAjYlDAAAAAI2JQwAAAACNiUMAAAAAjYlDAAAAAI2JQwAAAACNiUMAAAAAjYlDAAAAAI2JQwAAAACNiUMAAAAAjYlDAAAAAI2JQwAAAACNiUMAAAAAjYlDAAAAAI2JQwAAAACNiUMAAAAAjYlDAAAAAI2JQwAAAACNiUMAAAAAjYlDAAAAAI2JQwAAAACNiUMAAAAAjZ0zDlXV5VX1/qq6r6o+XlU/M21/RlW9r6run75ePG2vqvqlqjpRVR+tqhcu+kUAAAAAsDOzrBx6LMk/GmP8xSQvSvKaqnpeko0kd44xjiS5c7qfJC9LcmT678Ykb5n7qAEAAACYi3PGoTHGI2OMD0+3v5rkviSHk1yT5LbpsNuSXDvdvibJ28emDya5qKqePfeRAwAAALBr5/WeQ1W1nuQFSe5O8qwxxiPJZkBK8szpsMNJHtrysJPTtjOf68aqOl5Vx0+dOnX+IwcAAABg19ZmPbCqvjPJbyT52THGH1bVWQ/dZtt43IYxbklyS5IcPXr0cfsBAABg1axvHFv2EGDuZlo5VFVPzmYY+rUxxm9Omz9/+nKx6esXpu0nk1y+5eGXJXl4PsMFAAAAYJ5m+bSySnJrkvvGGP9qy647klw/3b4+ybu3bH/19KllL0ryldOXnwEAAACwWma5rOzFSX48yceq6t5p288luSnJu6rqhiSfSfLKad97krw8yYkkX0vyk3MdMQAAAABzc844NMb4b9n+fYSS5Kptjh9JXrPLcQEAAACwB87r08oAAAAAOFjEIQAAAIDGxCEAAACAxsQhAAAAgMbEIQAAAIDGxCEAAACAxsQhAAAAgMbEIQAAAIDGxCEAAACAxsQhAAAAgMbEIQAAAIDGxCEAAACAxsQhAAAAgMbEIQAAAIDGxCEAAACAxsQhAAAAgMbEIQAAAIDGxCEAAACAxsQhAAAAgMbEIQAAAIDGxCEAAACAxsQhAAAAgMbEIQAAAIDGxCEAAACAxsQhAAAAgMbEIQAAAIDG1pY9AAAAAFim9Y1jyx4CLJWVQwAAAACNiUMAAAAAjYlDAAAAAI2JQwAAAACNiUMAAAAAjYlDAAAAAI2JQwAAAACNiUMAAAAAjYlDAAAAAI2JQwAAAACNiUMAAAAAjYlDAAAAAI2tLXsAAAAAcD7WN47NdNyDN1294JHAwWDlEAAAAEBjVg4BAABwIM26wgi6s3IIAAAAoDFxCAAAAKAxl5UBAACwElwGBsshDgEAALAjYg4cDC4rAwAAAGhMHAIAAABoTBwCAAAAaEwcAgAAAGhMHAIAAABoTBwCAAAAaMxH2QMAADRwPh87/+BNVy9wJMCqsXIIAAAAoDFxCAAAAKAxcQgAAACgMXEIAAAAoDFxCAAAAKAxcQgAAACgMXEIAAAAoLG1ZQ8AAACAnVvfOLYvnhNYXVYOAQAAADQmDgEAAAA05rIyAACAFeTSLmCvWDkEAAAA0JiVQwAAAHvIiiBg1Vg5BAAAANCYOAQAAADQmDgEAAAA0Jg4BAAAANCYOAQAAADQmDgEAAAA0Jg4BAAAANCYOAQAAADQmDgEAAAA0Jg4BAAAANDY2rIHAAAAcBCsbxxb9hAAdkQcAgAAeAKiD3DQuawMAAAAoDFxCAAAAKAxl5UBAAAtuVwMYJOVQwAAAACNiUMAAAAAjYlDAAAAAI2JQwAAAACNiUMAAAAAjYlDAAAAAI35KHsAAGBpZv04+QdvunrBIwHoy8ohAAAAgMbEIQAAAIDGXFYGACycy0YAAFaXlUMAAAAAjYlDAAAAAI2JQwAAAACNec8hAABg7mZ9rzEAlk8cAgAADgxRCuD8uawMAAAAoDErhwAAgJVnRRDA4ohDAADAzEQagINHHALgwJn1Ly4P3nT1gkcCsH+IPgB9ec8hAAAAgMasHAJg6fxrNact4veCFWJ0589YAM5FHIIdcMkKcNAdpD/nDtJrYff8fgCAxxOH4ADygy/M1zLPKecz9OKcB2AZxCFgKfzwyypwqQVwNvP+88H3MwBWmTgEsA3ve7J7wsvZHaT/NwfptbB7+2GV3bLMe3yr/noB2F8WEoeq6qVJfjHJBUneOsa4aRG/ziqa9w9FVlfsnf3wQ9ayxrjM/zf7YV5mtern86qPjyd2kM4VVo/fXwBwsM09DlXVBUl+OcnfTHIyyYeq6o4xxifm/Wtx/jquhtgPP9DuhzGyf/nX6r3j/w1bLfN77kGJvc4pANgbi1g5dGWSE2OMB5Kkqt6R5Jok4tAWB+mHHdfkn92qz/Oqjw9gLx2UoLIf+P4DAKtlEXHocJKHttw/meT7FvDrcED5gZHunAOw2g7SOXqQXgsAsHOLiEO1zbbxuIOqbkxy43T3j6rqkwsYSxv1pl0/xaVJvrj7kbBPmf/ezH9f5n6fmcP3+63Mf2/mvy9z35v5n8Gcv98u25+b5aBFxKGTSS7fcv+yJA+fedAY45Yktyzg12cHqur4GOPossfBcpj/3sx/X+a+N/Pfm/nvy9z3Zv45myct4Dk/lORIVV1RVRcmuS7JHQv4dQAAAADYpbmvHBpjPFZVfy/Je7P5Ufa/Osb4+Lx/HQAAAAB2bxGXlWWM8Z4k71nEc7MwLvHrzfz3Zv77Mve9mf/ezH9f5r4388+2aozHvVc0AAAAAE0s4j2HAAAAANgnxKFGquoZVfW+qrp/+nrxNsc8v6r+e1V9vKo+WlV/e8u+K6rq7unx75zecJx9Ypb5n4773ar6clX9zhnb/21Vfbqq7p3+e/7ejJzdmsPcO/f3sfOY/+unY+6vquu3bP9AVX1yy7n/zL0bPTtRVS+d5uxEVW1ss/8p07l8Yjq317fse/20/ZNV9cN7OW7mY6fzX1XrVfXHW871f7PXY2f3Zpj/H6iqD1fVY1X1ijP2bft9gP1hl3P/zS3nvg+Takoc6mUjyZ1jjCNJ7pzun+lrSV49xvjeJC9N8uaqumja96YkN0+PfzTJDXswZuZnlvlPkn+R5MfPsu+fjDGeP/137yIGyULsdu6d+/vbOee/qp6R5A1Jvi/JlUnecEZEetWWc/8LezFodqaqLkjyy0leluR5Sf5OVT3vjMNuSPLoGOO5SW7O5jme6bjrkpz+GeBfT8/HPrGb+Z98asu5/tN7MmjmZsb5/0ySn0jy62c89lzfB1hhu5n7yR9vOfd/dKGDZWWJQ71ck+S26fZtSa4984Axxv8aY9w/3X44yReSHKqqSvKSJLc/0eNZaeec/yQZY9yZ5Kt7NSj2xI7n3rl/IMwy/z+c5H1jjC+NMR5N8r5sxgH2nyuTnBhjPDDG+JMk78jm74Gttv6euD3JVdO5fk2Sd4wxvjHG+HSSE9PzsX/sZv7Z/845/2OMB8cYH03yrTMe6/vA/rabuYck4lA3zxpjPJIk09cnvDSgqq5McmGSTyW5JMmXxxiPTbtPJjm8wLEyf+c1/2fxxulyw5ur6inzHR4LtJu5d+7vf7PM/+EkD225f+Y8v21aav7P/SVy5Z1rLv+/Y6Zz+yvZPNdneSyrbTfznyRXVNVHquo/V9VfX/RgmbvdnMPO//1tt/P31Ko6XlUfrCr/CNjUQj7KnuWpqt9L8me22fXz5/k8z07y75JcP8b41ln+MuCj7lbMvOb/LF6f5HPZDIa3JHldkl+Yw/MyBwuce+f+PjCH+X+ieX7VGOOzVfWnk/xGNi89fPv5j5I9Mss5e7ZjnO/7327m/5Ekzxlj/EFV/ZUkv11V3zvG+MN5D5KF2c057Pzf33Y7f88ZYzxcVd+d5K6q+tgY41NzGhv7hDh0wIwxfvBs+6rq81X17DHGI1P82fZ9I6rqu5IcS/LPxhgfnDZ/MclFVbU2/SvTZUkenvPw2aV5zP8TPPcj081vVNXbkvzjXQyVOVvg3Dv394E5zP/JJH9jy/3Lknxgeu7PTl+/WlW/ns2l6+LQ6jqZ5PIt97c7Z08fc7Kq1pI8PcmXZnwsq23H8z/GGEm+kSRjjHuq6lNJ/nyS4wsfNfOym3P4rN8H2Bd29ef39HYiGWM8UFUfSPKCbF49QiMuK+vljiSnP3ng+iTvPvOA2vwUot9K8vYxxn84vX36geH9SV7xRI9npZ1z/p/I9JfK0+9Bc22S35/r6FikHc+9c/9AmGX+35vkh6rq4ukNSH8oyXuraq2qLk2Sqnpykh+Jc3/VfSjJkdr8lMELs/kG02d+8szW3xOvSHLXdK7fkeS66dOsrkhyJMn/2KNxMx87nv+qOnT6Dcin1QNHkjywR+NmPmaZ/7PZ9vvAgsbJ/O147qc5f8p0+9IkL07yiYWNlJVVmz8L0EFVXZLkXUmek813q3/lGONLVXU0yU+PMf5uVf1Ykrcl+fiWh/7EGOPe6QeFdyR5RpKPJPmxMcY39vZVsFOzzP903H9N8j1JvjPJHyS5YYzx3qq6K8mhbC5bvXd6zB8t4aVwnuYw9879few85v+nkvzc9LA3jjHeVlVPS/Jfkjw5yQVJfi/Ja8cY39zr18HsqurlSd6czTn71THGG6vqF5IcH2PcUVVPzeal4y/I5oqh68YYD0yP/fkkP5XksSQ/O8b4T0t5EezYTue/qv5WNi8XfyzJN5O8YYzxH5fzKtipGeb/r2bzH4IvTvL1JJ+bPqV42+8De/8K2Kmdzn1VfX+SX8nmG1U/Kcmbxxi3LudVsEziEAAAAEBjLisDAAAAaEwcAgAAAGhMHAIAAABoTBwCAAAAaEwcAgAAAGhMHAIAAABoTBwCAAAAaEwcAgAAAGjs/wGQ0dJm4bpogAAAAABJRU5ErkJggg==\n", 400 | "text/plain": [ 401 | "
" 402 | ] 403 | }, 404 | "metadata": { 405 | "needs_background": "light" 406 | }, 407 | "output_type": "display_data" 408 | } 409 | ], 410 | "source": [ 411 | "import matplotlib.pyplot as plt\n", 412 | "%matplotlib inline\n", 413 | "plt.figure(figsize=(20, 10))\n", 414 | "_ = plt.hist(scores, bins=100)" 415 | ] 416 | }, 417 | { 418 | "cell_type": "code", 419 | "execution_count": 14, 420 | "metadata": {}, 421 | "outputs": [], 422 | "source": [ 423 | "cutoff = -0.07" 424 | ] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "execution_count": 15, 429 | "metadata": {}, 430 | "outputs": [ 431 | { 432 | "name": "stdout", 433 | "output_type": "stream", 434 | "text": [ 435 | "Counter({3: 7871, 1: 199, 0: 165, 2: 13})\n", 436 | "Counter({0: 158, 1: 64, 2: 13, 3: 8})\n" 437 | ] 438 | } 439 | ], 440 | "source": [ 441 | "print(Counter(y_val))\n", 442 | "print(Counter(y_val[cutoff>scores]))" 443 | ] 444 | }, 445 | { 446 | "cell_type": "code", 447 | "execution_count": 16, 448 | "metadata": {}, 449 | "outputs": [ 450 | { 451 | "name": "stdout", 452 | "output_type": "stream", 453 | "text": [ 454 | "Counter({3: 7847, 1: 224, 0: 152, 2: 22, 6: 1, 5: 1, 4: 1})\n", 455 | "Counter({0: 148, 1: 83, 2: 22, 3: 3, 6: 1, 5: 1})\n" 456 | ] 457 | } 458 | ], 459 | "source": [ 460 | "scores_test = IF.decision_function(X_test)\n", 461 | "print(Counter(y_test))\n", 462 | "print(Counter(y_test[cutoff>scores_test]))" 463 | ] 464 | }, 465 | { 466 | "cell_type": "code", 467 | "execution_count": null, 468 | "metadata": {}, 469 | "outputs": [], 470 | "source": [] 471 | } 472 | ], 473 | "metadata": { 474 | "kernelspec": { 475 | "display_name": "Python 3", 476 | "language": "python", 477 | "name": "python3" 478 | }, 479 | "language_info": { 480 | "codemirror_mode": { 481 | "name": "ipython", 482 | "version": 3 483 | }, 484 | "file_extension": ".py", 485 | "mimetype": "text/x-python", 486 | "name": "python", 487 | "nbconvert_exporter": "python", 488 | "pygments_lexer": "ipython3", 489 | "version": "3.7.3" 490 | } 491 | }, 492 | "nbformat": 4, 493 | "nbformat_minor": 2 494 | } 495 | -------------------------------------------------------------------------------- /Cybersecurity-Data-Science-on-Udemy-master/Network Behavior Anomaly Detection/kddcup_corrected_subset.7z: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ulookme/CyberSecurity-Data-Science/393c58655c5a902d7a69edfe0cbfbb4e231dcb4d/Cybersecurity-Data-Science-on-Udemy-master/Network Behavior Anomaly Detection/kddcup_corrected_subset.7z -------------------------------------------------------------------------------- /Cybersecurity-Data-Science-on-Udemy-master/PE Samples Dataset/Benign PE Samples 1.7z: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ulookme/CyberSecurity-Data-Science/393c58655c5a902d7a69edfe0cbfbb4e231dcb4d/Cybersecurity-Data-Science-on-Udemy-master/PE Samples Dataset/Benign PE Samples 1.7z -------------------------------------------------------------------------------- /Cybersecurity-Data-Science-on-Udemy-master/PE Samples Dataset/Benign PE Samples 2.7z: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ulookme/CyberSecurity-Data-Science/393c58655c5a902d7a69edfe0cbfbb4e231dcb4d/Cybersecurity-Data-Science-on-Udemy-master/PE Samples Dataset/Benign PE Samples 2.7z -------------------------------------------------------------------------------- /Cybersecurity-Data-Science-on-Udemy-master/PE Samples Dataset/Benign PE Samples 3.7z: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ulookme/CyberSecurity-Data-Science/393c58655c5a902d7a69edfe0cbfbb4e231dcb4d/Cybersecurity-Data-Science-on-Udemy-master/PE Samples Dataset/Benign PE Samples 3.7z -------------------------------------------------------------------------------- /Cybersecurity-Data-Science-on-Udemy-master/PE Samples Dataset/Benign PE Samples 4.7z: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ulookme/CyberSecurity-Data-Science/393c58655c5a902d7a69edfe0cbfbb4e231dcb4d/Cybersecurity-Data-Science-on-Udemy-master/PE Samples Dataset/Benign PE Samples 4.7z -------------------------------------------------------------------------------- /Cybersecurity-Data-Science-on-Udemy-master/PE Samples Dataset/Benign PE Samples 5.7z: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ulookme/CyberSecurity-Data-Science/393c58655c5a902d7a69edfe0cbfbb4e231dcb4d/Cybersecurity-Data-Science-on-Udemy-master/PE Samples Dataset/Benign PE Samples 5.7z -------------------------------------------------------------------------------- /Cybersecurity-Data-Science-on-Udemy-master/PE Samples Dataset/Benign PE Samples 6.7z: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ulookme/CyberSecurity-Data-Science/393c58655c5a902d7a69edfe0cbfbb4e231dcb4d/Cybersecurity-Data-Science-on-Udemy-master/PE Samples Dataset/Benign PE Samples 6.7z -------------------------------------------------------------------------------- /Cybersecurity-Data-Science-on-Udemy-master/PE Samples Dataset/Malicious PE Samples 1.7z: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ulookme/CyberSecurity-Data-Science/393c58655c5a902d7a69edfe0cbfbb4e231dcb4d/Cybersecurity-Data-Science-on-Udemy-master/PE Samples Dataset/Malicious PE Samples 1.7z -------------------------------------------------------------------------------- /Cybersecurity-Data-Science-on-Udemy-master/PE Samples Dataset/Malicious PE Samples 2.7z: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ulookme/CyberSecurity-Data-Science/393c58655c5a902d7a69edfe0cbfbb4e231dcb4d/Cybersecurity-Data-Science-on-Udemy-master/PE Samples Dataset/Malicious PE Samples 2.7z -------------------------------------------------------------------------------- /Cybersecurity-Data-Science-on-Udemy-master/README.md: -------------------------------------------------------------------------------- 1 | # Repository for Cybersecurity Data Science on Udemy 2 | 3 | https://www.udemy.com/course/cybersecurity-data-science/?referralCode=C50C493F1D6EF98FBF8B 4 | -------------------------------------------------------------------------------- /Cybersecurity-Data-Science-on-Udemy-master/Tackling Class Imbalance/Tackling Class Imbalance.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 159, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from sklearn.ensemble import RandomForestClassifier\n", 10 | "from sklearn.metrics import balanced_accuracy_score\n", 11 | "import numpy as np\n", 12 | "import scipy.sparse\n", 13 | "import collections\n", 14 | "X_train = scipy.sparse.load_npz(\"X_train.npz\")\n", 15 | "y_train = np.load(\"y_train.npy\")\n", 16 | "X_test = scipy.sparse.load_npz(\"X_test.npz\")\n", 17 | "y_test = np.load(\"y_test.npy\")" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 160, 23 | "metadata": {}, 24 | "outputs": [ 25 | { 26 | "name": "stdout", 27 | "output_type": "stream", 28 | "text": [ 29 | "Counter({0: 120, 1: 11})\n", 30 | "0.8666666666666667\n" 31 | ] 32 | } 33 | ], 34 | "source": [ 35 | "rf=RandomForestClassifier(n_estimators=100)\n", 36 | "rf.fit(X_train,y_train)\n", 37 | "rfPred = rf.predict(X_test)\n", 38 | "print(collections.Counter(rfPred))\n", 39 | "print(balanced_accuracy_score(y_test, rfPred)) " 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 161, 45 | "metadata": {}, 46 | "outputs": [ 47 | { 48 | "name": "stdout", 49 | "output_type": "stream", 50 | "text": [ 51 | "Counter({0: 115, 1: 16})\n", 52 | "0.9580459770114942\n" 53 | ] 54 | } 55 | ], 56 | "source": [ 57 | "rfWeighted=RandomForestClassifier(n_estimators=100, class_weight=\"balanced\")\n", 58 | "rfWeighted.fit(X_train,y_train)\n", 59 | "rfWeightedPred = rfWeighted.predict(X_test)\n", 60 | "print(collections.Counter(rfWeightedPred))\n", 61 | "print(balanced_accuracy_score(y_test, rfWeightedPred))" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 162, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "from sklearn.utils import resample\n", 71 | "X_train_np = X_train.toarray()\n", 72 | "class_0_indices = [i for i, x in enumerate(y_train==0) if x]\n", 73 | "class_1_indices = [i for i, x in enumerate(y_train==1) if x]\n", 74 | "size_class_0 = sum(y_train==0)\n", 75 | "X_train_class_0 = X_train_np[class_0_indices,:]\n", 76 | "y_train_class_0 = [0]*size_class_0\n", 77 | "X_train_class_1 = X_train_np[class_1_indices,:]" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 163, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "X_train_class_1_resampled = resample(X_train_class_1, replace=True, n_samples=size_class_0)\n", 87 | "y_train_class_1_resampled = [1]*size_class_0" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 164, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "X_train_resampled = np.concatenate([X_train_class_0,X_train_class_1_resampled])\n", 97 | "y_train_resampled = y_train_class_0+y_train_class_1_resampled" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 165, 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "from scipy import sparse\n", 107 | "X_train_resampled = sparse.csr_matrix(X_train_resampled)" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 166, 113 | "metadata": {}, 114 | "outputs": [ 115 | { 116 | "name": "stdout", 117 | "output_type": "stream", 118 | "text": [ 119 | "Counter({0: 114, 1: 17})\n", 120 | "0.9913793103448276\n" 121 | ] 122 | } 123 | ], 124 | "source": [ 125 | "rfResampled=RandomForestClassifier(n_estimators=100)\n", 126 | "rfResampled.fit(X_train_resampled,y_train_resampled)\n", 127 | "rfResampledPred = rfResampled.predict(X_test)\n", 128 | "print(collections.Counter(rfResampledPred))\n", 129 | "print(balanced_accuracy_score(y_test, rfResampledPred))" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 167, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "X_train_np = X_train.toarray()\n", 139 | "class_0_indices = [i for i, x in enumerate(y_train==0) if x]\n", 140 | "class_1_indices = [i for i, x in enumerate(y_train==1) if x]\n", 141 | "size_class_1 = sum(y_train==1)\n", 142 | "X_train_class_1 = X_train_np[class_1_indices,:]\n", 143 | "y_train_class_1 = [1]*size_class_1\n", 144 | "X_train_class_0 = X_train_np[class_0_indices,:]\n", 145 | "X_train_class_0_downsampled = resample(X_train_class_0, replace=False, n_samples=size_class_1)\n", 146 | "y_train_class_0_downsampled = [0]*size_class_1" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 168, 152 | "metadata": {}, 153 | "outputs": [], 154 | "source": [ 155 | "X_train_downsampled = np.concatenate([X_train_class_1,X_train_class_0_downsampled])\n", 156 | "y_train_downsampled = y_train_class_1+y_train_class_0_downsampled" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": 169, 162 | "metadata": {}, 163 | "outputs": [], 164 | "source": [ 165 | "X_train_downsampled = sparse.csr_matrix(X_train_downsampled)" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": 170, 171 | "metadata": {}, 172 | "outputs": [ 173 | { 174 | "name": "stdout", 175 | "output_type": "stream", 176 | "text": [ 177 | "Counter({0: 112, 1: 19})\n", 178 | "0.9827586206896552\n" 179 | ] 180 | } 181 | ], 182 | "source": [ 183 | "rfDownsampled=RandomForestClassifier(n_estimators=100)\n", 184 | "rfDownsampled.fit(X_train_downsampled,y_train_downsampled)\n", 185 | "rfDownsampledPred = rfDownsampled.predict(X_test)\n", 186 | "print(collections.Counter(rfDownsampledPred))\n", 187 | "print(balanced_accuracy_score(y_test, rfDownsampledPred))" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": 173, 193 | "metadata": {}, 194 | "outputs": [ 195 | { 196 | "name": "stdout", 197 | "output_type": "stream", 198 | "text": [ 199 | "Counter({0: 111, 1: 20})\n", 200 | "0.978448275862069\n" 201 | ] 202 | } 203 | ], 204 | "source": [ 205 | "from imblearn.ensemble import BalancedBaggingClassifier\n", 206 | "from sklearn.tree import DecisionTreeClassifier\n", 207 | "BBC = BalancedBaggingClassifier(base_estimator=DecisionTreeClassifier(),sampling_strategy='auto',replacement=False)\n", 208 | "BBC.fit(X_train, y_train) \n", 209 | "BBCPred = BBC.predict(X_test)\n", 210 | "print(collections.Counter(BBCPred))\n", 211 | "print(balanced_accuracy_score(y_test, BBCPred)) " 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": null, 217 | "metadata": {}, 218 | "outputs": [], 219 | "source": [] 220 | } 221 | ], 222 | "metadata": { 223 | "kernelspec": { 224 | "display_name": "Python 3", 225 | "language": "python", 226 | "name": "python3" 227 | }, 228 | "language_info": { 229 | "codemirror_mode": { 230 | "name": "ipython", 231 | "version": 3 232 | }, 233 | "file_extension": ".py", 234 | "mimetype": "text/x-python", 235 | "name": "python", 236 | "nbconvert_exporter": "python", 237 | "pygments_lexer": "ipython3", 238 | "version": "3.6.7" 239 | } 240 | }, 241 | "nbformat": 4, 242 | "nbformat_minor": 2 243 | } 244 | -------------------------------------------------------------------------------- /Cybersecurity-Data-Science-on-Udemy-master/Tackling Class Imbalance/X_test.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ulookme/CyberSecurity-Data-Science/393c58655c5a902d7a69edfe0cbfbb4e231dcb4d/Cybersecurity-Data-Science-on-Udemy-master/Tackling Class Imbalance/X_test.npz -------------------------------------------------------------------------------- /Cybersecurity-Data-Science-on-Udemy-master/Tackling Class Imbalance/X_train.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ulookme/CyberSecurity-Data-Science/393c58655c5a902d7a69edfe0cbfbb4e231dcb4d/Cybersecurity-Data-Science-on-Udemy-master/Tackling Class Imbalance/X_train.npz -------------------------------------------------------------------------------- /Cybersecurity-Data-Science-on-Udemy-master/Tackling Class Imbalance/y_test.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ulookme/CyberSecurity-Data-Science/393c58655c5a902d7a69edfe0cbfbb4e231dcb4d/Cybersecurity-Data-Science-on-Udemy-master/Tackling Class Imbalance/y_test.npy -------------------------------------------------------------------------------- /Cybersecurity-Data-Science-on-Udemy-master/Tackling Class Imbalance/y_train.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ulookme/CyberSecurity-Data-Science/393c58655c5a902d7a69edfe0cbfbb4e231dcb4d/Cybersecurity-Data-Science-on-Udemy-master/Tackling Class Imbalance/y_train.npy -------------------------------------------------------------------------------- /Cybersecurity-Data-Science-on-Udemy-master/Train-Test Splitting Your Data/Train-Test Splitting Your Data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from sklearn.model_selection import train_test_split\n", 10 | "import numpy as np\n", 11 | "n = 200\n", 12 | "X, y = np.arange(n).reshape((int(n/2), 2)), range(int(n/2)) # generate toy data set X with labels Y\n", 13 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # create a training and testing split" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 2, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 3, 28 | "metadata": {}, 29 | "outputs": [ 30 | { 31 | "data": { 32 | "text/plain": [ 33 | "60" 34 | ] 35 | }, 36 | "execution_count": 3, 37 | "metadata": {}, 38 | "output_type": "execute_result" 39 | } 40 | ], 41 | "source": [ 42 | "len(X_train)" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 4, 48 | "metadata": {}, 49 | "outputs": [ 50 | { 51 | "data": { 52 | "text/plain": [ 53 | "20" 54 | ] 55 | }, 56 | "execution_count": 4, 57 | "metadata": {}, 58 | "output_type": "execute_result" 59 | } 60 | ], 61 | "source": [ 62 | "len(X_val)" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 5, 68 | "metadata": {}, 69 | "outputs": [ 70 | { 71 | "data": { 72 | "text/plain": [ 73 | "20" 74 | ] 75 | }, 76 | "execution_count": 5, 77 | "metadata": {}, 78 | "output_type": "execute_result" 79 | } 80 | ], 81 | "source": [ 82 | "len(X_test)" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 6, 88 | "metadata": {}, 89 | "outputs": [ 90 | { 91 | "data": { 92 | "text/plain": [ 93 | "array([[144, 145],\n", 94 | " [ 2, 3],\n", 95 | " [ 30, 31],\n", 96 | " [136, 137],\n", 97 | " [ 16, 17],\n", 98 | " [ 74, 75],\n", 99 | " [108, 109],\n", 100 | " [196, 197],\n", 101 | " [132, 133],\n", 102 | " [178, 179],\n", 103 | " [150, 151],\n", 104 | " [102, 103],\n", 105 | " [192, 193],\n", 106 | " [158, 159],\n", 107 | " [ 96, 97],\n", 108 | " [ 32, 33],\n", 109 | " [ 42, 43],\n", 110 | " [ 38, 39],\n", 111 | " [100, 101],\n", 112 | " [ 56, 57],\n", 113 | " [142, 143],\n", 114 | " [ 84, 85],\n", 115 | " [130, 131],\n", 116 | " [ 72, 73],\n", 117 | " [ 18, 19],\n", 118 | " [104, 105],\n", 119 | " [ 80, 81],\n", 120 | " [ 12, 13],\n", 121 | " [172, 173],\n", 122 | " [188, 189],\n", 123 | " [ 92, 93],\n", 124 | " [184, 185],\n", 125 | " [ 10, 11],\n", 126 | " [ 50, 51],\n", 127 | " [198, 199],\n", 128 | " [162, 163],\n", 129 | " [122, 123],\n", 130 | " [ 54, 55],\n", 131 | " [ 86, 87],\n", 132 | " [ 28, 29],\n", 133 | " [ 64, 65],\n", 134 | " [ 94, 95],\n", 135 | " [ 34, 35],\n", 136 | " [120, 121],\n", 137 | " [118, 119],\n", 138 | " [ 58, 59],\n", 139 | " [174, 175],\n", 140 | " [128, 129],\n", 141 | " [ 26, 27],\n", 142 | " [176, 177],\n", 143 | " [116, 117],\n", 144 | " [ 68, 69],\n", 145 | " [ 52, 53],\n", 146 | " [190, 191],\n", 147 | " [ 40, 41],\n", 148 | " [ 98, 99],\n", 149 | " [126, 127],\n", 150 | " [148, 149],\n", 151 | " [186, 187],\n", 152 | " [ 82, 83]])" 153 | ] 154 | }, 155 | "execution_count": 6, 156 | "metadata": {}, 157 | "output_type": "execute_result" 158 | } 159 | ], 160 | "source": [ 161 | "X_train" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 7, 167 | "metadata": {}, 168 | "outputs": [ 169 | { 170 | "data": { 171 | "text/plain": [ 172 | "array([[ 48, 49],\n", 173 | " [110, 111],\n", 174 | " [ 14, 15],\n", 175 | " [ 6, 7],\n", 176 | " [ 70, 71],\n", 177 | " [124, 125],\n", 178 | " [ 22, 23],\n", 179 | " [182, 183],\n", 180 | " [138, 139],\n", 181 | " [170, 171],\n", 182 | " [194, 195],\n", 183 | " [ 76, 77],\n", 184 | " [ 4, 5],\n", 185 | " [156, 157],\n", 186 | " [ 46, 47],\n", 187 | " [134, 135],\n", 188 | " [164, 165],\n", 189 | " [168, 169],\n", 190 | " [114, 115],\n", 191 | " [112, 113]])" 192 | ] 193 | }, 194 | "execution_count": 7, 195 | "metadata": {}, 196 | "output_type": "execute_result" 197 | } 198 | ], 199 | "source": [ 200 | "X_val" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": 8, 206 | "metadata": {}, 207 | "outputs": [ 208 | { 209 | "data": { 210 | "text/plain": [ 211 | "array([[166, 167],\n", 212 | " [106, 107],\n", 213 | " [140, 141],\n", 214 | " [ 90, 91],\n", 215 | " [ 88, 89],\n", 216 | " [ 78, 79],\n", 217 | " [ 44, 45],\n", 218 | " [160, 161],\n", 219 | " [ 20, 21],\n", 220 | " [ 0, 1],\n", 221 | " [ 36, 37],\n", 222 | " [ 60, 61],\n", 223 | " [146, 147],\n", 224 | " [ 66, 67],\n", 225 | " [180, 181],\n", 226 | " [ 8, 9],\n", 227 | " [152, 153],\n", 228 | " [154, 155],\n", 229 | " [ 24, 25],\n", 230 | " [ 62, 63]])" 231 | ] 232 | }, 233 | "execution_count": 8, 234 | "metadata": {}, 235 | "output_type": "execute_result" 236 | } 237 | ], 238 | "source": [ 239 | "X_test" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": null, 245 | "metadata": {}, 246 | "outputs": [], 247 | "source": [] 248 | } 249 | ], 250 | "metadata": { 251 | "kernelspec": { 252 | "display_name": "Python 3", 253 | "language": "python", 254 | "name": "python3" 255 | }, 256 | "language_info": { 257 | "codemirror_mode": { 258 | "name": "ipython", 259 | "version": 3 260 | }, 261 | "file_extension": ".py", 262 | "mimetype": "text/x-python", 263 | "name": "python", 264 | "nbconvert_exporter": "python", 265 | "pygments_lexer": "ipython3", 266 | "version": "3.6.7" 267 | } 268 | }, 269 | "nbformat": 4, 270 | "nbformat_minor": 2 271 | } 272 | -------------------------------------------------------------------------------- /Cybersecurity-Data-Science-on-Udemy-master/Training an XGBoost Classifier/Training an XGBoost Classifier.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 9, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from xgboost import XGBClassifier\n", 10 | "from sklearn import datasets\n", 11 | "from sklearn.model_selection import train_test_split\n", 12 | "from sklearn.metrics import accuracy_score" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 14, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "iris = datasets.load_iris()\n", 22 | "X = iris['data']\n", 23 | "y = iris['target']\n", 24 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) " 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 15, 30 | "metadata": {}, 31 | "outputs": [ 32 | { 33 | "data": { 34 | "text/plain": [ 35 | "XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,\n", 36 | " colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,\n", 37 | " max_depth=3, min_child_weight=1, missing=None, n_estimators=100,\n", 38 | " n_jobs=1, nthread=None, objective='multi:softprob', random_state=0,\n", 39 | " reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,\n", 40 | " silent=True, subsample=1)" 41 | ] 42 | }, 43 | "execution_count": 15, 44 | "metadata": {}, 45 | "output_type": "execute_result" 46 | } 47 | ], 48 | "source": [ 49 | "model = XGBClassifier()\n", 50 | "model.fit(X_train, y_train)" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 18, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "y_pred = model.predict(X_test)\n", 60 | "predictions = [round(prediction) for prediction in y_pred]" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 19, 66 | "metadata": {}, 67 | "outputs": [ 68 | { 69 | "name": "stdout", 70 | "output_type": "stream", 71 | "text": [ 72 | "Accuracy: 96.67%\n" 73 | ] 74 | } 75 | ], 76 | "source": [ 77 | "accuracy = accuracy_score(y_test, predictions)\n", 78 | "print(\"Accuracy: %.2f%%\" % (accuracy * 100))" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [] 87 | } 88 | ], 89 | "metadata": { 90 | "kernelspec": { 91 | "display_name": "Python 3", 92 | "language": "python", 93 | "name": "python3" 94 | }, 95 | "language_info": { 96 | "codemirror_mode": { 97 | "name": "ipython", 98 | "version": 3 99 | }, 100 | "file_extension": ".py", 101 | "mimetype": "text/x-python", 102 | "name": "python", 103 | "nbconvert_exporter": "python", 104 | "pygments_lexer": "ipython3", 105 | "version": "3.6.7" 106 | } 107 | }, 108 | "nbformat": 4, 109 | "nbformat_minor": 2 110 | } 111 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CyberSecurity-Data-Science 2 | Use machine learning to classify malware. Malware analysis 101. Set up a cybersecurity lab environment. Learn how to tackle data class imbalance. Unsupervised anomaly detection. End-to-end deep neural networks for malware classification. Create a machine learning Intrusion Detection System (IDS). Employ machine learning for offensive security. Learn how to address False Positive constraints. Break a CAPTCHA system using machine learning. 3 | --------------------------------------------------------------------------------