├── Mappilary scrape.ipynb ├── Object based selection.ipynb ├── README.md ├── VGG Image similarity.ipynb ├── VGG uncertainty.ipynb └── dockerfiles ├── Dockerfile ├── jupyter_notebook_config.py └── run_jupyter.sh /Mappilary scrape.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "scrolled": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import json, requests\n", 12 | "from IPython.display import display # import module to print an image\n", 13 | "import random\n", 14 | "from PIL import Image\n", 15 | "from io import BytesIO\n", 16 | "import os\n", 17 | "\n", 18 | "client_id = 'xxx' # client ID for your mappilary application\\n\",\n", 19 | "\n", 20 | "output_image_path = '/notebooks/mappilary scraped myself/images/'\n", 21 | "output_json_path = '/notebooks/mappilary scraped myself/jsons/'\n", 22 | "\n", 23 | "\n", 24 | "if not os.path.exists(output_image_path):\n", 25 | " os.makedirs(output_image_path)\n", 26 | "\n", 27 | "if not os.path.exists(output_json_path):\n", 28 | " os.makedirs(output_json_path)\n", 29 | " \n", 30 | " \n", 31 | "while True: \n", 32 | " try: \n", 33 | " ## Sample an area inside a specific area. \n", 34 | " ## These are the coordinates for the Munich area. \n", 35 | " ## For more fun: set them to your local area!\n", 36 | " long = random.uniform(11.32, 11.78)\n", 37 | " lat = random.uniform(48.01, 48.26)\n", 38 | "\n", 39 | " distance = 1500 # the maximum distance in meters that our image should be from the point it looks toward\n", 40 | "\n", 41 | " coordinates = \"{0:2f},{1:2f}\".format(long, lat)\n", 42 | " print(coordinates)\n", 43 | " \n", 44 | " # API call URL to get a selection of images close to that position\n", 45 | " url = ('https://a.mapillary.com/v3/images?client_id={}&per_page=200&lookat={}&closeto={}&radius={}').format(client_id,coordinates,coordinates,distance) \n", 46 | "\n", 47 | " # request a JSON showing the point location and metadata of the images looking at our coordinates\n", 48 | " resp = requests.get(url)\n", 49 | " data = resp.json()\n", 50 | "\n", 51 | " features = data['features']\n", 52 | " print(len(features))\n", 53 | " for feature in features: \n", 54 | " image_key = feature['properties']['key']\n", 55 | "\n", 56 | " # we will use a template link that shows a JPG from Mapillary, and insert the key\n", 57 | " image = 'https://images.mapillary.com/{}/thumb-1024.jpg'.format(image_key)\n", 58 | " \n", 59 | " # This is where you want to store the image\n", 60 | " image_filename = os.path.join(output_image_path,image_key+\".jpg\")\n", 61 | " \n", 62 | " if os.path.exists(image_filename):\n", 63 | " print('Image already exists')\n", 64 | " continue\n", 65 | " else:\n", 66 | " print('Downloading image', image_filename)\n", 67 | "\n", 68 | " # request the image URL, to get a displayable image\n", 69 | " r = requests.get(image,stream=all)\n", 70 | " b = BytesIO(r.content)\n", 71 | " img = Image.open(b)\n", 72 | " img.save(image_filename)\n", 73 | "\n", 74 | " # Also store the JSON, perhaps you might need it for something\n", 75 | " with open(os.path.join(output_json_path, image_key+\".json\"), 'w') as outfile:\n", 76 | " json.dump(feature, outfile)\n", 77 | " except Exception as e: \n", 78 | " print(e)\n", 79 | " continue" 80 | ] 81 | } 82 | ], 83 | "metadata": { 84 | "kernelspec": { 85 | "display_name": "Python 3", 86 | "language": "python", 87 | "name": "python3" 88 | }, 89 | "language_info": { 90 | "codemirror_mode": { 91 | "name": "ipython", 92 | "version": 3 93 | }, 94 | "file_extension": ".py", 95 | "mimetype": "text/x-python", 96 | "name": "python", 97 | "nbconvert_exporter": "python", 98 | "pygments_lexer": "ipython3", 99 | "version": "3.8.5" 100 | } 101 | }, 102 | "nbformat": 4, 103 | "nbformat_minor": 4 104 | } 105 | -------------------------------------------------------------------------------- /Object based selection.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Under-represented class selection\n", 8 | "One way to do selection of images is to specifically search for 'under-represented classes'. Neural networks learn what you feed them, and actually struggle recognizing classes if the distribution in number of samples is tilted. More concretely: if you rarely show way more bikes than cars to a neural network you will have a better performance on cars than on bikes. \n", 9 | "\n", 10 | "It's best to keep the classes equal, but that begs the question: what images should you send to annotation, and how do you find these? \n", 11 | "\n", 12 | "An obvious solution is to use a trained neural network to find the images which contain these under-represented classes, and that's what I will demonstrate in this notebook. \n" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "## Download the neural network\n", 20 | "In this notebook I will use the ImageAI library. It's a simple library which makes working with images really simple. We will use a neural network which is pre-trained on the coco dataset to detect specific objects in images: \n", 21 | "`wget https://github.com/OlafenwaMoses/ImageAI/releases/download/essentials-v5/resnet50_coco_best_v2.1.0.h5`\n", 22 | "\n", 23 | "Unfortunately the predictions by this network are not going to be perfect (otherwise the self-driving car problem would be solved!), especially as the network we run inference with is not trained on this specific data. " 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "# Download the model if it does not exist yet\n", 33 | "import requests\n", 34 | "from pathlib import Path\n", 35 | "\n", 36 | "model_path = Path(\"resnet50_coco_best_v2.1.0.h5\")\n", 37 | "if not model_path.is_file():\n", 38 | " url = 'https://github.com/OlafenwaMoses/ImageAI/releases/download/essentials-v5/resnet50_coco_best_v2.1.0.h5'\n", 39 | " r = requests.get(url, allow_redirects=True)\n", 40 | " open(model_path, 'wb').write(r.content)\n" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": { 47 | "scrolled": true 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "from imageai.Detection import ObjectDetection\n", 52 | "from collections import Counter, defaultdict\n", 53 | "import matplotlib.pyplot as plt\n", 54 | "import tqdm\n", 55 | "import os\n", 56 | "import cv2\n", 57 | "\n", 58 | "folder = '/notebooks/mappilary scraped myself/images/'\n", 59 | "\n", 60 | "execution_path = os.getcwd()\n", 61 | "\n", 62 | "detector = ObjectDetection()\n", 63 | "detector.setModelTypeAsRetinaNet()\n", 64 | "detector.setModelPath( os.path.join(execution_path , \"resnet50_coco_best_v2.1.0.h5\"))\n", 65 | "detector.loadModel()" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "## Data inspection\n", 73 | "Let's take a look at what objects are detected, and with what quality. We will then run inference on a large number of images to get a feeling for how underrepresented certain classes are. \n", 74 | "When I ran this on a few images I got the following class distribution: \n", 75 | "\n", 76 | "car 3921\n", 77 | "\n", 78 | "truck 717\n", 79 | "\n", 80 | "person 607\n", 81 | "\n", 82 | "bicycle 307\n", 83 | "\n", 84 | "bus 137\n", 85 | "\n", 86 | "motorcycle 84\n", 87 | "\n", 88 | "train 34" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "number_classes = defaultdict(int)\n", 98 | "\n", 99 | "for index, image in enumerate(tqdm.tqdm(os.listdir(folder)[:5000])):\n", 100 | " out_images, detections = detector.detectObjectsFromImage(input_image=os.path.join(folder , image), output_type=\"array\", minimum_percentage_probability=30)\n", 101 | "\n", 102 | " # Visualise the first few images\n", 103 | " if index < 3: \n", 104 | " im_rgb = cv2.cvtColor(out_images, cv2.COLOR_BGR2RGB)\n", 105 | " plt.figure(figsize=(10,10))\n", 106 | " plt.imshow(im_rgb)\n", 107 | " plt.show()\n", 108 | " \n", 109 | " # Count all object classes to get a good feeling for how well balanced the dataset would be if we would randomly select images\n", 110 | " for eachObject in detections:\n", 111 | " number_classes[eachObject[\"name\"]] += 1" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "number_classes" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "for class_name in number_classes:\n", 130 | " print(class_name, number_classes[class_name])" 131 | ] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "metadata": {}, 136 | "source": [ 137 | "## Finding images with a specific class\n", 138 | "The code below defines a few rare classes, and shows you images with those rare classes present. " 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": { 145 | "scrolled": false 146 | }, 147 | "outputs": [], 148 | "source": [ 149 | "#rare_classes = [\"bicycle\", \"motorcycle\"]\n", 150 | "rare_classes = [\"dog\", \"horse\", \"cow\", \"sheep\", \"bear\", \"bird\"]\n", 151 | "\n", 152 | "number_classes = defaultdict(int)\n", 153 | "\n", 154 | "for image in os.listdir(folder):\n", 155 | " out_image, detections = detector.detectObjectsFromImage(input_image=os.path.join(folder , image), output_type=\"array\", minimum_percentage_probability=30)\n", 156 | "\n", 157 | " # Count all object classes to get a good feeling for how well balanced the dataset would be if we would randomly select images\n", 158 | " rare_detected = list()\n", 159 | " for eachObject in detections:\n", 160 | " if eachObject[\"name\"] in rare_classes: \n", 161 | " rare_detected.append(eachObject['name'])\n", 162 | "\n", 163 | " if len(rare_detected) > 0: \n", 164 | " print(\"Detected the following objects in this image: \", rare_detected)\n", 165 | " im_rgb = cv2.cvtColor(out_image, cv2.COLOR_BGR2RGB)\n", 166 | " plt.figure(figsize=(10,10))\n", 167 | " plt.imshow(im_rgb)\n", 168 | " plt.show()\n", 169 | " " 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": [ 176 | "## Conclusion and discussion\n", 177 | "As you can see not all the images contain the rare classes. However, quite a few do, so your dataset will be a bit more balanced if you use this method of data selection. \n", 178 | "\n", 179 | "One last remark is that there is one inherent danger in this way of selecting data. If there is an object which your neural network can not detect at all, this object will not be selected with this method at all. Hopefully your network learns to generalise from the selected images, but it is a danger to be mindful of!" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "metadata": {}, 186 | "outputs": [], 187 | "source": [] 188 | } 189 | ], 190 | "metadata": { 191 | "kernelspec": { 192 | "display_name": "Python 3", 193 | "language": "python", 194 | "name": "python3" 195 | }, 196 | "language_info": { 197 | "codemirror_mode": { 198 | "name": "ipython", 199 | "version": 3 200 | }, 201 | "file_extension": ".py", 202 | "mimetype": "text/x-python", 203 | "name": "python", 204 | "nbconvert_exporter": "python", 205 | "pygments_lexer": "ipython3", 206 | "version": "3.6.9" 207 | } 208 | }, 209 | "nbformat": 4, 210 | "nbformat_minor": 2 211 | } 212 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Data selection workshop 2 | 3 | The code in this repository belongs to a workshop I am giving. There is also a presentation, which you can find here: https://docs.google.com/presentation/d/1XPlKIggxb19U16k14vbnADxR1TT4zckS5vBeQOVizPM/edit?usp=sharing. 4 | Normally I give a presentation of about one hour, and then help people program for about one hour. This should be enough to get an understanding of one of the concepts, and explore it on your own a bit further. 5 | 6 | ## Set up your environment 7 | 8 | Before listening to any talks, download this repository to a computer with an NVIDIA GPU fit for deep learning applications. Then build the docker files: 9 | ``` 10 | docker build -t data_selection_workshop dockerfiles 11 | ``` 12 | 13 | Next run the docker container with port 8888 and 6006 open, and a drive of your current directory mounted. 14 | 15 | ``` 16 | docker run -it -p 8888:8888 -p 6006:6006 -v $(pwd):/notebooks data_selection_workshop 17 | ``` 18 | 19 | 20 | ## Download a dataset 21 | 22 | You can either bring your own data, or scrape part of a location of 'streetview-like' data from Mappilary. To do this, register an application on the Mappilary developer website: https://www.mapillary.com/dashboard/developers and get the API key. 23 | 24 | Put the API key in the 'Mappilary scrape' notebook to download your own dataset. You can choose to change the latitude and longitude to have your own local dataset! 25 | 26 | -------------------------------------------------------------------------------- /VGG Image similarity.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Select images based on similarity\n", 8 | "One way to select more data is taking an image which caused a mistake in your neural network, and finding similar images to that one. Although you can browse a lot of recordings yourself, you can also try to find images which elicit a similar response in your neural network. \n", 9 | "\n", 10 | "In this notebook I will show a simple way of doing this with a VGG16 neural network. " 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "# Get the imports needed for this project. \n", 20 | "import tensorflow as tf\n", 21 | "from tensorflow.keras.applications.vgg16 import VGG16\n", 22 | "from tensorflow.keras.preprocessing import image\n", 23 | "from tensorflow.keras.applications.vgg16 import preprocess_input\n", 24 | "from sklearn.neighbors import NearestNeighbors\n", 25 | "import numpy as np\n", 26 | "import scipy.spatial\n", 27 | "import matplotlib.pyplot as plt\n", 28 | "import cv2\n", 29 | "import os\n", 30 | "import tqdm" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "## Create a neural network with pre-trained weights\n", 38 | "As a neural network I will take a VGG16 neural network with a global maxpooling operator on top. The global max pooling ensures we have a feature vector of 512 features as output of the neural network. " 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "vgg16 = VGG16(weights='imagenet', include_top=False)\n", 48 | "model = tf.keras.Sequential(layers=[vgg16, tf.keras.layers.GlobalMaxPool2D() ])" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "## Analyse your unannotated data\n", 56 | "To select from your annotated data it's important to get inference results from the neural network we built, and store those in a list which links the filenames to the features. " 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "# Build a database with all imags, except for the last 1000 \n", 66 | "folder = '/notebooks/mappilary scraped myself/images/'\n", 67 | "filename_features = list()" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "images_to_select = os.listdir(folder)[:len(os.listdir(folder)) - 1000]\n", 77 | "already_existing = set([x[0] for x in filename_features])\n", 78 | "images_to_select = [x for x in images_to_select if not x in already_existing]\n", 79 | "\n", 80 | "print(\"To go: \", len(images_to_select))\n", 81 | "\n", 82 | "def get_features_from_image(filename):\n", 83 | " img = image.load_img(img_path, target_size=(224, 224))\n", 84 | " x = image.img_to_array(img)\n", 85 | " x = np.expand_dims(x, axis=0)\n", 86 | " x = preprocess_input(x)\n", 87 | "\n", 88 | " features = model.predict(x) \n", 89 | " return features, img\n", 90 | "\n", 91 | "\n", 92 | "# Get the features for the images and store both the filename and features in a list\n", 93 | "\n", 94 | "for image_filename in tqdm.tqdm(images_to_select):\n", 95 | " img_path = os.path.join(folder , image_filename)\n", 96 | " try: \n", 97 | " features, img = get_features_from_image(img_path)\n", 98 | " filename_features.append((image_filename, features[0,:]))\n", 99 | " except Exception as e: \n", 100 | " print(\"Error at file name\", img_path)" 101 | ] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "## Build an index with the metrics\n", 108 | "Now that we have the features for all unannotated images we can build a datastructure with all features. Here I offer two choices: a nearest neighbors algorithm using euclidian distance with an efficient datastructure, and a brute-force algorithm with cosine similarity between images. Experiment away!" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "X = np.array([x[1] for x in filename_features])\n", 118 | "#nbrs = NearestNeighbors(n_neighbors=10, algorithm='ball_tree').fit(X)\n", 119 | "nbrs = NearestNeighbors(n_neighbors=4, algorithm='brute', metric='cosine').fit(X)" 120 | ] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "metadata": {}, 125 | "source": [ 126 | "## Data selection" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": null, 132 | "metadata": { 133 | "scrolled": false 134 | }, 135 | "outputs": [], 136 | "source": [ 137 | "for image_filename in os.listdir(folder)[-3:]:\n", 138 | " img_path = os.path.join(folder , image_filename)\n", 139 | " features, img = get_features_from_image(img_path)\n", 140 | " distances, indices = nbrs.kneighbors([features[0,:]])\n", 141 | " \n", 142 | " ## Show the query image\n", 143 | " plt.figure(figsize=(10,10))\n", 144 | " plt.imshow(img)\n", 145 | " plt.show()\n", 146 | "\n", 147 | " ## Show the closest matching results\n", 148 | " result_index = 0\n", 149 | " f, axarr = plt.subplots(len(distances[0])//2,2, figsize=(10,10))\n", 150 | " for distance, index in zip(distances[0], indices[0]):\n", 151 | " img_path = os.path.join(folder , filename_features[index][0])\n", 152 | " result = image.load_img(img_path, target_size=(224, 224))\n", 153 | " axarr[result_index//2,result_index%2].imshow(result)\n", 154 | " axarr[result_index//2,result_index%2].set_title('distance ' + str(distance))\n", 155 | " result_index += 1\n", 156 | " plt.show()" 157 | ] 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "metadata": {}, 162 | "source": [ 163 | "## What does the algorithm select? \n", 164 | "The last question now is 'what is the algorithm actually looking at'? That's a difficult question. You might like an image because it contains a specific traffic sign, but the algorithm mostly pays attention to a bike in front of the car. \n", 165 | "\n", 166 | "One way of doing this is showing the 'saliency' of the neural network. It's a bit hard to understand what the neural network pays attention to, because the output is a feature vector. What we can do in this case is visualise the saliency for individual features. Below I do that for multiple features. " 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": { 173 | "scrolled": true 174 | }, 175 | "outputs": [], 176 | "source": [ 177 | "for image_filename in os.listdir(folder)[-3:]:\n", 178 | " img_path = os.path.join(folder , image_filename)\n", 179 | " img = image.load_img(img_path, target_size=(224, 224))\n", 180 | "\n", 181 | " images = tf.Variable([np.array(img)], dtype=float)\n", 182 | "\n", 183 | " with tf.GradientTape(persistent=True) as tape:\n", 184 | " pred = model(images, training=False)\n", 185 | " class_idxs_sorted = np.argsort(pred.numpy().flatten())[::-1]\n", 186 | "\n", 187 | " # Show the reason for the largest N features\n", 188 | " for i in range(10):\n", 189 | " loss = pred[0][class_idxs_sorted[i]]\n", 190 | "\n", 191 | " grads = tape.gradient(loss, images)\n", 192 | "\n", 193 | " dgrad_abs = tf.math.abs(grads)\n", 194 | " dgrad_max_ = np.max(dgrad_abs, axis=3)[0]\n", 195 | "\n", 196 | " ## normalize to range between 0 and 1\n", 197 | " arr_min, arr_max = np.min(dgrad_max_), np.max(dgrad_max_)\n", 198 | " grad_eval = (dgrad_max_ - arr_min) / (arr_max - arr_min + 1e-18)\n", 199 | "\n", 200 | " plt.imshow(img)\n", 201 | " plt.imshow(grad_eval, cmap='jet', alpha=0.5)\n", 202 | " plt.show()" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": null, 208 | "metadata": {}, 209 | "outputs": [], 210 | "source": [] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": null, 215 | "metadata": {}, 216 | "outputs": [], 217 | "source": [] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "metadata": {}, 223 | "outputs": [], 224 | "source": [] 225 | } 226 | ], 227 | "metadata": { 228 | "kernelspec": { 229 | "display_name": "Python 3", 230 | "language": "python", 231 | "name": "python3" 232 | }, 233 | "language_info": { 234 | "codemirror_mode": { 235 | "name": "ipython", 236 | "version": 3 237 | }, 238 | "file_extension": ".py", 239 | "mimetype": "text/x-python", 240 | "name": "python", 241 | "nbconvert_exporter": "python", 242 | "pygments_lexer": "ipython3", 243 | "version": "3.6.9" 244 | } 245 | }, 246 | "nbformat": 4, 247 | "nbformat_minor": 2 248 | } 249 | -------------------------------------------------------------------------------- /dockerfiles/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM tensorflow/tensorflow:latest-gpu-jupyter 2 | 3 | RUN apt-get update ##[edited] 4 | RUN apt-get install ffmpeg libsm6 libxext6 -y 5 | 6 | RUN pip install sklearn 7 | RUN pip install keras==2.4.3 numpy==1.19.3 pillow==7.0.0 scipy==1.4.1 h5py==2.10.0 matplotlib==3.3.2 opencv-python keras-resnet==0.2.0 8 | RUN pip install imageai --upgrade 9 | RUN pip install tqdm 10 | 11 | COPY jupyter_notebook_config.py /root/.jupyter/ 12 | COPY run_jupyter.sh / 13 | RUN chmod +x /run_jupyter.sh 14 | 15 | WORKDIR "/notebooks" 16 | CMD ["/run_jupyter.sh"] 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /dockerfiles/jupyter_notebook_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | from IPython.lib import passwd 3 | 4 | c.NotebookApp.ip = '*' 5 | c.NotebookApp.port = int(os.getenv('PORT', 8888)) 6 | c.NotebookApp.open_browser = False 7 | c.MultiKernelManager.default_kernel_name = 'python2' 8 | c.NotebookApp.password = '' 9 | c.NotebookApp.token = '' 10 | 11 | c.NotebookApp.allow_remote_access = True 12 | c.NotebookApp.allow_origin = '*' 13 | -------------------------------------------------------------------------------- /dockerfiles/run_jupyter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | jupyter notebook "$@" --allow-root 3 | 4 | 5 | --------------------------------------------------------------------------------