├── p2j ├── __init__.py ├── templates │ ├── cell_markdown.json │ ├── cell_code.json │ └── metadata.json ├── j2p.py ├── utils.py ├── main.py └── p2j.py ├── MANIFEST.in ├── screenshot.png ├── .gitignore ├── LICENSE ├── setup.py ├── examples ├── example3.py ├── example2.py ├── example.py ├── example3.ipynb ├── example2.ipynb └── example.ipynb └── README.md /p2j/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md LICENSE p2j/templates/*.json -------------------------------------------------------------------------------- /screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/remykarem/python2jupyter/HEAD/screenshot.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .ipynb_checkpoints 3 | build 4 | p2j.egg-info 5 | dist 6 | .pyc 7 | .vscode 8 | __pycache__ -------------------------------------------------------------------------------- /p2j/templates/cell_markdown.json: -------------------------------------------------------------------------------- 1 | { 2 | "cell_type": "markdown", 3 | "metadata": {}, 4 | "source": [] 5 | } -------------------------------------------------------------------------------- /p2j/templates/cell_code.json: -------------------------------------------------------------------------------- 1 | { 2 | "cell_type": "code", 3 | "execution_count": null, 4 | "metadata": {}, 5 | "outputs": [], 6 | "source": [] 7 | } -------------------------------------------------------------------------------- /p2j/templates/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "kernelspec": { 4 | "display_name": "Python 3", 5 | "language": "python", 6 | "name": "python3" 7 | }, 8 | "language_info": { 9 | "codemirror_mode": { 10 | "name": "ipython", 11 | "version": 3 12 | }, 13 | "file_extension": ".py", 14 | "mimetype": "text/x-python", 15 | "name": "python", 16 | "nbconvert_exporter": "python", 17 | "pygments_lexer": "ipython3", 18 | "version": "3.6.4" 19 | } 20 | }, 21 | "nbformat": 4, 22 | "nbformat_minor": 2 23 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Raimi bin Karim 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /p2j/j2p.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import json 3 | from p2j.utils import _check_files 4 | 5 | 6 | def jupyter2python(source_filename: str, target_filename: str, overwrite: bool = False): 7 | """Convert Jupyter notebooks to Python scripts 8 | 9 | Args: 10 | source_filename (str): Path to Jupyter notebook. 11 | target_filename (str): Path to name of Python script. Optional. 12 | overwrite (bool): Whether to overwrite an existing Python script. 13 | with_markdown (bool, optional): Whether to include markdown. Defaults to False. 14 | """ 15 | 16 | target_filename = _check_files( 17 | source_filename, target_filename, overwrite, conversion="j2p") 18 | 19 | # Check if source file exists and read 20 | try: 21 | with open(source_filename, "r", encoding="utf-8") as infile: 22 | myfile = json.load(infile) 23 | except FileNotFoundError: 24 | print("Source file not found. Specify a valid source file.") 25 | sys.exit(1) 26 | 27 | final = ["".join(["# " + line.lstrip() for line in cell["source"] if not line.strip() == ""]) 28 | if cell["cell_type"] == "markdown" else "".join(cell["source"]) 29 | for cell in myfile["cells"]] 30 | final = "\n\n".join(final) 31 | final = final.replace("
", "") 32 | 33 | with open(target_filename, "a", encoding="utf-8") as outfile: 34 | outfile.write(final) 35 | print("Python script {} written.".format(target_filename)) 36 | -------------------------------------------------------------------------------- /p2j/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | 5 | def _check_files(source_file: str, target_file: str, overwrite: bool, conversion: str): 6 | """File path checking 7 | 8 | Check if 9 | 1) Name of source file is valid. 10 | 2) Target file already exists. If not, create. 11 | 12 | Does not check if source file exists. That will be done 13 | together when opening the file. 14 | """ 15 | 16 | if conversion == "p2j": 17 | expected_src_file_ext = ".py" 18 | expected_tgt_file_ext = ".ipynb" 19 | else: 20 | expected_src_file_ext = ".ipynb" 21 | expected_tgt_file_ext = ".py" 22 | 23 | file_base = os.path.splitext(source_file)[0] 24 | file_ext = os.path.splitext(source_file)[-1] 25 | 26 | if file_ext != expected_src_file_ext: 27 | print("Wrong file type specified. Expected {} ".format(expected_src_file_ext) + 28 | "extension but got {} instead.".format(file_ext)) 29 | sys.exit(1) 30 | 31 | # Check if target file is specified and exists. If not specified, create 32 | if target_file is None: 33 | target_file = file_base + expected_tgt_file_ext 34 | if not overwrite and os.path.isfile(target_file): 35 | # FileExistsError 36 | print("File {} exists. ".format(target_file) + 37 | "Add -o flag to overwrite this file, " + 38 | "or specify a different target filename using -t.") 39 | sys.exit(1) 40 | 41 | return target_file 42 | -------------------------------------------------------------------------------- /p2j/main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from p2j.p2j import python2jupyter 3 | from p2j.j2p import jupyter2python 4 | 5 | 6 | def main(): 7 | 8 | parser = argparse.ArgumentParser( 9 | description="Convert a Python script to Jupyter notebook and vice versa", 10 | usage="p2j myfile.py") 11 | parser.add_argument("source_filename", 12 | help="Python script to parse") 13 | parser.add_argument("-r", "--reverse", 14 | action="store_true", 15 | help="To convert Jupyter to Python scripto") 16 | parser.add_argument("-t", "--target_filename", 17 | help="Target filename of Jupyter notebook. If not specified, " + 18 | "it will use the filename of the Python script and append .ipynb") 19 | parser.add_argument("-o", "--overwrite", 20 | action="store_true", 21 | help="Flag whether to overwrite existing target file.") 22 | args = parser.parse_args() 23 | 24 | if args.reverse: 25 | jupyter2python(source_filename=args.source_filename, 26 | target_filename=args.target_filename, 27 | overwrite=args.overwrite) 28 | else: 29 | python2jupyter(source_filename=args.source_filename, 30 | target_filename=args.target_filename, 31 | overwrite=args.overwrite) 32 | 33 | 34 | if __name__ == "__main__": 35 | main() 36 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import io 4 | import os 5 | from setuptools import setup 6 | 7 | # Package meta-data. 8 | VERSION = "1.3.3" 9 | DESCRIPTION = "p2j: Convert Python scripts to Jupyter notebook with minimal intervention" 10 | 11 | # Import the README and use it as the long-description. 12 | # Note: this will only work if README.md is present in your MANIFEST.in file! 13 | HERE = os.path.abspath(os.path.dirname(__file__)) 14 | try: 15 | with io.open(os.path.join(HERE, "README.md"), encoding="utf-8") as f: 16 | LONG_DESCRIPTION = "\n" + f.read() 17 | except FileNotFoundError: 18 | LONG_DESCRIPTION = DESCRIPTION 19 | 20 | # This call to setup() does all the work 21 | setup( 22 | name="p2j", 23 | version=VERSION, 24 | description=DESCRIPTION, 25 | long_description=LONG_DESCRIPTION, 26 | long_description_content_type="text/markdown", 27 | url="https://github.com/raibosome/python2jupyter", 28 | author="Raimi bin Karim", 29 | author_email="raimi.bkarim@gmail.com", 30 | python_requires=">=3.6.0", 31 | license="MIT", 32 | entry_points={ 33 | "console_scripts": [ 34 | "p2j=p2j.main:main", 35 | ], 36 | }, 37 | classifiers=[ 38 | # Trove classifiers 39 | # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers 40 | "License :: OSI Approved :: MIT License", 41 | "Programming Language :: Python", 42 | "Programming Language :: Python :: 3", 43 | "Programming Language :: Python :: 3.6" 44 | ], 45 | keywords="convert python jupyter notebook script", 46 | packages=["p2j"], 47 | include_package_data=True 48 | ) 49 | -------------------------------------------------------------------------------- /examples/example3.py: -------------------------------------------------------------------------------- 1 | print(__doc__) 2 | 3 | # Author: Gael Varoquaux 4 | # License: BSD 3 clause 5 | 6 | # Standard scientific Python imports 7 | import matplotlib.pyplot as plt 8 | 9 | # Import datasets, classifiers and performance metrics 10 | from sklearn import datasets, svm, metrics 11 | 12 | # The digits dataset 13 | digits = datasets.load_digits() 14 | 15 | # The data that we are interested in is made of 8x8 images of digits, let's 16 | # have a look at the first 4 images, stored in the `images` attribute of the 17 | # dataset. If we were working from image files, we could load them using 18 | # matplotlib.pyplot.imread. Note that each image must have the same size. For these 19 | # images, we know which digit they represent: it is given in the 'target' of 20 | # the dataset. 21 | images_and_labels = list(zip(digits.images, digits.target)) 22 | for index, (image, label) in enumerate(images_and_labels[:4]): 23 | plt.subplot(2, 4, index + 1) 24 | plt.axis('off') 25 | plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest') 26 | plt.title('Training: %i' % label) 27 | 28 | # To apply a classifier on this data, we need to flatten the image, to 29 | # turn the data in a (samples, feature) matrix: 30 | n_samples = len(digits.images) 31 | data = digits.images.reshape((n_samples, -1)) 32 | 33 | # Create a classifier: a support vector classifier 34 | classifier = svm.SVC(gamma=0.001) 35 | 36 | # We learn the digits on the first half of the digits 37 | classifier.fit(data[:n_samples // 2], digits.target[:n_samples // 2]) 38 | 39 | # Now predict the value of the digit on the second half: 40 | expected = digits.target[n_samples // 2:] 41 | predicted = classifier.predict(data[n_samples // 2:]) 42 | 43 | print("Classification report for classifier %s:\n%s\n" 44 | % (classifier, metrics.classification_report(expected, predicted))) 45 | print("Confusion matrix:\n%s" % metrics.confusion_matrix(expected, predicted)) 46 | 47 | images_and_predictions = list(zip(digits.images[n_samples // 2:], predicted)) 48 | for index, (image, prediction) in enumerate(images_and_predictions[:4]): 49 | plt.subplot(2, 4, index + 5) 50 | plt.axis('off') 51 | plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest') 52 | plt.title('Prediction: %i' % prediction) 53 | 54 | plt.show() -------------------------------------------------------------------------------- /examples/example2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # -*- coding: utf-8 -*- 4 | 5 | import tensorflow as tf 6 | import numpy as np 7 | 8 | # First we set up the computational graph: 9 | 10 | # N is batch size; D_in is input dimension; 11 | # H is hidden dimension; D_out is output dimension. 12 | N, D_in, H, D_out = 64, 1000, 100, 10 13 | 14 | # Create placeholders for the input and target data; these will be filled 15 | # with real data when we execute the graph. 16 | x = tf.placeholder(tf.float32, shape=(None, D_in)) 17 | y = tf.placeholder(tf.float32, shape=(None, D_out)) 18 | 19 | # Create Variables for the weights and initialize them with random data. 20 | # A TensorFlow Variable persists its value across executions of the graph. 21 | w1 = tf.Variable(tf.random_normal((D_in, H))) 22 | w2 = tf.Variable(tf.random_normal((H, D_out))) 23 | 24 | # Forward pass: Compute the predicted y using operations on TensorFlow Tensors. 25 | # Note that this code does not actually perform any numeric operations; it 26 | # merely sets up the computational graph that we will later execute. 27 | h = tf.matmul(x, w1) 28 | h_relu = tf.maximum(h, tf.zeros(1)) 29 | y_pred = tf.matmul(h_relu, w2) 30 | 31 | # Compute loss using operations on TensorFlow Tensors 32 | loss = tf.reduce_sum((y - y_pred) ** 2.0) 33 | 34 | # Compute gradient of the loss with respect to w1 and w2. 35 | grad_w1, grad_w2 = tf.gradients(loss, [w1, w2]) 36 | 37 | # Update the weights using gradient descent. To actually update the weights 38 | # we need to evaluate new_w1 and new_w2 when executing the graph. Note that 39 | # in TensorFlow the the act of updating the value of the weights is part of 40 | # the computational graph; in PyTorch this happens outside the computational 41 | # graph. 42 | learning_rate = 1e-6 43 | new_w1 = w1.assign(w1 - learning_rate * grad_w1) 44 | new_w2 = w2.assign(w2 - learning_rate * grad_w2) 45 | 46 | # Now we have built our computational graph, so we enter a TensorFlow session to 47 | # actually execute the graph. 48 | with tf.Session() as sess: 49 | # Run the graph once to initialize the Variables w1 and w2. 50 | sess.run(tf.global_variables_initializer()) 51 | 52 | # Create numpy arrays holding the actual data for the inputs x and targets 53 | # y 54 | x_value = np.random.randn(N, D_in) 55 | y_value = np.random.randn(N, D_out) 56 | for _ in range(500): 57 | # Execute the graph many times. Each time it executes we want to bind 58 | # x_value to x and y_value to y, specified with the feed_dict argument. 59 | # Each time we execute the graph we want to compute the values for loss, 60 | # new_w1, and new_w2; the values of these Tensors are returned as numpy 61 | # arrays. 62 | loss_value, _, _ = sess.run([loss, new_w1, new_w2], 63 | feed_dict={x: x_value, y: y_value}) 64 | print(loss_value) -------------------------------------------------------------------------------- /examples/example.py: -------------------------------------------------------------------------------- 1 | '''Trains a denoising autoencoder on MNIST dataset. 2 | 3 | Denoising is one of the classic applications of autoencoders. 4 | The denoising process removes unwanted noise that corrupted the 5 | true signal. 6 | 7 | Noise + Data ---> Denoising Autoencoder ---> Data 8 | 9 | Given a training dataset of corrupted data as input and 10 | true signal as output, a denoising autoencoder can recover the 11 | hidden structure to generate clean data. 12 | 13 | This example has modular design. The encoder, decoder and autoencoder 14 | are 3 models that share weights. For example, after training the 15 | autoencoder, the encoder can be used to generate latent vectors 16 | of input data for low-dim visualization like PCA or TSNE. 17 | ''' 18 | 19 | 20 | from __future__ import absolute_import 21 | from __future__ import division 22 | from __future__ import print_function 23 | import keras 24 | from keras.layers import Activation, Dense, Input 25 | from keras.layers import Conv2D, Flatten 26 | from keras.layers import Reshape, Conv2DTranspose 27 | from keras.models import Model 28 | from keras import backend as K 29 | from keras.datasets import mnist 30 | import numpy as np 31 | import matplotlib.pyplot as plt 32 | from PIL import Image 33 | 34 | np.random.seed(1337) 35 | 36 | # MNIST dataset 37 | (x_train, _), (x_test, _) = mnist.load_data() 38 | 39 | image_size = x_train.shape[1] 40 | x_train = np.reshape(x_train, [-1, image_size, image_size, 1]) 41 | x_test = np.reshape(x_test, [-1, image_size, image_size, 1]) 42 | x_train = x_train.astype('float32') / 255 43 | x_test = x_test.astype('float32') / 255 44 | 45 | # Generate corrupted MNIST images by adding noise with normal dist 46 | # centered at 0.5 and std=0.5 47 | noise = np.random.normal(loc=0.5, scale=0.5, size=x_train.shape) 48 | x_train_noisy = x_train + noise 49 | noise = np.random.normal(loc=0.5, scale=0.5, size=x_test.shape) 50 | x_test_noisy = x_test + noise 51 | 52 | x_train_noisy = np.clip(x_train_noisy, 0., 1.) 53 | x_test_noisy = np.clip(x_test_noisy, 0., 1.) 54 | 55 | # Network parameters 56 | input_shape = (image_size, image_size, 1) 57 | batch_size = 128 58 | kernel_size = 3 59 | latent_dim = 16 60 | # Encoder/Decoder number of CNN layers and filters per layer 61 | layer_filters = [32, 64] 62 | 63 | # Build the Autoencoder Model 64 | # First build the Encoder Model 65 | inputs = Input(shape=input_shape, name='encoder_input') 66 | x = inputs 67 | # Stack of Conv2D blocks 68 | # Notes: 69 | # 1) Use Batch Normalization before ReLU on deep networks 70 | # 2) Use MaxPooling2D as alternative to strides>1 71 | # - faster but not as good as strides>1 72 | for filters in layer_filters: 73 | x = Conv2D(filters=filters, 74 | kernel_size=kernel_size, 75 | strides=2, 76 | activation='relu', 77 | padding='same')(x) 78 | 79 | # Shape info needed to build Decoder Model 80 | shape = K.int_shape(x) 81 | 82 | # Generate the latent vector 83 | x = Flatten()(x) 84 | latent = Dense(latent_dim, name='latent_vector')(x) 85 | 86 | # Instantiate Encoder Model 87 | encoder = Model(inputs, latent, name='encoder') 88 | encoder.summary() 89 | 90 | # Build the Decoder Model 91 | latent_inputs = Input(shape=(latent_dim,), name='decoder_input') 92 | x = Dense(shape[1] * shape[2] * shape[3])(latent_inputs) 93 | x = Reshape((shape[1], shape[2], shape[3]))(x) 94 | 95 | # Stack of Transposed Conv2D blocks 96 | # Notes: 97 | # 1) Use Batch Normalization before ReLU on deep networks 98 | # 2) Use UpSampling2D as alternative to strides>1 99 | # - faster but not as good as strides>1 100 | for filters in layer_filters[::-1]: 101 | x = Conv2DTranspose(filters=filters, 102 | kernel_size=kernel_size, 103 | strides=2, 104 | activation='relu', 105 | padding='same')(x) 106 | 107 | x = Conv2DTranspose(filters=1, 108 | kernel_size=kernel_size, 109 | padding='same')(x) 110 | 111 | outputs = Activation('sigmoid', name='decoder_output')(x) 112 | 113 | # Instantiate Decoder Model 114 | decoder = Model(latent_inputs, outputs, name='decoder') 115 | decoder.summary() 116 | 117 | # Autoencoder = Encoder + Decoder 118 | # Instantiate Autoencoder Model 119 | autoencoder = Model(inputs, decoder(encoder(inputs)), name='autoencoder') 120 | autoencoder.summary() 121 | 122 | autoencoder.compile(loss='mse', optimizer='adam') 123 | 124 | # Train the autoencoder 125 | autoencoder.fit(x_train_noisy, 126 | x_train, 127 | validation_data=(x_test_noisy, x_test), 128 | epochs=30, 129 | batch_size=batch_size) 130 | 131 | # Predict the Autoencoder output from corrupted test images 132 | x_decoded = autoencoder.predict(x_test_noisy) 133 | 134 | # Display the 1st 8 corrupted and denoised images 135 | rows, cols = 10, 30 136 | num = rows * cols 137 | imgs = np.concatenate([x_test[:num], x_test_noisy[:num], x_decoded[:num]]) 138 | imgs = imgs.reshape((rows * 3, cols, image_size, image_size)) 139 | imgs = np.vstack(np.split(imgs, rows, axis=1)) 140 | imgs = imgs.reshape((rows * 3, -1, image_size, image_size)) 141 | imgs = np.vstack([np.hstack(i) for i in imgs]) 142 | imgs = (imgs * 255).astype(np.uint8) 143 | plt.figure() 144 | plt.axis('off') 145 | plt.title('Original images: top rows, ' 146 | 'Corrupted Input: middle rows, ' 147 | 'Denoised Input: third rows') 148 | plt.imshow(imgs, interpolation='none', cmap='gray') 149 | Image.fromarray(imgs).save('corrupted_and_denoised.png') 150 | plt.show() 151 | -------------------------------------------------------------------------------- /examples/example3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "print(__doc__)" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "Author: Gael Varoquaux
\n", 17 | "License: BSD 3 clause" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "Standard scientific Python imports" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "import matplotlib.pyplot as plt" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "Import datasets, classifiers and performance metrics" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "from sklearn import datasets, svm, metrics" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "The digits dataset" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "digits = datasets.load_digits()" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "The data that we are interested in is made of 8x8 images of digits, let's
\n", 73 | "have a look at the first 4 images, stored in the `images` attribute of the
\n", 74 | "dataset. If we were working from image files, we could load them using
\n", 75 | "matplotlib.pyplot.imread. Note that each image must have the same size. For these
\n", 76 | "images, we know which digit they represent: it is given in the 'target' of
\n", 77 | "the dataset." 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "images_and_labels = list(zip(digits.images, digits.target))\n", 87 | "for index, (image, label) in enumerate(images_and_labels[:4]):\n", 88 | " plt.subplot(2, 4, index + 1)\n", 89 | " plt.axis('off')\n", 90 | " plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')\n", 91 | " plt.title('Training: %i' % label)" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "To apply a classifier on this data, we need to flatten the image, to
\n", 99 | "turn the data in a (samples, feature) matrix:" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "n_samples = len(digits.images)\n", 109 | "data = digits.images.reshape((n_samples, -1))" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "Create a classifier: a support vector classifier" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "classifier = svm.SVC(gamma=0.001)" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | "We learn the digits on the first half of the digits" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "metadata": {}, 139 | "outputs": [], 140 | "source": [ 141 | "classifier.fit(data[:n_samples // 2], digits.target[:n_samples // 2])" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "Now predict the value of the digit on the second half:" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": null, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "expected = digits.target[n_samples // 2:]\n", 158 | "predicted = classifier.predict(data[n_samples // 2:])" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": null, 164 | "metadata": {}, 165 | "outputs": [], 166 | "source": [ 167 | "print(\"Classification report for classifier %s:\\n%s\\n\"\n", 168 | " % (classifier, metrics.classification_report(expected, predicted)))\n", 169 | "print(\"Confusion matrix:\\n%s\" % metrics.confusion_matrix(expected, predicted))" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": null, 175 | "metadata": {}, 176 | "outputs": [], 177 | "source": [ 178 | "images_and_predictions = list(zip(digits.images[n_samples // 2:], predicted))\n", 179 | "for index, (image, prediction) in enumerate(images_and_predictions[:4]):\n", 180 | " plt.subplot(2, 4, index + 5)\n", 181 | " plt.axis('off')\n", 182 | " plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')\n", 183 | " plt.title('Prediction: %i' % prediction)" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "metadata": {}, 190 | "outputs": [], 191 | "source": [ 192 | "plt.show()" 193 | ] 194 | } 195 | ], 196 | "metadata": { 197 | "kernelspec": { 198 | "display_name": "Python 3", 199 | "language": "python", 200 | "name": "python3" 201 | }, 202 | "language_info": { 203 | "codemirror_mode": { 204 | "name": "ipython", 205 | "version": 3 206 | }, 207 | "file_extension": ".py", 208 | "mimetype": "text/x-python", 209 | "name": "python", 210 | "nbconvert_exporter": "python", 211 | "pygments_lexer": "ipython3", 212 | "version": "3.6.0" 213 | } 214 | }, 215 | "nbformat": 4, 216 | "nbformat_minor": 2 217 | } 218 | -------------------------------------------------------------------------------- /examples/example2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "#!/usr/bin/env python3" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "# -*- coding: utf-8 -*-" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "import tensorflow as tf\n", 28 | "import numpy as np" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "First we set up the computational graph:" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "N is batch size; D_in is input dimension;
\n", 43 | "H is hidden dimension; D_out is output dimension." 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "N, D_in, H, D_out = 64, 1000, 100, 10" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "Create placeholders for the input and target data; these will be filled
\n", 60 | "with real data when we execute the graph." 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "x = tf.placeholder(tf.float32, shape=(None, D_in))\n", 70 | "y = tf.placeholder(tf.float32, shape=(None, D_out))" 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "Create Variables for the weights and initialize them with random data.
\n", 78 | "A TensorFlow Variable persists its value across executions of the graph." 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "w1 = tf.Variable(tf.random_normal((D_in, H)))\n", 88 | "w2 = tf.Variable(tf.random_normal((H, D_out)))" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": {}, 94 | "source": [ 95 | "Forward pass: Compute the predicted y using operations on TensorFlow Tensors.
\n", 96 | "Note that this code does not actually perform any numeric operations; it
\n", 97 | "merely sets up the computational graph that we will later execute." 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "h = tf.matmul(x, w1)\n", 107 | "h_relu = tf.maximum(h, tf.zeros(1))\n", 108 | "y_pred = tf.matmul(h_relu, w2)" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "Compute loss using operations on TensorFlow Tensors" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "loss = tf.reduce_sum((y - y_pred) ** 2.0)" 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "metadata": {}, 130 | "source": [ 131 | "Compute gradient of the loss with respect to w1 and w2." 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "grad_w1, grad_w2 = tf.gradients(loss, [w1, w2])" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": {}, 146 | "source": [ 147 | "Update the weights using gradient descent. To actually update the weights
\n", 148 | "we need to evaluate new_w1 and new_w2 when executing the graph. Note that
\n", 149 | "in TensorFlow the the act of updating the value of the weights is part of
\n", 150 | "the computational graph; in PyTorch this happens outside the computational
\n", 151 | "graph." 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "metadata": {}, 158 | "outputs": [], 159 | "source": [ 160 | "learning_rate = 1e-6\n", 161 | "new_w1 = w1.assign(w1 - learning_rate * grad_w1)\n", 162 | "new_w2 = w2.assign(w2 - learning_rate * grad_w2)" 163 | ] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "metadata": {}, 168 | "source": [ 169 | "Now we have built our computational graph, so we enter a TensorFlow session to
\n", 170 | "actually execute the graph." 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": null, 176 | "metadata": {}, 177 | "outputs": [], 178 | "source": [ 179 | "with tf.Session() as sess:\n", 180 | " # Run the graph once to initialize the Variables w1 and w2.\n", 181 | " sess.run(tf.global_variables_initializer())\n", 182 | "\n", 183 | " # Create numpy arrays holding the actual data for the inputs x and targets\n", 184 | " # y\n", 185 | " x_value = np.random.randn(N, D_in)\n", 186 | " y_value = np.random.randn(N, D_out)\n", 187 | " for _ in range(500):\n", 188 | " # Execute the graph many times. Each time it executes we want to bind\n", 189 | " # x_value to x and y_value to y, specified with the feed_dict argument.\n", 190 | " # Each time we execute the graph we want to compute the values for loss,\n", 191 | " # new_w1, and new_w2; the values of these Tensors are returned as numpy\n", 192 | " # arrays.\n", 193 | " loss_value, _, _ = sess.run([loss, new_w1, new_w2],\n", 194 | " feed_dict={x: x_value, y: y_value})\n", 195 | " print(loss_value)" 196 | ] 197 | } 198 | ], 199 | "metadata": { 200 | "kernelspec": { 201 | "display_name": "Python 3", 202 | "language": "python", 203 | "name": "python3" 204 | }, 205 | "language_info": { 206 | "codemirror_mode": { 207 | "name": "ipython", 208 | "version": 3 209 | }, 210 | "file_extension": ".py", 211 | "mimetype": "text/x-python", 212 | "name": "python", 213 | "nbconvert_exporter": "python", 214 | "pygments_lexer": "ipython3", 215 | "version": "3.6.8" 216 | } 217 | }, 218 | "nbformat": 4, 219 | "nbformat_minor": 2 220 | } 221 | -------------------------------------------------------------------------------- /p2j/p2j.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module translates .py files to .ipynb and vice versa 3 | """ 4 | from typing import Optional 5 | import os 6 | import sys 7 | import json 8 | 9 | from p2j.utils import _check_files 10 | 11 | # Path to directory 12 | HERE = os.path.abspath(os.path.dirname(__file__)) 13 | 14 | TRIPLE_QUOTES = ["\"\"\"", "\'\'\'"] 15 | FOUR_SPACES = "{:<4}".format("") 16 | EIGHT_SPACES = "{:<8}".format("") 17 | TWELVE_SPACES = "{:<12}".format("") 18 | 19 | 20 | def python2jupyter(source_filename: str, target_filename: str, overwrite: bool = False): 21 | """Convert Python scripts to Jupyter notebooks. 22 | 23 | Args: 24 | source_filename (str): Path to Python script. 25 | target_filename (str): Path to name of Jupyter notebook. Optional. 26 | overwrite (bool): Whether to overwrite an existing Jupyter notebook. 27 | """ 28 | 29 | target_filename = _check_files( 30 | source_filename, target_filename, overwrite, conversion="p2j") 31 | 32 | # Check if source file exists and read 33 | try: 34 | with open(source_filename, "r", encoding="utf-8") as infile: 35 | data = [l.rstrip("\n") for l in infile] 36 | except FileNotFoundError: 37 | print("Source file not found. Specify a valid source file.") 38 | sys.exit(1) 39 | 40 | # Read JSON files for .ipynb template 41 | with open(HERE + "/templates/cell_code.json", encoding="utf-8") as file: 42 | CODE = json.load(file) 43 | with open(HERE + "/templates/cell_markdown.json", encoding="utf-8") as file: 44 | MARKDOWN = json.load(file) 45 | with open(HERE + "/templates/metadata.json", encoding="utf-8") as file: 46 | MISC = json.load(file) 47 | 48 | # Initialise variables 49 | final = {} # the dictionary/json of the final notebook 50 | cells = [] # an array of all markdown and code cells 51 | arr = [] # an array to store individual lines for a cell 52 | num_lines = len(data) # no. of lines of code 53 | 54 | # Initialise variables for checks 55 | is_block_comment = False 56 | is_running_code = False 57 | is_running_comment = False 58 | next_is_code = False 59 | next_is_nothing = False 60 | next_is_function = False 61 | 62 | # Read source code line by line 63 | for i, line in enumerate(data): 64 | 65 | # Skip if line is empty 66 | if line == "": 67 | continue 68 | 69 | buffer = "" 70 | 71 | # Labels for current line 72 | contains_triple_quotes = TRIPLE_QUOTES[0] in line or TRIPLE_QUOTES[1] in line 73 | is_code = line.startswith("# pylint") or line.startswith("#pylint") or \ 74 | line.startswith("#!") or line.startswith("# -*- coding") or \ 75 | line.startswith("# coding=") or line.startswith("##") or \ 76 | line.startswith("# FIXME") or line.startswith("#FIXME") or \ 77 | line.startswith("# TODO") or line.startswith("#TODO") or \ 78 | line.startswith("# This Python file uses the following encoding:") 79 | is_end_of_code = i == num_lines-1 80 | starts_with_hash = line.startswith("#") 81 | 82 | # Labels for next line 83 | try: 84 | next_is_code = not data[i+1].startswith("#") 85 | except IndexError: 86 | pass 87 | try: 88 | next_is_nothing = data[i+1] == "" 89 | except IndexError: 90 | pass 91 | try: 92 | next_is_function = data[i+1].startswith(FOUR_SPACES) or ( 93 | next_is_nothing and data[i+2].startswith(FOUR_SPACES)) 94 | except IndexError: 95 | pass 96 | 97 | # Sub-paragraph is a comment but not a running code 98 | if not is_running_code and (is_running_comment or 99 | (starts_with_hash and not is_code) or 100 | contains_triple_quotes): 101 | 102 | if contains_triple_quotes: 103 | is_block_comment = not is_block_comment 104 | 105 | buffer = line.replace(TRIPLE_QUOTES[0], "\n").\ 106 | replace(TRIPLE_QUOTES[1], "\n") 107 | 108 | if not is_block_comment: 109 | if len(buffer) > 1: 110 | buffer = buffer[2:] if buffer[1].isspace() else buffer[1:] 111 | else: 112 | buffer = "" 113 | 114 | # Wrap this sub-paragraph as a markdown cell if 115 | # next line is end of code OR 116 | # (next line is a code but not a block comment) OR 117 | # (next line is nothing but not a block comment) 118 | if is_end_of_code or (next_is_code and not is_block_comment) or \ 119 | (next_is_nothing and not is_block_comment): 120 | arr.append("{}".format(buffer)) 121 | MARKDOWN["source"] = arr 122 | cells.append(dict(MARKDOWN)) 123 | arr = [] 124 | is_running_comment = False 125 | else: 126 | buffer = buffer + "
\n" 127 | arr.append("{}".format(buffer)) 128 | is_running_comment = True 129 | continue 130 | else: # Sub-paragraph is a comment but not a running code 131 | buffer = line 132 | 133 | # Wrap this sub-paragraph as a code cell if 134 | # (next line is end of code OR next line is nothing) AND NOT 135 | # (next line is nothing AND next line is part of a function) 136 | if (is_end_of_code or next_is_nothing) and not (next_is_nothing and next_is_function): 137 | arr.append("{}".format(buffer)) 138 | CODE["source"] = arr 139 | cells.append(dict(CODE)) 140 | arr = [] 141 | is_running_code = False 142 | else: 143 | buffer = buffer + "\n" 144 | 145 | # Put another newline character if in a function 146 | try: 147 | if data[i+1] == "" and (data[i+2].startswith(" #") or 148 | data[i+2].startswith(" #") or 149 | data[i+2].startswith(" #")): 150 | buffer = buffer + "\n" 151 | except IndexError: 152 | pass 153 | 154 | arr.append("{}".format(buffer)) 155 | is_running_code = True 156 | continue 157 | 158 | # Finalise the contents of notebook 159 | final["cells"] = cells 160 | final.update(MISC) 161 | 162 | # Write JSON to target file 163 | with open(target_filename, "w", encoding="utf-8") as outfile: 164 | json.dump(final, outfile, indent=1, ensure_ascii=False) 165 | print("Notebook {} written.".format(target_filename)) 166 | -------------------------------------------------------------------------------- /examples/example.ipynb: -------------------------------------------------------------------------------- 1 | {"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["\nTrains a denoising autoencoder on MNIST dataset.
\n", "Denoising is one of the classic applications of autoencoders.
\n", "The denoising process removes unwanted noise that corrupted the
\n", "true signal.
\n", "Noise + Data ---> Denoising Autoencoder ---> Data
\n", "Given a training dataset of corrupted data as input and
\n", "true signal as output, a denoising autoencoder can recover the
\n", "hidden structure to generate clean data.
\n", "This example has modular design. The encoder, decoder and autoencoder
\n", "are 3 models that share weights. For example, after training the
\n", "autoencoder, the encoder can be used to generate latent vectors
\n", "of input data for low-dim visualization like PCA or TSNE.
\n", ""]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["from __future__ import absolute_import\n", "from __future__ import division\n", "from __future__ import print_function\n", "import keras\n", "from keras.layers import Activation, Dense, Input\n", "from keras.layers import Conv2D, Flatten\n", "from keras.layers import Reshape, Conv2DTranspose\n", "from keras.models import Model\n", "from keras import backend as K\n", "from keras.datasets import mnist\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "from PIL import Image"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["np.random.seed(1337)"]}, {"cell_type": "markdown", "metadata": {}, "source": ["MNIST dataset"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["(x_train, _), (x_test, _) = mnist.load_data()"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["image_size = x_train.shape[1]\n", "x_train = np.reshape(x_train, [-1, image_size, image_size, 1])\n", "x_test = np.reshape(x_test, [-1, image_size, image_size, 1])\n", "x_train = x_train.astype('float32') / 255\n", "x_test = x_test.astype('float32') / 255"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Generate corrupted MNIST images by adding noise with normal dist
\n", "centered at 0.5 and std=0.5"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["noise = np.random.normal(loc=0.5, scale=0.5, size=x_train.shape)\n", "x_train_noisy = x_train + noise\n", "noise = np.random.normal(loc=0.5, scale=0.5, size=x_test.shape)\n", "x_test_noisy = x_test + noise"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["x_train_noisy = np.clip(x_train_noisy, 0., 1.)\n", "x_test_noisy = np.clip(x_test_noisy, 0., 1.)"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Network parameters"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["input_shape = (image_size, image_size, 1)\n", "batch_size = 128\n", "kernel_size = 3\n", "latent_dim = 16\n", "# Encoder/Decoder number of CNN layers and filters per layer\n", "layer_filters = [32, 64]"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Build the Autoencoder Model
\n", "First build the Encoder Model"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["inputs = Input(shape=input_shape, name='encoder_input')\n", "x = inputs\n", "# Stack of Conv2D blocks\n", "# Notes:\n", "# 1) Use Batch Normalization before ReLU on deep networks\n", "# 2) Use MaxPooling2D as alternative to strides>1\n", "# - faster but not as good as strides>1\n", "for filters in layer_filters:\n", " x = Conv2D(filters=filters,\n", " kernel_size=kernel_size,\n", " strides=2,\n", " activation='relu',\n", " padding='same')(x)"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Shape info needed to build Decoder Model"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["shape = K.int_shape(x)"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Generate the latent vector"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["x = Flatten()(x)\n", "latent = Dense(latent_dim, name='latent_vector')(x)"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Instantiate Encoder Model"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["encoder = Model(inputs, latent, name='encoder')\n", "encoder.summary()"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Build the Decoder Model"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["latent_inputs = Input(shape=(latent_dim,), name='decoder_input')\n", "x = Dense(shape[1] * shape[2] * shape[3])(latent_inputs)\n", "x = Reshape((shape[1], shape[2], shape[3]))(x)"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Stack of Transposed Conv2D blocks
\n", "Notes:
\n", "1) Use Batch Normalization before ReLU on deep networks
\n", "2) Use UpSampling2D as alternative to strides>1
\n", "- faster but not as good as strides>1"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["for filters in layer_filters[::-1]:\n", " x = Conv2DTranspose(filters=filters,\n", " kernel_size=kernel_size,\n", " strides=2,\n", " activation='relu',\n", " padding='same')(x)"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["x = Conv2DTranspose(filters=1,\n", " kernel_size=kernel_size,\n", " padding='same')(x)"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["outputs = Activation('sigmoid', name='decoder_output')(x)"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Instantiate Decoder Model"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["decoder = Model(latent_inputs, outputs, name='decoder')\n", "decoder.summary()"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Autoencoder = Encoder + Decoder
\n", "Instantiate Autoencoder Model"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["autoencoder = Model(inputs, decoder(encoder(inputs)), name='autoencoder')\n", "autoencoder.summary()"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["autoencoder.compile(loss='mse', optimizer='adam')"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Train the autoencoder"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["autoencoder.fit(x_train_noisy,\n", " x_train,\n", " validation_data=(x_test_noisy, x_test),\n", " epochs=30,\n", " batch_size=batch_size)"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Predict the Autoencoder output from corrupted test images"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["x_decoded = autoencoder.predict(x_test_noisy)"]}, {"cell_type": "markdown", "metadata": {}, "source": ["Display the 1st 8 corrupted and denoised images"]}, {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": ["rows, cols = 10, 30\n", "num = rows * cols\n", "imgs = np.concatenate([x_test[:num], x_test_noisy[:num], x_decoded[:num]])\n", "imgs = imgs.reshape((rows * 3, cols, image_size, image_size))\n", "imgs = np.vstack(np.split(imgs, rows, axis=1))\n", "imgs = imgs.reshape((rows * 3, -1, image_size, image_size))\n", "imgs = np.vstack([np.hstack(i) for i in imgs])\n", "imgs = (imgs * 255).astype(np.uint8)\n", "plt.figure()\n", "plt.axis('off')\n", "plt.title('Original images: top rows, '\n", " 'Corrupted Input: middle rows, '\n", " 'Denoised Input: third rows')\n", "plt.imshow(imgs, interpolation='none', cmap='gray')\n", "Image.fromarray(imgs).save('corrupted_and_denoised.png')\n", "plt.show()"]}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4"}}, "nbformat": 4, "nbformat_minor": 2} -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # p2j - Python-to-Jupyter parser with zero intervention 2 | [![PyPI version](https://badge.fury.io/py/p2j.svg)](https://badge.fury.io/py/p2j) 3 | 4 | Convert your Python source code to Jupyter notebook with **zero intervention**. 5 | 6 | Convert this source Python file: 7 | 8 | ```python 9 | # Evaluate the model 10 | model.evaluate() 11 | 12 | # Run the model for a while. 13 | # Then we hide the model. 14 | run() 15 | hide() 16 | 17 | print(type(data)) 18 | 19 | # This is considered as a paragraph too 20 | # It has 2 lines of comments 21 | 22 | # The data that we are interested in is made of 8x8 images of digits. 23 | # Let's have a look at the first 4 images, which is of course 24 | # stored in the `images` attribute of the dataset. 25 | images = list(zip(mnist.images)) 26 | ``` 27 | 28 | to the following Jupyter notebook: 29 | 30 | ![example](screenshot.png) 31 | 32 | The purpose of this package is to be able to run a code on Jupyter notebook without having to copy each paragraph of the code into every cell. It's also useful if we want to run our code in Google Colab. This parser isn't perfect, but you would be satisfactorily pleased with what you get. 33 | 34 | Contents of this README: 35 | 36 | - [Installation](#installation) 37 | - [Converting](#converting) 38 | - [Tests](#tests) 39 | - [Requirements](#requirements) 40 | - [Code format](#code-Format) 41 | - [How it works](#how-it-works) 42 | - [Feedback and pull requests](#feedback-and-pull-requests) 43 | 44 | ## Installation 45 | 46 | PyPI 47 | 48 | ```bash 49 | pip install p2j 50 | ``` 51 | 52 | Clone this repository and run Python's setup.py 53 | 54 | ```bash 55 | git clone https://github.com/remykarem/python2jupyter.git 56 | python setup.py install 57 | ``` 58 | 59 | or 60 | 61 | ```bash 62 | pip install git+https://github.com/remykarem/python2jupyter#egg=p2j 63 | ``` 64 | 65 | ## Converting 66 | 67 | There are 3 main ways you can get your Jupyter notebook: 68 | 69 | ### Converting a Python script 70 | 71 | ```bash 72 | p2j train.py 73 | ``` 74 | 75 | and you will get a `train.ipynb` Jupyter notebook. 76 | 77 | ### Converting a script from the Internet (you need to have curl) 78 | 79 | Specify the target filename with a `-t`. 80 | 81 | ```bash 82 | p2j <(curl https://raw.githubusercontent.com/keras-team/keras/master/examples/mnist_cnn.py) -t myfile.ipynb 83 | ``` 84 | 85 | ### Converting an in-line Python script 86 | 87 | ```bash 88 | p2j <(echo "# boilerplate code \n import os") -t myfile2.ipynb 89 | ``` 90 | 91 | Note: 92 | 93 | To run examples from this repository, first clone this repo 94 | 95 | ```bash 96 | git clone https://github.com/raibosome/python2jupyter.git 97 | ``` 98 | 99 | and after you `cd` into the project, run 100 | 101 | ```bash 102 | p2j examples/example.py 103 | ``` 104 | 105 | The `p2j/examples/example.py` is a Keras tutorial on building an autoencoder for the MNIST dataset, found [here](https://github.com/keras-team/keras/blob/master/examples/mnist_denoising_autoencoder.py). 106 | 107 | #### Command line usage 108 | 109 | To see the command line usage, run `p2j -h` and you will get something like this: 110 | 111 | ```txt 112 | usage: p2j [-h] [-r] [-t TARGET_FILENAME] [-o] source_filename 113 | 114 | Convert a Python script to Jupyter notebook 115 | 116 | positional arguments: 117 | source_filename Python script to parse 118 | 119 | optional arguments: 120 | -h, --help show this help message and exit 121 | -r, --reverse To convert Jupyter to Python script 122 | -t TARGET_FILENAME, --target_filename TARGET_FILENAME 123 | Target filename of Jupyter notebook. If not specified, 124 | it will use the filename of the Python script and 125 | append .ipynb 126 | -o, --overwrite Flag whether to overwrite existing target file. 127 | Defaults to false 128 | ``` 129 | 130 | ## Requirements 131 | 132 | - Python >= 3.6 133 | 134 | No third party libraries are used. 135 | 136 | ## Tests 137 | 138 | Tested on macOS 10.14.3 with Python 3.6. 139 | 140 | ## Code format 141 | 142 | There is no specific format that you should follow, but generally the parser assumes a format where your code is paragraphed. Check out some examples of well-documented code (and from which you can test!): 143 | 144 | - [PyTorch Tutorials](https://pytorch.org/tutorials/beginner/pytorch_with_examples.html) 145 | - [Keras Examples](https://github.com/keras-team/keras/tree/master/examples) 146 | - [Scikit Learn Example](https://scikit-learn.org/stable/auto_examples/classification/plot_digits_classification.html#sphx-glr-auto-examples-classification-plot-digits-classification-py) 147 | 148 | ## How it works 149 | 150 | Jupyter notebooks are just JSON files, like below. A Python script is read line by line and a dictionary of key-value pairs is generated along the way, using a set of rules. Finally, this dictionary is dumped as a JSON file whose file extension is `.ipynb`. 151 | 152 | ```json 153 | { 154 | "cells": [ 155 | { 156 | "cell_type": "markdown", 157 | "execution_count": null, 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [ 161 | "# Import standard functions" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "metadata": {}, 167 | "source": [ 168 | "import os" 169 | ] 170 | }, 171 | ], 172 | "metadata": {}, 173 | "nbformat": 4, 174 | "nbformat_minor": 2 175 | } 176 | ``` 177 | 178 | There are 4 basic rules (and exceptions) that I follow to parse the Python script. 179 | 180 | ### 1. Code or comment 181 | 182 | Firstly, any line that starts with a `#` is marked as a comment. So this will be a **markdown cell** in the Jupyter notebook. Everything else that does not start with this character is considered code, so this goes to the **code cell**. There are of course exceptions. 183 | 184 | This is a comment 185 | 186 | ```python 187 | # Train for 4 epochs 188 | ``` 189 | 190 | and this is code 191 | 192 | ```python 193 | model.train(4) 194 | ``` 195 | 196 | ### 2. Blocks of code and comment 197 | 198 | Secondly, code or comment can occur in blocks. A block of comment is several *consecutive* lines of comments that start with `#`. Similarly, several *consecutive* lines of codes that do not start with `#` will be considered as 'a block of code'. This rule is important because we want to ensure that a block of code or comment stays in one cell. 199 | 200 | This is a block of comment 201 | 202 | ```python 203 | # Load the model and 204 | # train for 4 epochs and 205 | # lastly we save the model 206 | ``` 207 | 208 | and this is a block of code 209 | 210 | ```python 211 | model.load() 212 | model.train(4) 213 | model.save() 214 | ``` 215 | 216 | By default, a commented code will not be converted to Markdown if it is placed directly (no newline space) below a block of code. Elsewhere, it will get converted to Markdown. You should preprend the code with two hashes `##` (instead of one) to prevent it from being converted to Markdown. 217 | 218 | ### 3. Paragraph 219 | 220 | Thirdly, I assume that everyone writes his/her script in paragraphs, where each paragraph represents an idea. In a paragraph, there can be code or comments or both. 221 | 222 | The following are 5 examples of paragraphs. 223 | 224 | ```python 225 | # Evaluate the model 226 | model.evaluate() 227 | 228 | # Run the model for a while. 229 | # Then we hide the model. 230 | run() 231 | hide() 232 | 233 | print(type(data)) 234 | 235 | # This is considered as a paragraph too 236 | # It has 2 lines of comments 237 | 238 | # The data that we are interested in is made of 8x8 images of digits. 239 | # Let's have a look at the first 4 images, which is of course 240 | # stored in the `images` attribute of the dataset. 241 | images = list(zip(mnist.images)) 242 | ``` 243 | 244 | which translates to the following: 245 | 246 | ![example](screenshot.png) 247 | 248 | ### 4. Indentation 249 | 250 | Any line of code or comment that is indented by a multiple of 4 spaces is considered code, and will stay in the same code cell as the previous non-empty line. This ensures that function and class definitions, loops and multi-line code stay in one cell. 251 | 252 | ### 5. Exceptions 253 | 254 | Now we handle the exceptions to the above-mentioned rules. 255 | 256 | - Docstrings are considered as **markdown cells**, only if they are not indented. 257 | 258 | - Lines that begin with `#pylint` or `# pylint` are Pylint directives and are kept as **code cells**. 259 | 260 | - Lines that begin with `#FIXME`, `# FIXME`, `#TODO` or `# TODO` are kept as **code cells**. 261 | 262 | - Shebang is considered as a **code cell**, eg. `#!/usr/bin/env python3`. 263 | 264 | - Encodings like `# -*- coding: utf-8 -*-` are also considered as **code cells**. 265 | 266 | ## Feedback and pull requests 267 | 268 | If you do like this, star me maybe? Pull requests are very much encouraged! 269 | --------------------------------------------------------------------------------