├── pip.conf ├── .gitmodules ├── 90assumeyes ├── content ├── start_jupyer.sh └── run.sh ├── jupyter_notebook_config.py ├── README.md ├── ipython.py ├── requirements.txt ├── pymultiplexer └── Dockerfile /pip.conf: -------------------------------------------------------------------------------- 1 | [list] 2 | format = columns 3 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "colabtools"] 2 | path = colabtools 3 | url = https://github.com/googlecolab/colabtools.git 4 | -------------------------------------------------------------------------------- /90assumeyes: -------------------------------------------------------------------------------- 1 | APT::Get::Assume-Yes "true"; 2 | APT::Get::allow-downgrades "true"; 3 | APT::Get::allow-remove-essential "true"; 4 | APT::Get::allow-change-help-packages "true"; 5 | -------------------------------------------------------------------------------- /content/start_jupyer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | # Enable http_over_ws (It was enabled on Dockerfile but it is neccesarry becauseof unknown reason) 4 | jupyter serverextension enable --py jupyter_http_over_ws 5 | 6 | # Run jupyer notebook 7 | cd /content 8 | jupyter notebook --NotebookApp.allow_origin='https://colab.research.google.com' --port=8081 9 | -------------------------------------------------------------------------------- /jupyter_notebook_config.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | c = get_config() 16 | c.NotebookApp.allow_root = True 17 | c.NotebookApp.disable_check_xsrf = True 18 | c.NotebookApp.token = '' 19 | c.NotebookApp.password = '' 20 | c.NotebookApp.ip = '*' 21 | c.NotebookApp.open_browser = False 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Docker image of Colaboratory local runtime 2 | ======================================== 3 | 4 | Install and RUN 5 | ============== 6 | 7 | ``` 8 | $ docker run --runtime=nvidia -it --rm -p 8081:8081 --cap-add SYS_ADMIN --device /dev/fuse \ 9 | --security-opt apparmor=unconfined ikeyasu/colab-local:latest 10 | ``` 11 | 12 | LICENSE 13 | ======= 14 | 15 | Copyright 2018 ikeyasu. All rights reserved. 16 | Licensed under the Apache License, Version 2.0. 17 | 18 | This repository is forked from 19 | https://github.com/googlecolab/backend-container/blob/bcbbf44/containers/Dockerfile 20 | 21 | The original license is as follows. 22 | 23 | ``` 24 | Copyright 2017 Google Inc. All rights reserved. 25 | 26 | Licensed under the Apache License, Version 2.0 (the "License"); 27 | you may not use this file except in compliance with the License. 28 | You may obtain a copy of the License at 29 | 30 | http://www.apache.org/licenses/LICENSE-2.0 31 | 32 | Unless required by applicable law or agreed to in writing, software 33 | distributed under the License is distributed on an "AS IS" BASIS, 34 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 35 | See the License for the specific language governing permissions and 36 | limitations under the License. 37 | ``` 38 | -------------------------------------------------------------------------------- /ipython.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Google Inc. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | """IPython configuration for Colab.""" 15 | 16 | c = get_config() # pylint:disable=undefined-variable 17 | 18 | # Register a custom kernel_class. 19 | c.IPKernelApp.kernel_class = 'google.colab._kernel.Kernel' 20 | 21 | # Implicitly imported packages. 22 | c.InteractiveShellApp.extensions = [ 23 | 'matplotlib', 24 | 'seaborn', 25 | ] 26 | 27 | # Startup code. 28 | c.InteractiveShellApp.exec_lines = [ 29 | 'from google.colab import _shell_customizations', 30 | '_shell_customizations.initialize()', 31 | # TODO(b/72409705): Remove this extra import. 32 | 'import h5py', 33 | ] 34 | 35 | # Enable matplotlib renderings to show up inline in the notebook. 36 | c.InteractiveShellApp.matplotlib = 'inline' 37 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | PyYAML~=3.12 2 | # Because we enable altair's colab renderer in the ipython.py startup script, 3 | # we pin this to an exact version so that colab startup can't be broken by 4 | # external package updates 5 | altair~=2.2.2 6 | beautifulsoup4~=4.6.0 7 | crcmod~=1.7 8 | future~=0.16.0 9 | google-api-python-client~=1.6.6 10 | google-auth~=1.4.0 11 | google-auth-httplib2~=0.0.3 12 | google-cloud-bigquery~=1.1.0 13 | google-cloud-language~=1.0.0 14 | google-cloud-storage~=1.8.0 15 | google-cloud-translate~=1.3.0 16 | grpcio~=1.15.0 17 | h5py~=2.8.0 18 | httplib2~=0.11.0 19 | ipykernel~=4.6.0 20 | ipython~=5.5.0 21 | jinja2~=2.10 22 | jsonschema~=2.6.0 23 | keras~=2.2.0 24 | matplotlib~=2.1.0 25 | nltk~=3.2.0 26 | notebook~=5.2.0 27 | numpy~=1.14.0 28 | oauth2client~=4.0 29 | opencv-python~=3.4.0 30 | pandas~=0.22.0 31 | pandas-gbq~=0.4.1 32 | pillow~=4.0.0 33 | plotly~=1.12.0 34 | portpicker~=1.2.0 35 | psutil~=5.4.0 36 | pygments~=2.1.0 37 | pymc3~=3.5 38 | pystache~=0.5.4 39 | python-dateutil~=2.5.0 40 | pytz~=2018.4 41 | # <17.1.0 in order to avoid startup crash. (Details in review log.) 42 | pyzmq>=17.0.0,<17.1.0 43 | requests~=2.18.0 44 | scikit-image~=0.13.0 45 | scikit-learn~=0.19.0 46 | scipy~=0.19.0 47 | seaborn~=0.7.0 48 | six~=1.11.0 49 | spacy~=2.0.0 50 | statsmodels~=0.8.0 51 | sympy~=1.1.0 52 | tensorflow-hub~=0.1.0 53 | tornado~=4.5.0 54 | vega_datasets~=0.5.0 55 | xgboost~=0.7.0 56 | -------------------------------------------------------------------------------- /pymultiplexer: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # -*- mode: bash -*- 3 | # 4 | # Wrapper for multiplexing a python or pip to use the same version as the most 5 | # recent caller in its process hierarchy. 6 | # 7 | # This lets us have a single command (pip/python) work for both python2 and 8 | # python3 notebooks, eg 9 | # !pip install ... 10 | # will install for the version of python currently in use. 11 | 12 | # This is the default: assume python2. 13 | SUFFIX=2 14 | 15 | # We know *this* process isn't python, so we start with the parent. 16 | NEXT_PID=${PPID} 17 | 18 | while true; do 19 | # Determine where to look up the parent of this process (if it exists). 20 | PID_STAT="/proc/${NEXT_PID}/stat" 21 | if [[ ! -f ${PID_STAT} ]]; then 22 | break 23 | fi 24 | 25 | # Figure out the parent PID: proc(5) says this 26 | # is the fourth field in /proc/${NEXT_PID}/stat. 27 | PARENT_PID=$(cut -d\ -f4 <${PID_STAT}) 28 | PARENT_EXE="/proc/${PARENT_PID}/exe" 29 | if [[ ! -f ${PARENT_EXE} ]]; then 30 | break 31 | fi 32 | 33 | # We want to know the real filename, so resolve any symlinks. 34 | CALLER=$(readlink -fnq ${PARENT_EXE}) 35 | if [[ ${CALLER} == *"/bin/python3"* ]]; then 36 | SUFFIX=3 37 | break 38 | elif [[ ${CALLER} == *"/bin/python2"* ]]; then 39 | SUFFIX=2 40 | break 41 | fi 42 | 43 | # Repeat with the parent process. 44 | NEXT_PID=${PARENT_PID} 45 | done 46 | 47 | COMMAND="$(basename $0)${SUFFIX}" 48 | exec "${COMMAND}" "$@" 49 | -------------------------------------------------------------------------------- /content/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | # Copyright 2017 Google Inc. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # Install the correct tensorflow variant depending on availability of GPU. 18 | # 19 | # pip install requires the wheel file name to match the name of the package 20 | # inside it, so we need to copy the gpu-blah.whl files to blah.whl. The mess 21 | # below conditionally (if VM has a GPU) creates a temporary directory, symlinks 22 | # the wheels into the temporary directory, and cd's into that directory, so that 23 | # the subsequent unconditional "pip install"s from the current directory do the 24 | # right thing. Finally, the temporary directory is removed, if it was created. 25 | cd / 26 | T="" 27 | if [ -d /usr/lib64-nvidia ]; then 28 | T="$(mktemp -d)" 29 | for f in tensorflow*whl; do 30 | ln -s "/gpu-${f}" "${T}/${f}" 31 | done 32 | cd "$T" 33 | fi 34 | 35 | pip3 install -f /tf_deps -U --upgrade-strategy=only-if-needed tensorflow-*-*36*whl 36 | pip2 install -f /tf_deps -U --upgrade-strategy=only-if-needed tensorflow-*-*cp27*whl 37 | 38 | if [ -n "$T" ]; then 39 | rm -rf "$T" 40 | fi 41 | cd / 42 | 43 | # Create the notebook notary secret if one does not already exist 44 | if [ ! -f /content/datalab/.config/notary_secret ] 45 | then 46 | mkdir -p /content/datalab/.config 47 | openssl rand -base64 128 > /content/datalab/.config/notary_secret 48 | fi 49 | 50 | # Start the DataLab server 51 | #forever --minUptime 1000 --spinSleepTime 1000 /datalab/web/app.js 52 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2018 ikeyasu@gmail.com. All rights reserved. 2 | # -----original license is below---- 3 | # Copyright 2017 Google Inc. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | FROM ubuntu:17.10 18 | MAINTAINER ikeyasu 19 | 20 | # Container configuration 21 | EXPOSE 8081 22 | 23 | # Path configuration 24 | ENV PATH $PATH:/tools/node/bin:/tools/google-cloud-sdk/bin 25 | ENV PYTHONPATH /env/python 26 | 27 | # We need to set the gcloud path before disabling the components check below. 28 | # TODO(b/70862907): Drop this custom gcloud directory. 29 | ENV CLOUDSDK_CONFIG /content/datalab/.config 30 | 31 | # Assume yes to all apt commands, to avoid user confusion around stdin. 32 | COPY 90assumeyes /etc/apt/apt.conf.d/ 33 | 34 | # Setup OS and core packages 35 | ENV DEBIAN_FRONTEND=noninteractive 36 | RUN apt-get update && \ 37 | apt-get install --no-install-recommends -y -q \ 38 | apt-utils \ 39 | build-essential \ 40 | ca-certificates \ 41 | curl \ 42 | gfortran \ 43 | git \ 44 | google-perftools \ 45 | libatlas-base-dev \ 46 | libcublas8.0 \ 47 | libcudart8.0 \ 48 | libcufft8.0 \ 49 | libcufftw8.0 \ 50 | libcurand8.0 \ 51 | libcusolver8.0 \ 52 | libfreetype6-dev \ 53 | libhdf5-dev \ 54 | liblapack-dev \ 55 | libpng-dev \ 56 | libsm6 \ 57 | libxext6 \ 58 | libxft-dev \ 59 | libxml2-dev \ 60 | openssh-client \ 61 | pkg-config \ 62 | python \ 63 | python-dev \ 64 | python-pip \ 65 | python-setuptools \ 66 | python-zmq \ 67 | python3 \ 68 | python3-dev \ 69 | python3-pip \ 70 | python3-setuptools \ 71 | python3-zmq \ 72 | rsync \ 73 | ttf-liberation \ 74 | unzip \ 75 | wget \ 76 | zip && \ 77 | mkdir -p /tools 78 | 79 | # Setup Google Cloud SDK 80 | # Also apply workaround for gsutil failure brought by this version of Google Cloud. 81 | # (https://code.google.com/p/google-cloud-sdk/issues/detail?id=538) in final step. 82 | RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \ 83 | unzip -qq google-cloud-sdk.zip -d tools && \ 84 | rm google-cloud-sdk.zip && \ 85 | tools/google-cloud-sdk/install.sh --usage-reporting=false \ 86 | --path-update=false --bash-completion=false && \ 87 | tools/google-cloud-sdk/bin/gcloud -q components update \ 88 | gcloud core bq gsutil compute preview alpha beta && \ 89 | # disable the gcloud update message 90 | tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true 91 | 92 | # Fetch tensorflow wheels. 93 | RUN gsutil cp gs://colab-tensorflow/2018-03-01T15:50:49-08:00/*whl / && \ 94 | alias pip2=pip && for v in 2 3; do for f in /*tensorflow*-cp${v}*.whl; do pip${v} download -d /tf_deps $f; done; done && unalias pip2 95 | 96 | # Update pip and pip3 to avoid noisy warnings for users, and install wheel for 97 | # use below. 98 | RUN pip3 install --upgrade pip wheel && \ 99 | pip2 install --upgrade pip wheel 100 | 101 | # Add a global pip.conf to avoid warnings on `pip list` and friends. 102 | COPY pip.conf /etc/ 103 | 104 | # TODO(b/69087391): Clean up the ordering of the RUN commands below. 105 | 106 | # Setup Python packages. One package isn't available from PyPA, so we 107 | # install it manually to save on install time. 108 | # 109 | # Order is important here: we always do the python3 variants *before* the 110 | # python2 ones, so that installed scripts still default to python2. 111 | COPY requirements.txt / 112 | RUN pip2 install -U http://wheels.scipy.org/subprocess32-3.5.0-cp27-cp27mu-manylinux1_x86_64.whl 113 | RUN pip3 install -U --upgrade-strategy only-if-needed --ignore-installed --no-cache-dir -r /requirements.txt && \ 114 | pip2 install -U --upgrade-strategy only-if-needed --ignore-installed --no-cache-dir -r /requirements.txt 115 | 116 | # Set up Jupyter kernels for python2 and python3. 117 | RUN python3 -m ipykernel install 118 | RUN python2 -m ipykernel install 119 | 120 | # Setup Node.js using LTS 6.10 121 | # mkdir -p /tools/node && \ 122 | # wget -nv https://nodejs.org/dist/v6.10.0/node-v6.10.0-linux-x64.tar.gz -O node.tar.gz && \ 123 | # tar xzf node.tar.gz -C /tools/node --strip-components=1 && \ 124 | # rm node.tar.gz && \ 125 | 126 | # Set our locale to en_US.UTF-8. 127 | RUN apt-get install -y locales && \ 128 | locale-gen en_US.UTF-8 && \ 129 | update-locale LANG=en_US.UTF-8 130 | 131 | # Build a copy of the datalab node app. 132 | # git clone https://github.com/googlecolab/backend-container /backend-container && \ 133 | # /backend-container/sources/build.sh && \ 134 | # mkdir /datalab && \ 135 | # cp -a /backend-container/build/web/nb /datalab/web && \ 136 | 137 | # Add some unchanging bits - specifically node modules (that need to be kept in sync 138 | # with packages.json manually, but help save build time, by preincluding them in an 139 | # earlier layer). 140 | # /tools/node/bin/npm install \ 141 | # ws@1.1.4 \ 142 | # http-proxy@1.13.2 \ 143 | # mkdirp@0.5.1 \ 144 | # node-uuid@1.4.7 \ 145 | # bunyan@1.7.1 \ 146 | # tcp-port-used@0.1.2 \ 147 | # node-cache@3.2.0 && \ 148 | # cd / && \ 149 | # /tools/node/bin/npm install -g forever && \ 150 | 151 | # Clean up 152 | RUN apt-get autoremove -y && \ 153 | rm -rf /tmp/* && \ 154 | rm -rf /root/.cache/* && \ 155 | cd / 156 | 157 | ENV LANG en_US.UTF-8 158 | 159 | ADD ipython.py /etc/ipython/ipython_config.py 160 | 161 | # Do IPython configuration and install build artifacts 162 | # Then link stuff needed for nbconvert to a location where Jinja will find it. 163 | # I'd prefer to just use absolute path in Jinja imports but those don't work. 164 | RUN ipython profile create default && \ 165 | jupyter notebook --generate-config && \ 166 | mkdir /etc/jupyter 167 | ADD jupyter_notebook_config.py /etc/jupyter 168 | 169 | # Add and install build artifacts 170 | ADD content/ /datalab 171 | #RUN cd /datalab/web && /tools/node/bin/npm install && cd / 172 | 173 | # Install colabtools. 174 | COPY colabtools /colabtools 175 | RUN cd /colabtools && \ 176 | python2 setup.py sdist && \ 177 | pip3 install /colabtools/dist/google-colab-0.0.1a1.tar.gz && \ 178 | pip2 install /colabtools/dist/google-colab-0.0.1a1.tar.gz && \ 179 | jupyter nbextension install --py google.colab 180 | 181 | RUN pip install jupyter_http_over_ws && \ 182 | jupyter serverextension enable --py jupyter_http_over_ws 183 | 184 | # Set up our pip/python aliases. We just copy the same file to two places 185 | # rather than play games with symlinks. 186 | ADD pymultiplexer /usr/local/bin/pip 187 | ADD pymultiplexer /usr/local/bin/python 188 | 189 | # We customize the chunksize used by googleapiclient for file transfers. 190 | # TODO(b/74067588): Drop this customization. 191 | RUN sed -i -e 's/DEFAULT_CHUNK_SIZE = 512\*1024/DEFAULT_CHUNK_SIZE = 100 * 1024 * 1024/' /usr/local/lib/*/dist-packages/googleapiclient/http.py 192 | 193 | # Startup 194 | ENV ENV /root/.bashrc 195 | ENV SHELL /bin/bash 196 | # TensorFlow uses less than half the RAM with tcmalloc relative to (the default) 197 | # jemalloc, so we use it. 198 | ENV LD_PRELOAD /usr/lib/x86_64-linux-gnu/libtcmalloc.so.4 199 | # setup tensorflow 200 | RUN /datalab/run.sh 201 | ENV HOME /content 202 | 203 | # Add Tini 204 | ENV TINI_VERSION v0.17.0 205 | ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /tini 206 | RUN chmod +x /tini 207 | ENTRYPOINT ["/tini", "--"] 208 | CMD [ "/datalab/start_jupyer.sh" ] 209 | --------------------------------------------------------------------------------