├── README.md ├── python-spark-flask └── Dockerfile ├── LICENSE └── data-science-at-the-command-line └── Dockerfile /README.md: -------------------------------------------------------------------------------- 1 | # dockerfiles -------------------------------------------------------------------------------- /python-spark-flask/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM jupyter/pyspark-notebook 2 | 3 | LABEL maintainer="Jeroen Janssens " 4 | 5 | USER root 6 | 7 | RUN pip install flask flask-restful flask-rest-jsonapi flask-themes flask-user flask-wtf flask-uploads 8 | 9 | USER $NB_USER 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Data Science Workshops 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /data-science-at-the-command-line/Dockerfile: -------------------------------------------------------------------------------- 1 | # Data Science at the Command Line 2 | 3 | FROM alpine:3.8 4 | LABEL maintainer "Jeroen Janssens " 5 | 6 | RUN apk update 7 | 8 | RUN apk --no-cache add \ 9 | R \ 10 | R-dev \ 11 | R-doc \ 12 | arpack-dev \ 13 | bash \ 14 | bash-doc \ 15 | bc \ 16 | bc-doc \ 17 | boost-dev \ 18 | cmake \ 19 | coreutils \ 20 | coreutils-doc \ 21 | curl \ 22 | curl-doc \ 23 | curl-dev \ 24 | findutils \ 25 | findutils-doc \ 26 | font-adobe-100dpi \ 27 | g++ \ 28 | git \ 29 | git-doc \ 30 | gnuplot \ 31 | go \ 32 | grep \ 33 | grep-doc \ 34 | groff \ 35 | jpeg-dev \ 36 | jq \ 37 | jq-doc \ 38 | less \ 39 | less-doc \ 40 | libxml2-dev \ 41 | m4 \ 42 | man \ 43 | man-pages \ 44 | mdocml-apropos \ 45 | ncurses \ 46 | nodejs-lts \ 47 | nodejs-npm \ 48 | openjdk7 \ 49 | openssl \ 50 | p7zip \ 51 | p7zip-doc \ 52 | parallel \ 53 | parallel-doc \ 54 | perl-dev \ 55 | py-lxml \ 56 | py-pip \ 57 | python3 \ 58 | python3-dev \ 59 | sed \ 60 | sed-doc \ 61 | sudo \ 62 | sudo-doc \ 63 | tar \ 64 | tar-doc \ 65 | tree \ 66 | tree-doc \ 67 | unrar \ 68 | unrar-doc \ 69 | unzip \ 70 | unzip-doc \ 71 | xmlstarlet \ 72 | xmlstarlet-doc \ 73 | zlib-dev 74 | 75 | RUN echo "install.packages(c('tidyverse','ggmap'),repos='https://cloud.r-project.org')" | R --slave --no-save --no-restore-history 76 | 77 | RUN easy_install-3.6 pip && \ 78 | pip3 install --upgrade pip && \ 79 | pip3 install \ 80 | awscli \ 81 | bigmler \ 82 | csvkit \ 83 | numpy \ 84 | scipy \ 85 | nose 86 | 87 | RUN pip3 install skll 88 | 89 | RUN pip2 install --upgrade pip && \ 90 | pip2 install cssselect 91 | 92 | RUN npm install -g \ 93 | cowsay \ 94 | xml2json-command 95 | 96 | # tapkee 97 | RUN curl -sL http://bitbucket.org/eigen/eigen/get/3.2.9.tar.gz > /tmp/eigen.tar.gz && \ 98 | cd \tmp && \ 99 | mkdir eigen && tar -xzvf eigen.tar.gz -C eigen --strip-components=1 && \ 100 | cd eigen && \ 101 | mkdir build && cd build && cmake .. && make && make install 102 | 103 | RUN cd /tmp && \ 104 | git clone https://github.com/lisitsyn/tapkee.git && \ 105 | cd tapkee && mkdir build && cd build && cmake .. && make && \ 106 | cp -v /tmp/tapkee/bin/tapkee /usr/bin 107 | 108 | 109 | # feedgnuplot 110 | RUN yes | cpan List::MoreUtils && \ 111 | git clone https://github.com/dkogan/feedgnuplot.git && \ 112 | cd feedgnuplot && \ 113 | perl Makefile.PL && \ 114 | make && \ 115 | make install && \ 116 | cd .. && \ 117 | rm -r feedgnuplot 118 | 119 | # pup 120 | RUN export GOPATH=/usr && \ 121 | go get github.com/ericchiang/pup && \ 122 | go get github.com/jehiah/json2csv 123 | 124 | 125 | # csvfix 126 | RUN curl https://bitbucket.org/neilb/csvfix/get/version-1.6.zip > /tmp/csvfix.zip && \ 127 | cd /tmp && \ 128 | unzip csvfix.zip && \ 129 | mv neilb* csvfix && \ 130 | cd csvfix && \ 131 | make lin && \ 132 | mv csvfix/bin/csvfix /bin 133 | 134 | 135 | # weka 136 | RUN cd /tmp && \ 137 | curl -L https://sourceforge.net/projects/weka/files/weka-3-8/3.8.1/weka-3-8-1.zip > weka.zip && \ 138 | unzip weka.zip && \ 139 | mv weka-3-8-1/weka.jar /bin 140 | 141 | 142 | # curlicue 143 | RUN cd /tmp &&\ 144 | curl -L https://github.com/decklin/curlicue/archive/master.zip > curlicue.zip && \ 145 | unzip curlicue.zip && \ 146 | mv curlicue-master/curl* /bin 147 | 148 | 149 | # drake and drip 150 | RUN curl -L https://raw.githubusercontent.com/Factual/drake/master/bin/drake > /usr/bin/drake && \ 151 | chmod 755 /usr/bin/drake 152 | RUN SHELL=/bin/bash drake; exit 0 153 | ENV JAVA_HOME=/usr/lib/jvm/default-jvm 154 | RUN ln -sf "${JAVA_HOME}/bin/"* "/usr/bin/" 155 | RUN curl -L https://raw.githubusercontent.com/ninjudd/drip/master/bin/drip > /usr/bin/drip && \ 156 | chmod 755 /usr/bin/drip && \ 157 | drip; exit 0 158 | 159 | 160 | # csvquote 161 | RUN cd /tmp && \ 162 | git clone https://github.com/dbro/csvquote.git && \ 163 | cd csvquote && \ 164 | make && make BINDIR=/usr/bin/ install 165 | 166 | 167 | # vowpal wabbit 168 | RUN cd /tmp && \ 169 | git clone --depth 1 --branch master --single-branch git://github.com/JohnLangford/vowpal_wabbit.git && \ 170 | cd vowpal_wabbit && \ 171 | make && \ 172 | make install 173 | 174 | 175 | # crush tools 176 | RUN cd /tmp && \ 177 | curl -L https://github.com/google/crush-tools/releases/download/20150716/crush-tools-20150716.tar.gz > crush-tools.tar.gz && \ 178 | tar -xzvf crush-tools.tar.gz && \ 179 | cd crush-tools-20150716/ && \ 180 | sed -i '12i#include ' src/fieldsplit/fieldsplit.c && \ 181 | ./configure --prefix=/usr && \ 182 | make && \ 183 | make install 184 | 185 | 186 | # data science at the command line tools, book content, and example data 187 | RUN cd /tmp && \ 188 | git clone https://github.com/jeroenjanssens/data-science-at-the-command-line.git && \ 189 | cp -v data-science-at-the-command-line/tools/* /usr/bin/ && \ 190 | cp -rv data-science-at-the-command-line/data /home/ && \ 191 | cp -rv data-science-at-the-command-line/book /home/ 192 | 193 | 194 | RUN rm -rf /tmp/* /var/cache/apk/* 195 | 196 | RUN echo "export PAGER='less'" >>~/.bashrc && \ 197 | echo "export SHELL='/bin/bash'" >>~/.bashrc && \ 198 | echo "alias l='ls -lph --group-directories-first'" >>~/.bashrc && \ 199 | echo "alias parallel='parallel --will-cite'" >>~/.bashrc && \ 200 | echo 'export PS1="\[\033[38;5;6m\][\w]$\[$(tput sgr0)\] "' >>~/.bashrc 201 | 202 | RUN cat $(which weka) | sed -ne '/WEKAPATH=/,/complete /p' | cut -c3- | sed -e 's|/home/joe||' >>~/.bashrc 203 | 204 | RUN apk --no-cache add msttcorefonts-installer fontconfig && \ 205 | update-ms-fonts && fc-cache -f 206 | RUN rm -rf /tmp/* /var/cache/apk/* 207 | 208 | WORKDIR /data 209 | CMD bash 210 | --------------------------------------------------------------------------------