├── start.sh ├── README.md └── Dockerfile /start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ -z "$REFINE_MEMORY" ] ; then 4 | TOTAL_MEMORY=`free -b | grep Mem | awk '{print $2}'` 5 | MIN_REFINE_MEMORY=$(( $TOTAL_MEMORY - 3 * 1024 * 1024 * 1024 )) 6 | REFINE_MEMORY=$(( $TOTAL_MEMORY * 6 / 10 )) 7 | 8 | if [ "$REFINE_MEMORY" -lt "$MIN_REFINE_MEMORY" ]; then 9 | REFINE_MEMORY="$MIN_REFINE_MEMORY" 10 | fi 11 | fi 12 | 13 | exec OpenRefine/refine -i 0.0.0.0 -d /mnt/refine -m $REFINE_MEMORY 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | docker-openrefine 2 | ================= 3 | 4 | A Dockerfile setting up OpenRefine 2.6 with some useful extensions, among which: 5 | 6 | - [RDF extension][1] to bring Linked Data capabilities to OpenRefine 7 | - [NER extension][2] to allow Named Entity Recognition on texts 8 | - [Geo extension][3] that adds geo utilities both on conversion and visualization 9 | - [extra ctu extension][4] to extract email addresses, urls and phone numbers from texts 10 | 11 | Run the docker 12 | -------------- 13 | 14 | This docker is hosted on the [official docker.io hub][5]. Running it is as simple as: 15 | 16 | docker run -p 80:3333 spaziodati/openrefine 17 | 18 | If you want refine projects to be persistent, you must mount `/mnt/refine` as follows: 19 | 20 | docker run -p 80:3333 -v /path-to-host:/mnt/refine spaziodati/openrefine 21 | 22 | You can also increase the max size of the heap, by specifying the REFINE_MEMORY environment variable: 23 | 24 | docker run -p 80:3333 -e REFINE_MEMORY=24G spaziodati/openrefine 25 | 26 | [1]: https://github.com/fadmaa/grefine-rdf-extension 27 | [2]: https://github.com/giTorto/Refine-NER-Extension 28 | [3]: https://github.com/giTorto/geoXtension 29 | [4]: https://github.com/giTorto/extraCTU-plugin 30 | [5]: https://registry.hub.docker.com/u/spaziodati/openrefine/ 31 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:trusty 2 | 3 | MAINTAINER: giuliano.tortoreto@outlook.com 4 | 5 | RUN groupadd -r mysql && useradd -r -g mysql mysql 6 | RUN echo debconf shared/accepted-oracle-license-v1-1 select true | /usr/bin/debconf-set-selections 7 | RUN echo debconf shared/accepted-oracle-license-v1-1 seen true | /usr/bin/debconf-set-selections 8 | RUN echo mysql-server mysql-server/root_password password 18473TYG | /usr/bin/debconf-set-selections 9 | RUN echo mysql-server mysql-server/root_password_again password 18473TYG | /usr/bin/debconf-set-selections 10 | 11 | RUN apt-get -y -q update; \ 12 | apt-get -y -q install wget make ant g++ software-properties-common 13 | 14 | RUN add-apt-repository 'deb http://ppa.launchpad.net/webupd8team/java/ubuntu precise main'; \ 15 | apt-get -y -q update 16 | 17 | RUN apt-get install -y --force-yes -q oracle-java7-installer 18 | 19 | # a mounted file systems table to make MySQL happy 20 | #RUN cat /proc/mounts > /etc/mtab 21 | 22 | # Install gdal dependencies provided by Ubuntu repositories 23 | RUN apt-get install -y -q \ 24 | mysql-server \ 25 | mysql-client \ 26 | python-numpy \ 27 | libpq-dev \ 28 | libpng12-dev \ 29 | libjpeg-dev \ 30 | libgif-dev \ 31 | liblzma-dev \ 32 | libcurl4-gnutls-dev \ 33 | libxml2-dev \ 34 | libexpat-dev \ 35 | libxerces-c-dev \ 36 | libnetcdf-dev \ 37 | netcdf-bin \ 38 | libpoppler-dev \ 39 | gpsbabel \ 40 | swig \ 41 | libhdf4-alt-dev \ 42 | libhdf5-serial-dev \ 43 | libpodofo-dev \ 44 | poppler-utils \ 45 | libfreexl-dev \ 46 | unixodbc-dev \ 47 | libwebp-dev \ 48 | libepsilon-dev \ 49 | liblcms2-2 \ 50 | libpcre3-dev \ 51 | python-dev 52 | 53 | #install geos 54 | RUN wget -O - http://download.osgeo.org/geos/geos-3.4.2.tar.bz2 | tar -jx 55 | RUN cd /geos-3.4.2; ./configure -enable-python && make && make install 56 | 57 | #install gdal 58 | RUN wget -O - http://download.osgeo.org/gdal/1.11.0/gdal-1.11.0.tar.gz | tar -xz 59 | RUN cd gdal-1.11.0 ; ./configure --with-xerces --with-java=/usr/lib/jvm/java-7-oracle --with-jvm-lib=/usr/lib/jvm/java-7-oracle/jre/lib/amd64/server --with-jvm-lib-add-rpath=yes --with-mdb=yes --with-geos=yes && make && make install; cd swig/java; make ; cp libgdalconstjni.so libgdaljni.so libogrjni.so libosrjni.so /usr/lib/; cd ../../.libs; cp libgdal.so /usr/lib 60 | 61 | #install proj 62 | RUN wget -O - http://download.osgeo.org/proj/proj-4.8.0.tar.gz | tar -xz 63 | RUN cd ./proj-4.8.0; ./configure && make && make install 64 | 65 | # download and "mount" OpenRefine 66 | RUN wget -O - --no-check-certificate https://github.com/OpenRefine/OpenRefine/archive/master.tar.gz | tar -xz 67 | RUN mv OpenRefine-master OpenRefine; cd ./OpenRefine ; ant clean build; 68 | 69 | RUN apt-get install unzip; 70 | 71 | #download extensions 72 | RUN cd ./OpenRefine/extensions; wget -O - --no-check-certificate https://github.com/giTorto/extraCTU-plugin/archive/master.tar.gz | tar -xz; cd ./extraCTU-plugin-master; ant clean build 73 | RUN cd ./OpenRefine/extensions; wget -O - --no-check-certificate https://github.com/giTorto/geoXtension/archive/master.tar.gz | tar -xz; cp ./gdal-1.11.0/swig/java/gdal.jar ./geoXtension-master/module/MOD-INF/lib; cd ./geoXtension-master ; ant clean build 74 | RUN cd ./OpenRefine/extensions; wget -O - --no-check-certificate https://github.com/giTorto/Refine-NER-Extension/archive/master.tar.gz | tar -xz; cd Refine-NER-Extension-master; ant clean build 75 | RUN cd ./OpenRefine/extensions; \ 76 | wget -O rdf-extension.tar.gz https://github.com/SpazioDati/grefine-rdf-extension/tarball/export-stream; \ 77 | tar -xzf rdf-extension.tar.gz && rm rdf-extension.tar.gz; \ 78 | mv SpazioDati-grefine-rdf-extension-* rdf-extension; \ 79 | cd ./rdf-extension; \ 80 | JAVA_TOOL_OPTIONS='-Dfile.encoding=UTF-8' ant build 81 | 82 | #setting ldpath 83 | RUN echo "LD_LIBRARY_PATH=/usr/lib" >> ~/.bashrc && echo "export LD_LIBRARY_PATH" >> ~/.bashrc 84 | 85 | RUN cd /usr/local/lib; cp libproj.so libproj.a libproj.la libproj.so.0 libgeos.a libgeos_c.a libgeos_c.la libgeos_c.so libgeos_c.so.1.8.2 libgeos.la libgeos.so /usr/lib; ldconfig 86 | 87 | #test gdal and geos 88 | #RUN cd ./gdal-1.11.0/swig/java; make test; 89 | 90 | ADD ./start.sh /start.sh 91 | RUN chmod +x /start.sh 92 | 93 | EXPOSE 3333 94 | CMD ["/start.sh"] 95 | --------------------------------------------------------------------------------