├── .dockerignore ├── .editorconfig ├── LICENSE ├── build.sh ├── push.sh ├── src ├── shell │ ├── py2 │ │ └── Dockerfile │ └── py3 │ │ └── Dockerfile └── spark │ ├── 0.9 │ └── Dockerfile │ └── 1.0 │ ├── py2 │ └── Dockerfile │ └── py3 │ └── Dockerfile └── start.sh /.dockerignore: -------------------------------------------------------------------------------- 1 | .git 2 | *.md -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | indent_style = space 5 | indent_size = 4 6 | end_of_line = lf 7 | charset = utf-8 8 | trim_trailing_whitespace = true 9 | insert_final_newline = true 10 | 11 | [*.md] 12 | trim_trailing_whitespace = false -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017 Webysther Nunes 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is furnished 8 | to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | if ! docker build --rm -t webysther/aws-glue:spark-1.0-py3 \ 4 | -t webysther/aws-glue:spark-py3 \ 5 | -t webysther/aws-glue:spark-1.0 \ 6 | -t webysther/aws-glue:spark \ 7 | src/spark/1.0/py3; 8 | then 9 | exit 10 | fi 11 | 12 | if ! docker build --rm -t webysther/aws-glue:spark-1.0-py2 \ 13 | -t webysther/aws-glue:spark-py2 \ 14 | src/spark/1.0/py2; 15 | then 16 | exit 17 | fi 18 | 19 | if ! docker build --rm -t webysther/aws-glue:spark-0.9-py2 \ 20 | -t webysther/aws-glue:spark-0.9 \ 21 | src/spark/0.9; 22 | then 23 | exit 24 | fi 25 | 26 | if ! docker build --rm -t webysther/aws-glue:shell-1.0-py3 \ 27 | -t webysther/aws-glue:shell-1.0 \ 28 | -t webysther/aws-glue:shell-py3 \ 29 | -t webysther/aws-glue:shell \ 30 | -t webysther/aws-glue:py3 \ 31 | -t webysther/aws-glue:1.0 \ 32 | -t webysther/aws-glue:1 \ 33 | -t webysther/aws-glue:latest \ 34 | src/shell/py3; 35 | then 36 | exit 37 | fi 38 | 39 | if ! docker build --rm -t webysther/aws-glue:shell-1.0-py2 \ 40 | -t webysther/aws-glue:shell-py2 \ 41 | -t webysther/aws-glue:py2 \ 42 | src/shell/py2; 43 | then 44 | exit 45 | fi 46 | -------------------------------------------------------------------------------- /push.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # https://github.com/docker/cli/issues/267 4 | 5 | # spark 0.9 6 | docker image push webysther/aws-glue:spark-0.9-py2 7 | docker image push webysther/aws-glue:spark-0.9 8 | 9 | # spark python 2 10 | docker image push webysther/aws-glue:spark-1.0-py2 11 | docker image push webysther/aws-glue:spark-py2 12 | 13 | # spark python 3 14 | docker image push webysther/aws-glue:spark-1.0-py3 15 | docker image push webysther/aws-glue:spark-py3 16 | docker image push webysther/aws-glue:spark-1.0 17 | docker image push webysther/aws-glue:spark 18 | 19 | # shell python 2 20 | docker image push webysther/aws-glue:shell-1.0-py2 21 | docker image push webysther/aws-glue:shell-py2 22 | docker image push webysther/aws-glue:py2 23 | 24 | # shell python 3 25 | docker image push webysther/aws-glue:shell-1.0-py3 26 | docker image push webysther/aws-glue:shell-1.0 27 | docker image push webysther/aws-glue:shell-py3 28 | docker image push webysther/aws-glue:shell 29 | docker image push webysther/aws-glue:py3 30 | docker image push webysther/aws-glue:1.0 31 | docker image push webysther/aws-glue:1 32 | docker image push webysther/aws-glue:latest -------------------------------------------------------------------------------- /src/shell/py2/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM webysther/aws-glue:spark-1.0-py2 2 | 3 | USER root 4 | 5 | RUN apt-get -q update -y && \ 6 | apt-get -qq install -y libpq-dev gcc && \ 7 | rm -rf /var/lib/apt/lists/* 8 | 9 | RUN python -m pip install colorama docutils futures jmespath numpy pandas pip pyasn1 10 | RUN python -m pip install pygresql python-dateutil pytz pyyaml rsa s3transfer 11 | RUN python -m pip install scikit-learn scipy setuptools six virtualenv wheel 12 | 13 | USER docker -------------------------------------------------------------------------------- /src/shell/py3/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM webysther/aws-glue:spark-1.0-py3 2 | 3 | USER root 4 | 5 | RUN apt-get -q update -y && \ 6 | apt-get -qq install -y libpq-dev gcc && \ 7 | rm -rf /var/lib/apt/lists/* 8 | 9 | RUN python -m pip install colorama docutils futures jmespath numpy pandas pip pyasn1 10 | RUN python -m pip install pygresql python-dateutil pytz pyyaml rsa s3transfer 11 | RUN python -m pip install scikit-learn scipy setuptools six virtualenv wheel 12 | 13 | RUN chown -R docker:docker /opt 14 | RUN chown -R docker:docker /usr/local 15 | 16 | USER docker -------------------------------------------------------------------------------- /src/spark/0.9/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM svajiraya/glue-dev-0.9:20200513_105653UTC as glue 2 | FROM openjdk:8-jdk-slim-buster as spark 3 | 4 | # Dependencies 5 | RUN apt-get -q update -y && \ 6 | apt-get -qq install -y curl git && \ 7 | rm -rf /var/lib/apt/lists/* 8 | 9 | # Maven 10 | RUN curl -SsL https://aws-glue-etl-artifacts.s3.amazonaws.com/glue-common/apache-maven-3.6.0-bin.tar.gz | tar -C /opt --warning=no-unknown-keyword -xzf - 11 | 12 | # Spark 13 | RUN curl -SsL https://aws-glue-etl-artifacts.s3.amazonaws.com/glue-0.9/spark-2.2.1-bin-hadoop2.7.tgz | tar -C /opt --warning=no-unknown-keyword -xzf - 14 | 15 | # AWS glue scripts (get from freezed fork) 16 | RUN git clone --depth 1 https://github.com/webysther/aws-glue-libs.git /opt/aws-glue-libs 17 | 18 | # Python 19 | FROM python:2.7-slim-buster 20 | COPY --from=spark /usr/local/openjdk-8 /usr/local/openjdk-8 21 | COPY --from=spark /opt/spark-2.2.1-bin-hadoop2.7 /opt/spark-2.2.1-bin-hadoop2.7 22 | COPY --from=spark /opt/apache-maven-3.6.0 /opt/apache-maven-3.6.0 23 | COPY --from=spark /opt/aws-glue-libs /opt/aws-glue-libs 24 | COPY --from=glue /glue/jars /opt/aws-glue-libs/jars 25 | COPY --from=glue /glue/PyGlue.zip /opt/aws-glue-libs/PyGlue.zip 26 | 27 | # Dependencies 28 | RUN apt-get update -q -y && \ 29 | apt-get install -q -y curl git zip vim wget && \ 30 | curl -sL https://deb.nodesource.com/setup_12.x | bash - && \ 31 | apt-get -y install nodejs && \ 32 | rm -rf /var/lib/apt/lists/* 33 | 34 | # pip 35 | RUN python -m pip install --upgrade pip 36 | 37 | # aws cli 38 | RUN python -m pip install awscli && python -m pip install -U pytest 39 | 40 | # cdk 41 | RUN npm install -g aws-cdk 42 | 43 | # glue samples 44 | RUN git clone --depth 1 https://github.com/aws-samples/aws-glue-samples /opt/aws-glue-samples && \ 45 | ln -s /opt/samples/glue /opt/aws-glue-samples/ 46 | 47 | # cdk samples 48 | RUN git clone --depth 1 https://github.com/aws-samples/aws-cdk-examples.git /opt/aws-cdk-samples && \ 49 | ln -s /opt/samples/cdk /opt/aws-cdk-samples/ 50 | 51 | # cloudformation samples 52 | RUN git clone --depth 1 https://github.com/awslabs/aws-cloudformation-templates.git /opt/aws-cloudformation-samples && \ 53 | ln -s /opt/samples/cloudformation /opt/aws-cloudformation-samples/ 54 | 55 | # User 56 | RUN addgroup --gid 1000 docker && \ 57 | adduser --uid 1000 --ingroup docker --home /home/docker --shell /bin/sh --disabled-password --gecos "" docker 58 | 59 | RUN chown -R docker:docker /opt 60 | RUN chown -R docker:docker /usr/local 61 | 62 | USER docker:docker 63 | 64 | # Env 65 | ENV JAVA_HOME=/usr/local/openjdk-8 66 | ENV M2_HOME=/opt/apache-maven-3.6.0 67 | ENV SPARK_HOME=/opt/spark-2.2.1-bin-hadoop2.7 68 | ENV GLUE_HOME=/opt/aws-glue-libs 69 | 70 | ENV PATH="${JAVA_HOME}/bin:${PATH}:${M2_HOME}/bin:${GLUE_HOME}/bin/" 71 | 72 | ENV GLUE_PY_FILES=$GLUE_HOME/PyGlue.zip 73 | ENV SPARK_CONF_DIR=$GLUE_HOME/conf 74 | ENV GLUE_JARS_DIR=$GLUE_HOME/jars 75 | 76 | ENV PYTHONPATH="${SPARK_HOME}/python/:${PYTHONPATH}" 77 | ENV PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.7-src.zip:${PYTHONPATH}" 78 | ENV PYTHONPATH="${GLUE_PY_FILES}:${PYTHONPATH}" 79 | 80 | ENV PATH="/home/docker/.poetry/bin:$PATH" 81 | ENV PATH="/home/docker/.local/bin:$PATH" 82 | 83 | # Generate spark-defaults.conf 84 | RUN mkdir $SPARK_CONF_DIR 85 | RUN echo "spark.driver.extraClassPath $GLUE_JARS_DIR/*" >> $SPARK_CONF_DIR/spark-defaults.conf 86 | RUN echo "spark.executor.extraClassPath $GLUE_JARS_DIR/*" >> $SPARK_CONF_DIR/spark-defaults.conf 87 | 88 | # Changed scripts 89 | RUN echo '#!/usr/bin/env bash \n\n exec "${SPARK_HOME}/bin/spark-submit" --py-files "${GLUE_PY_FILES}" "$@"' > $GLUE_HOME/bin/gluesparksubmit 90 | RUN echo '#!/usr/bin/env bash \n\n exec "${SPARK_HOME}/bin/pyspark" "$@"' > $GLUE_HOME/bin/gluepyspark 91 | RUN echo '#!/usr/bin/env bash \n\n exec pytest "$@"' > $GLUE_HOME/bin/gluepytest 92 | RUN cp $GLUE_HOME/bin/gluepytest $GLUE_HOME/bin/pytest && \ 93 | cp $GLUE_HOME/bin/gluepyspark $GLUE_HOME/bin/pyspark && \ 94 | cp $GLUE_HOME/bin/gluesparksubmit $GLUE_HOME/bin/sparksubmit 95 | RUN rm $GLUE_HOME/bin/glue-setup.sh 96 | 97 | RUN wget -O $SPARK_CONF_DIR/log4j.properties https://gist.githubusercontent.com/svajiraya/aecb45c038e7bba86429646a68b542bb/raw/0cc6229d3b745a0092be75bbbf9476fa17318004/log4j.properties 98 | 99 | # pip 100 | RUN python -m pip install --user --upgrade pip && \ 101 | python -m pip install --user boto3 102 | 103 | # poetry 104 | RUN curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python && \ 105 | poetry config virtualenvs.create false 106 | 107 | WORKDIR /app 108 | CMD ["bash"] 109 | -------------------------------------------------------------------------------- /src/spark/1.0/py2/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM svajiraya/glue-dev-1.0:20200513_151648UTC as glue 2 | FROM openjdk:8-jdk-slim-buster as spark 3 | 4 | # Dependencies 5 | RUN apt-get -q update -y && \ 6 | apt-get -qq install -y curl git && \ 7 | rm -rf /var/lib/apt/lists/* 8 | 9 | # Maven 10 | RUN curl -SsL https://aws-glue-etl-artifacts.s3.amazonaws.com/glue-common/apache-maven-3.6.0-bin.tar.gz | tar -C /opt --warning=no-unknown-keyword -xzf - 11 | 12 | # Spark 13 | RUN curl -SsL https://aws-glue-etl-artifacts.s3.amazonaws.com/glue-1.0/spark-2.4.3-bin-hadoop2.8.tgz | tar -C /opt --warning=no-unknown-keyword -xzf - 14 | 15 | # AWS glue scripts (get from freezed fork) 16 | RUN git clone --depth 1 -b glue-1.0 --single-branch https://github.com/webysther/aws-glue-libs.git /opt/aws-glue-libs 17 | 18 | # Python 19 | FROM python:2.7-slim-buster 20 | COPY --from=spark /usr/local/openjdk-8 /usr/local/openjdk-8 21 | COPY --from=spark /opt/spark-2.4.3-bin-spark-2.4.3-bin-hadoop2.8 /opt/spark-2.4.3-bin-spark-2.4.3-bin-hadoop2.8 22 | COPY --from=spark /opt/apache-maven-3.6.0 /opt/apache-maven-3.6.0 23 | COPY --from=spark /opt/aws-glue-libs /opt/aws-glue-libs 24 | COPY --from=glue /glue/jarsv1 /opt/aws-glue-libs/jarsv1 25 | COPY --from=glue /glue/PyGlue.zip /opt/aws-glue-libs/PyGlue.zip 26 | 27 | # Dependencies 28 | RUN apt-get update -q -y && \ 29 | apt-get install -q -y curl git zip vim wget && \ 30 | curl -sL https://deb.nodesource.com/setup_12.x | bash - && \ 31 | apt-get -y install nodejs && \ 32 | rm -rf /var/lib/apt/lists/* 33 | 34 | # pip 35 | RUN python -m pip install --upgrade pip 36 | 37 | # aws cli 38 | RUN python -m pip install awscli && python -m pip install -U pytest 39 | 40 | # cdk 41 | RUN npm install -g aws-cdk 42 | 43 | # glue samples 44 | RUN git clone --depth 1 https://github.com/aws-samples/aws-glue-samples /opt/aws-glue-samples && \ 45 | ln -s /opt/samples/glue /opt/aws-glue-samples/ 46 | 47 | # cdk samples 48 | RUN git clone --depth 1 https://github.com/aws-samples/aws-cdk-examples.git /opt/aws-cdk-samples && \ 49 | ln -s /opt/samples/cdk /opt/aws-cdk-samples/ 50 | 51 | # cloudformation samples 52 | RUN git clone --depth 1 https://github.com/awslabs/aws-cloudformation-templates.git /opt/aws-cloudformation-samples && \ 53 | ln -s /opt/samples/cloudformation /opt/aws-cloudformation-samples/ 54 | 55 | # User 56 | RUN addgroup --gid 1000 docker && \ 57 | adduser --uid 1000 --ingroup docker --home /home/docker --shell /bin/sh --disabled-password --gecos "" docker 58 | 59 | RUN chown -R docker:docker /opt 60 | RUN chown -R docker:docker /usr/local 61 | 62 | USER docker:docker 63 | 64 | # Env 65 | ENV JAVA_HOME=/usr/local/openjdk-8 66 | ENV M2_HOME=/opt/apache-maven-3.6.0 67 | ENV SPARK_HOME=/opt/spark-2.4.3-bin-spark-2.4.3-bin-hadoop2.8 68 | ENV GLUE_HOME=/opt/aws-glue-libs 69 | 70 | ENV PATH="${JAVA_HOME}/bin:${PATH}:${M2_HOME}/bin:${GLUE_HOME}/bin/" 71 | 72 | ENV GLUE_PY_FILES=$GLUE_HOME/PyGlue.zip 73 | ENV SPARK_CONF_DIR=$GLUE_HOME/conf 74 | ENV GLUE_JARS_DIR=$GLUE_HOME/jarsv1 75 | 76 | ENV PYTHONPATH="${SPARK_HOME}/python/:${PYTHONPATH}" 77 | ENV PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.7-src.zip:${PYTHONPATH}" 78 | ENV PYTHONPATH="${GLUE_PY_FILES}:${PYTHONPATH}" 79 | 80 | ENV PATH="/home/docker/.poetry/bin:$PATH" 81 | ENV PATH="/home/docker/.local/bin:$PATH" 82 | 83 | # Generate spark-defaults.conf 84 | RUN mkdir $SPARK_CONF_DIR 85 | RUN echo "spark.driver.extraClassPath $GLUE_JARS_DIR/*" >> $SPARK_CONF_DIR/spark-defaults.conf 86 | RUN echo "spark.executor.extraClassPath $GLUE_JARS_DIR/*" >> $SPARK_CONF_DIR/spark-defaults.conf 87 | 88 | # Changed scripts 89 | RUN echo '#!/usr/bin/env bash \n\n exec "${SPARK_HOME}/bin/spark-submit" --py-files "${GLUE_PY_FILES}" "$@"' > $GLUE_HOME/bin/gluesparksubmit 90 | RUN echo '#!/usr/bin/env bash \n\n exec "${SPARK_HOME}/bin/pyspark" "$@"' > $GLUE_HOME/bin/gluepyspark 91 | RUN echo '#!/usr/bin/env bash \n\n exec pytest "$@"' > $GLUE_HOME/bin/gluepytest 92 | RUN cp $GLUE_HOME/bin/gluepytest $GLUE_HOME/bin/pytest && \ 93 | cp $GLUE_HOME/bin/gluepyspark $GLUE_HOME/bin/pyspark && \ 94 | cp $GLUE_HOME/bin/gluesparksubmit $GLUE_HOME/bin/sparksubmit 95 | RUN rm $GLUE_HOME/bin/glue-setup.sh 96 | 97 | RUN wget -O $SPARK_CONF_DIR/log4j.properties https://gist.githubusercontent.com/svajiraya/aecb45c038e7bba86429646a68b542bb/raw/0cc6229d3b745a0092be75bbbf9476fa17318004/log4j.properties 98 | 99 | # pip 100 | RUN python -m pip install --user --upgrade pip && \ 101 | python -m pip install --user boto3 102 | 103 | # poetry 104 | RUN curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python && \ 105 | poetry config virtualenvs.create false 106 | 107 | WORKDIR /app 108 | CMD ["bash"] 109 | -------------------------------------------------------------------------------- /src/spark/1.0/py3/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM svajiraya/glue-dev-1.0:20200513_151648UTC as glue 2 | FROM openjdk:8-jdk-slim-buster as spark 3 | 4 | # Dependencies 5 | RUN apt-get -q update -y && \ 6 | apt-get -qq install -y curl git && \ 7 | rm -rf /var/lib/apt/lists/* 8 | 9 | # Maven 10 | RUN curl -SsL https://aws-glue-etl-artifacts.s3.amazonaws.com/glue-common/apache-maven-3.6.0-bin.tar.gz | tar -C /opt --warning=no-unknown-keyword -xzf - 11 | 12 | # Spark 13 | RUN curl -SsL https://aws-glue-etl-artifacts.s3.amazonaws.com/glue-1.0/spark-2.4.3-bin-hadoop2.8.tgz | tar -C /opt --warning=no-unknown-keyword -xzf - 14 | 15 | # AWS glue scripts (get from freezed fork) 16 | RUN git clone --depth 1 -b glue-1.0 --single-branch https://github.com/webysther/aws-glue-libs.git /opt/aws-glue-libs 17 | 18 | # Python 19 | FROM python:3.6.9-slim-buster 20 | COPY --from=spark /usr/local/openjdk-8 /usr/local/openjdk-8 21 | COPY --from=spark /opt/spark-2.4.3-bin-spark-2.4.3-bin-hadoop2.8 /opt/spark-2.4.3-bin-spark-2.4.3-bin-hadoop2.8 22 | COPY --from=spark /opt/apache-maven-3.6.0 /opt/apache-maven-3.6.0 23 | COPY --from=spark /opt/aws-glue-libs /opt/aws-glue-libs 24 | COPY --from=glue /glue/jarsv1 /opt/aws-glue-libs/jarsv1 25 | COPY --from=glue /glue/PyGlue.zip /opt/aws-glue-libs/PyGlue.zip 26 | 27 | # Dependencies 28 | RUN apt-get update -q -y && \ 29 | apt-get install -q -y curl git zip vim wget && \ 30 | curl -sL https://deb.nodesource.com/setup_12.x | bash - && \ 31 | apt-get -y install nodejs && \ 32 | rm -rf /var/lib/apt/lists/* 33 | 34 | # pip 35 | RUN python -m pip install --upgrade pip 36 | 37 | # aws cli 38 | RUN python -m pip install awscli && python -m pip install -U pytest 39 | 40 | # cdk 41 | RUN npm install -g aws-cdk 42 | 43 | # glue samples 44 | RUN git clone --depth 1 https://github.com/aws-samples/aws-glue-samples /opt/aws-glue-samples && \ 45 | ln -s /opt/samples/glue /opt/aws-glue-samples/ 46 | 47 | # cdk samples 48 | RUN git clone --depth 1 https://github.com/aws-samples/aws-cdk-examples.git /opt/aws-cdk-samples && \ 49 | ln -s /opt/samples/cdk /opt/aws-cdk-samples/ 50 | 51 | # cloudformation samples 52 | RUN git clone --depth 1 https://github.com/awslabs/aws-cloudformation-templates.git /opt/aws-cloudformation-samples && \ 53 | ln -s /opt/samples/cloudformation /opt/aws-cloudformation-samples/ 54 | 55 | # User 56 | RUN addgroup --gid 1000 docker && \ 57 | adduser --uid 1000 --ingroup docker --home /home/docker --shell /bin/sh --disabled-password --gecos "" docker 58 | 59 | RUN chown -R docker:docker /opt 60 | RUN chown -R docker:docker /usr/local 61 | 62 | USER docker:docker 63 | 64 | # Env 65 | ENV JAVA_HOME=/usr/local/openjdk-8 66 | ENV M2_HOME=/opt/apache-maven-3.6.0 67 | ENV SPARK_HOME=/opt/spark-2.4.3-bin-spark-2.4.3-bin-hadoop2.8 68 | ENV GLUE_HOME=/opt/aws-glue-libs 69 | 70 | ENV PATH="${JAVA_HOME}/bin:${PATH}:${M2_HOME}/bin:${GLUE_HOME}/bin/" 71 | 72 | ENV GLUE_PY_FILES=$GLUE_HOME/PyGlue.zip 73 | ENV SPARK_CONF_DIR=$GLUE_HOME/conf 74 | ENV GLUE_JARS_DIR=$GLUE_HOME/jarsv1 75 | 76 | ENV PYTHONPATH="${SPARK_HOME}/python/:${PYTHONPATH}" 77 | ENV PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.7-src.zip:${PYTHONPATH}" 78 | ENV PYTHONPATH="${GLUE_PY_FILES}:${PYTHONPATH}" 79 | 80 | ENV PATH="/home/docker/.poetry/bin:$PATH" 81 | ENV PATH="/home/docker/.local/bin:$PATH" 82 | 83 | # Generate spark-defaults.conf 84 | RUN mkdir $SPARK_CONF_DIR 85 | RUN echo "spark.driver.extraClassPath $GLUE_JARS_DIR/*" >> $SPARK_CONF_DIR/spark-defaults.conf 86 | RUN echo "spark.executor.extraClassPath $GLUE_JARS_DIR/*" >> $SPARK_CONF_DIR/spark-defaults.conf 87 | 88 | # Changed scripts 89 | RUN echo '#!/usr/bin/env bash \n\n exec "${SPARK_HOME}/bin/spark-submit" --py-files "${GLUE_PY_FILES}" "$@"' > $GLUE_HOME/bin/gluesparksubmit 90 | RUN echo '#!/usr/bin/env bash \n\n exec "${SPARK_HOME}/bin/pyspark" "$@"' > $GLUE_HOME/bin/gluepyspark 91 | RUN echo '#!/usr/bin/env bash \n\n exec pytest "$@"' > $GLUE_HOME/bin/gluepytest 92 | RUN cp $GLUE_HOME/bin/gluepytest $GLUE_HOME/bin/pytest && \ 93 | cp $GLUE_HOME/bin/gluepyspark $GLUE_HOME/bin/pyspark && \ 94 | cp $GLUE_HOME/bin/gluesparksubmit $GLUE_HOME/bin/sparksubmit 95 | RUN rm $GLUE_HOME/bin/glue-setup.sh 96 | 97 | RUN wget -O $SPARK_CONF_DIR/log4j.properties https://gist.githubusercontent.com/svajiraya/aecb45c038e7bba86429646a68b542bb/raw/0cc6229d3b745a0092be75bbbf9476fa17318004/log4j.properties 98 | 99 | # pip 100 | RUN python -m pip install --user --upgrade pip && \ 101 | python -m pip install --user boto3 102 | 103 | # poetry 104 | RUN curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python && \ 105 | poetry config virtualenvs.create false 106 | 107 | WORKDIR /app 108 | CMD ["bash"] 109 | -------------------------------------------------------------------------------- /start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e 3 | 4 | # install docker 5 | if ! [ -x "$(command -v docker)" ]; then 6 | curl -sSL https://get.docker.com/ | sh 7 | sudo usermod -aG docker $USER 8 | fi 9 | 10 | GLUE_DOCKER_ALIAS=$(cat <<-END 11 | \n# glue-docker alias 12 | \nalias glue='docker run -v \$PWD:/app -v ~/.aws:/home/docker/.aws -u $(id -u ${USER}):$(id -g ${USER}) -it webysther/aws-glue "\$@"' 13 | \nalias glue-spark='docker run -v \$PWD:/app -v ~/.aws:/home/docker/.aws -u $(id -u ${USER}):$(id -g ${USER}) -it webysther/aws-glue:spark "\$@"' 14 | \n 15 | END 16 | ) 17 | 18 | case $SHELL in 19 | */zsh) 20 | echo $GLUE_DOCKER_ALIAS >> ~/.zshrc 21 | ;; 22 | */bash) 23 | echo $GLUE_DOCKER_ALIAS >> ~/.bashrc 24 | ;; 25 | *) 26 | echo 'Shell not detected, create this alias inside your shell:\n' 27 | echo $GLUE_DOCKER_ALIAS 28 | esac 29 | 30 | cat >&2 <<-'EOF' 31 | Use: 32 | - glue: Glue Python Shell 33 | - glue-spark: Glue Pyspark 34 | 35 | EOF 36 | 37 | case $SHELL in 38 | */zsh) 39 | echo 'Execute source ~/.zshrc to register the alias' 40 | ;; 41 | */bash) 42 | echo 'Execute source ~/.bashrc to register the alias' 43 | ;; 44 | *) 45 | echo 'Execute source ~/.*rc to register the alias' 46 | esac --------------------------------------------------------------------------------