├── .gitignore ├── .travis.yml ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── Test-spark.ipynb ├── Test.ipynb ├── deploy.sh └── passwd-helper.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | 55 | # Sphinx documentation 56 | docs/_build/ 57 | 58 | # PyBuilder 59 | target/ 60 | 61 | #Ipython Notebook 62 | .ipynb_checkpoints 63 | 64 | *~ 65 | _site 66 | Gemfile.lock 67 | 68 | .* 69 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | branches: 2 | only: 3 | - master 4 | 5 | sudo: required 6 | services: 7 | - docker 8 | 9 | env: 10 | global: 11 | - DATE=$(date '+%Y%m%d') 12 | - REPO="threathuntproj" 13 | - IMAGE_NAME="hunting" 14 | - JUPYTER_NB_PASS="" 15 | 16 | before_script: 17 | - docker images 18 | 19 | script: 20 | - docker build --pull --build-arg JUPYTER_NB_PASS=$JUPYTER_NB_PASS -t $REPO/$IMAGE_NAME:$DATE.$TRAVIS_BUILD_NUMBER . 21 | - docker images 22 | - docker run -v `pwd`:/home/jovyan/work $REPO/$IMAGE_NAME:$DATE.$TRAVIS_BUILD_NUMBER jupyter nbconvert --to notebook --execute --ExecutePreprocessor.timeout=60 --output /tmp/testoutput-py3.ipynb /home/jovyan/work/Test.ipynb 23 | - docker run -v `pwd`:/home/jovyan/work $REPO/$IMAGE_NAME:$DATE.$TRAVIS_BUILD_NUMBER jupyter nbconvert --to notebook --execute --ExecutePreprocessor.timeout=60 --output /tmp/testoutput-spark-py3.ipynb /home/jovyan/work/Test-spark.ipynb 24 | - docker images 25 | 26 | before_deploy: 27 | - docker tag $REPO/$IMAGE_NAME:$DATE.$TRAVIS_BUILD_NUMBER $REPO/$IMAGE_NAME:latest 28 | - docker tag $REPO/$IMAGE_NAME $REPO/$IMAGE_NAME:$DATE 29 | - docker images 30 | - echo "$DOCKERHUB_PASS" | docker login -u "$DOCKERHUB_USER" --password-stdin 31 | 32 | deploy: 33 | provider: script 34 | script: bash ./deploy.sh 35 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # We're built on the official Jupyter image for PySpark notebooks 2 | FROM jupyter/pyspark-notebook:latest 3 | 4 | # But if it all breaks, blame us instead 5 | LABEL maintainer="The ThreatHunting Project " 6 | 7 | # Switch back to the jovyan user to do module installs or this will fail 8 | # due to directory ownership on the cache 9 | USER $NB_USER 10 | 11 | # Install Python packages. 12 | ENV INSTALL_PACKAGES_PIP plotly elasticsearch-dsl seaborn scikit-learn ipywidgets tqdm requests dask pyspark splunk-sdk huntlib 13 | RUN pip install jupyter_contrib_nbextensions ${INSTALL_PACKAGES_PIP} 14 | 15 | # Set up some Jupyter Notebook extensions 16 | RUN jupyter nbextension enable toc2/main && \ 17 | jupyter nbextension enable execute_time/ExecuteTime && \ 18 | jupyter nbextension enable python-markdown/main 19 | 20 | # Set up some useful Jupyter Lab extensions 21 | RUN jupyter labextension install --minimize=false jupyterlab-plotly @jupyterlab/toc 22 | 23 | # The first time you 'import plotly' on a new system, it has to build the 24 | # font cache. This takes a while and also causes spurious warnings, so 25 | # we can just do that during the build process and the user never has to 26 | # see it. 27 | RUN /opt/conda/bin/python -c 'import plotly' 28 | 29 | # Set the notebook default password 30 | ADD passwd-helper.py /tmp 31 | ARG JUPYTER_NB_PASS 32 | RUN mkdir -p /home/jovyan/.jupyter ; JUPYTER_NB_PASS=${JUPYTER_NB_PASS} python /tmp/passwd-helper.py >> /home/jovyan/.jupyter/jupyter_notebook_config.py 33 | 34 | # Add "/home/jovyan/work/lib" to the PYTHONPATH. Since "/home/jovyan/work" 35 | # is typically a mounted volume, this gives the user a convenient place to 36 | # drop their own Python modules that will be available in all notebooks. 37 | ENV PYTHONPATH "/home/jovyan/work/lib:$PYTHONPATH" 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 ThreatHuntingProject 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | REPO=threathuntproj 2 | IMAGE_NAME=hunting 3 | 4 | # Point this to the full path of a directory you want to mount as your "work" 5 | # volume inside the container. This will become "/home/jovyan/work" (Jupyter 6 | # default value) 7 | DATAVOL=$(HOME) 8 | 9 | # change this to the local port on which you'd like to connect to the 10 | # container. Usually you can just leave this as-is. 11 | LOCALPORT=8888 12 | 13 | build: Dockerfile refresh 14 | docker build --build-arg JUPYTER_NB_PASS=$$JUPYTER_NB_PASS -t $(REPO)/$(IMAGE_NAME):dev . 15 | 16 | refresh: 17 | docker pull jupyter/pyspark-notebook 18 | 19 | test: 20 | @echo "\n************************" 21 | @echo "Testing basic functionality with Python 3..." 22 | docker run -v `pwd`:/home/jovyan/work $(REPO)/$(IMAGE_NAME):dev jupyter nbconvert --to notebook --execute --ExecutePreprocessor.timeout=60 --output /tmp/testoutput-py3.ipynb /home/jovyan/work/Test.ipynb 23 | @echo "Testing Apache Spark support with Python 3..." 24 | docker run -v `pwd`:/home/jovyan/work $(REPO)/$(IMAGE_NAME):dev jupyter nbconvert --to notebook --execute --ExecutePreprocessor.timeout=60 --output /tmp/testoutput-spark-py3.ipynb /home/jovyan/work/Test-spark.ipynb 25 | 26 | run: 27 | docker run -it -p $(LOCALPORT):8888 --user root -e GRANT_SUDO=yes -e GEN_CERT=yes -v $(DATAVOL):/home/jovyan/work $(REPO)/$(IMAGE_NAME):dev 28 | 29 | run-lab: 30 | docker run -it -p $(LOCALPORT):8888 --user root -e GRANT_SUDO=yes -e GEN_CERT=yes -e JUPYTER_ENABLE_LAB=yes -v $(DATAVOL):/home/jovyan/work $(REPO)/$(IMAGE_NAME):dev 31 | 32 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Hunter 2 | A threat hunting / data analysis environment based on Python, Pandas, PySpark and Jupyter Notebook. 3 | 4 | [![Build Status](https://travis-ci.com/ThreatHuntingProject/hunter.svg?branch=master)](https://travis-ci.com/ThreatHuntingProject/hunter) 5 | 6 | The following is a partial list of the major features: 7 | 8 | * Support for either the traditional Notebook or the new Lab interface 9 | * Built-in extensions manager for the Notebook interface 10 | * Python 3 (default) and Python 2 kernels 11 | * Preinstalled libraries useful for data search, analysis and visualization, including 12 | * elasticsearch-dsl 13 | * splunk-sdk 14 | * pandas 15 | * pyspark 16 | * numpy 17 | * matplotlib 18 | * seaborn 19 | * plotly (with cufflinks support) 20 | * scikit-learn 21 | * ipywidgets 22 | * huntlib 23 | * tqdm 24 | 25 | # Docker Image 26 | This repo contains all the files and instructions necessary to build your own Docker image from scratch. If you just want to *use* the hunting environment, though, I recommend using the pre-build image available on Docker Hub. 27 | 28 | > docker pull threathuntproj/hunting 29 | 30 | This will download the image to your local cache. Once you've downloaded the image, skip directly to the *Running the Container* section of this README and continue from there. 31 | 32 | See [the instructions](https://hub.docker.com/r/threathuntproj/hunting/) for details. 33 | 34 | # Prerequisites 35 | You will need to have Docker installed and running on your system. 36 | 37 | To build the container, you'll also need _make_. I typically use the version included with XCode's command line tools, but other versions may work as well, as the Makefile is not very complex. 38 | 39 | Be sure your build system is connected to the Internet, as the process downloads and installs the latest version of many packages. Internet access is not required to simply run the container. 40 | 41 | I have only tested the build with Docker running on OS X, though I believe it should work pretty well with most Linux environments, too. Of course, the host OS is irrelevant if you are just running the resulting container. 42 | 43 | # Building 44 | To build the container, simply do: 45 | 46 | > make 47 | 48 | This generally takes several minutes, and may be significantly longer if the upstream notebook image we use needs refreshing. 49 | 50 | ## Build Options 51 | * If the JUPYTER_NB_PASS is set in your environment when you run 'make', the build script will use the variable's value as the default password for accessing the notebook server. For example, the following will set the password to _notelling_: 52 | > export JUPYTER_NB_PASS="notelling" 53 | 54 | # Running the container 55 | To run the container for the first time, simply do: 56 | 57 | > make run 58 | 59 | This will start a new container with the Jupyter Notebook interface, which you can stop/start as you like. 60 | 61 | If you prefer the Jupyter Lab interface, just do: 62 | 63 | > make run-lab 64 | 65 | _Each time you use `make run` or `make run-lab`, you'll create a new persistent container_. Be careful not to run 'make run' more than once unless you know what you're doing. 66 | 67 | If you want to have a bit more control, try something like: 68 | 69 | > docker run -it -p 8888:8888 -e GEN_CERT=yes -v $HOME:/home/jovyan/work threathuntproj/hunting 70 | 71 | This is essentially the same as the _make run_ method. As written, the example will mount your home directory as the filesystem the notebooks see, but you can change this to fit your needs and preferences. To do the same with Jupyter Lab, just set the `JUPYTER_ENABLE_LAB` environment variable in the container, like so: 72 | 73 | > docker run -it -p $(LOCALPORT):8888 -e GEN_CERT=yes -e JUPYTER_ENABLE_LAB=yes -v $(DATAVOL):/home/jovyan/work $(REPO)/$(IMAGE_NAME) 74 | 75 | ## Adding Custom Python Libraries 76 | 77 | As a special feature, the image adds _/home/jovyan/work/lib_ to the PYTHONPATH in the container. This is especially handy when you mount _/home/jovyan/work_ as a volume, as in the example above. If you install a python module into that _lib_ directory, your notebooks will automatically be able to find it when they're running in the container. This provides a convenient way for you to add your own modules without having to rebuild the entire image. 78 | 79 | # Accessing the Jupyter Environment 80 | By default, when the container runs, it will print the UI URL to the console. This URL contains a randomly-generated access token, which will serve instead of a password to authenticate you to the Jupyter server. Click that URL, or cut-n-paste it into your browser, and you'll be logged in. 81 | 82 | If you've set a password at build time (via the JUPYTER_NB_PASS variable), then there will be no default URL. Instead, just browse to _https://localhost:8888_. 83 | 84 | **NOTE: This image uses SSL, so all access URLs must start with _https://_.** 85 | -------------------------------------------------------------------------------- /Test-spark.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Simple Test Suite for Hunting Platform - Spark Version\n", 8 | "\n", 9 | "This notebook is just a simple test to make sure that basic Apache Spark functions are working. It tries to estimate the value of pi, but it's just an estimate so don't worry if the value is off. The important thing is that Spark completes the job and gives an answer without error. \n", 10 | "\n", 11 | "**Spark is only installed for the Python3 kernel, so if you try to execute this with Python2, it will fail.** If there are no errors, everything is probably installed correctly." 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "## Check Python Modules\n", 19 | "For the modules we install, just do a very basic import to make sure they are installed. " 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 0, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "import pyspark\n", 29 | "import random " 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "## A basic SparkContext()" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 0, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "spark_config = (pyspark.SparkConf().setAppName('test').setMaster(\"local[2]\"))\n", 46 | "sc = pyspark.SparkContext(conf=spark_config)" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "## Compute Pi\n", 54 | "This is based on the Apache sample code at https://spark.apache.org/examples.html. I've slightly modified it to work with Python 3, but it's basically identical to their version." 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 0, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "NUM_SAMPLES=10000000" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 0, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "def inside(p):\n", 73 | " x, y = random.random(), random.random()\n", 74 | " return x*x + y*y < 1\n", 75 | "\n", 76 | "count = sc.parallelize(range(0, NUM_SAMPLES)) \\\n", 77 | " .filter(inside).count()\n", 78 | "print(\"Pi is roughly %f\" % (4.0 * count / NUM_SAMPLES))" 79 | ] 80 | } 81 | ], 82 | "metadata": { 83 | "kernelspec": { 84 | "display_name": "Python 3", 85 | "language": "python", 86 | "name": "python3" 87 | }, 88 | "language_info": { 89 | "codemirror_mode": { 90 | "name": "ipython", 91 | "version": 3 92 | }, 93 | "file_extension": ".py", 94 | "mimetype": "text/x-python", 95 | "name": "python", 96 | "nbconvert_exporter": "python", 97 | "pygments_lexer": "ipython3", 98 | "version": "3.7.3" 99 | } 100 | }, 101 | "nbformat": 4, 102 | "nbformat_minor": 2 103 | } -------------------------------------------------------------------------------- /Test.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Simple Test Suite for Hunting Platform\n", 8 | "\n", 9 | "This notebook is just a simple test suite for some _very_ basic functions of the hunting platform. \n", 10 | "\n", 11 | "Run it once for each Python kernel (_Kernel -> Change kernel_). If there are no errors, everything is probably installed correctly." 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "## Check Python Modules and Version\n", 19 | "For the modules we install, just do a very basic import to make sure they are installed. Also check the exactly Python version we're using. " 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 0, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "from __future__ import print_function\n", 29 | "import plotly as py\n", 30 | "import plotly.offline as pyo\n", 31 | "import plotly.express as px\n", 32 | "import elasticsearch\n", 33 | "import seaborn\n", 34 | "import sklearn\n", 35 | "import ipywidgets\n", 36 | "import pandas as pd\n", 37 | "import numpy as np\n", 38 | "\n", 39 | "\n", 40 | "import sys\n" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 0, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "print(sys.version)" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "## List of Python 3 Modules" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 0, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "!conda list" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "## List of Python 2 Modules" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 0, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "!conda list --name python2" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 0, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "df = pd.DataFrame([[1,2,3],[4,5,6]], columns=[\"col1\",\"col2\",\"col3\"])\n", 91 | "df[\"result\"] = df.apply(lambda row: row[\"col1\"] + row[\"col2\"] + row[\"col3\"], axis=1)\n", 92 | "display(df)\n", 93 | "\n", 94 | "result_df = pd.DataFrame([[1,2,3,6],[4,5,6,15]], columns=[\"col1\",\"col2\",\"col3\",\"result\"])\n", 95 | "display(result_df)\n", 96 | "\n", 97 | "pd.testing.assert_frame_equal(df, result_df)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 0, 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "data = px.data.iris()\n", 107 | "\n", 108 | "px.scatter(data, x='sepal_width', y='sepal_length')" 109 | ] 110 | } 111 | ], 112 | "metadata": { 113 | "kernelspec": { 114 | "display_name": "Python 3", 115 | "language": "python", 116 | "name": "python3" 117 | }, 118 | "language_info": { 119 | "codemirror_mode": { 120 | "name": "ipython", 121 | "version": 3 122 | }, 123 | "file_extension": ".py", 124 | "mimetype": "text/x-python", 125 | "name": "python", 126 | "nbconvert_exporter": "python", 127 | "pygments_lexer": "ipython3", 128 | "version": "3.8.6" 129 | }, 130 | "toc": { 131 | "base_numbering": 1, 132 | "nav_menu": {}, 133 | "number_sections": true, 134 | "sideBar": true, 135 | "skip_h1_title": false, 136 | "title_cell": "Table of Contents", 137 | "title_sidebar": "Contents", 138 | "toc_cell": false, 139 | "toc_position": {}, 140 | "toc_section_display": true, 141 | "toc_window_display": false 142 | } 143 | }, 144 | "nbformat": 4, 145 | "nbformat_minor": 4 146 | } -------------------------------------------------------------------------------- /deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Simple script called by Travis CI to push the Docker images we just built to 4 | # Docker hub. 5 | 6 | echo "Logging into Docker Hub" 7 | echo "$DOCKERHUB_PASS" | docker login -u "$DOCKERHUB_USER" --password-stdin 8 | echo "Pushing $REPO/$IMAGE_NAME:latest" 9 | docker push $REPO/$IMAGE_NAME:latest 10 | echo "Pushing $REPO/$IMAGE_NAME:$DATE.$TRAVIS_BUILD_NUMBER" 11 | docker push $REPO/$IMAGE_NAME:$DATE.$TRAVIS_BUILD_NUMBER 12 | echo "Pushing $REPO/$IMAGE_NAME:$DATE" 13 | docker push $REPO/$IMAGE_NAME:$DATE 14 | -------------------------------------------------------------------------------- /passwd-helper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import print_function 3 | 4 | from IPython.lib import passwd 5 | import os 6 | 7 | if os.environ['JUPYTER_NB_PASS']: 8 | password = passwd(os.environ['JUPYTER_NB_PASS']) 9 | 10 | print("\n\n# Set the default password for the notebook") 11 | print("c.NotebookApp.password = u'%s'" % password) 12 | print("\n\n") 13 | --------------------------------------------------------------------------------