├── .gitignore
├── Dockerfile
├── README.md
├── docker-compose.yml
├── notebooks
    └── test_db_connection.ipynb
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM jupyter/datascience-notebook
 2 | 
 3 | COPY requirements.txt ./
 4 | RUN pip install -U pip
 5 | RUN pip install --no-cache-dir -r requirements.txt
 6 | 
 7 | ENV JUPYTER_ENABLE_LAB=yes
 8 | 
 9 | COPY --chown=${NB_UID}:${NB_GID} . /home/jovyan/work
10 | WORKDIR /home/jovyan/work
11 | 
12 | CMD ["jupyter", "lab", "--ip=0.0.0.0", "--port=8888", "--no-browser", "--notebook-dir=/home/jovyan/work", "--allow-root"]
13 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # data_science_neo4j_docker
 2 | Docker template for basic data science packages to interface with Neo4j
 3 | 
 4 | ## Introduction
 5 | 
 6 | This container is based on the Juptyer data science container along side creating a Neo4j database on your local machine. This container does NOT include support for GPU-based deep learning packages (that will be a separate repo).  There are a lot of Python packages in this container that you might not use and so this container might be a bit bloated.  It is more of a generic container to get you started.
 7 | 
 8 | ## To run this container
 9 | 
10 | First, build the container from the CLI:
11 | 
12 | ```
13 | docker-compose build
14 | ```
15 | 
16 | Then start the container:
17 | 
18 | ```
19 | docker-compose up
20 | ```
21 | 
22 | This will start up both a Jupyter Lab notebook at `localhost:8888` (note that there will be a token associated with this which you will need to grab as that container is starting) as well as the Neo4j browser at `localhost:7474`.  This are both configurable to whatever port you want.
23 | 
24 | When you are done with the container, stop it vial `CTRL-c` and then:
25 | 
26 | ```
27 | docker-compose down
28 | ```
29 | 
30 | ## Some notes
31 | 
32 | - There are two different Python packages that can be used to connect to Neo4j from within Python.  It is probably easiest to just pick one and go with it.
33 |   - `neo4j`(https://neo4j.com/docs/api/python-driver/current/): The official, Neo4j-supported Python driver
34 |   - `py2neo`(https://py2neo.org/2021.0/): A community-developed driver with lots of solid documentation and examples out there
35 | - The container is set up to run Neo4j with the user name `neo4j` and the password as `1234`.  You will want to change the password in particular.
36 | - There is a notebook in `notebooks/` that tests to make sure that Jupyter can properly connect to Neo4j.  Run this to verify.
37 | - The `docker-compose.yml` is set to create and read data into Neo4j (including creating the database itself) in the directory `$HOME/graph_data/my_data`.  You should change this to whatever directory you want to store the database.  You will also use these directorys should you want to read `.csv` files into the database.
38 | - The `docker-compose.yml` file includes some optional environment variables for setting memory values.  Since this container includes the Graph Data Science (GDS) library, it is a good idea to add some heap memory.  The exact values will depend on your machine.  You will want to experiment with tuning this based on [these instructions](https://neo4j.com/docs/operations-manual/current/performance/memory-configuration/)
39 | - The Neo4j portion of the container includes two libraries that are very helpful for doing data science with graphs.  These are APOC and GDS.  The latest versions should be pulled when this container executes.  Links to their use are provided below.
40 | 
41 | ## Some helpful links
42 | 
43 | - [Neo4j](https://neo4j.com)
44 |   - [Awesome Procedures on Cypher (APOC)](https://neo4j.com/labs/apoc/)
45 |   - [Cypher Manual](https://neo4j.com/docs/cypher-manual/current/)
46 |   - [Cypher Reference Card](https://neo4j.com/docs/pdf/neo4j-cypher-refcard-stable.pdf)
47 |   - [Graph Data Science (GDS) Library](https://neo4j.com/developer/graph-data-science/)
48 |   - [Python Driver API Docs](https://neo4j.com/docs/api/python-driver/current/)
49 | 
50 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3.7'
 2 | 
 3 | services:
 4 |   neo4j:
 5 |     image: neo4j:4.2.3-enterprise
 6 |     container_name: "neo-gds"
 7 |     volumes:
 8 |       - $HOME/graph_data/my_data:/data
 9 |       - $HOME/graph_data/my_data:/var/lib/neo4j/import
10 |     ports:
11 |       - "7474:7474"
12 |       - "7687:7687"
13 |     environment:
14 |       - NEO4J_ACCEPT_LICENSE_AGREEMENT=yes
15 |       - NEO4J_AUTH=neo4j/1234
16 |       - NEO4JLABS_PLUGINS=["apoc", "graph-data-science"]
17 |       - apoc.import.file.enabled=true
18 |       - apoc.export.file.enabled=true
19 |       - apoc.export.json.all=true
20 |       # Comment these lines if you do not have the memory available
21 |       - NEO4J_dbms_memory_pagecache_size=4G
22 |       - NEO4j_dbms_memory_heap_initial__size=4G
23 |       - NEO4J_dbms_memory_heap_max__size=8G
24 |     networks:
25 |       - neo_net
26 | 
27 |   jupyterlab:
28 |     build:
29 |       context: .
30 |       dockerfile: Dockerfile
31 |     ports:
32 |       - "8888:8888"
33 |     volumes:
34 |       - ./notebooks:/home/jovyan/work
35 |     links:
36 |       - neo4j
37 |     networks:
38 |       - neo_net
39 | 
40 | networks:
41 |   neo_net:    
42 | 
43 | 


--------------------------------------------------------------------------------
/notebooks/test_db_connection.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "id": "modular-leone",
 6 |    "metadata": {},
 7 |    "source": [
 8 |     "## This notebook checks that Python is able to connect to the graph.  Assuming this block runs (on an empty database) and returns 0 nodes in the graph, you are good to go!"
 9 |    ]
10 |   },
11 |   {
12 |    "cell_type": "code",
13 |    "execution_count": 3,
14 |    "id": "preliminary-virginia",
15 |    "metadata": {},
16 |    "outputs": [],
17 |    "source": [
18 |     "from py2neo import Node, Graph"
19 |    ]
20 |   },
21 |   {
22 |    "cell_type": "code",
23 |    "execution_count": 5,
24 |    "id": "prescription-basket",
25 |    "metadata": {},
26 |    "outputs": [
27 |     {
28 |      "name": "stdout",
29 |      "output_type": "stream",
30 |      "text": [
31 |       "Number of nodes in graph:  0\n"
32 |      ]
33 |     }
34 |    ],
35 |    "source": [
36 |     "graph = Graph(\"bolt://neo4j:7687\", name=\"neo4j\", password=\"1234\")\n",
37 |     "print('Number of nodes in graph: ', graph.nodes.match('Node').count())"
38 |    ]
39 |   },
40 |   {
41 |    "cell_type": "code",
42 |    "execution_count": null,
43 |    "id": "awful-vertical",
44 |    "metadata": {},
45 |    "outputs": [],
46 |    "source": []
47 |   }
48 |  ],
49 |  "metadata": {
50 |   "kernelspec": {
51 |    "display_name": "Python 3",
52 |    "language": "python",
53 |    "name": "python3"
54 |   },
55 |   "language_info": {
56 |    "codemirror_mode": {
57 |     "name": "ipython",
58 |     "version": 3
59 |    },
60 |    "file_extension": ".py",
61 |    "mimetype": "text/x-python",
62 |    "name": "python",
63 |    "nbconvert_exporter": "python",
64 |    "pygments_lexer": "ipython3",
65 |    "version": "3.8.8"
66 |   }
67 |  },
68 |  "nbformat": 4,
69 |  "nbformat_minor": 5
70 | }
71 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | jupyterlab==3.0.7
2 | neo4j==4.2.1
3 | py2neo==2021.0.1
4 | 
5 | 
6 | 


--------------------------------------------------------------------------------