├── .DS_Store
├── .gitignore
├── Dockerfile
├── Dockerfile.md
├── LICENSE
├── License.md
├── README.md
├── aerospike
├── aerospike.conf
├── aerospike.template.conf
├── bin
    └── build-docker-image.sh
├── binder-pre
    └── binder_run_first.ipynb
├── entrypoint.sh
├── features.conf
├── grep
├── jupyter_notebook_config.py
├── logo.png
├── notebooks
    ├── .DS_Store
    ├── README.md
    ├── java
    │   ├── README.md
    │   ├── SimplePutGetExample.ipynb
    │   ├── add_namespace.sh
    │   ├── async_ops.ipynb
    │   ├── batch_ops.ipynb
    │   ├── cdt_indexing.ipynb
    │   ├── doc_api.ipynb
    │   ├── doc_api_example_store.json
    │   ├── doc_api_example_tommyleejones.json
    │   ├── expressions.ipynb
    │   ├── hello_world.ipynb
    │   ├── java-advanced_collection_data_types.ipynb
    │   ├── java-intro_to_data_modeling.ipynb
    │   ├── java-intro_to_transactions.ipynb
    │   ├── java-modeling_using_lists.ipynb
    │   ├── java-modeling_using_maps.ipynb
    │   ├── java-working_with_lists.ipynb
    │   ├── java-working_with_maps.ipynb
    │   ├── look_aside_cache_mongo.ipynb
    │   ├── nobel_prizes.json
    │   ├── object_mapper.ipynb
    │   ├── query_splits.ipynb
    │   ├── query_streams.ipynb
    │   ├── query_udf.ipynb
    │   ├── space_companies.json
    │   ├── sql_aggregates_1.ipynb
    │   ├── sql_aggregates_2.ipynb
    │   ├── sql_select.ipynb
    │   ├── sql_update.ipynb
    │   └── tweetaspike.ipynb
    ├── presto
    │   ├── AerospikePrestoDemo.ipynb
    │   └── AerospikePython.ipynb
    ├── python
    │   ├── README.md
    │   ├── basic_operations.ipynb
    │   ├── hello_world.ipynb
    │   ├── local_cache.ipynb
    │   ├── look_aside_cache.ipynb
    │   ├── query.ipynb
    │   ├── readme_tips.ipynb
    │   ├── simple_put_get_example.ipynb
    │   └── transactions_rmw_pattern.ipynb
    ├── readme_tips.ipynb
    ├── spark
    │   ├── .gitignore
    │   ├── AerospikeSparkPython.ipynb
    │   ├── AerospikeSparkScala.ipynb
    │   ├── feature-store-feature-eng.ipynb
    │   ├── feature-store-model-serving.ipynb
    │   ├── feature-store-model-training.ipynb
    │   ├── other_notebooks
    │   │   ├── AerospikeSparkH2ODemo.ipynb
    │   │   ├── AerospikeSparkMLDemo.ipynb
    │   │   ├── AerospikeSparkMLLinearRegression.ipynb
    │   │   ├── AerospikeSparkPythonJSONSQL.ipynb
    │   │   ├── AerospikeSparkPythonParquet.ipynb
    │   │   ├── AerospikeSparkSQLSyntaxDemo.ipynb
    │   │   └── nested_data.json
    │   ├── resources
    │   │   ├── creditcard_small.csv
    │   │   ├── fs-arch.jpg
    │   │   ├── fs-model-ws.py
    │   │   ├── fs_model_rf.tar.gz
    │   │   ├── install.txt
    │   │   ├── nested_data.json
    │   │   └── pushdown-expressions.ipynb
    │   └── simple-load-store.ipynb
    └── udf
    │   ├── aggregate_fns.lua
    │   └── update_example.lua
├── update.sh
└── update_readme.sh


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerospike-examples/interactive-notebooks/0e582d4305974f6cadd390e2086e8550f1b3ecf7/.DS_Store


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.class
 2 | *.log
 3 | *.iml
 4 | *.ipr
 5 | *.iws
 6 | *.pyc
 7 | *.pyo
 8 | *.swp
 9 | .DS_Store
10 | .cache
11 | #Spark related stuff
12 | spark-warehouse/
13 | .ipynb_checkpoints/
14 | #Idea realted stuff
15 | .idea/
16 | .idea_modules/
17 | target
18 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | #
 2 | # Aerospike Server Dockerfile
 3 | #
 4 | # http://github.com/aerospike/aerospike-server.docker
 5 | #
 6 | # This docker file is compatible with Aerospike Community Edition. It provides Java and Python environments and access to the Aerospike DB.
 7 | FROM jupyter/base-notebook:python-3.8.6
 8 | 
 9 | USER root
10 | 
11 | ENV AEROSPIKE_VERSION 7.2.0.4
12 | ENV AEROSPIKE_SHA256 f742ad19d6a75901134e8a6a9a8c9bba9830b019c06053145ad56d8d1b189af8
13 | ENV LOGFILE /var/log/aerospike/aerospike.log
14 | ARG AEROSPIKE_TOOLS_VERSION=11.1.1
15 | 
16 | ARG NB_USER=jovyan
17 | ARG NB_UID=1000
18 | ENV USER ${NB_USER}
19 | ENV NB_UID ${NB_UID}
20 | ENV HOME /home/${NB_USER}
21 | USER root
22 | RUN chown -R ${NB_UID} ${HOME}
23 | 
24 | # spark notebook
25 | RUN mkdir /opt/spark-nb; cd /opt/spark-nb\
26 |   && wget -qO- "https://archive.apache.org/dist/spark/spark-3.4.0/spark-3.4.0-bin-hadoop3-scala2.13.tgz" | tar -xvz \
27 |   && ln -s spark-3.4.0-bin-hadoop3-scala2.13 spark-dir-link \
28 |   && pip install findspark numpy pandas matplotlib sklearn \
29 |   && wget "https://download.aerospike.com/artifacts/aerospike-spark/4.5.1/aerospike-spark-4.5.1-spark3.4-scala2.13-clientunshaded.jar" \
30 |   && ln -s aerospike-spark-4.5.1-spark3.4-scala2.13-clientunshaded.jar aerospike-jar-link
31 |   
32 |   # install jupyter notebook extensions, and enable these extensions by default: table of content, collapsible headers, and scratchpad
33 | RUN pip install jupyter_contrib_nbextensions\
34 |   && jupyter contrib nbextension install --sys-prefix\
35 |   && jupyter nbextension enable toc2/main --sys-prefix\
36 |   && jupyter nbextension enable collapsible_headings/main --sys-prefix\
37 |   && jupyter nbextension enable scratchpad/main --sys-prefix
38 | 
39 | RUN  mkdir /var/run/aerospike\
40 |   && apt-get update -y \
41 |   && apt-get install software-properties-common dirmngr gpg-agent -y --no-install-recommends\
42 |   && apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 0xB1998361219BD9C9 \
43 |   && apt-add-repository 'deb http://repos.azulsystems.com/ubuntu stable main' \
44 |   && apt-get install -y --no-install-recommends build-essential wget lua5.2 gettext-base libldap-dev curl unzip python python3-pip python3-dev python3 zulu-11\
45 |   && wget "https://www.aerospike.com/artifacts/aerospike-server-enterprise/${AEROSPIKE_VERSION}/aerospike-server-enterprise_${AEROSPIKE_VERSION}_tools-${AEROSPIKE_TOOLS_VERSION}_ubuntu20.04_x86_64.tgz" -O aerospike-server.tgz \  
46 |   && echo "$AEROSPIKE_SHA256 *aerospike-server.tgz" | sha256sum -c - \
47 |   && wget "https://github.com/aerospike/aerospike-loader/releases/download/4.0.3/aerospike-load-4.0.3-jar-with-dependencies.jar" \
48 |   && mkdir aerospike \
49 |   && tar xzf aerospike-server.tgz --strip-components=1 -C aerospike \
50 |   && dpkg -i aerospike/aerospike-server*.deb \
51 |   && dpkg -i aerospike/aerospike-tools*.deb \
52 |   && mkdir -p /opt/aerospike/lib/java \
53 |   && mv aerospike-load-*-jar-with-dependencies.jar /opt/aerospike/lib/java/ \
54 |   && pip install --no-cache-dir aerospike\
55 |   && pip install --no-cache-dir pymongo\
56 |   && wget "https://github.com/SpencerPark/IJava/releases/download/v1.3.0/ijava-1.3.0.zip" -O ijava-kernel.zip\
57 |   && unzip ijava-kernel.zip -d ijava-kernel \
58 |   && python3 ijava-kernel/install.py --sys-prefix\
59 |   && rm ijava-kernel.zip\
60 |   && rm -rf aerospike-server.tgz aerospike /var/lib/apt/lists/* \
61 |   && apt-get purge -y \
62 |   && apt autoremove -y \
63 |   && mkdir -p /var/log/aerospike 
64 | 
65 | COPY aerospike /etc/init.d/
66 | RUN usermod -a -G aerospike ${NB_USER}
67 | 
68 | # Add the Aerospike configuration specific to this dockerfile
69 | COPY aerospike.template.conf /etc/aerospike/aerospike.template.conf
70 | COPY aerospike.conf /etc/aerospike/aerospike.conf
71 | COPY features.conf /etc/aerospike/features.conf
72 | 
73 | RUN chown -R ${NB_UID} /etc/aerospike
74 | RUN chown -R ${NB_UID} /opt/aerospike
75 | RUN chown -R ${NB_UID} /var/log/aerospike
76 | RUN chown -R ${NB_UID} /var/run/aerospike
77 | 
78 | #RUN fix-permissions /etc/aerospike/
79 | #RUN fix-permissions /var/log/aerospike
80 | 
81 | COPY notebooks* /home/${NB_USER}/notebooks
82 | RUN echo "Versions:" > /home/${NB_USER}/notebooks/README.md
83 | RUN python -V >> /home/${NB_USER}/notebooks/README.md
84 | RUN java -version 2>> /home/${NB_USER}/notebooks/README.md
85 | RUN asd --version >> /home/${NB_USER}/notebooks/README.md
86 | RUN echo -e "Aerospike Python Client `pip show aerospike|grep Version|sed -e 's/Version://g'`" >> /home/${NB_USER}/notebooks/README.md
87 | #RUN echo -e "Aerospike Java Client 5.0.0" >> /home/${NB_USER}/notebooks/README.md
88 | 
89 | COPY jupyter_notebook_config.py /home/${NB_USER}/
90 | RUN  fix-permissions /home/${NB_USER}/
91 | 
92 | # I don't know why this has to be like this 
93 | # rather than overiding
94 | COPY entrypoint.sh /usr/local/bin/start-notebook.sh
95 | WORKDIR /home/${NB_USER}/notebooks  
96 | USER ${NB_USER}
97 | 


--------------------------------------------------------------------------------
/Dockerfile.md:
--------------------------------------------------------------------------------
 1 | ## Aerospike Development Notebooks Dockerfile
 2 | 
 3 | This repository contains the Dockerfile for building a Docker image for running [Aerospike](http://aerospike.com). 
 4 | 
 5 | ## Installation
 6 | 
 7 | 1. Install [Docker](https://www.docker.io/).
 8 | 
 9 | 2. Download from public [Docker Registry](https://index.docker.io/):
10 | 
11 | 		docker pull aerospike-examples/interactive-notebooks
12 | 
13 | 	_Alternatively, you can build an image from Dockerfile:_
14 |    
15 | 		docker build -t="aerospike-examples/interactive-notebooks" github.com/aerospike-examples/interactive-notebooks
16 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 aerospike-examples/interactive-notebooks
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/License.md:
--------------------------------------------------------------------------------
 1 | # Aerospike Evaluation License Agreement
 2 | PLEASE READ THIS EVALUATION LICENSE AGREEMENT (THE “AGREEMENT”) CAREFULLY BEFORE USING THE SOFTWARE AND SERVICES OFFERED BY AEROSPIKE, INC. (“AEROSPIKE”). BY DOWNLOADING THE AEROSPIKE SOFTWARE ON A TRIAL OR EVALUATION BASIS (AN “EVALUATION”), YOU OR THE ENTITY THAT YOU REPRESENT (“LICENSEE”) ARE UNCONDITIONALLY CONSENTING TO BE BOUND BY AND ARE BECOMING A PARTY TO THE MASTER LICENSE AGREEMENT CONSISTING OF THIS PARAGRAPH AND THE FOLLOWING TERMS. PROVISION OF THE PRODUCT IS CONDITIONED ON, AND LICENSEE’S INSTALLATION OR USE OF THE PRODUCT SHALL CONSTITUTE, LICENSEE’S ASSENT TO THE TERMS OF THIS AGREEMENT TO THE EXCLUSION OF ALL OTHER TERMS.
 3 | 
 4 |  1. <ins>Grant of License and Restrictions.</ins> Subject to the terms hereof and any applicable user/use limitations, Aerospike grants Licensee a personal, nonsublicensable, nonexclusive, limited right to use the licensed product downloaded for an Evaluation in object code form only (the “Product”) subject to any of the limitations herein and only in accordance with Aerospike’s applicable user documentation. Licensee may possess only the number of copies of the Product that you download for an Evaluation (an “Evaluation Product”), may be used only during the Evaluation Term (defined below) and only for purposes of internal evaluation, and not for any production use. Aerospike retains ownership of all Products and rights therein and Licensee will maintain the copyright notice and any other notices that appear on the Product on any copies and any media. Licensee will not (and will not allow any third party to) (i) reverse engineer or attempt to discover any source code or underlying ideas or algorithms of any Product (except to the extent that applicable law prohibits reverse engineering restrictions), (ii) provide, lease, lend, disclose, use for timesharing or service bureau purposes, or otherwise use or allow others to use for the benefit of any third party, any Product (except as expressly and specifically authorized by Aerospike), (iii) possess or use any Product, or allow the transfer, transmission, export, or re-export of any Product or portion thereof in violation of any export control laws or regulations administered by the U.S. Commerce Department, U.S. Treasury Department’s Office of Foreign Assets Control, or any other government agency, (iv) disclose to any third party any benchmarking or comparative study involving any Product, (v) modify any Product, or (vi) run any production instance of an enterprise edition version of a Product with any community edition version of a Product. Prior to disposing of any media or apparatus containing any part of the Product, Licensee shall completely destroy any Product contained therein. All the limitations and restrictions on Products in this Agreement also apply to documentation.
 5 |  2. <ins>Support and Maintenance.</ins> Aerospike will use reasonable commercial efforts to provide the support and maintenance services for the Evaluation Product as and to the extent described in Aerospike’s then-current Support and Maintenance Terms. Licensee shall not use any Support Services for any unsupported application, including, without limitation, any open source or community edition of any Aerospike product, without paying Aerospike’s then-current enterprise subscription fees. Aerospike shall be entitled to invoice Licensee (and Licensee shall pay) Aerospike’s then-current enterprise subscription fees for every instance of any Aerospike product (including, without limitation, any open source or community edition thereof) in connection with which Licensee uses (or otherwise benefits from) any Support Services.
 6 |  3. <ins>Indemnification.</ins> Aerospike shall defend, indemnify and hold Licensee harmless from liability to third parties resulting from infringement by a Product of any United States patent or any copyright or misappropriation of any trade secret, provided Aerospike is promptly notified of any and all threats, claims and proceedings related thereto and given reasonable assistance and the opportunity to assume sole control over defense and settlement; Aerospike will not be responsible for any settlement it does not approve. The foregoing obligations do not apply with respect to a Product or portions or components thereof to the extent (i) not created by Aerospike, (ii) made in whole or in part in accordance to Licensee specifications, (iii) that are modified after delivery by Aerospike, (iv) combined with other products, processes or materials where the alleged infringement relates to such combination, (v) where Licensee continues allegedly infringing activity after being notified thereof or after being informed of modifications that would have avoided the alleged infringement, or (vi) where Licensee’s use of such Product is not strictly in accordance herewith. Licensee will indemnify Aerospike from all damages, costs, settlements, attorneys’ fees and expenses related to any claim of infringement or misappropriation excluded from Aerospike’s indemnity obligation by the preceding sentence.
 7 |  4. <ins>Embedded Reporting/Compliance Routine; Data Access and Use; Feedback.</ins> Licensee acknowledges that Products (excluding the Enterprise Edition of the Product) may contain automated reporting routines that will automatically identify and analyze certain aspects of use and performance of Products and/or the systems on which they are installed, as well as the operator and operating environment (including problems and issues that arise in connection therewith), and provide e-mail and other reports to Aerospike; this includes, without limitation, information on usage that Aerospike uses for billing purposes. Aerospike will be entitled to inspect the installation and configuration of such Products and systems from time to time on reasonable notice. Provided it does not identify Licensee, Aerospike will be free to use for development, diagnostic and corrective purposes any data and information it so collects relating to diagnosis, problems, systems, performance, use or functionality, and may allow others to do so. Notwithstanding anything else, Licensee agrees that Aerospike may freely exploit and make available any and all feedback, suggestions, ideas, enhancement requests, recommendations or other information provided by Licensee any other party relating to the Products.
 8 |  5. <ins>No Warranty.</ins> ALL PRODUCTS AND SERVICES (INCLUDING, WITHOUT LIMITATION, EVALUATION PRODUCTS) ARE PROVIDED “AS IS” WITHOUT WARRANTY OF ANY KIND FROM ANYONE, INCLUDING WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE OR NONINFRINGEMENT. FURTHER, AEROSPIKE DOES NOT WARRANT RESULTS OF USE OR THAT THE PRODUCTS ARE BUG FREE OR THAT THE PRODUCT’S USE WILL BE UNINTERRUPTED.
 9 |  6. <ins>Limitation of Liability.</ins> NOTWITHSTANDING ANYTHING ELSE HEREIN OR OTHERWISE, AND EXCEPT FOR BODILY INJURY, NEITHER AEROSPIKE NOR ANY LICENSOR SHALL BE LIABLE OR OBLIGATED WITH RESPECT TO THE SUBJECT MATTER HEREOF OR UNDER ANY CONTRACT, NEGLIGENCE, STRICT LIABILITY OR OTHER LEGAL OR EQUITABLE THEORY (I) FOR ANY DOLLAR AMOUNTS OR (II) FOR ANY COST OF PROCUREMENT OF SUBSTITUTE GOODS, TECHNOLOGY, SERVICES OR RIGHTS; (III) FOR ANY INCIDENTAL OR CONSEQUENTIAL DAMAGES; (IV) FOR INTERRUPTION OF USE OR LOSS OR CORRUPTION OF DATA; OR (V) FOR ANY MATTER BEYOND ITS REASONABLE CONTROL. THE PRODUCT IS NOT DESIGNED, MANUFACTURED, OR INTENDED FOR USE IN HAZARDOUS ENVIRONMENTS REQUIRING FAIL-SAFE PERFORMANCE WHERE THE FAILURE OF THE PRODUCT COULD LEAD DIRECTLY TO DEATH, PERSONAL INJURY, OR SIGNIFICANT PHYSICAL OR ENVIRONMENTAL DAMAGE (“HIGH RISK ACTIVITIES”). USE OF THE PRODUCT IN HIGH RISK ACTIVITIES IS NOT AUTHORIZED. THE PARTIES AGREE THAT THIS SECTION 8 REPRESENTS A REASONABLE ALLOCATION OF RISK AND THAT AEROSPIKE WOULD NOT PROCEED IN THE ABSENCE OF SUCH ALLOCATION.
10 |  7. <ins>Confidentiality.</ins> Licensee understands that Company has disclosed or may disclose information relating to Company’s technology or business, including, without limitation, the Products and any other software, documentation, updates, modifications, or new releases thereof and the existence, terms and conditions of this Agreement (hereinafter collectively referred to as “Proprietary Information”). Licensee agrees: (i) not to divulge to any third person any such Proprietary Information, (ii) to give access to such Proprietary information solely to those employees with a need to have access thereto for purposes of this Agreement, and (iii) to take the same security precautions to protect against disclosure or unauthorized use of such Proprietary information that Licensee takes with its own proprietary information, but in no event will Licensee apply less than reasonable precautions to protect such Proprietary Information. Nothing in this Agreement will prevent Licensee from disclosing the Proprietary Information pursuant to any judicial or governmental order, provided that Licensee gives Company reasonable prior notice of such disclosure to contest such order.
11 |  8. <ins>Miscellaneous.</ins> Neither this Agreement nor the licenses granted hereunder are assignable or transferable (and any attempt to do so shall be void); provided that either party may assign and transfer the foregoing to a successor to substantially all of (i) in the case of Aerospike, Aerospike’s Product business or assets or, (ii) in the case of Licensee, Licensee’s business for which Products are licensed (but if the authorized use is not limited, the assignee is not licensed to expand use beyond Licensee’s bona fide pre-assignment use plus reasonably expected growth assuming the assignment and related transactions had not occurred). The provisions hereof are for the benefit of the parties only and not for any other person or entity. No failure or delay in exercising any right hereunder will operate as a waiver thereof, nor will any partial exercise of any right or power hereunder preclude further exercise. If any provision shall be adjudged by any court of competent jurisdiction to be unenforceable or invalid, that provision shall be limited or eliminated to the minimum extent necessary so that this Agreement shall otherwise remain in full force and effect and enforceable. This Agreement shall be deemed to have been made in, and shall be construed pursuant to the laws of the State of California and the United States without regard to conflicts of laws provisions thereof, and without regard to the United Nations Convention on the International Sale of Goods or the Uniform Computer Information Transactions Act. This Agreement is the complete and exclusive statement of the mutual understanding of the parties and supersedes and cancels all previous written and oral agreements and communications relating to the subject matter hereof and any waivers or amendments shall be effective only if made in a writing executed by authorized representatives of both parties; however, any pre-printed or standard terms of any Licensee purchase order, confirmation, or similar form, even if signed by the parties after the effective date hereof, shall have no force or effect. The substantially prevailing party in any action to enforce this agreement will be entitled to recover its attorney’s fees and costs in connection with such action.
12 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | This repository contains the build information for the Docker Image used at https://hub.docker.com/r/aerospike/intro-notebooks.
  2 | 
  3 | # Aerospike Interactive Notebooks
  4 | 
  5 | Aerospike is a distributed database designed to serve global applications with low latency, fast throughput, and resilience to failure. 
  6 | 
  7 | The Docker Image in this repo contains a complete [Aerospike](https://www.aerospike.com) development environment and two categories of Jupyter Notebooks: 
  8 | - **Aerospike Client Tutorials:** Notebooks that provide interactive examples of Java and Python client use of Aerospike Database.
  9 | - **Spark Notebooks:** Notebooks that show how Aerospike can be used in conjunction with Spark.
 10 | 
 11 | Useful links:
 12 | - [Run the **Aerospike Java and Python Client Development Environment** locally in Docker](#run-client-notebooks-in-docker-container).
 13 | - How-to set up and run Spark on [Linux (CentOS)](#set-up-spark-notebooks-on-linux-centos) or [MacOS X](#set-up-spark-notebooks-on-macos-x) to work with the Spark notebooks in the development environment.
 14 | - [Notebook list](notebooks/README.md#notebooks)
 15 | - [Software contents list](#software-contents)
 16 | 
 17 | Documentation for Aerospike is available at [https://aerospike.com/docs](https://aerospike.com/docs),
 18 | and Docker Desktop installation at [https://docs.docker.com/desktop/](https://docs.docker.com/desktop/).
 19 | 
 20 | The download and use of this Aerospike software is governed by [Aerospike Evaluation License Agreement](https://www.aerospike.com/forms/evaluation-license-agreement/).
 21 | 
 22 | 
 23 | ## Run Client Notebooks in Docker Container
 24 | Notebooks for Java and Python clients are currently avaialble. Some Spark notebooks can currently run in the container.
 25 | 
 26 | 1. Install [Docker](https://www.docker.com).
 27 | 
 28 | 1. Get [the Intro Notebooks image](https://hub.docker.com/r/aerospike/intro-notebooks) from [Docker Hub](https://hub.docker.com/u/aerospike):
 29 |    ```
 30 |    docker pull aerospike/intro-notebooks
 31 |    ```
 32 |       [Alternatively] If building the image:
 33 |       1. Git clone image repo:
 34 |          ```
 35 |          git clone https://github.com/aerospike-examples/interactive-notebooks.docker.git
 36 |          ```
 37 |       1. cd to "interactive-notebooks.docker" and build from Dockerfile:
 38 |          ```
 39 |          docker build -t aerospike/intro-notebooks .
 40 |          ```
 41 | 1. Run the image and expose port 8888:
 42 |    ```
 43 |    docker run --name aero-nb -p 8888:8888 aerospike/intro-notebooks
 44 |    ```
 45 |    [Optional alternative] Use the LOGFILE environment variable to specify a log file path in the image:
 46 |    ```
 47 |    docker run -e "LOGFILE=/opt/aerospike/aerospike.log" --name aero-nb -p 8888:8888 aerospike/intro-notebooks
 48 |    ```
 49 | 1. Point your browser at the url with token which should be printed on the output. By default it should be:
 50 |    ```
 51 |    http://127.0.0.1:8888/?token=<token>
 52 |    ```
 53 | 
 54 | Example:
 55 | ```text
 56 | $ docker run --name aero-nb -p 8888:8888 aerospike/intro-notebooks
 57 | 
 58 | link eth0 state up
 59 | link eth0 state up in 0
 60 | Set username to: jovyan
 61 | usermod: no changes
 62 | Executing the command: jupyter notebook
 63 | [I 05:28:34.202 NotebookApp] Writing notebook server cookie secret to /home/jovyan/.local/share/jupyter/runtime/notebook_cookie_secret
 64 | [I 05:28:34.954 NotebookApp] JupyterLab extension loaded from /opt/conda/lib/python3.8/site-packages/jupyterlab
 65 | [I 05:28:34.954 NotebookApp] JupyterLab application directory is /opt/conda/share/jupyter/lab
 66 | [I 05:28:34.957 NotebookApp] Serving notebooks from local directory: /home/jovyan/notebooks
 67 | [I 05:28:34.957 NotebookApp] Jupyter Notebook 6.1.4 is running at:
 68 | [I 05:28:34.957 NotebookApp] http://6a374afd9f00:8888/?token=c45783e6631e305c97f6919905250e61f09049e750813cf6
 69 | [I 05:28:34.957 NotebookApp]  or http://127.0.0.1:8888/?token=c45783e6631e305c97f6919905250e61f09049e750813cf6
 70 | [I 05:28:34.957 NotebookApp] Use Control-C to stop this server and shut down all kernels (twice to skip confirmation).
 71 | 
 72 | ```
 73 | 
 74 | ## Spark Notebooks
 75 | Spark notebooks can run on Linux (CentOS) and MacOS X. 
 76 | 
 77 | Some Spark notebooks can fully run in the container by following the specific setup provided in the respective notebook. To run any Spark notebook with an external Spark, Aerospike, and/or Jupyter server, follow the instructions below.
 78 | 
 79 | ### Set up Spark Notebooks on Linux (CentOS)
 80 | 
 81 | yum installer used below - use dbpkg/rpm/other if your Linux distribution does not support yum
 82 | 
 83 | ``` bash
 84 | sudo yum -y install gcc zlib-devel openssl-devel libffi-devel sqlite-devel bzip2-devel bzip2 xz-devel screen wget
 85 | ```
 86 | 
 87 | Get your own local copy of Python 3.7 (ignore if you have it already). Below we install to ~/.localpython
 88 | 
 89 | ``` bash
 90 | PYTHON_VERSION=3.7.1
 91 | wget http://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tgz
 92 | tar zxvf Python-${PYTHON_VERSION}.tgz
 93 | cd Python-${PYTHON_VERSION}
 94 | mkdir ~/.localpython
 95 | ./configure --prefix=$HOME/.localpython
 96 | make
 97 | make install
 98 | ```
 99 | 
100 | Set up a virtual Python environment - this is a sandbox which avoids you making system wide changes
101 | 
102 | ``` bash
103 | # Install virtualenv tool
104 | ~/.localpython/bin/pip3 install virtualenv
105 | # Create on-disk representation of virtual environment at ~/spark-venv
106 | ~/.localpython/bin/virtualenv ~/spark-venv
107 | # Activate virtual environment
108 | source ~/spark-venv/bin/activate
109 | ```
110 | 
111 | Use of a virtual environment is indicated in the command line string - the name of the virtual environment - spark-env is added to the command line prompt - e.g.,
112 | 
113 | ```
114 | (spark-venv) [ec2-user@ip-10-0-0-248 Python-3.7.1]$
115 | ```
116 | 
117 | You can return to the system enviroment by typing ```deactivate``` and reactivate using ```source ~/spark-venv/bin/activate```
118 | 
119 | Get rid of annoying messages concerning pip upgrade
120 | 
121 | ```
122 | pip install --upgrade pip
123 | ```
124 | 
125 | Note at this point, all our Python related tooling is local to our virtual environment. So ```which pip``` will give
126 | 
127 | ```
128 | ~/spark-venv/bin/pip
129 | ```
130 | 
131 | Install required Python dependencies
132 | 
133 | ```
134 | pip install jupyter PySpark findspark numpy pandas matplotlib sklearn
135 | ```
136 | 
137 | If you plan on using scala in your workbooks you need to install the spylon kernel - some care is needed with Python versioning
138 | ```
139 | pip install spylon_kernel
140 | PYTHON=$(which python)
141 | sudo $PYTHON -m spylon_kernel install
142 | ```
143 | 
144 | Install Spark and set ```$SPARK_HOME```. Note you may need to change the SPARK_VERSION if you get a 404 following the wget.
145 | 
146 | ``` bash
147 | SPARK_VERSION=2.4.7
148 | HADOOP_VERSION=2.7
149 | cd /tmp
150 | wget https://downloads.apache.org/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz
151 | tar xvfz spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz
152 | sudo mv spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} /opt/
153 | export SPARK_HOME=/opt/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}
154 | export PYTHONPATH=$SPARK_HOME/python:$PYTHONPATH
155 | cd ~
156 | ```
157 | 
158 | Use of the Aerospike Spark Connector requires a valid feature key. The notebooks assume this is located at ```/etc/aerospike/features.conf```. Make sure your feature key is locally available, and if it is not located as above, modify the ```AS_FEATURE_KEY_PATH``` variable at the head of the notebook. You may need to run
159 | 
160 | ``` bash
161 | sudo mkdir /etc/aerospike
162 | sudo chmod 777 /etc/aerospike
163 | ```
164 | 
165 | Make sure you have the interactive-notebooks repository locally.
166 | 
167 | ```
168 | git clone https://github.com/aerospike-examples/interactive-notebooks
169 | ```
170 | Finally start Jupyter. Change the IP in the string below - it can be localhost, but if you want to access from a remote host, choose the IP of one of your ethernet interfaces. You could replace with $(hostname -I | awk '{print $1}')
171 | 
172 | Note I set the notebook-dir to point to the directory containing the notebooks in this repository. You also will need SPARK_HOME and PYTHONPATH set correctly (reproducing the former from the above).
173 | 
174 | ```
175 | SPARK_VERSION=2.4.7
176 | HADOOP_VERSION=2.7
177 | export SPARK_HOME=/opt/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}
178 | export PYTHONPATH=$SPARK_HOME/python:$PYTHONPATH
179 | jupyter notebook --no-browser --ip=<IP> --port=8888 --notebook-dir=~/interactive-notebooks/spark/
180 | ```
181 | 
182 | You will see output similar to
183 | 
184 | ```
185 | [I 09:36:52.202 NotebookApp] Writing notebook server cookie secret to /home/ec2-user/.local/share/jupyter/runtime/notebook_cookie_secret
186 | [I 09:36:52.370 NotebookApp] Serving notebooks from local directory: /home/ec2-user/interactive-notebooks/spark
187 | [I 09:36:52.370 NotebookApp] Jupyter Notebook 6.1.4 is running at:
188 | [I 09:36:52.370 NotebookApp] http://10.0.0.248:8888/?token=5bf2910a2527567346323e0a4735e94136e1c70d392b561f
189 | [I 09:36:52.370 NotebookApp]  or http://127.0.0.1:8888/?token=5bf2910a2527567346323e0a4735e94136e1c70d392b561f
190 | [I 09:36:52.371 NotebookApp] Use Control-C to stop this server and shut down all kernels (twice to skip confirmation).
191 | [C 09:36:52.373 NotebookApp]
192 | ```
193 | 
194 | You will need to use the URLs in the output to access jupyter - as the security token is expected.
195 | 
196 | You can omit this step by omitting the --no-browser flag - in that case jupyter will open a browser window local to itself, and request the Notebook app URL above.
197 | 
198 | You may wish to run the jupyter startup command from a [screen](https://linuxize.com/post/how-to-use-linux-screen/) so it will stay running if your session terminates. We installed screen at the outset to allow for this.
199 | 
200 | #### pyenv / Linux
201 | 
202 | You can go down the pyenv route on Linux as per the instructions for Mac. You install pyenv differently
203 | 
204 | ```
205 | sudo yum -y install gcc git zlib-devel openssl-devel libffi-devel sqlite-devel bzip2-devel bzip2 xz-devel screen
206 | git clone http://github.com/pyenv/pyenv .pyenv
207 | export PATH=$PATH:~/.pyenv/bin
208 | ```
209 | 
210 | but once done, just pick up the MacOS instructions at ```pyenv install 3.7.3```
211 | 
212 | ### Set Up Spark Notebooks on MacOS X
213 | The main challenge is getting a sufficiently up to date version of Python installed and set as your working version. You mustn't mess with your existing version of Python (see [xkcd](https://xkcd.com/1987/)).
214 | 
215 | [pyenv](https://github.com/pyenv/pyenv) is the tool to help with this.
216 | 
217 | First you'll need **brew** the package manager for macOS. From  [instructions](https://brew.sh)
218 | ```
219 | /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install.sh)"
220 | ```
221 | 
222 | Next install pyenv
223 | ```
224 | brew install pyenv
225 | ```
226 | and finally we can install our required python version. The subsequent 'global' command sets 3.7.3 as our selected version
227 | ```
228 | pyenv install 3.7.3
229 | pyenv global 3.7.3
230 | ```
231 | The command below sets up our path so the required version of Python is used. Once done, do ```python --version``` to check.
232 | ```
233 | eval "$(pyenv init -)"
234 | ```
235 | You can now set up your virtual environment - this is a sandbox which avoids you making system wide changes. Note this is the same as the steps above for Linux, except we don't have  to give explicit paths to pip, virtualenv.
236 | 
237 | ``` bash
238 | # Install virtualenv tool
239 | pip install virtualenv
240 | # Create on-disk representation of virtual environment at ~/spark-venv
241 | virtualenv ~/spark-venv
242 | # Activate virtual environment
243 | source ~/spark-venv/bin/activate
244 | ```
245 | 
246 | You can now follow the Linux instructions from
247 | ```
248 | pip install jupyter PySpark findspark numpy pandas matplotlib sklearn
249 | ```
250 | 
251 | onwards.
252 | 
253 | 
254 | ## Software Contents
255 | 
256 | - [Aerospike](https://www.aerospike.com) development software: 
257 |    - Aerospike Database
258 |    - Aerospike Java and Python client libraries
259 |    - Aerospike Tools
260 | - [Jupyter Notebook Server](https://jupyter.org/)
261 | 


--------------------------------------------------------------------------------
/aerospike:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | # Start/stop the aerospike daemon.
 3 | #
 4 | ### BEGIN INIT INFO
 5 | # Provides:          aerospike
 6 | # Required-Start:    $remote_fs $syslog $time
 7 | # Required-Stop:     $remote_fs $syslog $time
 8 | # Should-Start:      $network $named slapd autofs ypbind nscd nslcd winbind
 9 | # Should-Stop:       $network $named slapd autofs ypbind nscd nslcd winbind
10 | # Default-Start:     2 3 4 5
11 | # Default-Stop:
12 | # Short-Description: Aerospike
13 | # Description:       Aerospike
14 | ### END INIT INFO
15 | 
16 | PATH=/bin:/usr/bin:/sbin:/usr/sbin
17 | DESC="aerospike daemon"
18 | NAME=aerospike
19 | DAEMON=/usr/bin/asd
20 | PIDFILE=/var/run/aerospike/asd.pid
21 | SCRIPTNAME=/etc/init.d/"$NAME"
22 | 
23 | test -f $DAEMON || exit 0
24 | 
25 | . /lib/lsb/init-functions
26 | 
27 | case "$1" in
28 | start)  log_success_msg "Starting aerospike" "aerospike"
29 |         start_daemon -p $PIDFILE $DAEMON --config-file /etc/aerospike/aerospike.conf $EXTRA_OPTS
30 |         log_success_msg $?
31 |         ;;
32 | stop)   log_success_msg "Stopping aerospike" "aerospike"
33 | 		pkill asd
34 | 		sleep 1
35 | 		if [ $? -ne 0 ]; then pkill -9 asd; fi
36 |         log_success_msg $RETVAL
37 |         ;;
38 | restart) log_success_msg "Restarting aerospike" "aerospike"
39 |         $0 stop
40 |         $0 start
41 |         ;;
42 | coldstart)  log_success_msg "Starting aerospike" "aerospike"
43 |         start_daemon -p $PIDFILE $DAEMON --cold-start $EXTRA_OPTS
44 |         log_success_msg $?
45 |         ;;
46 | status)
47 |         status_of_proc -p $PIDFILE $DAEMON $NAME && exit 0 || exit $?
48 |         ;;
49 | *)      log_success_msg "Usage: /etc/init.d/aerospike {start|stop|status|restart|coldstart}"
50 |         exit 2
51 |         ;;
52 | esac
53 | exit 0
54 | 


--------------------------------------------------------------------------------
/aerospike.conf:
--------------------------------------------------------------------------------
 1 | # Aerospike database configuration file.
 2 | 
 3 | # This stanza must come first.
 4 | service {
 5 | 	user jovyan 
 6 | 	#group aerospike 
 7 | 	pidfile /var/run/aerospike/asd.pid
 8 | #	service-threads 4 # cpu x 5 in 4.7 
 9 | #	transaction-queues 4 # obsolete in 4.7 
10 | #	transaction-threads-per-queue 4 # obsolete in 4.7
11 | 	proto-fd-max 15000
12 | }
13 | 
14 | logging {
15 | 
16 | 	# Log file must be an absolute path.
17 | 	file /var/log/aerospike/aerospike.log {
18 | 		context any info
19 | 	}
20 | 
21 | 	# Send log messages to stdout
22 | 	console {
23 | 		context any info 
24 | 	}
25 | }
26 | 
27 | network {
28 | 	service {
29 | 		address any
30 | 		port 3000
31 | 
32 | 		# Uncomment the following to set the `access-address` parameter to the
33 | 		# IP address of the Docker host. This will the allow the server to correctly
34 | 		# publish the address which applications and other nodes in the cluster to
35 | 		# use when addressing this node.
36 | 		# access-address <IPADDR>
37 | 	}
38 | 
39 | 	heartbeat {
40 | 
41 |         address any
42 | 		# mesh is used for environments that do not support multicast
43 | 		mode mesh
44 | 		port 3002
45 | 
46 | 		# use asinfo -v 'tip:host=<ADDR>;port=3002' to inform cluster of
47 | 		# other mesh nodes
48 | 
49 | 		interval 150
50 | 		timeout 10
51 | 	}
52 | 
53 | 	fabric {
54 | 	    address any
55 | 		port 3001
56 | 	}
57 | 
58 | 	info {
59 | 	    address any
60 | 		port 3003
61 | 	}
62 | }
63 | 
64 | namespace test {
65 | 	replication-factor 2
66 | 	# memory-size 1G
67 | 	default-ttl 30d # 5 days, use 0 to never expire/evict.
68 | 	nsup-period 120
69 | 	
70 | 	storage-engine memory {
71 | 		file /opt/aerospike/data/test.dat
72 | 		filesize 4G
73 | 		# data-in-memory true # Store data in memory in addition to file.
74 | 	}
75 | }
76 | 
77 | 


--------------------------------------------------------------------------------
/aerospike.template.conf:
--------------------------------------------------------------------------------
 1 | # Aerospike database configuration file.
 2 | 
 3 | # This stanza must come first.
 4 | service {
 5 | 	user jovyan 
 6 | 
 7 | 	pidfile /var/run/aerospike/asd.pid
 8 | #	service-threads ${SERVICE_THREADS} # cpu x 5 in 4.7 
 9 | #	transaction-queues ${TRANSACTION_QUEUES} # obsolete in 4.7 
10 | #	transaction-threads-per-queue ${TRANSACTION_THREADS_PER_QUEUE} # obsolete in 4.7
11 | 	proto-fd-max 15000
12 | }
13 | 
14 | logging {
15 | 
16 | 	# Log file must be an absolute path.
17 | 	file ${LOGFILE} {
18 | 		context any info
19 | 	}
20 | 
21 | 	# Send log messages to stdout
22 | 	console {
23 | 		context any info 
24 | 	}
25 | }
26 | 
27 | network {
28 | 	service {
29 | 		address ${SERVICE_ADDRESS}
30 | 		port ${SERVICE_PORT}
31 | 
32 | 		# Uncomment the following to set the `access-address` parameter to the
33 | 		# IP address of the Docker host. This will the allow the server to correctly
34 | 		# publish the address which applications and other nodes in the cluster to
35 | 		# use when addressing this node.
36 | 		# access-address <IPADDR>
37 | 	}
38 | 
39 | 	heartbeat {
40 | 
41 |         address ${HB_ADDRESS}
42 | 		# mesh is used for environments that do not support multicast
43 | 		mode mesh
44 | 		port ${HB_PORT}
45 | 
46 | 		# use asinfo -v 'tip:host=<ADDR>;port=3002' to inform cluster of
47 | 		# other mesh nodes
48 | 
49 | 		interval 150
50 | 		timeout 10
51 | 	}
52 | 
53 | 	fabric {
54 | 	    address ${FABRIC_ADDRESS}
55 | 		port ${FABRIC_PORT}
56 | 	}
57 | 
58 | 	info {
59 | 	    address ${INFO_ADDRESS}
60 | 		port ${INFO_PORT}
61 | 	}
62 | }
63 | 
64 | namespace ${NAMESPACE} {
65 | 	replication-factor ${REPL_FACTOR}
66 | 	# memory-size ${MEM_GB}G
67 | 	default-ttl ${DEFAULT_TTL} # 5 days, use 0 to never expire/evict.
68 | 	nsup-period ${NSUP_PERIOD}
69 | 	
70 | 	storage-engine memory {
71 | 		file /opt/aerospike/data/${NAMESPACE}.dat
72 | 		filesize ${STORAGE_GB}G
73 | 		# data-in-memory true # Store data in memory in addition to file.
74 | 	}
75 | }
76 | 
77 | 


--------------------------------------------------------------------------------
/bin/build-docker-image.sh:
--------------------------------------------------------------------------------
1 | #rm -rf target
2 | #mkdir target
3 | #cp -r docker/* target
4 | #cp -r notebooks/java target/notebooks/
5 | #cp -r notebooks/python target/notebooks/
6 | 
7 | docker build --no-cache -t ${1:-aerospike/intro-notebooks} .
8 | 


--------------------------------------------------------------------------------
/binder-pre/binder_run_first.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# This notebook sets some prereqs if running in some environments such as https://mybinder.org\n",
 8 |     "\n",
 9 |     "*** Important: If in Binder, this notebook will auto close if unused for 10 minutes. If that occurs reload from the initial url. ***\n",
10 |     "\n"
11 |    ]
12 |   },
13 |   {
14 |    "cell_type": "markdown",
15 |    "metadata": {},
16 |    "source": [
17 |     "## In a binder environment, asd process must be started\n",
18 |     "\n",
19 |     "If running enterprise edition, feture key must be set as well"
20 |    ]
21 |   },
22 |   {
23 |    "cell_type": "code",
24 |    "execution_count": null,
25 |    "metadata": {},
26 |    "outputs": [],
27 |    "source": [
28 |     "import os\n",
29 |     "import subprocess\n",
30 |     "import pprint\n",
31 |     "import time\n",
32 |     "\n",
33 |     "features = \"\"\"\n",
34 |     "COPY FEATURE FILE CONTENTS HERE\n",
35 |     "\"\"\"\n",
36 |     "\n",
37 |     "if features.strip() == \"\" or features.strip() == \"COPY FEATURE FILE CONTENTS HERE\":\n",
38 |     "    features_file = open(\"/etc/aerospike/features.conf\", \"w\")\n",
39 |     "    n = features_file.write(features)\n",
40 |     "    features_file.close()\n",
41 |     "\n",
42 |     "\n",
43 |     "os.system(\"asd\")\n"
44 |    ]
45 |   }
46 |  ],
47 |  "metadata": {
48 |   "kernelspec": {
49 |    "display_name": "Python 3",
50 |    "language": "python",
51 |    "name": "python3"
52 |   },
53 |   "language_info": {
54 |    "codemirror_mode": {
55 |     "name": "ipython",
56 |     "version": 3
57 |    },
58 |    "file_extension": ".py",
59 |    "mimetype": "text/x-python",
60 |    "name": "python",
61 |    "nbconvert_exporter": "python",
62 |    "pygments_lexer": "ipython3",
63 |    "version": "3.8.2-final"
64 |   }
65 |  },
66 |  "nbformat": 4,
67 |  "nbformat_minor": 4
68 | }


--------------------------------------------------------------------------------
/entrypoint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -m
 3 | 
 4 | export CORES=$(grep -c ^processor /proc/cpuinfo)
 5 | export SERVICE_THREADS=${SERVICE_THREADS:-$CORES}
 6 | export TRANSACTION_QUEUES=${TRANSACTION_QUEUES:-$CORES}
 7 | export TRANSACTION_THREADS_PER_QUEUE=${TRANSACTION_THREADS_PER_QUEUE:-4}
 8 | export LOGFILE=${LOGFILE:-/dev/null}
 9 | export SERVICE_ADDRESS=${SERVICE_ADDRESS:-any}
10 | export SERVICE_PORT=${SERVICE_PORT:-3000}
11 | export HB_ADDRESS=${HB_ADDRESS:-any}
12 | export HB_PORT=${HB_PORT:-3002}
13 | export FABRIC_ADDRESS=${FABRIC_ADDRESS:-any}
14 | export FABRIC_PORT=${FABRIC_PORT:-3001}
15 | export INFO_ADDRESS=${INFO_ADDRESS:-any}
16 | export INFO_PORT=${INFO_PORT:-3003}
17 | export NAMESPACE=${NAMESPACE:-test}
18 | export REPL_FACTOR=${REPL_FACTOR:-2}
19 | export MEM_GB=${MEM_GB:-1}
20 | export DEFAULT_TTL=${DEFAULT_TTL:-30d}
21 | export STORAGE_GB=${STORAGE_GB:-4}
22 | export NSUP_PERIOD=${NSUP_PERIOD:-120}
23 | export USER=${USER:-jovyan}
24 | export MEMORY_SIZE=${MEMORY_SIZE:-128}
25 | export INDEX_STAGE_SIZE=${INDEX_STAGE_SIZE:-128}
26 | 
27 | # Fill out conffile with above values
28 | if [ -f /etc/aerospike/aerospike.template.conf ]; then
29 |         envsubst < /etc/aerospike/aerospike.template.conf > /etc/aerospike/aerospike.conf
30 | fi
31 | 
32 | NETLINK=${NETLINK:-eth0}
33 | 
34 | # we will wait a bit for the network link to be up.
35 | NETLINK_UP=0
36 | NETLINK_COUNT=0
37 | echo "link $NETLINK state $(cat /sys/class/net/${NETLINK}/operstate)"
38 | while [ $NETLINK_UP -eq 0 ] && [ $NETLINK_COUNT -lt 20 ]; do
39 |     if grep -q "up" /sys/class/net/${NETLINK}/operstate; then
40 |                 NETLINK_UP=1
41 |         else
42 |                 sleep 0.1
43 |                 let NETLINK_COUNT=NETLINK_COUNT+1
44 |         fi
45 | done
46 | echo "link $NETLINK state $(cat /sys/class/net/${NETLINK}/operstate) in ${NETLINK_COUNT}"
47 | 
48 | service aerospike restart
49 | 
50 | #####
51 | # Jupiter stuff
52 | #####
53 | 
54 | wrapper=""
55 | if [[ "${RESTARTABLE}" == "yes" ]]; then
56 |   wrapper="run-one-constantly"
57 | fi
58 | 
59 | if [[ ! -z "${JUPYTERHUB_API_TOKEN}" ]]; then
60 |   # launched by JupyterHub, use single-user entrypoint
61 |   exec /usr/local/bin/start-singleuser.sh "$@"
62 | elif [[ ! -z "${JUPYTER_ENABLE_LAB}" ]]; then
63 |   . /usr/local/bin/start.sh $wrapper jupyter lab "$@"
64 | else
65 |   . /usr/local/bin/start.sh $wrapper jupyter notebook "$@"
66 | fi
67 | 


--------------------------------------------------------------------------------
/features.conf:
--------------------------------------------------------------------------------
 1 | # generated 2024-05-24 19:44:42
 2 | 
 3 | feature-key-version              2
 4 | serial-number                    136523944
 5 | 
 6 | account-name                     Aerospike
 7 | account-ID                       Aerospike_Eval_ver.7.2
 8 | 
 9 | valid-until-version              7.2
10 | 
11 | asdb-change-notification         true
12 | asdb-cluster-nodes-limit         1
13 | asdb-compression                 true
14 | asdb-encryption-at-rest          true
15 | asdb-flash-index                 true
16 | asdb-ldap                        true
17 | asdb-pmem                        true
18 | asdb-rack-aware                  true
19 | asdb-secrets                     true
20 | asdb-strong-consistency          true
21 | asdb-vault                       true
22 | asdb-xdr                         true
23 | database-recovery                true
24 | elasticsearch-connector          true
25 | graph-service                    true
26 | mesg-jms-connector               true
27 | mesg-kafka-connector             true
28 | presto-connector                 true
29 | pulsar-connector                 true
30 | spark-connector                  true
31 | 
32 | ----- SIGNATURE ------------------------------------------------
33 | MEYCIQDykHgLkd9N7xIzOV80QHpMfmwtu6rtFV/E/9wUcdb5PgIhALKE7QQakgsp
34 | EyjZJtoGtyO1UnXLioru9cY6uIizUozmJA==
35 | ----- END OF SIGNATURE -----------------------------------------
36 | 


--------------------------------------------------------------------------------
/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerospike-examples/interactive-notebooks/0e582d4305974f6cadd390e2086e8550f1b3ecf7/logo.png


--------------------------------------------------------------------------------
/notebooks/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerospike-examples/interactive-notebooks/0e582d4305974f6cadd390e2086e8550f1b3ecf7/notebooks/.DS_Store


--------------------------------------------------------------------------------
/notebooks/README.md:
--------------------------------------------------------------------------------
 1 | # Notebooks
 2 | 
 3 | This area is for [Jupyter notebooks](https://jupyter.org/) in .ipynb format. Python and Java notebooks are currently supported by the kernel. 
 4 | 
 5 | The list of notebooks below has links to browse each notebook in the viewer and to launch it in interactive mode in Binder. This repository also provides a Docker container that you can install (see the [instructions](../README.md)) to run the notebooks locally. 
 6 | 
 7 | 
 8 | All Notebooks | [View All](https://github.com/aerospike-examples/interactive-notebooks/tree/main/notebooks) | [Launch in Binder](https://mybinder.org/v2/gh/aerospike-examples/interactive-notebooks/main?filepath=)
 9 | :-------- | ---- | ------
10 | &nbsp; Aerospike Notebooks Readme/Tips | [View](https://github.com/aerospike-examples/interactive-notebooks/tree/main/notebooks/readme_tips.ipynb) | [Launch](https://mybinder.org/v2/gh/aerospike-examples/interactive-notebooks/main?filepath=readme_tips.ipynb)
11 |  | | | | 
12 | **Java  Notebooks** | [View All](https://github.com/aerospike-examples/interactive-notebooks/tree/main/notebooks/java) | [Launch in Binder](https://mybinder.org/v2/gh/aerospike-examples/interactive-notebooks/main?filepath=java)
13 |  | | | | 
14 | &nbsp; A Simple Put-Get Example | [View](https://github.com/aerospike-examples/interactive-notebooks/tree/main/notebooks/java/SimplePutGetExample.ipynb) | [Launch](https://mybinder.org/v2/gh/aerospike-examples/interactive-notebooks/main?filepath=java/SimplePutGetExample.ipynb)
15 | &nbsp; Understanding Asynchronous Operations in Aerospike | [View](https://github.com/aerospike-examples/interactive-notebooks/tree/main/notebooks/java/async_ops.ipynb) | [Launch](https://mybinder.org/v2/gh/aerospike-examples/interactive-notebooks/main?filepath=java/async_ops.ipynb)
16 | &nbsp; Aerospike Document API for JSON Documents | [View](https://github.com/aerospike-examples/interactive-notebooks/tree/main/notebooks/java/doc_api.ipynb) | [Launch](https://mybinder.org/v2/gh/aerospike-examples/interactive-notebooks/main?filepath=java/doc_api.ipynb)
17 | &nbsp; Understanding Expressions in Aerospike | [View](https://github.com/aerospike-examples/interactive-notebooks/tree/main/notebooks/java/expressions.ipynb) | [Launch](https://mybinder.org/v2/gh/aerospike-examples/interactive-notebooks/main?filepath=java/expressions.ipynb)
18 | &nbsp; Aerospike Hello World! | [View](https://github.com/aerospike-examples/interactive-notebooks/tree/main/notebooks/java/hello_world.ipynb) | [Launch](https://mybinder.org/v2/gh/aerospike-examples/interactive-notebooks/main?filepath=java/hello_world.ipynb)
19 | &nbsp; Aerospike Java Client – Advanced Collection Data Types | [View](https://github.com/aerospike-examples/interactive-notebooks/tree/main/notebooks/java/java-advanced_collection_data_types.ipynb) | [Launch](https://mybinder.org/v2/gh/aerospike-examples/interactive-notebooks/main?filepath=java/java-advanced_collection_data_types.ipynb)
20 | &nbsp; Aerospike Java Client – Introduction to Data Modeling | [View](https://github.com/aerospike-examples/interactive-notebooks/tree/main/notebooks/java/java-intro_to_data_modeling.ipynb) | [Launch](https://mybinder.org/v2/gh/aerospike-examples/interactive-notebooks/main?filepath=java/java-intro_to_data_modeling.ipynb)
21 | &nbsp; Introduction to Transactions with Aerospike | [View](https://github.com/aerospike-examples/interactive-notebooks/tree/main/notebooks/java/java-intro_to_transactions.ipynb) | [Launch](https://mybinder.org/v2/gh/aerospike-examples/interactive-notebooks/main?filepath=java/java-intro_to_transactions.ipynb)
22 | &nbsp; Modeling Using Lists | [View](https://github.com/aerospike-examples/interactive-notebooks/tree/main/notebooks/java/java-modeling_using_lists.ipynb) | [Launch](https://mybinder.org/v2/gh/aerospike-examples/interactive-notebooks/main?filepath=java/java-modeling_using_lists.ipynb)
23 | &nbsp; Modeling Using Maps | [View](https://github.com/aerospike-examples/interactive-notebooks/tree/main/notebooks/java/java-modeling_using_maps.ipynb) | [Launch](https://mybinder.org/v2/gh/aerospike-examples/interactive-notebooks/main?filepath=java/java-modeling_using_maps.ipynb)
24 | &nbsp; Aerospike Java Client – Reading and Updating Lists | [View](https://github.com/aerospike-examples/interactive-notebooks/tree/main/notebooks/java/java-working_with_lists.ipynb) | [Launch](https://mybinder.org/v2/gh/aerospike-examples/interactive-notebooks/main?filepath=java/java-working_with_lists.ipynb)
25 | &nbsp; Aerospike Java Client – Reading and Updating Maps | [View](https://github.com/aerospike-examples/interactive-notebooks/tree/main/notebooks/java/java-working_with_maps.ipynb) | [Launch](https://mybinder.org/v2/gh/aerospike-examples/interactive-notebooks/main?filepath=java/java-working_with_maps.ipynb)
26 | &nbsp; Look-Aside Cache for MongoDB | [View](https://github.com/aerospike-examples/interactive-notebooks/tree/main/notebooks/java/look_aside_cache_mongo.ipynb) | [Launch](https://mybinder.org/v2/gh/aerospike-examples/interactive-notebooks/main?filepath=java/look_aside_cache_mongo.ipynb)
27 | &nbsp; Java Object Mapper | [View](https://github.com/aerospike-examples/interactive-notebooks/tree/main/notebooks/java/object_mapper.ipynb) | [Launch](https://mybinder.org/v2/gh/aerospike-examples/interactive-notebooks/main?filepath=java/object_mapper.ipynb)
28 | &nbsp; Aerospike Query and UDF | [View](https://github.com/aerospike-examples/interactive-notebooks/tree/main/notebooks/java/query_udf.ipynb) | [Launch](https://mybinder.org/v2/gh/aerospike-examples/interactive-notebooks/main?filepath=java/query_udf.ipynb)
29 | &nbsp; Implementing SQL Operations: Aggregates (Part 1) | [View](https://github.com/aerospike-examples/interactive-notebooks/tree/main/notebooks/java/sql_aggregates_1.ipynb) | [Launch](https://mybinder.org/v2/gh/aerospike-examples/interactive-notebooks/main?filepath=java/sql_aggregates_1.ipynb)
30 | &nbsp; Implementing SQL Operations: Aggregates (Part 2) | [View](https://github.com/aerospike-examples/interactive-notebooks/tree/main/notebooks/java/sql_aggregates_2.ipynb) | [Launch](https://mybinder.org/v2/gh/aerospike-examples/interactive-notebooks/main?filepath=java/sql_aggregates_2.ipynb)
31 | &nbsp; Implementing SQL Operations: SELECT | [View](https://github.com/aerospike-examples/interactive-notebooks/tree/main/notebooks/java/sql_select.ipynb) | [Launch](https://mybinder.org/v2/gh/aerospike-examples/interactive-notebooks/main?filepath=java/sql_select.ipynb)
32 | &nbsp; Implementing SQL Operations: CREATE, UPDATE, DELETE | [View](https://github.com/aerospike-examples/interactive-notebooks/tree/main/notebooks/java/sql_update.ipynb) | [Launch](https://mybinder.org/v2/gh/aerospike-examples/interactive-notebooks/main?filepath=java/sql_update.ipynb)
33 | &nbsp; Tweetaspike: A Simple Application | [View](https://github.com/aerospike-examples/interactive-notebooks/tree/main/notebooks/java/tweetaspike.ipynb) | [Launch](https://mybinder.org/v2/gh/aerospike-examples/interactive-notebooks/main?filepath=java/tweetaspike.ipynb)
34 |  | | | | 
35 | **Python  Notebooks** | [View All](https://github.com/aerospike-examples/interactive-notebooks/tree/main/notebooks/python) | [Launch in Binder](https://mybinder.org/v2/gh/aerospike-examples/interactive-notebooks/main?filepath=python)
36 |  | | | | 
37 | &nbsp; Aerospike Basic Operations | [View](https://github.com/aerospike-examples/interactive-notebooks/tree/main/notebooks/python/basic_operations.ipynb) | [Launch](https://mybinder.org/v2/gh/aerospike-examples/interactive-notebooks/main?filepath=python/basic_operations.ipynb)
38 | &nbsp; Aerospike Hello World! | [View](https://github.com/aerospike-examples/interactive-notebooks/tree/main/notebooks/python/hello_world.ipynb) | [Launch](https://mybinder.org/v2/gh/aerospike-examples/interactive-notebooks/main?filepath=python/hello_world.ipynb)
39 | &nbsp; Local Cache | [View](https://github.com/aerospike-examples/interactive-notebooks/tree/main/notebooks/python/local_cache.ipynb) | [Launch](https://mybinder.org/v2/gh/aerospike-examples/interactive-notebooks/main?filepath=python/local_cache.ipynb)
40 | &nbsp; Look-Aside Cache for MongoDB | [View](https://github.com/aerospike-examples/interactive-notebooks/tree/main/notebooks/python/look_aside_cache.ipynb) | [Launch](https://mybinder.org/v2/gh/aerospike-examples/interactive-notebooks/main?filepath=python/look_aside_cache.ipynb)
41 | &nbsp; Aerospike Queries in Python | [View](https://github.com/aerospike-examples/interactive-notebooks/tree/main/notebooks/python/query.ipynb) | [Launch](https://mybinder.org/v2/gh/aerospike-examples/interactive-notebooks/main?filepath=python/query.ipynb)
42 | &nbsp; Aerospike Notebooks Readme/Tips | [View](https://github.com/aerospike-examples/interactive-notebooks/tree/main/notebooks/python/readme_tips.ipynb) | [Launch](https://mybinder.org/v2/gh/aerospike-examples/interactive-notebooks/main?filepath=python/readme_tips.ipynb)
43 | &nbsp; A Simple Put-Get Example | [View](https://github.com/aerospike-examples/interactive-notebooks/tree/main/notebooks/python/simple_put_get_example.ipynb) | [Launch](https://mybinder.org/v2/gh/aerospike-examples/interactive-notebooks/main?filepath=python/simple_put_get_example.ipynb)
44 | &nbsp; Implementing Read-Write Transactions with R-M-W Pattern | [View](https://github.com/aerospike-examples/interactive-notebooks/tree/main/notebooks/python/transactions_rmw_pattern.ipynb) | [Launch](https://mybinder.org/v2/gh/aerospike-examples/interactive-notebooks/main?filepath=python/transactions_rmw_pattern.ipynb)
45 |  | | | | 
46 | **Spark  Notebooks** | [View All](https://github.com/aerospike-examples/interactive-notebooks/tree/main/notebooks/spark)
47 |  | | | | 
48 | &nbsp; Aerospike Connect for Spark Tutorial for Python | [View](https://github.com/aerospike-examples/interactive-notebooks/tree/main/notebooks/spark/AerospikeSparkPython.ipynb)
49 | &nbsp; Aerospike Spark Connector Tutorial for Scala | [View](https://github.com/aerospike-examples/interactive-notebooks/tree/main/notebooks/spark/AerospikeSparkScala.ipynb)
50 | 
51 | 


--------------------------------------------------------------------------------
/notebooks/java/README.md:
--------------------------------------------------------------------------------
1 | This area is for Java Jupyter notebooks in .ipynb format. 
2 | 
3 | Visit [Aerospike notebooks repo](https://github.com/aerospike-examples/interactive-notebooks) to run additional Aerospike notebooks. To run a different notebook, download the notebook from the repo to your local machine, and then in the notebook interface click on File->Open, and select Upload.
4 | 


--------------------------------------------------------------------------------
/notebooks/java/SimplePutGetExample.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# A Simple Put-Get Example\n",
  8 |     "\n",
  9 |     "A simple example of `put` and `get` calls in Aerospike.\n",
 10 |     "\n",
 11 |     "This notebook requires the Aerospike Database running locally with Java kernel and Aerospike Java Client. To create a Docker container that satisfies the requirements and holds a copy of Aerospike notebooks, visit the [Aerospike Notebooks Repo](https://github.com/aerospike-examples/interactive-notebooks)."
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {
 17 |     "hide_input": false
 18 |    },
 19 |    "source": [
 20 |     "# Use magics to load Aerospike Client from POM"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": null,
 26 |    "metadata": {},
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "%%loadFromPOM\n",
 30 |     "<dependencies>\n",
 31 |     "  <dependency>\n",
 32 |     "    <groupId>com.aerospike</groupId>\n",
 33 |     "    <artifactId>aerospike-client</artifactId>\n",
 34 |     "    <version>5.0.0</version>\n",
 35 |     "  </dependency>\n",
 36 |     "</dependencies>"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "metadata": {},
 42 |    "source": [
 43 |     "## Use client to write a record to Aerospike DB and read it back"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": null,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "import com.aerospike.client.AerospikeClient;\n",
 53 |     "import com.aerospike.client.policy.WritePolicy;\n",
 54 |     "import com.aerospike.client.Bin;\n",
 55 |     "import com.aerospike.client.Key;\n",
 56 |     "import com.aerospike.client.Record;\n",
 57 |     "import com.aerospike.client.Value;\n",
 58 |     "\n",
 59 |     "public class Test{\n",
 60 |     "    public static void putRecordGetRecord () {\n",
 61 |     "        AerospikeClient client = new AerospikeClient(\"localhost\", 3000);\n",
 62 |     "\n",
 63 |     "        Key key = new Key(\"test\", \"demo\", \"putgetkey\");\n",
 64 |     "        Bin bin1 = new Bin(\"bin1\", \"value1\");\n",
 65 |     "        Bin bin2 = new Bin(\"bin2\", \"value2\");\n",
 66 |     "\n",
 67 |     "        // Write a record\n",
 68 |     "        client.put(null, key, bin1, bin2);\n",
 69 |     "\n",
 70 |     "        // Read a record\n",
 71 |     "        Record record = client.get(null, key);\n",
 72 |     "        client.close();   \n",
 73 |     "        System.out.println(\"Record values are:\");\n",
 74 |     "        System.out.println(record);\n",
 75 |     "    }\n",
 76 |     "}\n",
 77 |     "\n",
 78 |     "Test.putRecordGetRecord()\n",
 79 |     "\n"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "markdown",
 84 |    "metadata": {},
 85 |    "source": [
 86 |     "## You can also skip the java boilerplate"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": null,
 92 |    "metadata": {},
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "import com.aerospike.client.AerospikeClient;\n",
 96 |     "import com.aerospike.client.policy.WritePolicy;\n",
 97 |     "import com.aerospike.client.Bin;\n",
 98 |     "import com.aerospike.client.Key;\n",
 99 |     "import com.aerospike.client.Record;\n",
100 |     "import com.aerospike.client.Value;\n",
101 |     "\n",
102 |     "AerospikeClient client = new AerospikeClient(\"localhost\", 3000);\n",
103 |     "\n",
104 |     "Key key = new Key(\"test\", \"demo\", \"putgetkey\");\n",
105 |     "Bin bin1 = new Bin(\"bin1\", \"value1\");\n",
106 |     "Bin bin2 = new Bin(\"bin2\", \"value2\");\n",
107 |     "\n",
108 |     "// Write a record\n",
109 |     "client.put(null, key, bin1, bin2);\n",
110 |     "\n",
111 |     "// Read a record\n",
112 |     "Record record = client.get(null, key);\n",
113 |     "client.close();   \n",
114 |     "System.out.println(\"Record values are:\");\n",
115 |     "System.out.println(record);\n"
116 |    ]
117 |   }
118 |  ],
119 |  "metadata": {
120 |   "kernelspec": {
121 |    "display_name": "Java",
122 |    "language": "java",
123 |    "name": "java"
124 |   },
125 |   "language_info": {
126 |    "codemirror_mode": "java",
127 |    "file_extension": ".jshell",
128 |    "mimetype": "text/x-java-source",
129 |    "name": "Java",
130 |    "pygments_lexer": "java",
131 |    "version": "11.0.8+10-LTS"
132 |   }
133 |  },
134 |  "nbformat": 4,
135 |  "nbformat_minor": 2
136 | }
137 | 


--------------------------------------------------------------------------------
/notebooks/java/add_namespace.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/sh
2 | cp /etc/aerospike/aerospike.conf ~/notebooks/java/aerospike.conf
3 | sed -i '/paxos-single-replica-limit/d' ~/notebooks/java/aerospike.conf
4 | echo "namespace $1 {\n memory-size 1G \n}" >> ~/notebooks/java/aerospike.conf
5 | pkill asd; asd --config-file ~/notebooks/java/aerospike.conf
6 | 


--------------------------------------------------------------------------------
/notebooks/java/doc_api_example_store.json:
--------------------------------------------------------------------------------
 1 | {
 2 |  "store": {
 3 |     "book": [
 4 |       {
 5 |         "category": "reference",
 6 |         "author": "Nigel Rees",
 7 |         "title": "Sayings of the Century",
 8 |         "price": 8.95,
 9 |         "ref": [1,2]
10 |       },
11 |       {
12 |         "category": "fiction",
13 |         "author": "Evelyn Waugh",
14 |         "title": "Sword of Honour",
15 |         "price": 12.99,
16 |         "ref": [2,4,16]
17 |       },
18 |       {
19 |         "category": "fiction",
20 |         "author": "Herman Melville",
21 |         "title": "Moby Dick",
22 |         "isbn": "0-553-21311-3",
23 |         "price": 8.99,
24 |         "ref": [1,3,5]
25 |       },
26 |       {
27 |         "category": "fiction",
28 |         "author": "J. R. R. Tolkien",
29 |         "title": "The Lord of the Rings",
30 |         "isbn": "0-395-19395-8",
31 |         "price": 22.99,
32 |         "ref": [1,2,7]
33 |       }
34 |     ],
35 |     "bicycle": {
36 |       "color": "red",
37 |       "price": 19.95
38 |     }
39 |   },
40 |   "expensive": 10
41 | }


--------------------------------------------------------------------------------
/notebooks/java/doc_api_example_tommyleejones.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "forenames": [
 3 |     "Tommy",
 4 |     "Lee"
 5 |   ],
 6 |   "surname": "Jones",
 7 |   "date_of_birth": {
 8 |     "day": 15,
 9 |     "month": 9,
10 |     "year": 1946
11 |   },
12 |   "selected_filmography":{
13 |     "2012":["Lincoln","Men In Black 3"],
14 |     "2007":["No Country For Old Men"],
15 |     "2002":["Men in Black 2"],
16 |     "1997":["Men in Black","Volcano"],
17 |     "1994":["Natural Born Killers","Cobb"],
18 |     "1991":["JFK"],
19 |     "1980":["Coal Miner's Daughter","Barn Burning"]
20 |   },
21 |   "imdb_rank":{
22 |     "source":"https://www.imdb.com/list/ls050274118/",
23 |     "rank":51
24 |   },
25 |   "best_films_ranked": [
26 |     {
27 |       "source": "http://www.rottentomatoes.com",
28 |       "films": ["The Fugitive","No Country For Old Men","Men In Black","Coal Miner's Daughter","Lincoln"]
29 |     },
30 |     {
31 |       "source":"https://medium.com/the-greatest-films-according-to-me/10-greatest-films-of-tommy-lee-jones-97426103e3d6",
32 |       "films":["The Three Burials of Melquiades Estrada","The Homesman","No Country for Old Men","In the Valley of Elah","Coal Miner's Daughter"]
33 |     }
34 |   ]
35 | }


--------------------------------------------------------------------------------
/notebooks/java/hello_world.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "toc": true
  7 |    },
  8 |    "source": [
  9 |     "<h1>Table of Contents<span class=\"tocSkip\"></span></h1>\n",
 10 |     "<div class=\"toc\"><ul class=\"toc-item\"><li><span><a href=\"#Aerospike-Hello-World!\" data-toc-modified-id=\"Aerospike-Hello-World!-1\"><span class=\"toc-item-num\">1&nbsp;&nbsp;</span>Aerospike Hello World!</a></span><ul class=\"toc-item\"><li><span><a href=\"#Ensure-database-is-running\" data-toc-modified-id=\"Ensure-database-is-running-1.1\"><span class=\"toc-item-num\">1.1&nbsp;&nbsp;</span>Ensure database is running</a></span></li><li><span><a href=\"#Download-Aerospike-client-from-POM\" data-toc-modified-id=\"Download-Aerospike-client-from-POM-1.2\"><span class=\"toc-item-num\">1.2&nbsp;&nbsp;</span>Download Aerospike client from POM</a></span></li><li><span><a href=\"#Import-the-modules\" data-toc-modified-id=\"Import-the-modules-1.3\"><span class=\"toc-item-num\">1.3&nbsp;&nbsp;</span>Import the modules</a></span></li><li><span><a href=\"#Initialize-the-client\" data-toc-modified-id=\"Initialize-the-client-1.4\"><span class=\"toc-item-num\">1.4&nbsp;&nbsp;</span>Initialize the client</a></span></li><li><span><a href=\"#Understand-records-are-addressable-via-a-tuple-of-(namespace,-set,-userkey)\" data-toc-modified-id=\"Understand-records-are-addressable-via-a-tuple-of-(namespace,-set,-userkey)-1.5\"><span class=\"toc-item-num\">1.5&nbsp;&nbsp;</span>Understand records are addressable via a tuple of (namespace, set, userkey)</a></span></li><li><span><a href=\"#Write-a-record\" data-toc-modified-id=\"Write-a-record-1.6\"><span class=\"toc-item-num\">1.6&nbsp;&nbsp;</span>Write a record</a></span></li><li><span><a href=\"#Read-a-record\" data-toc-modified-id=\"Read-a-record-1.7\"><span class=\"toc-item-num\">1.7&nbsp;&nbsp;</span>Read a record</a></span></li><li><span><a href=\"#Display-result\" data-toc-modified-id=\"Display-result-1.8\"><span class=\"toc-item-num\">1.8&nbsp;&nbsp;</span>Display result</a></span></li><li><span><a href=\"#Clean-up\" data-toc-modified-id=\"Clean-up-1.9\"><span class=\"toc-item-num\">1.9&nbsp;&nbsp;</span>Clean up</a></span></li><li><span><a href=\"#All-code-in-Java-boilerplate\" data-toc-modified-id=\"All-code-in-Java-boilerplate-1.10\"><span class=\"toc-item-num\">1.10&nbsp;&nbsp;</span>All code in Java boilerplate</a></span></li><li><span><a href=\"#Next-steps\" data-toc-modified-id=\"Next-steps-1.11\"><span class=\"toc-item-num\">1.11&nbsp;&nbsp;</span>Next steps</a></span></li></ul></li></ul></div>"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "metadata": {},
 16 |    "source": [
 17 |     "# Aerospike Hello World!\n",
 18 |     "\n",
 19 |     "Hello World! in Java with Aerospike.\n",
 20 |     "This notebook requires Aerospike datbase running locally and that Java kernel has been installed. Visit [Aerospike notebooks repo](https://github.com/aerospike-examples/interactive-notebooks) for additional details and the docker container."
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "markdown",
 25 |    "metadata": {},
 26 |    "source": [
 27 |     "## Ensure database is running\n",
 28 |     "This notebook requires that Aerospike datbase is running."
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 1,
 34 |    "metadata": {},
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "import io.github.spencerpark.ijava.IJava;\n",
 38 |     "import io.github.spencerpark.jupyter.kernel.magic.common.Shell;\n",
 39 |     "IJava.getKernelInstance().getMagics().registerMagics(Shell.class);\n",
 40 |     "%sh asd"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "markdown",
 45 |    "metadata": {
 46 |     "hide_input": false
 47 |    },
 48 |    "source": [
 49 |     "## Download Aerospike client from POM"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": 2,
 55 |    "metadata": {},
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "%%loadFromPOM\n",
 59 |     "<dependencies>\n",
 60 |     "  <dependency>\n",
 61 |     "    <groupId>com.aerospike</groupId>\n",
 62 |     "    <artifactId>aerospike-client</artifactId>\n",
 63 |     "    <version>5.0.0</version>\n",
 64 |     "  </dependency>\n",
 65 |     "</dependencies>"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "markdown",
 70 |    "metadata": {},
 71 |    "source": [
 72 |     "## Import the modules\n",
 73 |     "\n",
 74 |     "Import the client library and other modules."
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 3,
 80 |    "metadata": {},
 81 |    "outputs": [
 82 |     {
 83 |      "name": "stdout",
 84 |      "output_type": "stream",
 85 |      "text": [
 86 |       "Client modules imported.\n"
 87 |      ]
 88 |     }
 89 |    ],
 90 |    "source": [
 91 |     "import com.aerospike.client.AerospikeClient;\n",
 92 |     "import com.aerospike.client.policy.WritePolicy;\n",
 93 |     "import com.aerospike.client.Bin;\n",
 94 |     "import com.aerospike.client.Key;\n",
 95 |     "import com.aerospike.client.Record;\n",
 96 |     "import com.aerospike.client.Value;\n",
 97 |     "System.out.println(\"Client modules imported.\");"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "markdown",
102 |    "metadata": {},
103 |    "source": [
104 |     "## Initialize the client\n",
105 |     "\n",
106 |     "Initialize the client and connect to the cluster. The configuration is for Aerospike database running on port 3000 of localhost which is the default. Modify config if your environment is different (Aerospike database running on a different host or different port).\n"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": 4,
112 |    "metadata": {},
113 |    "outputs": [
114 |     {
115 |      "name": "stdout",
116 |      "output_type": "stream",
117 |      "text": [
118 |       "Initialized the client and connected to the cluster.\n"
119 |      ]
120 |     }
121 |    ],
122 |    "source": [
123 |     "AerospikeClient client = new AerospikeClient(\"localhost\", 3000);\n",
124 |     "System.out.println(\"Initialized the client and connected to the cluster.\");"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "markdown",
129 |    "metadata": {},
130 |    "source": [
131 |     "## Understand records are addressable via a tuple of (namespace, set, userkey) \n",
132 |     "\n",
133 |     "The three components namespace, set, and userkey (with set being optional) form the Primary Key (PK) or simply key, of the record. The key serves as a handle to the record, and using it, a record can be read or written. By default userkey is not stored on server, only a hash (a byte array, the fourth component in the output below) which is the internal representation of the key is stored. For a detailed description of the data model see the [Data Model overview](https://www.aerospike.com/docs/architecture/data-model.html)"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": 5,
139 |    "metadata": {},
140 |    "outputs": [
141 |     {
142 |      "name": "stdout",
143 |      "output_type": "stream",
144 |      "text": [
145 |       "Working with record key:\n",
146 |       "test:demo:foo:f57ec18335f7100c0458f8a644bcbc766d93471e\n"
147 |      ]
148 |     }
149 |    ],
150 |    "source": [
151 |     "Key key = new Key(\"test\", \"demo\", \"foo\");\n",
152 |     "System.out.println(\"Working with record key:\");\n",
153 |     "System.out.println(key);"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "markdown",
158 |    "metadata": {},
159 |    "source": [
160 |     "## Write a record\n",
161 |     "\n",
162 |     "Aerospike is schema-less and records may be written without any other setup. Here the bins or fields: name, age and greeting, are being written to a record with the key as defined above. "
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": 6,
168 |    "metadata": {},
169 |    "outputs": [
170 |     {
171 |      "name": "stdout",
172 |      "output_type": "stream",
173 |      "text": [
174 |       "Successfully written the record.\n"
175 |      ]
176 |     }
177 |    ],
178 |    "source": [
179 |     "Bin bin1 = new Bin(\"name\", \"John Doe\");\n",
180 |     "Bin bin2 = new Bin(\"age\", 32);\n",
181 |     "Bin bin3 = new Bin(\"greeting\", \"Hello World!\");\n",
182 |     "\n",
183 |     "// Write a record\n",
184 |     "client.put(null, key, bin1, bin2, bin3);\n",
185 |     "System.out.println(\"Successfully written the record.\");"
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "markdown",
190 |    "metadata": {},
191 |    "source": [
192 |     "## Read a record\n",
193 |     "\n",
194 |     "The record can be retrieved using the same key."
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "code",
199 |    "execution_count": 7,
200 |    "metadata": {},
201 |    "outputs": [
202 |     {
203 |      "name": "stdout",
204 |      "output_type": "stream",
205 |      "text": [
206 |       "Read back the record.\n"
207 |      ]
208 |     }
209 |    ],
210 |    "source": [
211 |     "// Read the record\n",
212 |     "Record record = client.get(null, key);\n",
213 |     "System.out.println(\"Read back the record.\");"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "markdown",
218 |    "metadata": {},
219 |    "source": [
220 |     "## Display result\n",
221 |     "\n",
222 |     "Print the record that was just retrieved. We are printing: \n",
223 |     "\n",
224 |     "1. The metadata with the record's generation (or version) and expiration time. \n",
225 |     "1. The actual value of the record's bins. "
226 |    ]
227 |   },
228 |   {
229 |    "cell_type": "code",
230 |    "execution_count": 8,
231 |    "metadata": {},
232 |    "outputs": [
233 |     {
234 |      "name": "stdout",
235 |      "output_type": "stream",
236 |      "text": [
237 |       "Record values are:\n",
238 |       "(gen:3),(exp:351567215),(bins:(name:John Doe),(age:32),(gpa:4.3),(greeting:Hello World!))\n"
239 |      ]
240 |     }
241 |    ],
242 |    "source": [
243 |     "System.out.println(\"Record values are:\");\n",
244 |     "System.out.println(record);"
245 |    ]
246 |   },
247 |   {
248 |    "cell_type": "markdown",
249 |    "metadata": {},
250 |    "source": [
251 |     "## Clean up\n",
252 |     "Finally close the client connection."
253 |    ]
254 |   },
255 |   {
256 |    "cell_type": "code",
257 |    "execution_count": 9,
258 |    "metadata": {},
259 |    "outputs": [
260 |     {
261 |      "name": "stdout",
262 |      "output_type": "stream",
263 |      "text": [
264 |       "Connection closed.\n"
265 |      ]
266 |     }
267 |    ],
268 |    "source": [
269 |     "client.close();   \n",
270 |     "System.out.println(\"Connection closed.\");"
271 |    ]
272 |   },
273 |   {
274 |    "cell_type": "markdown",
275 |    "metadata": {},
276 |    "source": [
277 |     "## All code in Java boilerplate\n",
278 |     "All the above code can also be written in the Java boilerplate format and run in a cell."
279 |    ]
280 |   },
281 |   {
282 |    "cell_type": "code",
283 |    "execution_count": 10,
284 |    "metadata": {},
285 |    "outputs": [
286 |     {
287 |      "name": "stdout",
288 |      "output_type": "stream",
289 |      "text": [
290 |       "Record values are:\n",
291 |       "(gen:1),(exp:351567216),(bins:(bin1:value1),(bin2:value2))\n"
292 |      ]
293 |     }
294 |    ],
295 |    "source": [
296 |     "import com.aerospike.client.AerospikeClient;\n",
297 |     "import com.aerospike.client.policy.WritePolicy;\n",
298 |     "import com.aerospike.client.Bin;\n",
299 |     "import com.aerospike.client.Key;\n",
300 |     "import com.aerospike.client.Record;\n",
301 |     "import com.aerospike.client.Value;\n",
302 |     "\n",
303 |     "public class Test{\n",
304 |     "    public static void putRecordGetRecord () {\n",
305 |     "        AerospikeClient client = new AerospikeClient(\"localhost\", 3000);\n",
306 |     "\n",
307 |     "        Key key = new Key(\"test\", \"demo\", \"putgetkey\");\n",
308 |     "        Bin bin1 = new Bin(\"bin1\", \"value1\");\n",
309 |     "        Bin bin2 = new Bin(\"bin2\", \"value2\");\n",
310 |     "\n",
311 |     "        // Write a record\n",
312 |     "        client.put(null, key, bin1, bin2);\n",
313 |     "\n",
314 |     "        // Read a record\n",
315 |     "        Record record = client.get(null, key);\n",
316 |     "        client.close();   \n",
317 |     "        System.out.println(\"Record values are:\");\n",
318 |     "        System.out.println(record);\n",
319 |     "    }\n",
320 |     "}\n",
321 |     "\n",
322 |     "Test.putRecordGetRecord()"
323 |    ]
324 |   },
325 |   {
326 |    "cell_type": "markdown",
327 |    "metadata": {},
328 |    "source": [
329 |     "## Next steps\n",
330 |     "\n",
331 |     "Visit [Aerospike notebooks repo](https://github.com/aerospike-examples/interactive-notebooks) to run additional Aerospike notebooks. To run a different notebook, download the notebook from the repo to your local machine, and then click on File->Open, and select Upload."
332 |    ]
333 |   }
334 |  ],
335 |  "metadata": {
336 |   "kernelspec": {
337 |    "display_name": "Java",
338 |    "language": "java",
339 |    "name": "java"
340 |   },
341 |   "language_info": {
342 |    "codemirror_mode": "java",
343 |    "file_extension": ".jshell",
344 |    "mimetype": "text/x-java-source",
345 |    "name": "Java",
346 |    "pygments_lexer": "java",
347 |    "version": "11.0.8+10-LTS"
348 |   },
349 |   "toc": {
350 |    "base_numbering": 1,
351 |    "nav_menu": {},
352 |    "number_sections": true,
353 |    "sideBar": true,
354 |    "skip_h1_title": false,
355 |    "title_cell": "Table of Contents",
356 |    "title_sidebar": "Contents",
357 |    "toc_cell": true,
358 |    "toc_position": {},
359 |    "toc_section_display": true,
360 |    "toc_window_display": false
361 |   }
362 |  },
363 |  "nbformat": 4,
364 |  "nbformat_minor": 2
365 | }
366 | 


--------------------------------------------------------------------------------
/notebooks/java/look_aside_cache_mongo.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Look-Aside Cache for MongoDB\n",
  8 |     "### This is a sample notebook for using Aerospike as a read/look-aside cache\n",
  9 |     "\n",
 10 |     "- This notebook demonstrates the use of Aerospike as a cache using Mongo as another primary datastore\n",
 11 |     "- It is required to run Mongo as a separate container using `docker run --name some-mongo -d mongo:latest`\n",
 12 |     "\n",
 13 |     "To test: Run the `cache.getData(\"id\", \"data\");` method once - to fetch from Mongo and populate Aerospike\n",
 14 |     "\n",
 15 |     "Another run will fetch the data from Aerospike cache\n"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "markdown",
 20 |    "metadata": {},
 21 |    "source": [
 22 |     "#### Ensure that Aerospike Database is running"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 2,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "import io.github.spencerpark.ijava.IJava;\n",
 32 |     "import io.github.spencerpark.jupyter.kernel.magic.common.Shell;\n",
 33 |     "IJava.getKernelInstance().getMagics().registerMagics(Shell.class);\n",
 34 |     "%sh asd"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "metadata": {},
 40 |    "source": [
 41 |     "#### Load Aerospike and Mongo dependencies from POM"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 6,
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "%%loadFromPOM\n",
 51 |     "<dependencies>\n",
 52 |     "  <dependency>\n",
 53 |     "    <groupId>com.aerospike</groupId>\n",
 54 |     "    <artifactId>aerospike-client</artifactId>\n",
 55 |     "    <version>5.0.0</version>\n",
 56 |     "  </dependency>\n",
 57 |     "  <dependency>\n",
 58 |     "    <groupId>org.mongodb</groupId>\n",
 59 |     "    <artifactId>mongo-java-driver</artifactId>\n",
 60 |     "    <version>3.12.7</version>\n",
 61 |     "  </dependency>\n",
 62 |     "</dependencies>"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 7,
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "import com.aerospike.client.AerospikeClient;\n",
 72 |     "import com.aerospike.client.policy.WritePolicy;\n",
 73 |     "import com.aerospike.client.Bin;\n",
 74 |     "import com.aerospike.client.Key;\n",
 75 |     "import com.aerospike.client.Record;\n",
 76 |     "import com.aerospike.client.Value;\n",
 77 |     "\n",
 78 |     "import com.mongodb.client.MongoDatabase;\n",
 79 |     "import com.mongodb.client.MongoCollection;\n",
 80 |     "import com.mongodb.MongoClient; \n",
 81 |     "import com.mongodb.MongoCredential; \n",
 82 |     "import org.bson.Document;\n",
 83 |     "import com.mongodb.client.model.Filters;\n",
 84 |     "import java.util.Set;"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "markdown",
 89 |    "metadata": {},
 90 |    "source": [
 91 |     "## Configure the clients\n",
 92 |     "\n",
 93 |     "The configuration is for \n",
 94 |     " - Aerospike database running on port 3000 of localhost (IP 127.0.0.1) which is the default. \n",
 95 |     " - Mongo running in a separate container whose IP can be found by `docker inspect <containerid> | grep -i ipaddress`\n",
 96 |     "\n",
 97 |     "\n",
 98 |     "Modify config if your environment is different (Aerospike database running on a different host or different port)."
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": 8,
104 |    "metadata": {},
105 |    "outputs": [],
106 |    "source": [
107 |     "public class Cache{\n",
108 |     "        //Database Constants\n",
109 |     "        public static final String AEROSPIKE_HOST = \"0.0.0.0\";\n",
110 |     "        public static final String MONGO_HOST = \"172.17.0.3\";\n",
111 |     "        public static final int AEROSPIKE_PORT = 3000;\n",
112 |     "        public static final int MONGO_PORT = 27017;\n",
113 |     "        \n",
114 |     "        public static final String AEROSPIKE_NAMESPACE = \"test\";\n",
115 |     "        public static final String AEROSPIKE_SET = \"demo\";\n",
116 |     "        public static final String MONGO_USER = \"sampleUser\";\n",
117 |     "        public static final String MONGO_PASSWORD = \"password\";\n",
118 |     "        public static final String MONGO_DB = \"myDb\";\n",
119 |     "        public static final String MONGO_COLLECTION = \"sampleCollection\";\n",
120 |     "        \n",
121 |     "        private AerospikeClient client;\n",
122 |     "        private MongoClient mongo;\n",
123 |     "        private MongoCredential credential;\n",
124 |     "        private MongoDatabase database;\n",
125 |     "        \n",
126 |     "        public Cache() {\n",
127 |     "            client = new AerospikeClient(AEROSPIKE_HOST, AEROSPIKE_PORT);\n",
128 |     "            mongo = new MongoClient(MONGO_HOST , MONGO_PORT);\n",
129 |     "            credential = MongoCredential.createCredential(MONGO_USER, MONGO_DB, \n",
130 |     "                                 MONGO_PASSWORD.toCharArray());\n",
131 |     "            database = mongo.getDatabase(MONGO_DB);\n",
132 |     "        }\n",
133 |     "        \n",
134 |     "        private boolean collectionExists(final String collectionName) {\n",
135 |     "            // Check and return if the collection exists in Mongo\n",
136 |     "            return database.listCollectionNames()\n",
137 |     "                .into(new ArrayList<String>()).contains(collectionName);\n",
138 |     "        }\n",
139 |     "\n",
140 |     "        public void populateMongoData(String id, String data) {\n",
141 |     "            // Populate Mongodb first\n",
142 |     "            Document document = new Document(id, data);\n",
143 |     "            if (! collectionExists(MONGO_COLLECTION)) {\n",
144 |     "                database.createCollection(MONGO_COLLECTION);\n",
145 |     "            } else {\n",
146 |     "                MongoCollection<Document> collection = database.getCollection(MONGO_COLLECTION);\n",
147 |     "                collection.insertOne(document);\n",
148 |     "            }\n",
149 |     "            Key key = new Key(AEROSPIKE_NAMESPACE, AEROSPIKE_SET, id);\n",
150 |     "            client.delete(null, key);\n",
151 |     "        }\n",
152 |     "        \n",
153 |     "        public String getData(String id, String data) {\n",
154 |     "            // This is just an example code that exhibits a cache fetch for a String id with String data\n",
155 |     "            \n",
156 |     "            Key key = new Key(AEROSPIKE_NAMESPACE, AEROSPIKE_SET, id);\n",
157 |     "            String BIN_NAME = \"value\";\n",
158 |     "            Record record = client.get(null,key);\n",
159 |     "            if ( record == null ) {\n",
160 |     "                System.out.println(\"First Fetch Record does not exist in Aerospike cache\");\n",
161 |     "                MongoCollection<Document> collection = database.getCollection(MONGO_COLLECTION);\n",
162 |     "                Document document = collection.find(Filters.eq(id, data)).first();\n",
163 |     "                //System.out.println(\"Document \" + document.get(id));\n",
164 |     "                String json = document.get(id).toString();\n",
165 |     "                client.put(null, key, new Bin(BIN_NAME,json));\n",
166 |     "                return client.get(null, key).toString();  \n",
167 |     "            \n",
168 |     "            } else {\n",
169 |     "                System.out.println(\"Data retrieved from Aerospike cache\");\n",
170 |     "                return record.toString();\n",
171 |     "                \n",
172 |     "            }\n",
173 |     "        }\n",
174 |     "}"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "code",
179 |    "execution_count": 9,
180 |    "metadata": {},
181 |    "outputs": [],
182 |    "source": [
183 |     "Cache cache = new Cache();\n",
184 |     "cache.populateMongoData(\"id\", \"data\");"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": 10,
190 |    "metadata": {},
191 |    "outputs": [
192 |     {
193 |      "name": "stdout",
194 |      "output_type": "stream",
195 |      "text": [
196 |       "First Fetch Record does not exist in Aerospike cache\n"
197 |      ]
198 |     },
199 |     {
200 |      "data": {
201 |       "text/plain": [
202 |        "(gen:1),(exp:350708590),(bins:(value:data))"
203 |       ]
204 |      },
205 |      "execution_count": 10,
206 |      "metadata": {},
207 |      "output_type": "execute_result"
208 |     }
209 |    ],
210 |    "source": [
211 |     "cache.getData(\"id\", \"data\");\n"
212 |    ]
213 |   },
214 |   {
215 |    "cell_type": "code",
216 |    "execution_count": null,
217 |    "metadata": {},
218 |    "outputs": [],
219 |    "source": []
220 |   }
221 |  ],
222 |  "metadata": {
223 |   "kernelspec": {
224 |    "display_name": "Java",
225 |    "language": "java",
226 |    "name": "java"
227 |   },
228 |   "language_info": {
229 |    "codemirror_mode": "java",
230 |    "file_extension": ".jshell",
231 |    "mimetype": "text/x-java-source",
232 |    "name": "Java",
233 |    "pygments_lexer": "java",
234 |    "version": "11.0.8+10-LTS"
235 |   },
236 |   "toc": {
237 |    "base_numbering": 1,
238 |    "nav_menu": {},
239 |    "number_sections": true,
240 |    "sideBar": true,
241 |    "skip_h1_title": false,
242 |    "title_cell": "Table of Contents",
243 |    "title_sidebar": "Contents",
244 |    "toc_cell": false,
245 |    "toc_position": {},
246 |    "toc_section_display": true,
247 |    "toc_window_display": false
248 |   }
249 |  },
250 |  "nbformat": 4,
251 |  "nbformat_minor": 4
252 | }
253 | 


--------------------------------------------------------------------------------
/notebooks/presto/AerospikePrestoDemo.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## Aerospike Connect for Presto Tutorial for Python\n",
  8 |     "## Tested with Python 3.7, Java 11, Presto 343, Presto Connector (Beta), and PyHive 0.6.3"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": 1,
 14 |    "metadata": {},
 15 |    "outputs": [],
 16 |    "source": [
 17 |     "from pyhive import presto\n",
 18 |     "presto_conn = presto.connect(\n",
 19 |     "    host='localhost',\n",
 20 |     "    port=8080,\n",
 21 |     "    catalog='aerospike',\n",
 22 |     "    schema='test'\n",
 23 |     ")\n",
 24 |     "presto_cursor=presto_conn.cursor()"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": 2,
 30 |    "metadata": {},
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "presto_cursor.execute('SELECT * FROM test.write_set LIMIT 3')"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": 3,
 39 |    "metadata": {},
 40 |    "outputs": [
 41 |     {
 42 |      "name": "stdout",
 43 |      "output_type": "stream",
 44 |      "text": [
 45 |       "(234, 'Individual: 234', 33.568431516802406, 66363)\n",
 46 |       "(13, 'Individual: 013', 25.752921531369164, 48610)\n",
 47 |       "(79, 'Individual: 079', 25.16109674428971, 60357)\n"
 48 |      ]
 49 |     }
 50 |    ],
 51 |    "source": [
 52 |     "records = presto_cursor.fetchall()\n",
 53 |     "for row in records:\n",
 54 |     "    print(row)"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": null,
 60 |    "metadata": {},
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "presto_cursor.execute('select name from test.write_set where age>40 and age<45')"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": null,
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "records1 = presto_cursor.fetchall()\n",
 73 |     "for row in records1:\n",
 74 |     "    print(row)"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "markdown",
 79 |    "metadata": {},
 80 |    "source": [
 81 |     "### Refer to https://www.aerospike.com/docs/connect/access/presto/examples.html for more examples."
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": null,
 87 |    "metadata": {},
 88 |    "outputs": [],
 89 |    "source": []
 90 |   }
 91 |  ],
 92 |  "metadata": {
 93 |   "kernelspec": {
 94 |    "display_name": "Python 3",
 95 |    "language": "python",
 96 |    "name": "python3"
 97 |   },
 98 |   "language_info": {
 99 |    "codemirror_mode": {
100 |     "name": "ipython",
101 |     "version": 3
102 |    },
103 |    "file_extension": ".py",
104 |    "mimetype": "text/x-python",
105 |    "name": "python",
106 |    "nbconvert_exporter": "python",
107 |    "pygments_lexer": "ipython3",
108 |    "version": "3.7.5"
109 |   }
110 |  },
111 |  "nbformat": 4,
112 |  "nbformat_minor": 4
113 | }
114 | 


--------------------------------------------------------------------------------
/notebooks/python/README.md:
--------------------------------------------------------------------------------
1 | This area is for Python Jupyter notebooks in .ipynb format. 
2 | 
3 | Visit [Aerospike notebooks repo](https://github.com/aerospike-examples/interactive-notebooks) to run additional Aerospike notebooks. To run a different notebook, download the notebook from the repo to your local machine, and then in the notebook interface click on File->Open, and select Upload.
4 | 


--------------------------------------------------------------------------------
/notebooks/python/hello_world.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "toc": true
  7 |    },
  8 |    "source": [
  9 |     "<h1>Table of Contents<span class=\"tocSkip\"></span></h1>\n",
 10 |     "<div class=\"toc\"><ul class=\"toc-item\"><li><span><a href=\"#Aerospike-Hello-World!\" data-toc-modified-id=\"Aerospike-Hello-World!-1\"><span class=\"toc-item-num\">1&nbsp;&nbsp;</span>Aerospike Hello World!</a></span><ul class=\"toc-item\"><li><span><a href=\"#Ensure-database-is-running\" data-toc-modified-id=\"Ensure-database-is-running-1.1\"><span class=\"toc-item-num\">1.1&nbsp;&nbsp;</span>Ensure database is running</a></span></li><li><span><a href=\"#Import-the-module\" data-toc-modified-id=\"Import-the-module-1.2\"><span class=\"toc-item-num\">1.2&nbsp;&nbsp;</span>Import the module</a></span></li><li><span><a href=\"#Configure-the-client\" data-toc-modified-id=\"Configure-the-client-1.3\"><span class=\"toc-item-num\">1.3&nbsp;&nbsp;</span>Configure the client</a></span></li><li><span><a href=\"#Create-client-object-and-connect-to-the-cluster\" data-toc-modified-id=\"Create-client-object-and-connect-to-the-cluster-1.4\"><span class=\"toc-item-num\">1.4&nbsp;&nbsp;</span>Create client object and connect to the cluster</a></span></li><li><span><a href=\"#Understand-records-are-addressable-via-a-tuple-of-(namespace,-set,-userkey)\" data-toc-modified-id=\"Understand-records-are-addressable-via-a-tuple-of-(namespace,-set,-userkey)-1.5\"><span class=\"toc-item-num\">1.5&nbsp;&nbsp;</span>Understand records are addressable via a tuple of (namespace, set, userkey)</a></span></li><li><span><a href=\"#Write-a-record\" data-toc-modified-id=\"Write-a-record-1.6\"><span class=\"toc-item-num\">1.6&nbsp;&nbsp;</span>Write a record</a></span></li><li><span><a href=\"#Read-a-record\" data-toc-modified-id=\"Read-a-record-1.7\"><span class=\"toc-item-num\">1.7&nbsp;&nbsp;</span>Read a record</a></span></li><li><span><a href=\"#Display-result\" data-toc-modified-id=\"Display-result-1.8\"><span class=\"toc-item-num\">1.8&nbsp;&nbsp;</span>Display result</a></span></li><li><span><a href=\"#Clean-up\" data-toc-modified-id=\"Clean-up-1.9\"><span class=\"toc-item-num\">1.9&nbsp;&nbsp;</span>Clean up</a></span></li><li><span><a href=\"#Next-steps\" data-toc-modified-id=\"Next-steps-1.10\"><span class=\"toc-item-num\">1.10&nbsp;&nbsp;</span>Next steps</a></span></li></ul></li></ul></div>"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "metadata": {},
 16 |    "source": [
 17 |     "# Aerospike Hello World!\n",
 18 |     "\n",
 19 |     "Hello, World! in Python with Aerospike.\n",
 20 |     "<br>\n",
 21 |     "This notebook requires Aerospike datbase running on localhost and that python and the Aerospike python client have been installed (`pip install aerospike`). Visit [Aerospike notebooks repo](https://github.com/aerospike-examples/interactive-notebooks) for additional details and the docker container."
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "metadata": {},
 27 |    "source": [
 28 |     "## Ensure database is running\n",
 29 |     "This notebook requires that Aerospike datbase is running."
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 1,
 35 |    "metadata": {},
 36 |    "outputs": [
 37 |     {
 38 |      "name": "stdout",
 39 |      "output_type": "stream",
 40 |      "text": [
 41 |       "Aerospike database is running!\r\n"
 42 |      ]
 43 |     }
 44 |    ],
 45 |    "source": [
 46 |     "!asd >& /dev/null\n",
 47 |     "!pgrep -x asd >/dev/null && echo \"Aerospike database is running!\" || echo \"**Aerospike database is not running!**\""
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "markdown",
 52 |    "metadata": {},
 53 |    "source": [
 54 |     "## Import the module\n",
 55 |     "\n",
 56 |     "Import the client library."
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": 2,
 62 |    "metadata": {},
 63 |    "outputs": [
 64 |     {
 65 |      "name": "stdout",
 66 |      "output_type": "stream",
 67 |      "text": [
 68 |       "Client module imported\n"
 69 |      ]
 70 |     }
 71 |    ],
 72 |    "source": [
 73 |     "import aerospike\n",
 74 |     "print(\"Client module imported\")"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "markdown",
 79 |    "metadata": {},
 80 |    "source": [
 81 |     "## Configure the client\n",
 82 |     "\n",
 83 |     "The configuration is for Aerospike database running on port 3000 of localhost (IP 127.0.0.1) which is the default. Modify config if your environment is different (Aerospike database running on a different host or different port)."
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": 3,
 89 |    "metadata": {},
 90 |    "outputs": [
 91 |     {
 92 |      "name": "stdout",
 93 |      "output_type": "stream",
 94 |      "text": [
 95 |       "Configuring with seed host: [('127.0.0.1', 3000)]\n"
 96 |      ]
 97 |     }
 98 |    ],
 99 |    "source": [
100 |     "config = {\n",
101 |     "  'hosts': [ ('127.0.0.1', 3000) ]\n",
102 |     "}\n",
103 |     "print(\"Configuring with seed host:\", config['hosts'])"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "markdown",
108 |    "metadata": {},
109 |    "source": [
110 |     "## Create client object and connect to the cluster"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": 4,
116 |    "metadata": {},
117 |    "outputs": [
118 |     {
119 |      "name": "stdout",
120 |      "output_type": "stream",
121 |      "text": [
122 |       "Connected to the cluster\n"
123 |      ]
124 |     }
125 |    ],
126 |    "source": [
127 |     "try:\n",
128 |     "  client = aerospike.client(config).connect()\n",
129 |     "except:\n",
130 |     "  import sys\n",
131 |     "  print(\"Failed to connect to the cluster with\", config['hosts'])\n",
132 |     "  sys.exit(1)\n",
133 |     "print(\"Connected to the cluster\")"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "markdown",
138 |    "metadata": {},
139 |    "source": [
140 |     "## Understand records are addressable via a tuple of (namespace, set, userkey) \n",
141 |     "\n",
142 |     "The three components namespace, set, and userkey (with set being optional) form the Primary Key (PK) or simply key, of the record. The key serves as a handle to the record, and using it, a record can be read or written. For a detailed description of the data model see the [Data Model overview](https://www.aerospike.com/docs/architecture/data-model.html)"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": 5,
148 |    "metadata": {},
149 |    "outputs": [
150 |     {
151 |      "name": "stdout",
152 |      "output_type": "stream",
153 |      "text": [
154 |       "Working with record key  ('test', 'demo', 'foo')\n"
155 |      ]
156 |     }
157 |    ],
158 |    "source": [
159 |     "key = ('test', 'demo', 'foo')\n",
160 |     "print('Working with record key ', key)"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "markdown",
165 |    "metadata": {},
166 |    "source": [
167 |     "## Write a record\n",
168 |     "\n",
169 |     "Aerospike is schema-less and records may be written without any other setup. Here the bins or fields: name, age and greeting, are being written to a record with the key as defined above. "
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "code",
174 |    "execution_count": 6,
175 |    "metadata": {},
176 |    "outputs": [
177 |     {
178 |      "name": "stdout",
179 |      "output_type": "stream",
180 |      "text": [
181 |       "Successfully written the record\n"
182 |      ]
183 |     }
184 |    ],
185 |    "source": [
186 |     "try:\n",
187 |     "  # Write a record\n",
188 |     "  client.put(key, {\n",
189 |     "    'name': 'John Doe',\n",
190 |     "    'age': 32,\n",
191 |     "    'greeting': 'Hello, World!'\n",
192 |     "  })\n",
193 |     "except Exception as e:\n",
194 |     "  import sys\n",
195 |     "  print(\"error: {0}\".format(e), file=sys.stderr)\n",
196 |     "  sys.exit(1)\n",
197 |     "print('Successfully written the record')"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "markdown",
202 |    "metadata": {},
203 |    "source": [
204 |     "## Read a record\n",
205 |     "\n",
206 |     "The record may be retrieved using the same key."
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "code",
211 |    "execution_count": 7,
212 |    "metadata": {},
213 |    "outputs": [
214 |     {
215 |      "name": "stdout",
216 |      "output_type": "stream",
217 |      "text": [
218 |       "Read back the record\n"
219 |      ]
220 |     }
221 |    ],
222 |    "source": [
223 |     "(key, metadata, record) = client.get(key)\n",
224 |     "print('Read back the record')"
225 |    ]
226 |   },
227 |   {
228 |    "cell_type": "markdown",
229 |    "metadata": {},
230 |    "source": [
231 |     "## Display result\n",
232 |     "\n",
233 |     "Print the record that was just retrieved. We are also printing: \n",
234 |     "\n",
235 |     "1. The components of the key which are: namespace, set, and userkey. By default userkey is not stored on server, only a hash (appearing as bytearray in the output below) which is the internal representation of the key is stored.\n",
236 |     "1. The metadata with the time-to-live and the record's generation or version. \n",
237 |     "1. The actual value of the record's bins. "
238 |    ]
239 |   },
240 |   {
241 |    "cell_type": "code",
242 |    "execution_count": 8,
243 |    "metadata": {},
244 |    "outputs": [
245 |     {
246 |      "name": "stdout",
247 |      "output_type": "stream",
248 |      "text": [
249 |       "Record contents are {'name': 'John Doe', 'age': 32, 'gpa': 4.3, 'greeting': 'Hello, World!'}\n",
250 |       "Key's components are ('test', 'demo', None, bytearray(b'\\xf5~\\xc1\\x835\\xf7\\x10\\x0c\\x04X\\xf8\\xa6D\\xbc\\xbcvm\\x93G\\x1e'))\n",
251 |       "Metadata is {'ttl': 2592000, 'gen': 2}\n"
252 |      ]
253 |     }
254 |    ],
255 |    "source": [
256 |     "print(\"Record contents are\", record)\n",
257 |     "print(\"Key's components are\", key)\n",
258 |     "print(\"Metadata is\", metadata)"
259 |    ]
260 |   },
261 |   {
262 |    "cell_type": "markdown",
263 |    "metadata": {},
264 |    "source": [
265 |     "## Clean up\n",
266 |     "Finally close the client we created at the beginning."
267 |    ]
268 |   },
269 |   {
270 |    "cell_type": "code",
271 |    "execution_count": 9,
272 |    "metadata": {},
273 |    "outputs": [
274 |     {
275 |      "name": "stdout",
276 |      "output_type": "stream",
277 |      "text": [
278 |       "Connection closed.\n"
279 |      ]
280 |     }
281 |    ],
282 |    "source": [
283 |     "# Close the connection to the Aerospike cluster\n",
284 |     "client.close()\n",
285 |     "print('Connection closed.')"
286 |    ]
287 |   },
288 |   {
289 |    "cell_type": "markdown",
290 |    "metadata": {},
291 |    "source": [
292 |     "## Next steps\n",
293 |     "\n",
294 |     "Visit [Aerospike notebooks repo](https://github.com/aerospike-examples/interactive-notebooks) to run additional Aerospike notebooks. To run a different notebook, download the notebook from the repo to your local machine, and then click on File->Open, and select Upload.\n"
295 |    ]
296 |   }
297 |  ],
298 |  "metadata": {
299 |   "file_extension": ".py",
300 |   "kernelspec": {
301 |    "display_name": "Python 3",
302 |    "language": "python",
303 |    "name": "python3"
304 |   },
305 |   "language_info": {
306 |    "codemirror_mode": {
307 |     "name": "ipython",
308 |     "version": 3
309 |    },
310 |    "file_extension": ".py",
311 |    "mimetype": "text/x-python",
312 |    "name": "python",
313 |    "nbconvert_exporter": "python",
314 |    "pygments_lexer": "ipython3",
315 |    "version": "3.8.6"
316 |   },
317 |   "mimetype": "text/x-python",
318 |   "name": "python",
319 |   "npconvert_exporter": "python",
320 |   "pygments_lexer": "ipython3",
321 |   "toc": {
322 |    "base_numbering": 1,
323 |    "nav_menu": {},
324 |    "number_sections": true,
325 |    "sideBar": true,
326 |    "skip_h1_title": false,
327 |    "title_cell": "Table of Contents",
328 |    "title_sidebar": "Contents",
329 |    "toc_cell": true,
330 |    "toc_position": {},
331 |    "toc_section_display": true,
332 |    "toc_window_display": false
333 |   },
334 |   "version": 3
335 |  },
336 |  "nbformat": 4,
337 |  "nbformat_minor": 2
338 | }
339 | 


--------------------------------------------------------------------------------
/notebooks/python/look_aside_cache.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Look-Aside Cache for MongoDB\n",
  8 |     "### This is a sample notebook for using Aerospike as a read/look-aside cache\n",
  9 |     "\n",
 10 |     "- This notebook demonstrates the use of Aerospike as a cache using Mongo as another primary datastore\n",
 11 |     "- It is required to run Mongo as a separte container using `docker run --name some-mongo -d mongo:latest`\n",
 12 |     "\n",
 13 |     "To test: Run the `get_data(key, value)` method once - to fetch from Mongo and populate Aerospike\n",
 14 |     "\n",
 15 |     "Another run will fetch the data from Aerospike cache\n"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "markdown",
 20 |    "metadata": {},
 21 |    "source": [
 22 |     "#### Ensure that the Aerospike Database is running"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 1,
 28 |    "metadata": {
 29 |     "scrolled": true
 30 |    },
 31 |    "outputs": [
 32 |     {
 33 |      "name": "stdout",
 34 |      "output_type": "stream",
 35 |      "text": [
 36 |       "Aerospike database is running!\r\n"
 37 |      ]
 38 |     }
 39 |    ],
 40 |    "source": [
 41 |     "!asd >& /dev/null\n",
 42 |     "!pgrep -x asd >/dev/null && echo \"Aerospike database is running!\" || echo \"**Aerospike database is not running!**\""
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "markdown",
 47 |    "metadata": {},
 48 |    "source": [
 49 |     "#### Import all dependencies"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": 4,
 55 |    "metadata": {},
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "import aerospike\n",
 59 |     "import pymongo\n",
 60 |     "from pymongo import MongoClient\n",
 61 |     "import sys"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "markdown",
 66 |    "metadata": {},
 67 |    "source": [
 68 |     "## Configure the clients\n",
 69 |     "\n",
 70 |     "The configuration is for \n",
 71 |     " - Aerospike database running on port 3000 of localhost (IP 127.0.0.1) which is the default. \n",
 72 |     " - Mongo running in a separate container whose IP can be found by `docker inspect <containerid> | grep -i ipaddress`\n",
 73 |     "\n",
 74 |     "\n",
 75 |     "Modify config if your environment is different (Aerospike database running on a different host or different port)."
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": 17,
 81 |    "metadata": {},
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "# Define a few constants\n",
 85 |     "\n",
 86 |     "AEROSPIKE_HOST = \"0.0.0.0\"\n",
 87 |     "AEROSPIKE_PORT = 3000\n",
 88 |     "AEROSPIKE_NAMESPACE = \"test\"\n",
 89 |     "AEROSPIKE_SET = \"demo\"\n",
 90 |     "MONGO_HOST = \"172.17.0.3\"\n",
 91 |     "MONGO_PORT = 27017\n",
 92 |     "MONGO_DB = \"test-database\"\n",
 93 |     "MONGO_COLLECTION = \"test-collection\""
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": 18,
 99 |    "metadata": {},
100 |    "outputs": [
101 |     {
102 |      "name": "stdout",
103 |      "output_type": "stream",
104 |      "text": [
105 |       "Connected to Aerospike\n",
106 |       "Connected to Mongo\n"
107 |      ]
108 |     }
109 |    ],
110 |    "source": [
111 |     "#Aerospike configuration\n",
112 |     "aero_config = {\n",
113 |     "  'hosts': [ (AEROSPIKE_HOST, AEROSPIKE_PORT) ]\n",
114 |     "}\n",
115 |     "try:\n",
116 |     "  aero_client = aerospike.client(aero_config).connect()\n",
117 |     "except:\n",
118 |     "  print(\"Failed to connect to the cluster with\", aero_config['hosts'])\n",
119 |     "  sys.exit(1)\n",
120 |     "print(\"Connected to Aerospike\")\n",
121 |     "\n",
122 |     "#Mongo configuration\n",
123 |     "try:\n",
124 |     "    mongo_client = MongoClient(MONGO_HOST, MONGO_PORT)\n",
125 |     "    print(\"Connected to Mongo\")\n",
126 |     "except:\n",
127 |     "    print(\"Failed to connect to Mongo\")\n",
128 |     "    sys.exit(1)"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "markdown",
133 |    "metadata": {},
134 |    "source": [
135 |     "#### Store data in Mongo and clear the keys in Aerospike if any"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": 20,
141 |    "metadata": {},
142 |    "outputs": [],
143 |    "source": [
144 |     "db = mongo_client[MONGO_DB]\n",
145 |     "collection = db[MONGO_COLLECTION]"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "code",
150 |    "execution_count": 21,
151 |    "metadata": {},
152 |    "outputs": [],
153 |    "source": [
154 |     "def store_data(data_id, data):\n",
155 |     "    m_data = {data_id: data}\n",
156 |     "    collection.drop()\n",
157 |     "    aero_key = ('test', 'demo', data_id)\n",
158 |     "    #aero_client.remove(aero_key)\n",
159 |     "    post_id = collection.insert_one(m_data)\n",
160 |     "store_data(\"key\", \"value\")"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "markdown",
165 |    "metadata": {},
166 |    "source": [
167 |     "#### Fetch the data. In this instance we are using a simple key value pair.\n",
168 |     "If the data exists in the cache it is returned, if not data is read from Mongo, put in the cache and then returned"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": 23,
174 |    "metadata": {},
175 |    "outputs": [
176 |     {
177 |      "name": "stdout",
178 |      "output_type": "stream",
179 |      "text": [
180 |       "Data retrieved from Aerospike cache\n",
181 |       "Record::: key value\n"
182 |      ]
183 |     }
184 |    ],
185 |    "source": [
186 |     "def get_data(data_id, data):\n",
187 |     "    aero_key = (AEROSPIKE_NAMESPACE, AEROSPIKE_SET, data_id)\n",
188 |     "    #aero_client.remove(aero_key)\n",
189 |     "    data_check = aero_client.exists(aero_key)\n",
190 |     "    if data_check[1]:\n",
191 |     "        (key, metadata, record) = aero_client.get(aero_key)\n",
192 |     "        print(\"Data retrieved from Aerospike cache\")\n",
193 |     "        print(\"Record::: {} {}\".format(data_id, record['value']))\n",
194 |     "    else:\n",
195 |     "        mongo_data = collection.find_one({data_id: data})\n",
196 |     "        print(\"Data not present in Aerospike cache, retrieved from mongo {}\".format(mongo_data))\n",
197 |     "        aero_client.put(aero_key, {'value': mongo_data[data_id]})\n",
198 |     "get_data(\"key\", \"value\")"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": null,
204 |    "metadata": {},
205 |    "outputs": [],
206 |    "source": []
207 |   }
208 |  ],
209 |  "metadata": {
210 |   "kernelspec": {
211 |    "display_name": "Python 3",
212 |    "language": "python",
213 |    "name": "python3"
214 |   },
215 |   "language_info": {
216 |    "codemirror_mode": {
217 |     "name": "ipython",
218 |     "version": 3
219 |    },
220 |    "file_extension": ".py",
221 |    "mimetype": "text/x-python",
222 |    "name": "python",
223 |    "nbconvert_exporter": "python",
224 |    "pygments_lexer": "ipython3",
225 |    "version": "3.8.6"
226 |   }
227 |  },
228 |  "nbformat": 4,
229 |  "nbformat_minor": 4
230 | }
231 | 


--------------------------------------------------------------------------------
/notebooks/python/query.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "toc": true
  7 |    },
  8 |    "source": [
  9 |     "<h1>Table of Contents<span class=\"tocSkip\"></span></h1>\n",
 10 |     "<div class=\"toc\"><ul class=\"toc-item\"><li><span><a href=\"#Aerospike-Queries-in-Python¶\" data-toc-modified-id=\"Aerospike-Queries-in-Python¶-1\"><span class=\"toc-item-num\">1&nbsp;&nbsp;</span>Aerospike Queries in Python¶</a></span><ul class=\"toc-item\"><li><span><a href=\"#Ensure-database-is-running\" data-toc-modified-id=\"Ensure-database-is-running-1.1\"><span class=\"toc-item-num\">1.1&nbsp;&nbsp;</span>Ensure database is running</a></span></li><li><span><a href=\"#Connect-to-database-and-populate-test-data\" data-toc-modified-id=\"Connect-to-database-and-populate-test-data-1.2\"><span class=\"toc-item-num\">1.2&nbsp;&nbsp;</span>Connect to database and populate test data</a></span></li><li><span><a href=\"#Create-secondary-index\" data-toc-modified-id=\"Create-secondary-index-1.3\"><span class=\"toc-item-num\">1.3&nbsp;&nbsp;</span>Create secondary index</a></span></li></ul></li><li><span><a href=\"#Querying-with-secondary-indexes\" data-toc-modified-id=\"Querying-with-secondary-indexes-2\"><span class=\"toc-item-num\">2&nbsp;&nbsp;</span>Querying with secondary indexes</a></span><ul class=\"toc-item\"><li><span><a href=\"#Create-a-query\" data-toc-modified-id=\"Create-a-query-2.1\"><span class=\"toc-item-num\">2.1&nbsp;&nbsp;</span>Create a query</a></span></li><li><span><a href=\"#Project-bins\" data-toc-modified-id=\"Project-bins-2.2\"><span class=\"toc-item-num\">2.2&nbsp;&nbsp;</span>Project bins</a></span></li><li><span><a href=\"#Add-query-predicate\" data-toc-modified-id=\"Add-query-predicate-2.3\"><span class=\"toc-item-num\">2.3&nbsp;&nbsp;</span>Add query predicate</a></span></li><li><span><a href=\"#Define-foreach-function\" data-toc-modified-id=\"Define-foreach-function-2.4\"><span class=\"toc-item-num\">2.4&nbsp;&nbsp;</span>Define foreach function</a></span></li><li><span><a href=\"#Execute-query-and-foreach\" data-toc-modified-id=\"Execute-query-and-foreach-2.5\"><span class=\"toc-item-num\">2.5&nbsp;&nbsp;</span>Execute query and foreach</a></span></li><li><span><a href=\"#Explore-other-query-capabilities\" data-toc-modified-id=\"Explore-other-query-capabilities-2.6\"><span class=\"toc-item-num\">2.6&nbsp;&nbsp;</span>Explore other query capabilities</a></span></li><li><span><a href=\"#Clean-up\" data-toc-modified-id=\"Clean-up-2.7\"><span class=\"toc-item-num\">2.7&nbsp;&nbsp;</span>Clean up</a></span></li><li><span><a href=\"#Next-steps\" data-toc-modified-id=\"Next-steps-2.8\"><span class=\"toc-item-num\">2.8&nbsp;&nbsp;</span>Next steps</a></span></li></ul></li></ul></div>"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "metadata": {},
 16 |    "source": [
 17 |     "# Aerospike Queries in Python\n",
 18 |     "Intoduction to Aerospike queries in Python.\n",
 19 |     "<br>\n",
 20 |     "This notebook requires Aerospike datbase running on localhost and that python and the Aerospike python client have been installed (`pip install aerospike`). Visit [Aerospike notebooks repo](https://github.com/aerospike-examples/interactive-notebooks) for additional details and the docker container."
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "markdown",
 25 |    "metadata": {},
 26 |    "source": [
 27 |     "## Ensure database is running\n",
 28 |     "This notebook requires that Aerospike datbase is running."
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 1,
 34 |    "metadata": {},
 35 |    "outputs": [
 36 |     {
 37 |      "name": "stdout",
 38 |      "output_type": "stream",
 39 |      "text": [
 40 |       "Aerospike database is running!\r\n"
 41 |      ]
 42 |     }
 43 |    ],
 44 |    "source": [
 45 |     "!asd >& /dev/null\n",
 46 |     "!pgrep -x asd >/dev/null && echo \"Aerospike database is running!\" || echo \"**Aerospike database is not running!**\""
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "markdown",
 51 |    "metadata": {},
 52 |    "source": [
 53 |     "## Connect to database and populate test data\n",
 54 |     "The test data has ten records with user-key \"id1-10\", two bins (fields) \"name\" and \"age\", in the namespace \"test\" and set \"demo\". "
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": 2,
 60 |    "metadata": {},
 61 |    "outputs": [
 62 |     {
 63 |      "name": "stdout",
 64 |      "output_type": "stream",
 65 |      "text": [
 66 |       "Test data populated.\n"
 67 |      ]
 68 |     }
 69 |    ],
 70 |    "source": [
 71 |     "# import the module\n",
 72 |     "from __future__ import print_function\n",
 73 |     "import aerospike\n",
 74 |     "\n",
 75 |     "# Configure the client\n",
 76 |     "config = {\n",
 77 |     "  'hosts': [ ('127.0.0.1', 3000) ],\n",
 78 |     "  'policy' : {'key': aerospike.POLICY_KEY_SEND}\n",
 79 |     "}\n",
 80 |     "\n",
 81 |     "# Create a client and connect it to the cluster\n",
 82 |     "try:\n",
 83 |     "  client = aerospike.client(config).connect()\n",
 84 |     "except:\n",
 85 |     "  import sys\n",
 86 |     "  print(\"failed to connect to the cluster with\", config['hosts'])\n",
 87 |     "  sys.exit(1)\n",
 88 |     "\n",
 89 |     "# Records are addressable via a tuple of (namespace, set, key)\n",
 90 |     "people = [ {'id':1, 'name':'John Doe', 'age': 53},\n",
 91 |     "           {'id':2, 'name':'Brian Yu', 'age': 21},\n",
 92 |     "           {'id':3, 'name':'Will Kim', 'age': 34},\n",
 93 |     "           {'id':4, 'name':'Dorothy Smith', 'age': 48},\n",
 94 |     "           {'id':5, 'name':'Sara Poe', 'age': 29},\n",
 95 |     "           {'id':6, 'name':'Kim Knott', 'age': 56},\n",
 96 |     "           {'id':7, 'name':'Joe Miller', 'age': 30},\n",
 97 |     "           {'id':8, 'name':'Jeff Nye', 'age': 32},\n",
 98 |     "           {'id':9, 'name':'Jane Doe', 'age': 44},\n",
 99 |     "           {'id':10, 'name':'Emily Tuck', 'age': 22} ]\n",
100 |     "try:\n",
101 |     "    for i in range(10):\n",
102 |     "      # Write the records\n",
103 |     "      client.put(('test', 'demo', 'id'+str(people[i]['id'])), people[i])\n",
104 |     "except Exception as e:\n",
105 |     "  import sys\n",
106 |     "  print(\"error: {0}\".format(e), file=sys.stderr)\n",
107 |     "\n",
108 |     "print('Test data populated.')"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "markdown",
113 |    "metadata": {},
114 |    "source": [
115 |     "## Create secondary index\n",
116 |     "To use the query API, a secondary index must exist on the query field. We will create an integer secondary index on the \"age\" bin."
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": 3,
122 |    "metadata": {},
123 |    "outputs": [
124 |     {
125 |      "name": "stdout",
126 |      "output_type": "stream",
127 |      "text": [
128 |       "Secondary index created.\n"
129 |      ]
130 |     }
131 |    ],
132 |    "source": [
133 |     "# Must create an index to query on a bin\n",
134 |     "from aerospike import exception as ex\n",
135 |     "try:\n",
136 |     "    client.index_integer_create(\"test\", \"demo\", \"age\", \"test_demo_number_idx\")\n",
137 |     "except ex.IndexFoundError:\n",
138 |     "    pass\n",
139 |     "\n",
140 |     "print('Secondary index created.')"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "markdown",
145 |    "metadata": {},
146 |    "source": [
147 |     "# Querying with secondary indexes\n",
148 |     "\n",
149 |     "In addition to retrieving records with the primary index using the key-value store APIs, the Aerospike Python client provides an API to query records using secondary indexes. To use the query API, a secondary index must exist on the query field.\n",
150 |     "\n",
151 |     "Use the Query APIs to query the database using secondary indexes."
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "markdown",
156 |    "metadata": {},
157 |    "source": [
158 |     "## Create a query\n",
159 |     "The API client.query() takes the namespace (required) and set (optional) arguments. The parameter set can be omitted or None, in which case records in the namespace that are outside any set are returned. The return value is a new aerospike.Query class instance.\n",
160 |     "\n",
161 |     "This example creates a query on the test namespace, demo set."
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": 4,
167 |    "metadata": {},
168 |    "outputs": [
169 |     {
170 |      "name": "stdout",
171 |      "output_type": "stream",
172 |      "text": [
173 |       "Query object created.\n"
174 |      ]
175 |     }
176 |    ],
177 |    "source": [
178 |     "query = client.query('test', 'demo')\n",
179 |     "print('Query object created.')"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "markdown",
184 |    "metadata": {},
185 |    "source": [
186 |     "## Project bins\n",
187 |     "Project (or select) bins using select() on the Query class instance. The select() API accepts one or many bin names (strings).\n",
188 |     "\n",
189 |     "This example selects \"name\" and \"age\" bins from the specified records."
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "code",
194 |    "execution_count": 5,
195 |    "metadata": {},
196 |    "outputs": [
197 |     {
198 |      "name": "stdout",
199 |      "output_type": "stream",
200 |      "text": [
201 |       "Bins name and age selected.\n"
202 |      ]
203 |     }
204 |    ],
205 |    "source": [
206 |     "query.select('name', 'age')\n",
207 |     "print('Bins name and age selected.')"
208 |    ]
209 |   },
210 |   {
211 |    "cell_type": "markdown",
212 |    "metadata": {},
213 |    "source": [
214 |     "## Add query predicate\n",
215 |     "Define predicates using the where() API on the Query class instance. The where() API accepts a predicate created using one of the functions in aerospike.predicates including:\n",
216 |     "\n",
217 |     "- equals(bin, value) — Find records containing the bin with the specified value (integer or string).\n",
218 |     "- between(bin, min, max) — Find records containing the bin with a value in the min and max range (integer only).\n",
219 |     "\n",
220 |     "This example adds the between() predicate to a query."
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "code",
225 |    "execution_count": 6,
226 |    "metadata": {},
227 |    "outputs": [
228 |     {
229 |      "name": "stdout",
230 |      "output_type": "stream",
231 |      "text": [
232 |       "Predicate defined.\n"
233 |      ]
234 |     }
235 |    ],
236 |    "source": [
237 |     "from aerospike import predicates as p\n",
238 |     "query.where( p.between('age', 14, 25) )\n",
239 |     "print('Predicate defined.')"
240 |    ]
241 |   },
242 |   {
243 |    "cell_type": "markdown",
244 |    "metadata": {},
245 |    "source": [
246 |     "## Define foreach function\n",
247 |     "In order to executer the query and read the results, we need to use the foreach() API in the Query class instance. The foreach() API accepts a callback function for each record read from the query. The callback function must accept a single argument as a tuple:\n",
248 |     "\n",
249 |     "- key tuple — The tuple to identify the record.\n",
250 |     "- metadata — The dict containing the record metadata (TTL and generation).\n",
251 |     "- record — The dict containing the record bins.\n",
252 |     "\n",
253 |     "If the callback returns False, the client stops reading results.\n",
254 |     "\n",
255 |     "This examples executes the query and prints results as they are read.\n",
256 |     "\n",
257 |     "To print the records as they are read, we define a print_result function."
258 |    ]
259 |   },
260 |   {
261 |    "cell_type": "code",
262 |    "execution_count": 7,
263 |    "metadata": {},
264 |    "outputs": [
265 |     {
266 |      "name": "stdout",
267 |      "output_type": "stream",
268 |      "text": [
269 |       "Foreach function defined.\n"
270 |      ]
271 |     }
272 |    ],
273 |    "source": [
274 |     "def print_result(result_tuple):\n",
275 |     "    print(result_tuple)\n",
276 |     "    \n",
277 |     "print('Foreach function defined.')"
278 |    ]
279 |   },
280 |   {
281 |    "cell_type": "markdown",
282 |    "metadata": {},
283 |    "source": [
284 |     "## Execute query and foreach\n",
285 |     "Now we are ready to execute the query by passing in the print_result that will be called for each record. Based on the data we populated earlier, we expect 2 results between ages 14 and 25."
286 |    ]
287 |   },
288 |   {
289 |    "cell_type": "code",
290 |    "execution_count": 8,
291 |    "metadata": {},
292 |    "outputs": [
293 |     {
294 |      "name": "stdout",
295 |      "output_type": "stream",
296 |      "text": [
297 |       "Executing query and printing results:\n",
298 |       "(('test', 'demo', None, bytearray(b'\\xb2\\x13X\\x1dI\\xd8\\xba`\\xab\\x96\\xa2\\xf0\\xd9\\x8b\\x19\\xf9DZug')), {'ttl': 2591998, 'gen': 1}, {'name': 'Brian Yu', 'age': 21})\n",
299 |       "(('test', 'demo', None, bytearray(b'\\x0bR\\xbc\\xa1\\x02`SF?\\x01\\xe7\\xd3`\\x8d[F\\xcb\\xd71V')), {'ttl': 2591998, 'gen': 1}, {'name': 'Emily Tuck', 'age': 22})\n"
300 |      ]
301 |     }
302 |    ],
303 |    "source": [
304 |     "print(\"Executing query and printing results:\")\n",
305 |     "query.foreach(print_result)"
306 |    ]
307 |   },
308 |   {
309 |    "cell_type": "markdown",
310 |    "metadata": {},
311 |    "source": [
312 |     "## Explore other query capabilities\n",
313 |     "Please feel free to play with the \"equals\" predicate, adding secondary indexes on other fields, populating more test data to the \"null\" set and querying those records, and so on."
314 |    ]
315 |   },
316 |   {
317 |    "cell_type": "markdown",
318 |    "metadata": {},
319 |    "source": [
320 |     "## Clean up"
321 |    ]
322 |   },
323 |   {
324 |    "cell_type": "code",
325 |    "execution_count": 9,
326 |    "metadata": {},
327 |    "outputs": [
328 |     {
329 |      "name": "stdout",
330 |      "output_type": "stream",
331 |      "text": [
332 |       "Connection closed.\n"
333 |      ]
334 |     }
335 |    ],
336 |    "source": [
337 |     "# Close the connection to the Aerospike cluster\n",
338 |     "client.close()\n",
339 |     "print('Connection closed.')"
340 |    ]
341 |   },
342 |   {
343 |    "cell_type": "markdown",
344 |    "metadata": {},
345 |    "source": [
346 |     "## Next steps\n",
347 |     "\n",
348 |     "Visit [Aerospike notebooks repo](https://github.com/aerospike-examples/interactive-notebooks) to run additional Aerospike notebooks. To run a different notebook, download the notebook from the repo to your local machine, and then click on File->Open, and select Upload."
349 |    ]
350 |   }
351 |  ],
352 |  "metadata": {
353 |   "kernelspec": {
354 |    "display_name": "Python 3",
355 |    "language": "python",
356 |    "name": "python3"
357 |   },
358 |   "language_info": {
359 |    "codemirror_mode": {
360 |     "name": "ipython",
361 |     "version": 3
362 |    },
363 |    "file_extension": ".py",
364 |    "mimetype": "text/x-python",
365 |    "name": "python",
366 |    "nbconvert_exporter": "python",
367 |    "pygments_lexer": "ipython3",
368 |    "version": "3.8.6"
369 |   },
370 |   "toc": {
371 |    "base_numbering": 1,
372 |    "nav_menu": {},
373 |    "number_sections": true,
374 |    "sideBar": true,
375 |    "skip_h1_title": false,
376 |    "title_cell": "Table of Contents",
377 |    "title_sidebar": "Contents",
378 |    "toc_cell": true,
379 |    "toc_position": {},
380 |    "toc_section_display": true,
381 |    "toc_window_display": false
382 |   }
383 |  },
384 |  "nbformat": 4,
385 |  "nbformat_minor": 4
386 | }
387 | 


--------------------------------------------------------------------------------
/notebooks/python/simple_put_get_example.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# A Simple Put-Get Example\n",
  8 |     "\n",
  9 |     "A simple example of `put` and `get` calls in Aerospike.\n",
 10 |     "\n",
 11 |     "This notebook requires Aerospike datbase running locally and that python and the Aerospike python client have been installed (`pip install aerospike`). Visit [Aerospike notebooks repo](https://github.com/aerospike-examples/interactive-notebooks) for additional details and the docker container."
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "## Import the module\n",
 19 |     "\n",
 20 |     "The Aerospike client must be imported."
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": null,
 26 |    "metadata": {},
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "import aerospike"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "markdown",
 34 |    "metadata": {},
 35 |    "source": [
 36 |     "## Configure the client\n",
 37 |     "\n",
 38 |     "This configuration is for aerospike running on port 3000 of localhost which is the default. If your environment is different (Aerospike server running on a different host or different port, etc)"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": null,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "config = {\n",
 48 |     "  'hosts': [ ('127.0.0.1', 3000) ]\n",
 49 |     "}"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "markdown",
 54 |    "metadata": {},
 55 |    "source": [
 56 |     "## Create a client and connect it to the cluster"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": null,
 62 |    "metadata": {},
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "try:\n",
 66 |     "  client = aerospike.client(config).connect()\n",
 67 |     "except:\n",
 68 |     "  import sys\n",
 69 |     "  print(\"failed to connect to the cluster with\", config['hosts'])\n",
 70 |     "  sys.exit(1)\n"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "markdown",
 75 |    "metadata": {},
 76 |    "source": [
 77 |     "## Records are addressable via a tuple of (namespace, set, key) \n",
 78 |     "\n",
 79 |     "These three components (with set being optionsl) form the key. Using this key records may be read or written. For a detailed description of the data model see the [Data Model overview](https://www.aerospike.com/docs/architecture/data-model.html)"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": null,
 85 |    "metadata": {},
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "key = ('test', 'demo', 'foo')"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {},
 94 |    "source": [
 95 |     "## Writing a record\n",
 96 |     "\n",
 97 |     "Aerospike is schema-less and records may be written without any other setup. Here a record with two bins (name and age) is being written to a record with they key defined above. "
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": null,
103 |    "metadata": {},
104 |    "outputs": [],
105 |    "source": [
106 |     "\n",
107 |     "try:\n",
108 |     "  # Write a record\n",
109 |     "  client.put(key, {\n",
110 |     "    'name': 'John Doe',\n",
111 |     "    'age': 32\n",
112 |     "  })\n",
113 |     "except Exception as e:\n",
114 |     "  import sys\n",
115 |     "  print(\"error: {0}\".format(e), file=sys.stderr)"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "markdown",
120 |    "metadata": {},
121 |    "source": [
122 |     "## Reading a record\n",
123 |     "\n",
124 |     "This same record may be retrieved using the same key."
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": null,
130 |    "metadata": {},
131 |    "outputs": [],
132 |    "source": [
133 |     "(key, metadata, record) = client.get(key)\n"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "markdown",
138 |    "metadata": {},
139 |    "source": [
140 |     "## Display result\n",
141 |     "\n",
142 |     "Print the record that was just retrieved. We are also printing: \n",
143 |     "\n",
144 |     "1. The components of the key which are: namespace, the set, a userkey (by default there is no user key), and a hash which is the internal representation of the key.\n",
145 |     "1. The metadata with the time to live and the record's generation. \n",
146 |     "1. The actual value of the record with two bins. \n",
147 |     "\n",
148 |     "Lastly it is important to clean up the client we created at the beginning."
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "code",
153 |    "execution_count": null,
154 |    "metadata": {},
155 |    "outputs": [],
156 |    "source": [
157 |     "print(\"record contents are\", record)\n",
158 |     "print(\"key components are\", key)\n",
159 |     "print(\"metadata is\", metadata)\n",
160 |     "# Close the connection to the Aerospike cluster\n",
161 |     "client.close()"
162 |    ]
163 |   }
164 |  ],
165 |  "metadata": {
166 |   "file_extension": ".py",
167 |   "kernelspec": {
168 |    "display_name": "Python 3.7.9 64-bit",
169 |    "language": "python",
170 |    "name": "python37964bit728b6d8a91f74e8c9f0db525f58accf3"
171 |   },
172 |   "language_info": {
173 |    "codemirror_mode": {
174 |     "name": "ipython",
175 |     "version": 3
176 |    },
177 |    "file_extension": ".py",
178 |    "mimetype": "text/x-python",
179 |    "name": "python",
180 |    "nbconvert_exporter": "python",
181 |    "pygments_lexer": "ipython3",
182 |    "version": "3.7.9"
183 |   },
184 |   "mimetype": "text/x-python",
185 |   "name": "python",
186 |   "npconvert_exporter": "python",
187 |   "pygments_lexer": "ipython3",
188 |   "version": 3
189 |  },
190 |  "nbformat": 4,
191 |  "nbformat_minor": 2
192 | }
193 | 


--------------------------------------------------------------------------------
/notebooks/python/transactions_rmw_pattern.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "toc": true
  7 |    },
  8 |    "source": [
  9 |     "<h1>Table of Contents<span class=\"tocSkip\"></span></h1>\n",
 10 |     "<div class=\"toc\"><ul class=\"toc-item\"><li><span><a href=\"#Implementing-Read-Write-Transactions-with-R-M-W-Pattern\" data-toc-modified-id=\"Implementing-Read-Write-Transactions-with-R-M-W-Pattern-1\"><span class=\"toc-item-num\">1&nbsp;&nbsp;</span>Implementing Read-Write Transactions with R-M-W Pattern</a></span><ul class=\"toc-item\"><li><span><a href=\"#Introduction\" data-toc-modified-id=\"Introduction-1.1\"><span class=\"toc-item-num\">1.1&nbsp;&nbsp;</span>Introduction</a></span></li><li><span><a href=\"#Prerequisites\" data-toc-modified-id=\"Prerequisites-1.2\"><span class=\"toc-item-num\">1.2&nbsp;&nbsp;</span>Prerequisites</a></span></li><li><span><a href=\"#Initialization\" data-toc-modified-id=\"Initialization-1.3\"><span class=\"toc-item-num\">1.3&nbsp;&nbsp;</span>Initialization</a></span><ul class=\"toc-item\"><li><span><a href=\"#Ensure-database-is-running\" data-toc-modified-id=\"Ensure-database-is-running-1.3.1\"><span class=\"toc-item-num\">1.3.1&nbsp;&nbsp;</span>Ensure database is running</a></span></li><li><span><a href=\"#Connect-to-database.\" data-toc-modified-id=\"Connect-to-database.-1.3.2\"><span class=\"toc-item-num\">1.3.2&nbsp;&nbsp;</span>Connect to database.</a></span></li><li><span><a href=\"#Populate-database-with-test-data.\" data-toc-modified-id=\"Populate-database-with-test-data.-1.3.3\"><span class=\"toc-item-num\">1.3.3&nbsp;&nbsp;</span>Populate database with test data.</a></span></li></ul></li></ul></li><li><span><a href=\"#The-Problem-of-Lost-Writes\" data-toc-modified-id=\"The-Problem-of-Lost-Writes-2\"><span class=\"toc-item-num\">2&nbsp;&nbsp;</span>The Problem of Lost Writes</a></span><ul class=\"toc-item\"><li><span><a href=\"#Test-Framework\" data-toc-modified-id=\"Test-Framework-2.1\"><span class=\"toc-item-num\">2.1&nbsp;&nbsp;</span>Test Framework</a></span></li><li><span><a href=\"#Simple-RMW-Function\" data-toc-modified-id=\"Simple-RMW-Function-2.2\"><span class=\"toc-item-num\">2.2&nbsp;&nbsp;</span>Simple RMW Function</a></span></li><li><span><a href=\"#Test-Results\" data-toc-modified-id=\"Test-Results-2.3\"><span class=\"toc-item-num\">2.3&nbsp;&nbsp;</span>Test Results</a></span></li></ul></li><li><span><a href=\"#Using-Generation-Check\" data-toc-modified-id=\"Using-Generation-Check-3\"><span class=\"toc-item-num\">3&nbsp;&nbsp;</span>Using Generation Check</a></span><ul class=\"toc-item\"><li><span><a href=\"#RMW-Function-with-Version-Check-and-Retries\" data-toc-modified-id=\"RMW-Function-with-Version-Check-and-Retries-3.1\"><span class=\"toc-item-num\">3.1&nbsp;&nbsp;</span>RMW Function with Version Check and Retries</a></span></li><li><span><a href=\"#Test-Results\" data-toc-modified-id=\"Test-Results-3.2\"><span class=\"toc-item-num\">3.2&nbsp;&nbsp;</span>Test Results</a></span></li></ul></li><li><span><a href=\"#Takeaways\" data-toc-modified-id=\"Takeaways-4\"><span class=\"toc-item-num\">4&nbsp;&nbsp;</span>Takeaways</a></span></li><li><span><a href=\"#Clean-up\" data-toc-modified-id=\"Clean-up-5\"><span class=\"toc-item-num\">5&nbsp;&nbsp;</span>Clean up</a></span></li><li><span><a href=\"#Further-Exploration-and-Resources\" data-toc-modified-id=\"Further-Exploration-and-Resources-6\"><span class=\"toc-item-num\">6&nbsp;&nbsp;</span>Further Exploration and Resources</a></span><ul class=\"toc-item\"><li><span><a href=\"#Next-steps\" data-toc-modified-id=\"Next-steps-6.1\"><span class=\"toc-item-num\">6.1&nbsp;&nbsp;</span>Next steps</a></span></li></ul></li></ul></div>"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "metadata": {},
 16 |    "source": [
 17 |     "# Implementing Read-Write Transactions with R-M-W Pattern \n",
 18 |     "This tutorial explains how to use the Read-Modify-Write pattern in order to ensure atomicity and isolation for read-write single-record transactions. \n",
 19 |     "\n",
 20 |     "This notebook requires Aerospike datbase running on localhost and that python and the Aerospike python client have been installed (`pip install aerospike`). Visit [Aerospike notebooks repo](https://github.com/aerospike-examples/interactive-notebooks) for additional details and the docker container."
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "markdown",
 25 |    "metadata": {},
 26 |    "source": [
 27 |     "## Introduction\n",
 28 |     "In Aerospike, the transactional boundaries are \"single request, single record\". While multiple operations may be specified in a single request on a single record, each such operation can involve a single bin and only certain write operations are allowed. Therefore, neither updates involving multiple bins (e.g, \"a=a+b\") nor general logic (e.g., \"concatenate alternate letters and append\") are possible as server-side operations. Of course, UDFs allow complex logic in a transactional update of a single record, however they are not suitable for all situations for various reasons such as performance and ease. Therefore most updates entail the R-M-W pattern or Reading the record, Modifying bins on the client side, and then Writing the record updates back to the server. \n",
 29 |     "\n",
 30 |     "The tutorial first demonstrates how read-write operations can result in lost writes in a concurrent multi-client environment. \n",
 31 |     "\n",
 32 |     "Then we show how to specify conditional writes with version check to address the problem by disallowing intereaved read-write and thus protecting against lost writes."
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "metadata": {},
 38 |    "source": [
 39 |     "## Prerequisites\n",
 40 |     "This tutorial assumes familiarity with the following topics:\n",
 41 |     "\n",
 42 |     "[Provide topics and links. For example:]\n",
 43 |     "- [Hello World](hello_world.ipynb)\n",
 44 |     "- [Aerospike Basic Operations](basic_operations.ipynb)"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "markdown",
 49 |    "metadata": {},
 50 |    "source": [
 51 |     "## Initialization"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "markdown",
 56 |    "metadata": {},
 57 |    "source": [
 58 |     "### Ensure database is running\n",
 59 |     "This notebook requires that Aerospike datbase is running. \n",
 60 |     "[Include the right code cell for Java or Python from the two cells below.] "
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": 1,
 66 |    "metadata": {
 67 |     "ExecuteTime": {
 68 |      "end_time": "2020-12-29T20:48:49.695739Z",
 69 |      "start_time": "2020-12-29T20:48:49.447020Z"
 70 |     }
 71 |    },
 72 |    "outputs": [
 73 |     {
 74 |      "name": "stdout",
 75 |      "output_type": "stream",
 76 |      "text": [
 77 |       "Aerospike database is running!\r\n"
 78 |      ]
 79 |     }
 80 |    ],
 81 |    "source": [
 82 |     "!asd >& /dev/null\n",
 83 |     "!pgrep -x asd >/dev/null && echo \"Aerospike database is running!\" || echo \"**Aerospike database is not running!**\""
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "markdown",
 88 |    "metadata": {},
 89 |    "source": [
 90 |     "### Connect to database."
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 3,
 96 |    "metadata": {
 97 |     "ExecuteTime": {
 98 |      "end_time": "2020-12-29T20:48:51.190060Z",
 99 |      "start_time": "2020-12-29T20:48:51.110597Z"
100 |     }
101 |    },
102 |    "outputs": [
103 |     {
104 |      "name": "stdout",
105 |      "output_type": "stream",
106 |      "text": [
107 |       "Client successfully connected to the database.\n"
108 |      ]
109 |     }
110 |    ],
111 |    "source": [
112 |     "# import the modules\n",
113 |     "import sys\n",
114 |     "import aerospike\n",
115 |     "\n",
116 |     "# Configure the client\n",
117 |     "config = {\n",
118 |     "  'hosts': [ ('127.0.0.1', 3000) ],\n",
119 |     "  'policy' : {'key': aerospike.POLICY_KEY_SEND}\n",
120 |     "}\n",
121 |     "\n",
122 |     "# Create a client and connect it to the cluster\n",
123 |     "try:\n",
124 |     "  client = aerospike.client(config).connect()\n",
125 |     "except:\n",
126 |     "  print(\"failed to connect to the cluster with\", config['hosts'])\n",
127 |     "  sys.exit(1)\n",
128 |     "print('Client successfully connected to the database.')"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "markdown",
133 |    "metadata": {},
134 |    "source": [
135 |     "### Populate database with test data.\n",
136 |     "We create one record with an integer bin \"gen-times-2\" (the names will become clear below), initialized to 1."
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": 4,
142 |    "metadata": {
143 |     "ExecuteTime": {
144 |      "end_time": "2020-12-29T20:48:52.195181Z",
145 |      "start_time": "2020-12-29T20:48:52.189787Z"
146 |     }
147 |    },
148 |    "outputs": [
149 |     {
150 |      "name": "stdout",
151 |      "output_type": "stream",
152 |      "text": [
153 |       "Test data populated.\n"
154 |      ]
155 |     }
156 |    ],
157 |    "source": [
158 |     "namespace = 'test'\n",
159 |     "tutorial_set = 'rmw-tutorial-set'\n",
160 |     "user_key = 'id-1'\n",
161 |     "# Records are addressable via a tuple of (namespace, set, user_key)\n",
162 |     "rec_key = (namespace, tutorial_set, user_key)\n",
163 |     "rmw_bin = 'gen-times-2'\n",
164 |     "try:\n",
165 |     "  # Create the record\n",
166 |     "  client.put(rec_key, {rmw_bin: 1})\n",
167 |     "except Exception as e:\n",
168 |     "  print(\"error: {0}\".format(e), file=sys.stderr)\n",
169 |     "\n",
170 |     "print('Test data populated.')"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "markdown",
175 |    "metadata": {},
176 |    "source": [
177 |     "# The Problem of Lost Writes\n",
178 |     "In a concurrent setting, multiple clients may be performaing Read-Modify-Write on the same record in a way that get in each other's way. Since various R-M-W transactions can interleave, a transaction can be lost, if another client updates the record without reading the transaction's update.\n",
179 |     "\n",
180 |     "To demonstrate this, we make use of a record's \"generation\" or version, that is available as the record metadata, and is automatically incremented on each successful update of the record.\n",
181 |     "\n",
182 |     "The integer bin “gen-times-2” holds the value that is 2 times the value of the current generation of the record. A client first reads the current generation of the record, and then updates the bin value 2 times that value.\n",
183 |     "\n",
184 |     "In the case of a single client, there are no issues in maintaining the semantics of the bin.  However when there are multiple clients, the interleaving of reads and writes of different transactions can violate the semantics. By updating the bin using an older generation value, it may not be 2 times the current generation, which is the constraint that we want to preserve.\n",
185 |     "\n",
186 |     "First, we will show how transaction writes are lost in a simple concurrent case by observing whether the relationship between record's current generation and the bin value is maintained. Then we will show how the problem is solved using a conditional write with version check.\n"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "markdown",
191 |    "metadata": {},
192 |    "source": [
193 |     "## Test Framework\n",
194 |     "We spawn multiple (num_threads) threads to simulate concurrent access. Each thread repeatedly (num_txns) does the following:\n",
195 |     "- waits for a random duration (with average of txn_wait_ms) \n",
196 |     "- executes a passed-in R-M-W function that returns the failure type (string, null if success).\n",
197 |     "\n",
198 |     "At the end the thread prints out the aggregate counts for each error type. In aggregate, they signify the likelihood of a read-write transaction failing."
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": 9,
204 |    "metadata": {},
205 |    "outputs": [],
206 |    "source": [
207 |     "import threading\n",
208 |     "import time\n",
209 |     "import random\n",
210 |     "\n",
211 |     "num_txns = 10\n",
212 |     "txn_wait_ms = 500\n",
213 |     "\n",
214 |     "def thread_fn(thread_id, rmw_fn):\n",
215 |     "    random.seed(thread_id)\n",
216 |     "    lost_writes_count = 0\n",
217 |     "    failures = {}\n",
218 |     "    for i in range(num_txns):\n",
219 |     "        failure = rmw_fn()\n",
220 |     "        if failure:\n",
221 |     "            if not failure in failures:\n",
222 |     "                failures[failure] = 1\n",
223 |     "            else: \n",
224 |     "                failures[failure] += 1  \n",
225 |     "    print('\\tThead {0} failures: {1}'.format(thread_id, failures))\n",
226 |     "    return\n",
227 |     "       \n",
228 |     "    \n",
229 |     "def run_test(num_threads, rmw_fn):\n",
230 |     "    threads = list()\n",
231 |     "    print('{0} threads, {1} transcations per thread:'.format(num_threads, num_txns))\n",
232 |     "    for thread_index in range(num_threads):\n",
233 |     "        thread = threading.Thread(target=thread_fn, args=(thread_index, rmw_fn))\n",
234 |     "        threads.append(thread)\n",
235 |     "        thread.start()\n",
236 |     "    for thread in threads:\n",
237 |     "        thread.join()\n",
238 |     "    return"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "markdown",
243 |    "metadata": {},
244 |    "source": [
245 |     "## Simple RMW Function\n",
246 |     "Next we implement a simple RMW function simple_rmw_fn to pass into the above framework. The function: \n",
247 |     "- Reads the record.\n",
248 |     "- Computes new value of gen_times_2 (= 2 * read generation). Then waits for a random duration, with average of write_wait_ms average to simulate the application computation time between read and write.\n",
249 |     "- Writes the new bin value. In the same (multi-op) request, reads back the record for the record's new generation value.\n",
250 |     "- Returns \"lost writes\" if the updated value of gen_times_2/2 is smaller than the new gen. If they are the same, it returns None."
251 |    ]
252 |   },
253 |   {
254 |    "cell_type": "code",
255 |    "execution_count": 10,
256 |    "metadata": {},
257 |    "outputs": [],
258 |    "source": [
259 |     "import aerospike_helpers.operations.operations as op_helpers\n",
260 |     "\n",
261 |     "write_wait_ms = 50\n",
262 |     "\n",
263 |     "def rmw_simple():\n",
264 |     "    #read\n",
265 |     "    _, meta, bins = client.get(rec_key)\n",
266 |     "    # wait before write to simulate computation time\n",
267 |     "    time.sleep(random.uniform(0,2*write_wait_ms/1000.0))\n",
268 |     "    # modify \n",
269 |     "    read_gen = meta['gen']\n",
270 |     "    new_rmw_bin_value = 2*(read_gen+1)\n",
271 |     "    # write and read back bin_inc to compare\n",
272 |     "    ops = [op_helpers.write(rmw_bin, new_rmw_bin_value),\n",
273 |     "          op_helpers.read(rmw_bin)]\n",
274 |     "    try:\n",
275 |     "        _, meta, bins = client.operate(rec_key, ops)\n",
276 |     "    except Exception as e:\n",
277 |     "        print(\"error: {0}\".format(e), file=sys.stderr)\n",
278 |     "        exit(-1)\n",
279 |     "    # compare new_rmw_bin_value//2 and new gen; if different return 'lost writes'\n",
280 |     "    new_gen = meta['gen']\n",
281 |     "    if new_rmw_bin_value//2 != new_gen:  \n",
282 |     "        #print('gen: {0}, bin: {1}, lost: {2}'.format(new_gen, new_rmw_bin_value//2, new_gen-new_rmw_bin_value//2))\n",
283 |     "        return 'lost writes'\n",
284 |     "    return None"
285 |    ]
286 |   },
287 |   {
288 |    "cell_type": "markdown",
289 |    "metadata": {},
290 |    "source": [
291 |     "## Test Results\n",
292 |     "For various values of concurrency (num_threads), we can see that with greater concurrent updates, a larger percentage of read-write transactions are lost, meaning greater likelihood of the semantics of the gen_times_2 bin not being preserved."
293 |    ]
294 |   },
295 |   {
296 |    "cell_type": "code",
297 |    "execution_count": 11,
298 |    "metadata": {},
299 |    "outputs": [
300 |     {
301 |      "name": "stdout",
302 |      "output_type": "stream",
303 |      "text": [
304 |       "1 threads, 10 transcations per thread:\n",
305 |       "\tThead 0 failures: {}\n",
306 |       "2 threads, 10 transcations per thread:\n",
307 |       "\tThead 0 failures: {'lost writes': 5}\n",
308 |       "\tThead 1 failures: {'lost writes': 6}\n",
309 |       "3 threads, 10 transcations per thread:\n",
310 |       "\tThead 0 failures: {'lost writes': 4}\n",
311 |       "\tThead 1 failures: {'lost writes': 8}\n",
312 |       "\tThead 2 failures: {'lost writes': 7}\n",
313 |       "4 threads, 10 transcations per thread:\n",
314 |       "\tThead 0 failures: {'lost writes': 9}\n",
315 |       "\tThead 3 failures: {'lost writes': 8}\n",
316 |       "\tThead 1 failures: {'lost writes': 8}\n",
317 |       "\tThead 2 failures: {'lost writes': 8}\n"
318 |      ]
319 |     }
320 |    ],
321 |    "source": [
322 |     "run_test(num_threads=1, rmw_fn=rmw_simple)\n",
323 |     "run_test(num_threads=2, rmw_fn=rmw_simple)\n",
324 |     "run_test(num_threads=3, rmw_fn=rmw_simple)\n",
325 |     "run_test(num_threads=4, rmw_fn=rmw_simple)"
326 |    ]
327 |   },
328 |   {
329 |    "cell_type": "markdown",
330 |    "metadata": {},
331 |    "source": [
332 |     "# Using Generation Check\n",
333 |     "To solve the problem of lost writes, the simple R-M-W is modified with how the Write is done: by making it conditional on the record not having been modified since the Read. It is a \"check-and-set (CAS)\" like operation that succeeds if the record generation (version) is still the same as at the time of Read. Otherwise it fails, and the client must retry the whole R-M-W pattern. The syntax and usage is shown in the code below."
334 |    ]
335 |   },
336 |   {
337 |    "cell_type": "markdown",
338 |    "metadata": {},
339 |    "source": [
340 |     "## RMW Function with Version Check and Retries\n",
341 |     "In the rmw_with_gen_check function below, a failed read-write due to generation mismatch is retried for max_retries attempts or until the write is successful. Each retry is attempted after a exponential backoff wait of (retry_number ** 2) * retry_wait_ms.\n",
342 |     "\n",
343 |     "A write can still fail after max_retries attempts, and the client can suitably handle it. However no writes are overwritten or lost, and the intended semantics of the gen-times-2 bin is always preserved.\n",
344 |     "\n",
345 |     "We perform the same concurrent test with the version check at Write. We expect no interleaved_writes reported in any thread."
346 |    ]
347 |   },
348 |   {
349 |    "cell_type": "code",
350 |    "execution_count": 12,
351 |    "metadata": {},
352 |    "outputs": [],
353 |    "source": [
354 |     "from aerospike_helpers.operations import operations as op_helpers\n",
355 |     "from aerospike import exception as ex\n",
356 |     "\n",
357 |     "max_retries = 3\n",
358 |     "retry_wait_ms = 20\n",
359 |     "\n",
360 |     "def rmw_with_gen_check():\n",
361 |     "    retryRMWCount = 0\n",
362 |     "    done = False\n",
363 |     "    while (not done):\n",
364 |     "        #read\n",
365 |     "        _, meta, bins = client.get(rec_key)\n",
366 |     "        # wait before write to simulate computation time\n",
367 |     "        time.sleep(random.uniform(0,2*write_wait_ms/1000.0))\n",
368 |     "        # modify \n",
369 |     "        read_gen = meta['gen']\n",
370 |     "        new_rmw_bin_value = 2*(read_gen+1)\n",
371 |     "        # write and read back bin_inc to compare\n",
372 |     "        ops = [op_helpers.write(rmw_bin, new_rmw_bin_value),\n",
373 |     "              op_helpers.read(rmw_bin)]\n",
374 |     "        write_policy = { 'gen': aerospike.POLICY_GEN_EQ }\n",
375 |     "        try:\n",
376 |     "            _, meta, bins = client.operate(rec_key, ops, meta={'gen': read_gen}, policy=write_policy)\n",
377 |     "        except ex.RecordGenerationError as e:\n",
378 |     "            if retryRMWCount < max_retries:\n",
379 |     "                retryRMWCount += 1\n",
380 |     "                time.sleep((2**retryRMWCount)*retry_wait_ms/1000.0)       \n",
381 |     "            else:\n",
382 |     "                return 'max retries exceeded' \n",
383 |     "        except Exception as e:\n",
384 |     "            print(\"error: {0}\".format(e), file=sys.stderr)\n",
385 |     "            exit(-1)\n",
386 |     "        else:\n",
387 |     "            done = True        \n",
388 |     "    # compare new_rmw_bin_value//2 and new gen; if different \n",
389 |     "    new_gen = meta['gen']\n",
390 |     "    if new_rmw_bin_value//2 != new_gen:  \n",
391 |     "        return 'lost writes'\n",
392 |     "    return None"
393 |    ]
394 |   },
395 |   {
396 |    "cell_type": "markdown",
397 |    "metadata": {},
398 |    "source": [
399 |     "## Test Results\n",
400 |     "Let's execute for various levels of concurrency and see the results. We expect to see no lost writes. Even when max-retries are exceeded, transaction and database integrity is preserved."
401 |    ]
402 |   },
403 |   {
404 |    "cell_type": "code",
405 |    "execution_count": 13,
406 |    "metadata": {},
407 |    "outputs": [
408 |     {
409 |      "name": "stdout",
410 |      "output_type": "stream",
411 |      "text": [
412 |       "2 threads, 10 transcations per thread:\n",
413 |       "\tThead 1 failures: {}\n",
414 |       "\tThead 0 failures: {}\n",
415 |       "3 threads, 10 transcations per thread:\n",
416 |       "\tThead 1 failures: {}\n",
417 |       "\tThead 0 failures: {}\n",
418 |       "\tThead 2 failures: {}\n",
419 |       "4 threads, 10 transcations per thread:\n",
420 |       "\tThead 0 failures: {}\n",
421 |       "\tThead 3 failures: {'max retries exceeded': 1}\n",
422 |       "\tThead 2 failures: {'max retries exceeded': 1}\n",
423 |       "\tThead 1 failures: {'max retries exceeded': 2}\n"
424 |      ]
425 |     }
426 |    ],
427 |    "source": [
428 |     "run_test(num_threads=2, rmw_fn=rmw_with_gen_check)\n",
429 |     "run_test(num_threads=3, rmw_fn=rmw_with_gen_check)\n",
430 |     "run_test(num_threads=4, rmw_fn=rmw_with_gen_check)"
431 |    ]
432 |   },
433 |   {
434 |    "cell_type": "markdown",
435 |    "metadata": {},
436 |    "source": [
437 |     "# Takeaways\n",
438 |     "In the tutorial we showed:\n",
439 |     "- the need for read-write transactions in Aerospike to use the R-M-W pattern \n",
440 |     "- how writes can be overwritten and lost in a concurrent environment if performed simply\n",
441 |     "- how the developer can ensure atomicity and isolation of a read-write transaction by using version check logic and syntax."
442 |    ]
443 |   },
444 |   {
445 |    "cell_type": "markdown",
446 |    "metadata": {},
447 |    "source": [
448 |     "# Clean up\n",
449 |     "Remove data and close connection."
450 |    ]
451 |   },
452 |   {
453 |    "cell_type": "code",
454 |    "execution_count": 14,
455 |    "metadata": {
456 |     "ExecuteTime": {
457 |      "end_time": "2020-12-29T20:49:21.100931Z",
458 |      "start_time": "2020-12-29T20:49:21.095318Z"
459 |     }
460 |    },
461 |    "outputs": [
462 |     {
463 |      "name": "stdout",
464 |      "output_type": "stream",
465 |      "text": [
466 |       "Removed tutorial data. Connection closed.\n"
467 |      ]
468 |     }
469 |    ],
470 |    "source": [
471 |     "client.truncate(namespace, tutorial_set, 0)\n",
472 |     "# Close the connection to the Aerospike cluster\n",
473 |     "client.close()\n",
474 |     "print('Removed tutorial data. Connection closed.')"
475 |    ]
476 |   },
477 |   {
478 |    "cell_type": "markdown",
479 |    "metadata": {},
480 |    "source": [
481 |     "# Further Exploration and Resources\n",
482 |     "For further exploration of transactions support in Aerospike, check out the following resources:\n",
483 |     "\n",
484 |     "- Blog posts\n",
485 |     "    - [Developers: Understanding Aerospike Transactions](https://www.aerospike.com/blog/developers-understanding-aerospike-transactions/)\n",
486 |     "    - [Twelve Do's of Consistency in Aerospike](https://www.aerospike.com/blog/twelve-dos-of-consistency-in-aerospike/)\n",
487 |     "- Video\n",
488 |     "    - [Strong Consistency in Databases. What does it actually guarantee?](https://www.aerospike.com/resources/videos/strong-consistency-in-databases-what-does-it-actually-guarantee/)"
489 |    ]
490 |   },
491 |   {
492 |    "cell_type": "markdown",
493 |    "metadata": {},
494 |    "source": [
495 |     "## Next steps\n",
496 |     "\n",
497 |     "Visit [Aerospike notebooks repo](https://github.com/aerospike-examples/interactive-notebooks) to run additional Aerospike notebooks. To run a different notebook, download the notebook from the repo to your local machine, and then click on File->Open, and select Upload."
498 |    ]
499 |   }
500 |  ],
501 |  "metadata": {
502 |   "kernelspec": {
503 |    "display_name": "Python 3",
504 |    "language": "python",
505 |    "name": "python3"
506 |   },
507 |   "language_info": {
508 |    "codemirror_mode": {
509 |     "name": "ipython",
510 |     "version": 3
511 |    },
512 |    "file_extension": ".py",
513 |    "mimetype": "text/x-python",
514 |    "name": "python",
515 |    "nbconvert_exporter": "python",
516 |    "pygments_lexer": "ipython3",
517 |    "version": "3.8.6"
518 |   },
519 |   "toc": {
520 |    "base_numbering": 1,
521 |    "nav_menu": {},
522 |    "number_sections": true,
523 |    "sideBar": true,
524 |    "skip_h1_title": false,
525 |    "title_cell": "Table of Contents",
526 |    "title_sidebar": "Contents",
527 |    "toc_cell": true,
528 |    "toc_position": {},
529 |    "toc_section_display": true,
530 |    "toc_window_display": false
531 |   }
532 |  },
533 |  "nbformat": 4,
534 |  "nbformat_minor": 4
535 | }
536 | 


--------------------------------------------------------------------------------
/notebooks/spark/.gitignore:
--------------------------------------------------------------------------------
1 | aerospike-spark-assembly*.jar


--------------------------------------------------------------------------------
/notebooks/spark/other_notebooks/AerospikeSparkH2ODemo.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Aerospike Connect for Spark - H2O Tutorial for Python\n",
  8 |     "## Tested with Java 8, Spark 2.4.0, H2O 3.30.1.2, h2o_pysparkling_2.4, Python 3.7, and Aerospike Spark Connector 2.5"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "metadata": {},
 14 |    "source": [
 15 |     "### Setup\n",
 16 |     "\n",
 17 |     "Below, a seed address for your Aerospike database cluster is required\n",
 18 |     "\n",
 19 |     "Check the given namespace is available, and your feature key is located as per AS_FEATURE_KEY_PATH\n",
 20 |     "\n",
 21 |     "Finally, review https://www.aerospike.com/enterprise/download/connectors/ to ensure AEROSPIKE_SPARK_JAR_VERSION is correct"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 4,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "# IP Address or DNS name for one host in your Aerospike cluster\n",
 31 |     "AS_HOST =\"127.0.0.1\"\n",
 32 |     "# Name of one of your namespaces. Type 'show namespaces' at the aql prompt if you are not sure\n",
 33 |     "AS_NAMESPACE = \"test\" \n",
 34 |     "AS_FEATURE_KEY_PATH = \"/etc/aerospike/features.conf\"\n",
 35 |     "AEROSPIKE_SPARK_JAR_VERSION=\"2.5.0\"\n",
 36 |     "\n",
 37 |     "AS_PORT = 3000 # Usually 3000, but change here if not\n",
 38 |     "AS_CONNECTION_STRING = AS_HOST + \":\"+ str(AS_PORT)"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 5,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "# Next we locate the Spark installation - this will be found using the SPARK_HOME environment variable that you will have set \n",
 48 |     "# if you followed the repository README\n",
 49 |     "\n",
 50 |     "import findspark\n",
 51 |     "findspark.init()"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": 1,
 57 |    "metadata": {},
 58 |    "outputs": [
 59 |     {
 60 |      "name": "stdout",
 61 |      "output_type": "stream",
 62 |      "text": [
 63 |       "Checking whether there is an H2O instance running at http://localhost:54321 . connected.\n"
 64 |      ]
 65 |     },
 66 |     {
 67 |      "data": {
 68 |       "text/html": [
 69 |        "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td>H2O_cluster_uptime:</td>\n",
 70 |        "<td>24 days 15 hours 18 mins</td></tr>\n",
 71 |        "<tr><td>H2O_cluster_timezone:</td>\n",
 72 |        "<td>America/Los_Angeles</td></tr>\n",
 73 |        "<tr><td>H2O_data_parsing_timezone:</td>\n",
 74 |        "<td>UTC</td></tr>\n",
 75 |        "<tr><td>H2O_cluster_version:</td>\n",
 76 |        "<td>3.30.1.2</td></tr>\n",
 77 |        "<tr><td>H2O_cluster_version_age:</td>\n",
 78 |        "<td>1 month and 11 days </td></tr>\n",
 79 |        "<tr><td>H2O_cluster_name:</td>\n",
 80 |        "<td>H2O_from_python_kmatty_mnldpz</td></tr>\n",
 81 |        "<tr><td>H2O_cluster_total_nodes:</td>\n",
 82 |        "<td>1</td></tr>\n",
 83 |        "<tr><td>H2O_cluster_free_memory:</td>\n",
 84 |        "<td>3.057 Gb</td></tr>\n",
 85 |        "<tr><td>H2O_cluster_total_cores:</td>\n",
 86 |        "<td>16</td></tr>\n",
 87 |        "<tr><td>H2O_cluster_allowed_cores:</td>\n",
 88 |        "<td>16</td></tr>\n",
 89 |        "<tr><td>H2O_cluster_status:</td>\n",
 90 |        "<td>locked, healthy</td></tr>\n",
 91 |        "<tr><td>H2O_connection_url:</td>\n",
 92 |        "<td>http://localhost:54321</td></tr>\n",
 93 |        "<tr><td>H2O_connection_proxy:</td>\n",
 94 |        "<td>{\"http\": null, \"https\": null}</td></tr>\n",
 95 |        "<tr><td>H2O_internal_security:</td>\n",
 96 |        "<td>False</td></tr>\n",
 97 |        "<tr><td>H2O_API_Extensions:</td>\n",
 98 |        "<td>Amazon S3, XGBoost, Algos, AutoML, Core V3, TargetEncoder, Core V4</td></tr>\n",
 99 |        "<tr><td>Python_version:</td>\n",
100 |        "<td>3.7.5 final</td></tr></table></div>"
101 |       ],
102 |       "text/plain": [
103 |        "--------------------------  ------------------------------------------------------------------\n",
104 |        "H2O_cluster_uptime:         24 days 15 hours 18 mins\n",
105 |        "H2O_cluster_timezone:       America/Los_Angeles\n",
106 |        "H2O_data_parsing_timezone:  UTC\n",
107 |        "H2O_cluster_version:        3.30.1.2\n",
108 |        "H2O_cluster_version_age:    1 month and 11 days\n",
109 |        "H2O_cluster_name:           H2O_from_python_kmatty_mnldpz\n",
110 |        "H2O_cluster_total_nodes:    1\n",
111 |        "H2O_cluster_free_memory:    3.057 Gb\n",
112 |        "H2O_cluster_total_cores:    16\n",
113 |        "H2O_cluster_allowed_cores:  16\n",
114 |        "H2O_cluster_status:         locked, healthy\n",
115 |        "H2O_connection_url:         http://localhost:54321\n",
116 |        "H2O_connection_proxy:       {\"http\": null, \"https\": null}\n",
117 |        "H2O_internal_security:      False\n",
118 |        "H2O_API_Extensions:         Amazon S3, XGBoost, Algos, AutoML, Core V3, TargetEncoder, Core V4\n",
119 |        "Python_version:             3.7.5 final\n",
120 |        "--------------------------  ------------------------------------------------------------------"
121 |       ]
122 |      },
123 |      "metadata": {},
124 |      "output_type": "display_data"
125 |     }
126 |    ],
127 |    "source": [
128 |     "import h2o\n",
129 |     "h2o.init()"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": 6,
135 |    "metadata": {},
136 |    "outputs": [
137 |     {
138 |      "name": "stdout",
139 |      "output_type": "stream",
140 |      "text": [
141 |       "aerospike-spark-assembly-2.5.0.jar already downloaded\n"
142 |      ]
143 |     }
144 |    ],
145 |    "source": [
146 |     "# Here we download the Aerospike Spark jar\n",
147 |     "import urllib\n",
148 |     "import os\n",
149 |     "\n",
150 |     "def aerospike_spark_jar_download_url(version=AEROSPIKE_SPARK_JAR_VERSION):\n",
151 |     "    DOWNLOAD_PREFIX=\"https://www.aerospike.com/enterprise/download/connectors/aerospike-spark/\"\n",
152 |     "    DOWNLOAD_SUFFIX=\"/artifact/jar\"\n",
153 |     "    AEROSPIKE_SPARK_JAR_DOWNLOAD_URL = DOWNLOAD_PREFIX+AEROSPIKE_SPARK_JAR_VERSION+DOWNLOAD_SUFFIX\n",
154 |     "    return AEROSPIKE_SPARK_JAR_DOWNLOAD_URL\n",
155 |     "\n",
156 |     "def download_aerospike_spark_jar(version=AEROSPIKE_SPARK_JAR_VERSION):\n",
157 |     "    JAR_NAME=\"aerospike-spark-assembly-\"+AEROSPIKE_SPARK_JAR_VERSION+\".jar\"\n",
158 |     "    if(not(os.path.exists(JAR_NAME))) :\n",
159 |     "        urllib.request.urlretrieve(aerospike_spark_jar_download_url(),JAR_NAME)\n",
160 |     "    else :\n",
161 |     "        print(JAR_NAME+\" already downloaded\")\n",
162 |     "    return os.path.join(os.getcwd(),JAR_NAME)\n",
163 |     "\n",
164 |     "AEROSPIKE_JAR_PATH=download_aerospike_spark_jar()\n",
165 |     "os.environ[\"PYSPARK_SUBMIT_ARGS\"] = '--jars ' + AEROSPIKE_JAR_PATH + ' pyspark-shell'"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": 10,
171 |    "metadata": {},
172 |    "outputs": [],
173 |    "source": [
174 |     "import pyspark\n",
175 |     "from pyspark.context import SparkContext\n",
176 |     "from pyspark.sql.context import SQLContext\n",
177 |     "from pyspark.sql.session import SparkSession\n",
178 |     "from pyspark.sql.types import StringType, StructField, StructType, ArrayType, IntegerType, MapType, LongType, DoubleType\n",
179 |     "from pysparkling import *"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "markdown",
184 |    "metadata": {},
185 |    "source": [
186 |     "Get a spark session object and set required Aerospike configuration properties"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "markdown",
191 |    "metadata": {},
192 |    "source": [
193 |     "Set up spark and point aerospike db to AS_HOST"
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "code",
198 |    "execution_count": 11,
199 |    "metadata": {},
200 |    "outputs": [],
201 |    "source": [
202 |     "sc = SparkContext.getOrCreate()\n",
203 |     "spark = SparkSession(sc)\n",
204 |     "sqlContext = SQLContext(sc)\n",
205 |     "spark.conf.set(\"aerospike.namespace\",AS_NAMESPACE)\n",
206 |     "spark.conf.set(\"aerospike.seedhost\",AS_CONNECTION_STRING)\n",
207 |     "spark.conf.set(\"aerospike.keyPath\",AS_FEATURE_KEY_PATH )"
208 |    ]
209 |   },
210 |   {
211 |    "cell_type": "code",
212 |    "execution_count": 12,
213 |    "metadata": {},
214 |    "outputs": [
215 |     {
216 |      "name": "stdout",
217 |      "output_type": "stream",
218 |      "text": [
219 |       "Connecting to H2O server at http://192.168.1.6:54321 ... successful.\n"
220 |      ]
221 |     },
222 |     {
223 |      "data": {
224 |       "text/html": [
225 |        "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td>H2O_cluster_uptime:</td>\n",
226 |        "<td>22 secs</td></tr>\n",
227 |        "<tr><td>H2O_cluster_timezone:</td>\n",
228 |        "<td>America/Los_Angeles</td></tr>\n",
229 |        "<tr><td>H2O_data_parsing_timezone:</td>\n",
230 |        "<td>UTC</td></tr>\n",
231 |        "<tr><td>H2O_cluster_version:</td>\n",
232 |        "<td>3.30.1.2</td></tr>\n",
233 |        "<tr><td>H2O_cluster_version_age:</td>\n",
234 |        "<td>1 month and 11 days </td></tr>\n",
235 |        "<tr><td>H2O_cluster_name:</td>\n",
236 |        "<td>sparkling-water-kmatty_local-1602784872166</td></tr>\n",
237 |        "<tr><td>H2O_cluster_total_nodes:</td>\n",
238 |        "<td>1</td></tr>\n",
239 |        "<tr><td>H2O_cluster_free_memory:</td>\n",
240 |        "<td>794 Mb</td></tr>\n",
241 |        "<tr><td>H2O_cluster_total_cores:</td>\n",
242 |        "<td>16</td></tr>\n",
243 |        "<tr><td>H2O_cluster_allowed_cores:</td>\n",
244 |        "<td>16</td></tr>\n",
245 |        "<tr><td>H2O_cluster_status:</td>\n",
246 |        "<td>locked, healthy</td></tr>\n",
247 |        "<tr><td>H2O_connection_url:</td>\n",
248 |        "<td>http://192.168.1.6:54321</td></tr>\n",
249 |        "<tr><td>H2O_connection_proxy:</td>\n",
250 |        "<td>null</td></tr>\n",
251 |        "<tr><td>H2O_internal_security:</td>\n",
252 |        "<td>False</td></tr>\n",
253 |        "<tr><td>H2O_API_Extensions:</td>\n",
254 |        "<td>XGBoost, Algos, Amazon S3, Sparkling Water REST API Extensions, AutoML, Core V3, TargetEncoder, Core V4</td></tr>\n",
255 |        "<tr><td>Python_version:</td>\n",
256 |        "<td>3.7.5 final</td></tr></table></div>"
257 |       ],
258 |       "text/plain": [
259 |        "--------------------------  -------------------------------------------------------------------------------------------------------\n",
260 |        "H2O_cluster_uptime:         22 secs\n",
261 |        "H2O_cluster_timezone:       America/Los_Angeles\n",
262 |        "H2O_data_parsing_timezone:  UTC\n",
263 |        "H2O_cluster_version:        3.30.1.2\n",
264 |        "H2O_cluster_version_age:    1 month and 11 days\n",
265 |        "H2O_cluster_name:           sparkling-water-kmatty_local-1602784872166\n",
266 |        "H2O_cluster_total_nodes:    1\n",
267 |        "H2O_cluster_free_memory:    794 Mb\n",
268 |        "H2O_cluster_total_cores:    16\n",
269 |        "H2O_cluster_allowed_cores:  16\n",
270 |        "H2O_cluster_status:         locked, healthy\n",
271 |        "H2O_connection_url:         http://192.168.1.6:54321\n",
272 |        "H2O_connection_proxy:       null\n",
273 |        "H2O_internal_security:      False\n",
274 |        "H2O_API_Extensions:         XGBoost, Algos, Amazon S3, Sparkling Water REST API Extensions, AutoML, Core V3, TargetEncoder, Core V4\n",
275 |        "Python_version:             3.7.5 final\n",
276 |        "--------------------------  -------------------------------------------------------------------------------------------------------"
277 |       ]
278 |      },
279 |      "metadata": {},
280 |      "output_type": "display_data"
281 |     },
282 |     {
283 |      "name": "stdout",
284 |      "output_type": "stream",
285 |      "text": [
286 |       "\n",
287 |       "Sparkling Water Context:\n",
288 |       " * Sparkling Water Version: 3.30.1.2-1-2.4\n",
289 |       " * H2O name: sparkling-water-kmatty_local-1602784872166\n",
290 |       " * cluster size: 1\n",
291 |       " * list of used nodes:\n",
292 |       "  (executorId, host, port)\n",
293 |       "  ------------------------\n",
294 |       "  (0,192.168.1.6,54321)\n",
295 |       "  ------------------------\n",
296 |       "\n",
297 |       "  Open H2O Flow in browser: http://192.168.1.6:54323 (CMD + click in Mac OSX)\n",
298 |       "\n",
299 |       "    \n"
300 |      ]
301 |     }
302 |    ],
303 |    "source": [
304 |     "h2oContext = H2OContext.getOrCreate()"
305 |    ]
306 |   },
307 |   {
308 |    "cell_type": "markdown",
309 |    "metadata": {},
310 |    "source": [
311 |     "## Create Sample Data and load it into Aerospike"
312 |    ]
313 |   },
314 |   {
315 |    "cell_type": "code",
316 |    "execution_count": 13,
317 |    "metadata": {},
318 |    "outputs": [
319 |     {
320 |      "name": "stdout",
321 |      "output_type": "stream",
322 |      "text": [
323 |       "Data created\n"
324 |      ]
325 |     }
326 |    ],
327 |    "source": [
328 |     "# We create age vs salary data, using three different Gaussian distributions\n",
329 |     "import numpy as np\n",
330 |     "import matplotlib.pyplot as plt\n",
331 |     "import pandas as pd\n",
332 |     "import math\n",
333 |     "\n",
334 |     "# Create covariance matrix from std devs + correlation\n",
335 |     "def covariance_matrix(std_dev_1,std_dev_2,correlation):\n",
336 |     "    return [[std_dev_1 ** 2, correlation * std_dev_1 * std_dev_2], \n",
337 |     "           [correlation * std_dev_1 * std_dev_2, std_dev_2 ** 2]]\n",
338 |     "\n",
339 |     "# Return a bivariate sample given means/std dev/correlation\n",
340 |     "def age_salary_sample(distribution_params,sample_size):\n",
341 |     "    mean = [distribution_params[\"age_mean\"], distribution_params[\"salary_mean\"]]\n",
342 |     "    cov = covariance_matrix(distribution_params[\"age_std_dev\"],distribution_params[\"salary_std_dev\"],\n",
343 |     "                            distribution_params[\"age_salary_correlation\"])\n",
344 |     "    return np.random.multivariate_normal(mean, cov, sample_size).T\n",
345 |     "\n",
346 |     "# Define the characteristics of our age/salary distribution\n",
347 |     "age_salary_distribution_1 = {\"age_mean\":25,\"salary_mean\":50000,\n",
348 |     "                             \"age_std_dev\":1,\"salary_std_dev\":5000,\"age_salary_correlation\":0.3}\n",
349 |     "\n",
350 |     "age_salary_distribution_2 = {\"age_mean\":45,\"salary_mean\":80000,\n",
351 |     "                             \"age_std_dev\":4,\"salary_std_dev\":10000,\"age_salary_correlation\":0.7}\n",
352 |     "\n",
353 |     "age_salary_distribution_3 = {\"age_mean\":35,\"salary_mean\":70000,\n",
354 |     "                             \"age_std_dev\":2,\"salary_std_dev\":9000,\"age_salary_correlation\":0.1}\n",
355 |     "\n",
356 |     "distribution_data = [age_salary_distribution_1,age_salary_distribution_2,age_salary_distribution_3]\n",
357 |     "\n",
358 |     "# Sample age/salary data for each distributions\n",
359 |     "group_1_ages,group_1_salaries = age_salary_sample(age_salary_distribution_1,sample_size=100)\n",
360 |     "group_2_ages,group_2_salaries = age_salary_sample(age_salary_distribution_2,sample_size=120)\n",
361 |     "group_3_ages,group_3_salaries = age_salary_sample(age_salary_distribution_3,sample_size=80)\n",
362 |     "\n",
363 |     "ages=np.concatenate([group_1_ages,group_2_ages,group_3_ages])\n",
364 |     "salaries=np.concatenate([group_1_salaries,group_2_salaries,group_3_salaries])\n",
365 |     "\n",
366 |     "print(\"Data created\")"
367 |    ]
368 |   },
369 |   {
370 |    "cell_type": "code",
371 |    "execution_count": 14,
372 |    "metadata": {},
373 |    "outputs": [],
374 |    "source": [
375 |     "# Turn the above records into a Data Frame\n",
376 |     "# First of all, create an array of arrays\n",
377 |     "inputBuf = []\n",
378 |     "\n",
379 |     "for  i in range(0, len(ages)) :\n",
380 |     "     id = i + 1 # Avoid counting from zero\n",
381 |     "     name = \"Individual: {:03d}\".format(id)\n",
382 |     "     # Note we need to make sure values are typed correctly\n",
383 |     "     # salary will have type numpy.float64 - if it is not cast as below, an error will be thrown\n",
384 |     "     age = float(ages[i])\n",
385 |     "     salary = int(salaries[i])\n",
386 |     "     inputBuf.append((id, name,age,salary))\n",
387 |     "\n",
388 |     "# Convert to an RDD \n",
389 |     "inputRDD = spark.sparkContext.parallelize(inputBuf)\n",
390 |     "       \n",
391 |     "# Convert to a data frame using a schema\n",
392 |     "schema = StructType([\n",
393 |     "    StructField(\"id\", IntegerType(), True),\n",
394 |     "    StructField(\"name\", StringType(), True),\n",
395 |     "    StructField(\"age\", DoubleType(), True),\n",
396 |     "    StructField(\"salary\",IntegerType(), True)\n",
397 |     "])\n",
398 |     "\n",
399 |     "inputDF=spark.createDataFrame(inputRDD,schema)\n",
400 |     "\n",
401 |     "#Write the data frame to Aerospike, the id field is used as the primary key\n",
402 |     "inputDF \\\n",
403 |     ".write \\\n",
404 |     ".mode('overwrite') \\\n",
405 |     ".format(\"com.aerospike.spark.sql\")  \\\n",
406 |     ".option(\"aerospike.set\", \"salary_data\")\\\n",
407 |     ".option(\"aerospike.updateByKey\", \"id\") \\\n",
408 |     ".save()"
409 |    ]
410 |   },
411 |   {
412 |    "cell_type": "markdown",
413 |    "metadata": {},
414 |    "source": [
415 |     "## Step 1: Load data into a DataFrame using user specified schema "
416 |    ]
417 |   },
418 |   {
419 |    "cell_type": "code",
420 |    "execution_count": 15,
421 |    "metadata": {},
422 |    "outputs": [
423 |     {
424 |      "name": "stdout",
425 |      "output_type": "stream",
426 |      "text": [
427 |       "+---+---------------+-----------------+------+\n",
428 |       "| id|           name|              age|salary|\n",
429 |       "+---+---------------+-----------------+------+\n",
430 |       "|239|Individual: 239|31.83300818606226| 74975|\n",
431 |       "|101|Individual: 101|43.01299505505053| 73747|\n",
432 |       "|194|Individual: 194|40.82834439786344| 63853|\n",
433 |       "| 31|Individual: 031|25.38038331484876| 52375|\n",
434 |       "|139|Individual: 139|47.62537494799876| 80100|\n",
435 |       "+---+---------------+-----------------+------+\n",
436 |       "only showing top 5 rows\n",
437 |       "\n"
438 |      ]
439 |     }
440 |    ],
441 |    "source": [
442 |     "# If we explicitly set the schema, using the previously created schema object\n",
443 |     "# we effectively type the rows in the Data Frame\n",
444 |     "\n",
445 |     "loadedDFWithSchema=spark \\\n",
446 |     ".read \\\n",
447 |     ".format(\"com.aerospike.spark.sql\") \\\n",
448 |     ".schema(schema) \\\n",
449 |     ".option(\"aerospike.set\", \"salary_data\").load()\n",
450 |     "\n",
451 |     "loadedDFWithSchema.show(5)"
452 |    ]
453 |   },
454 |   {
455 |    "cell_type": "markdown",
456 |    "metadata": {},
457 |    "source": [
458 |     "## Step 2: Load Data from Spark DataFrame into H2OFrame"
459 |    ]
460 |   },
461 |   {
462 |    "cell_type": "code",
463 |    "execution_count": null,
464 |    "metadata": {},
465 |    "outputs": [],
466 |    "source": [
467 |     "#Save into an H2OFrame using a Key. A key is an entry in the H2O Key value store that maps to an object in H2O.\n",
468 |     "loadedDFWithSchema.write.format(\"h2o\").option(\"key\", \"key_one\").save()"
469 |    ]
470 |   },
471 |   {
472 |    "cell_type": "code",
473 |    "execution_count": 20,
474 |    "metadata": {},
475 |    "outputs": [],
476 |    "source": [
477 |     "#List the current contents of the H2O cluster, you can use the h2o.ls.\n",
478 |     "h2o.ls()\n",
479 |     "\n",
480 |     "h2oframe = h2o.get_frame(\"key_one\")"
481 |    ]
482 |   },
483 |   {
484 |    "cell_type": "markdown",
485 |    "metadata": {},
486 |    "source": [
487 |     "## Step 3: Create a model using H2O libraries"
488 |    ]
489 |   },
490 |   {
491 |    "cell_type": "code",
492 |    "execution_count": 21,
493 |    "metadata": {},
494 |    "outputs": [
495 |     {
496 |      "data": {
497 |       "text/html": [
498 |        "<table>\n",
499 |        "<thead>\n",
500 |        "<tr><th>       </th><th>id               </th><th>name           </th><th>age               </th><th>salary            </th></tr>\n",
501 |        "</thead>\n",
502 |        "<tbody>\n",
503 |        "<tr><td>type   </td><td>int              </td><td>string         </td><td>real              </td><td>int               </td></tr>\n",
504 |        "<tr><td>mins   </td><td>1.0              </td><td>NaN            </td><td>22.405590847347618</td><td>37748.0           </td></tr>\n",
505 |        "<tr><td>mean   </td><td>150.5            </td><td>NaN            </td><td>35.593540086982685</td><td>67127.00666666667 </td></tr>\n",
506 |        "<tr><td>maxs   </td><td>300.0            </td><td>NaN            </td><td>60.312589253321136</td><td>107261.0          </td></tr>\n",
507 |        "<tr><td>sigma  </td><td>86.74675786448738</td><td>NaN            </td><td>8.788476744518679 </td><td>15177.875046143428</td></tr>\n",
508 |        "<tr><td>zeros  </td><td>0                </td><td>0              </td><td>0                 </td><td>0                 </td></tr>\n",
509 |        "<tr><td>missing</td><td>0                </td><td>0              </td><td>0                 </td><td>0                 </td></tr>\n",
510 |        "<tr><td>0      </td><td>239.0            </td><td>Individual: 239</td><td>31.83300818606226 </td><td>74975.0           </td></tr>\n",
511 |        "<tr><td>1      </td><td>101.0            </td><td>Individual: 101</td><td>43.01299505505053 </td><td>73747.0           </td></tr>\n",
512 |        "<tr><td>2      </td><td>194.0            </td><td>Individual: 194</td><td>40.82834439786344 </td><td>63853.0           </td></tr>\n",
513 |        "<tr><td>3      </td><td>31.0             </td><td>Individual: 031</td><td>25.38038331484876 </td><td>52375.0           </td></tr>\n",
514 |        "<tr><td>4      </td><td>139.0            </td><td>Individual: 139</td><td>47.62537494799876 </td><td>80100.0           </td></tr>\n",
515 |        "<tr><td>5      </td><td>14.0             </td><td>Individual: 014</td><td>25.41226437694945 </td><td>50203.0           </td></tr>\n",
516 |        "<tr><td>6      </td><td>142.0            </td><td>Individual: 142</td><td>35.49930947093095 </td><td>66239.0           </td></tr>\n",
517 |        "<tr><td>7      </td><td>272.0            </td><td>Individual: 272</td><td>32.59037083790934 </td><td>51935.0           </td></tr>\n",
518 |        "<tr><td>8      </td><td>76.0             </td><td>Individual: 076</td><td>25.066279193638437</td><td>50236.0           </td></tr>\n",
519 |        "<tr><td>9      </td><td>147.0            </td><td>Individual: 147</td><td>44.565530108647465</td><td>77111.0           </td></tr>\n",
520 |        "</tbody>\n",
521 |        "</table>"
522 |       ]
523 |      },
524 |      "metadata": {},
525 |      "output_type": "display_data"
526 |     }
527 |    ],
528 |    "source": [
529 |     "h2oframe.summary()"
530 |    ]
531 |   },
532 |   {
533 |    "cell_type": "code",
534 |    "execution_count": null,
535 |    "metadata": {},
536 |    "outputs": [],
537 |    "source": []
538 |   }
539 |  ],
540 |  "metadata": {
541 |   "kernelspec": {
542 |    "display_name": "Python 3",
543 |    "language": "python",
544 |    "name": "python3"
545 |   },
546 |   "language_info": {
547 |    "codemirror_mode": {
548 |     "name": "ipython",
549 |     "version": 3
550 |    },
551 |    "file_extension": ".py",
552 |    "mimetype": "text/x-python",
553 |    "name": "python",
554 |    "nbconvert_exporter": "python",
555 |    "pygments_lexer": "ipython3",
556 |    "version": "3.7.5"
557 |   }
558 |  },
559 |  "nbformat": 4,
560 |  "nbformat_minor": 2
561 | }
562 | 


--------------------------------------------------------------------------------
/notebooks/spark/other_notebooks/AerospikeSparkPythonParquet.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Convert Aerospike data into a Parquet file using Spark\n",
  8 |     "## Tested with Spark connector 3.1.0, Java 8, Apache Spark 3.0.2, Python 3.7"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "metadata": {},
 14 |    "source": [
 15 |     "### <font color=blue>The purpose of this notebook is to walk you through how to convert Aerospike data into a Parquet file using [Spark APIs](https://spark.apache.org/docs/latest/sql-data-sources-parquet.html). [Apache Parquet](https://parquet.apache.org/) is a columnar storage format that is extensively used as a format of choice for analysis in the big data ecosystem. </font>"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": 1,
 21 |    "metadata": {},
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "# IP Address or DNS name for one host in your Aerospike cluster\n",
 25 |     "AS_HOST =\"127.0.0.1\"\n",
 26 |     "# Name of one of your namespaces. Type 'show namespaces' at the aql prompt if you are not sure\n",
 27 |     "AS_NAMESPACE = \"testNameSpace\" \n",
 28 |     "AEROSPIKE_SPARK_JAR_VERSION=\"3.1.0\"\n",
 29 |     "AS_PORT = 3000 # Usually 3000, but change here if not\n",
 30 |     "AS_CONNECTION_STRING = AS_HOST + \":\"+ str(AS_PORT)"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 2,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "# Next we locate the Spark installation - this will be found using the SPARK_HOME \n",
 40 |     "# environment variable that you will have set \n",
 41 |     "\n",
 42 |     "import findspark\n",
 43 |     "findspark.init()"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "markdown",
 48 |    "metadata": {},
 49 |    "source": [
 50 |     "#### Please download the Aeropsike Connect for Spark from the [download page](https://enterprise.aerospike.com/enterprise/download/connectors/aerospike-spark/notes.html) and make sure you check the [interoperability page]( https://docs.aerospike.com/docs/connect/processing/spark/installation.html#prerequisites-for-using-the-spark-connector ).\n",
 51 |     "Set `AEROSPIKE_JAR_PATH` with path to the downloaded binary"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": 3,
 57 |    "metadata": {},
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "import os \n",
 61 |     "AEROSPIKE_JAR_PATH= \"aerospike-spark-assembly-\"+AEROSPIKE_SPARK_JAR_VERSION+\".jar\"\n",
 62 |     "os.environ[\"PYSPARK_SUBMIT_ARGS\"] = '--jars ' + AEROSPIKE_JAR_PATH + ' pyspark-shell'"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 4,
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "import pyspark\n",
 72 |     "from pyspark.context import SparkContext\n",
 73 |     "from pyspark.sql.context import SQLContext\n",
 74 |     "from pyspark.sql.session import SparkSession\n",
 75 |     "from pyspark.sql.types import StringType, StructField, StructType, ArrayType, IntegerType, MapType, LongType, DoubleType"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "markdown",
 80 |    "metadata": {},
 81 |    "source": [
 82 |     "### Configure Aerospike properties in the Spark Session object. Please visit [Configuring Aerospike Connect for Spark](https://docs.aerospike.com/docs/connect/processing/spark/configuration.html) for more information about the properties used on this page."
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": 5,
 88 |    "metadata": {},
 89 |    "outputs": [],
 90 |    "source": [
 91 |     "sc = SparkContext.getOrCreate()\n",
 92 |     "conf=sc._conf.setAll([(\"aerospike.namespace\",AS_NAMESPACE),(\"aerospike.seedhost\",AS_CONNECTION_STRING)])\n",
 93 |     "sc.stop()\n",
 94 |     "sc = pyspark.SparkContext(conf=conf)\n",
 95 |     "spark = SparkSession(sc)\n",
 96 |     "sqlContext = SQLContext(sc)"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "markdown",
101 |    "metadata": {},
102 |    "source": [
103 |     "### Load data from Aerospike into a Spark DataFrame"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": 15,
109 |    "metadata": {},
110 |    "outputs": [
111 |     {
112 |      "name": "stdout",
113 |      "output_type": "stream",
114 |      "text": [
115 |       "+--------------+--------------------+---------+------------+-------+------------+--------------+-----------+-----------+--------------+------------+-----------+-----------+-------------+------------+--------------+-----------+----------+\n",
116 |       "|         __key|            __digest| __expiry|__generation|  __ttl|drate_100Kl7|conf_rate_100K|probable_dd|d_rate_100K|     state_ter|total_deaths|total_cases|d_in_last_7|confirm_cases|crate_100Kl7|case_last_week|pbble_cases|confirm_dd|\n",
117 |       "+--------------+--------------------+---------+------------+-------+------------+--------------+-----------+-----------+--------------+------------+-----------+-----------+-------------+------------+--------------+-----------+----------+\n",
118 |       "|Virgin Islands|[2D 40 5A 16 9B 9...|377621369|           2|2591982|         0.3|        1342.0|          0|       21.0|Virgin Islands|          23|       1405|          2|            0|         3.7|            27|          0|         0|\n",
119 |       "|North Carolina|[83 70 D3 0C A3 2...|377621369|           2|2591982|         0.3|        2825.0|         94|       44.0|North Carolina|        4607|     293339|        224|       280213|        22.9|         16647|      13126|      4513|\n",
120 |       "|       Indiana|[91 60 2C F4 F4 4...|377621369|           2|2591982|         0.6|        3144.0|        246|       69.0|       Indiana|        4629|     210374|        265|            0|        60.3|         28266|          0|      4383|\n",
121 |       "|      Oklahoma|[EF 70 A8 4C 85 0...|377621369|           2|2591982|         0.4|        3720.0|         43|       36.0|      Oklahoma|        1450|     146692|         98|       124671|        58.5|         16151|      22021|      1407|\n",
122 |       "|      Missouri|[0A 91 83 C6 45 D...|377621369|           2|2591982|         0.3|        3415.0|          0|       51.0|      Missouri|        3153|     209197|        127|            0|        55.2|         23662|          0|         0|\n",
123 |       "+--------------+--------------------+---------+------------+-------+------------+--------------+-----------+-----------+--------------+------------+-----------+-----------+-------------+------------+--------------+-----------+----------+\n",
124 |       "only showing top 5 rows\n",
125 |       "\n"
126 |      ]
127 |     }
128 |    ],
129 |    "source": [
130 |     "#We will not specify the schema here, but rather use the schema inference capability of the Spark connector. \n",
131 |     "as_df=spark \\\n",
132 |     ".read \\\n",
133 |     ".format(\"aerospike\") \\\n",
134 |     ".option(\"aerospike.set\", \"covid_stats\") \\\n",
135 |     ".option(\"aerospike.sendKey\", \"true\") \\\n",
136 |     ".load() \n",
137 |     "as_df.show(5)"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": 21,
143 |    "metadata": {},
144 |    "outputs": [
145 |     {
146 |      "name": "stdout",
147 |      "output_type": "stream",
148 |      "text": [
149 |       "root\n",
150 |       " |-- __key: string (nullable = true)\n",
151 |       " |-- __digest: binary (nullable = true)\n",
152 |       " |-- __expiry: integer (nullable = false)\n",
153 |       " |-- __generation: integer (nullable = false)\n",
154 |       " |-- __ttl: integer (nullable = false)\n",
155 |       " |-- drate_100Kl7: double (nullable = true)\n",
156 |       " |-- conf_rate_100K: double (nullable = true)\n",
157 |       " |-- probable_dd: long (nullable = true)\n",
158 |       " |-- d_rate_100K: double (nullable = true)\n",
159 |       " |-- state_ter: string (nullable = true)\n",
160 |       " |-- total_deaths: long (nullable = true)\n",
161 |       " |-- total_cases: long (nullable = true)\n",
162 |       " |-- d_in_last_7: long (nullable = true)\n",
163 |       " |-- confirm_cases: long (nullable = true)\n",
164 |       " |-- crate_100Kl7: double (nullable = true)\n",
165 |       " |-- case_last_week: long (nullable = true)\n",
166 |       " |-- pbble_cases: long (nullable = true)\n",
167 |       " |-- confirm_dd: long (nullable = true)\n",
168 |       "\n"
169 |      ]
170 |     }
171 |    ],
172 |    "source": [
173 |     "as_df.printSchema()"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "markdown",
178 |    "metadata": {},
179 |    "source": [
180 |     "### Dump the DataFrame into a parquet file in your local FS, HDFS, or S3"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "code",
185 |    "execution_count": 17,
186 |    "metadata": {},
187 |    "outputs": [],
188 |    "source": [
189 |     "as_df.write.parquet(\"proto.parquet\")"
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "markdown",
194 |    "metadata": {},
195 |    "source": [
196 |     "#### Notice that a directory \"proto.parquet\" is created in your current directory with a bunch of files"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "markdown",
201 |    "metadata": {},
202 |    "source": [
203 |     "### Read the parquet file from your data store for further analysis"
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "code",
208 |    "execution_count": 22,
209 |    "metadata": {},
210 |    "outputs": [],
211 |    "source": [
212 |     "#Read in the parquet file created above\n",
213 |     "#Parquet files are self-describing so the schema is preserved\n",
214 |     "#The result of loading a Parquet file is also a DataFrame\n",
215 |     "parquetFileDF = spark.read.parquet(\"proto.parquet\")\n",
216 |     "\n",
217 |     "#Parquet files can also be used to create a temporary view and then used in SQL statements\n",
218 |     "parquetFileDF.createOrReplaceTempView(\"parquetFile\")"
219 |    ]
220 |   },
221 |   {
222 |    "cell_type": "markdown",
223 |    "metadata": {},
224 |    "source": [
225 |     "### Analyze data"
226 |    ]
227 |   },
228 |   {
229 |    "cell_type": "code",
230 |    "execution_count": 23,
231 |    "metadata": {},
232 |    "outputs": [
233 |     {
234 |      "name": "stdout",
235 |      "output_type": "stream",
236 |      "text": [
237 |       "+--------------------+------------+\n",
238 |       "|              states|covid_deaths|\n",
239 |       "+--------------------+------------+\n",
240 |       "|Federated States ...|           0|\n",
241 |       "|Republic of Marsh...|           0|\n",
242 |       "|Northern Mariana ...|           2|\n",
243 |       "|District of Columbia|         654|\n",
244 |       "|      North Carolina|        4607|\n",
245 |       "|      Virgin Islands|          23|\n",
246 |       "|      American Samoa|           0|\n",
247 |       "|      South Carolina|        4036|\n",
248 |       "|       New Hampshire|         489|\n",
249 |       "|       West Virginia|         502|\n",
250 |       "|       Massachusetts|       10131|\n",
251 |       "|       New York City|       24086|\n",
252 |       "|          New Jersey|       16429|\n",
253 |       "|                Guam|          88|\n",
254 |       "|        Pennsylvania|        9020|\n",
255 |       "|        Rhode Island|        1224|\n",
256 |       "|        North Dakota|         639|\n",
257 |       "|             Arizona|        6164|\n",
258 |       "|          California|       17963|\n",
259 |       "|               Idaho|         686|\n",
260 |       "+--------------------+------------+\n",
261 |       "only showing top 20 rows\n",
262 |       "\n"
263 |      ]
264 |     }
265 |    ],
266 |    "source": [
267 |     "namesDF = spark.sql(\"SELECT state_ter as states, total_deaths as covid_deaths FROM parquetFile\").show()"
268 |    ]
269 |   },
270 |   {
271 |    "cell_type": "code",
272 |    "execution_count": 26,
273 |    "metadata": {},
274 |    "outputs": [
275 |     {
276 |      "name": "stdout",
277 |      "output_type": "stream",
278 |      "text": [
279 |       "+--------------+\n",
280 |       "|     hot_zones|\n",
281 |       "+--------------+\n",
282 |       "|North Carolina|\n",
283 |       "| Massachusetts|\n",
284 |       "|    New Jersey|\n",
285 |       "|  Pennsylvania|\n",
286 |       "|       Arizona|\n",
287 |       "|    California|\n",
288 |       "|       Georgia|\n",
289 |       "|     Tennessee|\n",
290 |       "|     Wisconsin|\n",
291 |       "|     Minnesota|\n",
292 |       "|      Colorado|\n",
293 |       "|      Kentucky|\n",
294 |       "|      Illinois|\n",
295 |       "|      Virginia|\n",
296 |       "|      Missouri|\n",
297 |       "|      New York|\n",
298 |       "|      Nebraska|\n",
299 |       "|      Oklahoma|\n",
300 |       "|      Michigan|\n",
301 |       "|       Florida|\n",
302 |       "+--------------+\n",
303 |       "only showing top 20 rows\n",
304 |       "\n"
305 |      ]
306 |     }
307 |    ],
308 |    "source": [
309 |     "namesDF = spark.sql(\"SELECT state_ter as hot_zones FROM parquetFile where case_last_week > 10000\").show()"
310 |    ]
311 |   },
312 |   {
313 |    "cell_type": "code",
314 |    "execution_count": null,
315 |    "metadata": {},
316 |    "outputs": [],
317 |    "source": []
318 |   }
319 |  ],
320 |  "metadata": {
321 |   "kernelspec": {
322 |    "display_name": "Python 3",
323 |    "language": "python",
324 |    "name": "python3"
325 |   },
326 |   "language_info": {
327 |    "codemirror_mode": {
328 |     "name": "ipython",
329 |     "version": 3
330 |    },
331 |    "file_extension": ".py",
332 |    "mimetype": "text/x-python",
333 |    "name": "python",
334 |    "nbconvert_exporter": "python",
335 |    "pygments_lexer": "ipython3",
336 |    "version": "3.7.5"
337 |   }
338 |  },
339 |  "nbformat": 4,
340 |  "nbformat_minor": 2
341 | }
342 | 


--------------------------------------------------------------------------------
/notebooks/spark/other_notebooks/AerospikeSparkSQLSyntaxDemo.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Aerospike Connect for Spark - SQL Syntax Tutorial for Python\n",
  8 |     "## Tested with Java 8, Spark 2.4.0, Python 3.7, and Aerospike Spark Connector 2.5"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "metadata": {},
 14 |    "source": [
 15 |     "### Setup\n",
 16 |     "\n",
 17 |     "Below, a seed address for your Aerospike database cluster is required\n",
 18 |     "\n",
 19 |     "Check the given namespace is available, and your feature key is located as per AS_FEATURE_KEY_PATH\n",
 20 |     "\n",
 21 |     "Finally, review https://www.aerospike.com/enterprise/download/connectors/ to ensure AEROSPIKE_SPARK_JAR_VERSION is correct"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 1,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "# IP Address or DNS name for one host in your Aerospike cluster\n",
 31 |     "AS_HOST =\"127.0.0.1\"\n",
 32 |     "# Name of one of your namespaces. Type 'show namespaces' at the aql prompt if you are not sure\n",
 33 |     "AS_NAMESPACE = \"test\" \n",
 34 |     "AS_FEATURE_KEY_PATH = \"/etc/aerospike/features.conf\"\n",
 35 |     "AEROSPIKE_SPARK_JAR_VERSION=\"2.5.0\"\n",
 36 |     "\n",
 37 |     "AS_PORT = 3000 # Usually 3000, but change here if not\n",
 38 |     "AS_CONNECTION_STRING = AS_HOST + \":\"+ str(AS_PORT)"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 2,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "# Next we locate the Spark installation - this will be found using the SPARK_HOME environment variable that you will have set \n",
 48 |     "# if you followed the repository README\n",
 49 |     "\n",
 50 |     "import findspark\n",
 51 |     "findspark.init()"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": 3,
 57 |    "metadata": {},
 58 |    "outputs": [
 59 |     {
 60 |      "name": "stdout",
 61 |      "output_type": "stream",
 62 |      "text": [
 63 |       "aerospike-spark-assembly-2.5.0.jar already downloaded\n"
 64 |      ]
 65 |     }
 66 |    ],
 67 |    "source": [
 68 |     "# Here we download the Aerospike Spark jar\n",
 69 |     "import urllib\n",
 70 |     "import os\n",
 71 |     "\n",
 72 |     "def aerospike_spark_jar_download_url(version=AEROSPIKE_SPARK_JAR_VERSION):\n",
 73 |     "    DOWNLOAD_PREFIX=\"https://www.aerospike.com/enterprise/download/connectors/aerospike-spark/\"\n",
 74 |     "    DOWNLOAD_SUFFIX=\"/artifact/jar\"\n",
 75 |     "    AEROSPIKE_SPARK_JAR_DOWNLOAD_URL = DOWNLOAD_PREFIX+AEROSPIKE_SPARK_JAR_VERSION+DOWNLOAD_SUFFIX\n",
 76 |     "    return AEROSPIKE_SPARK_JAR_DOWNLOAD_URL\n",
 77 |     "\n",
 78 |     "def download_aerospike_spark_jar(version=AEROSPIKE_SPARK_JAR_VERSION):\n",
 79 |     "    JAR_NAME=\"aerospike-spark-assembly-\"+AEROSPIKE_SPARK_JAR_VERSION+\".jar\"\n",
 80 |     "    if(not(os.path.exists(JAR_NAME))) :\n",
 81 |     "        urllib.request.urlretrieve(aerospike_spark_jar_download_url(),JAR_NAME)\n",
 82 |     "    else :\n",
 83 |     "        print(JAR_NAME+\" already downloaded\")\n",
 84 |     "    return os.path.join(os.getcwd(),JAR_NAME)\n",
 85 |     "\n",
 86 |     "AEROSPIKE_JAR_PATH=download_aerospike_spark_jar()\n",
 87 |     "os.environ[\"PYSPARK_SUBMIT_ARGS\"] = '--jars ' + AEROSPIKE_JAR_PATH + ' pyspark-shell'"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": 4,
 93 |    "metadata": {},
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "import pyspark\n",
 97 |     "from pyspark.context import SparkContext\n",
 98 |     "from pyspark.sql.context import SQLContext\n",
 99 |     "from pyspark.sql.session import SparkSession\n",
100 |     "from pyspark.sql.types import StringType, StructField, StructType, ArrayType, IntegerType, MapType, LongType, DoubleType"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "markdown",
105 |    "metadata": {},
106 |    "source": [
107 |     "Get a spark session object and set required Aerospike configuration properties"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "markdown",
112 |    "metadata": {},
113 |    "source": [
114 |     "Set up spark and point aerospike db to AS_HOST"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": 5,
120 |    "metadata": {},
121 |    "outputs": [],
122 |    "source": [
123 |     "sc = SparkContext.getOrCreate()\n",
124 |     "spark = SparkSession(sc)\n",
125 |     "sqlContext = SQLContext(sc)\n",
126 |     "spark.conf.set(\"aerospike.namespace\",AS_NAMESPACE)\n",
127 |     "spark.conf.set(\"aerospike.seedhost\",AS_CONNECTION_STRING)\n",
128 |     "spark.conf.set(\"aerospike.keyPath\",AS_FEATURE_KEY_PATH )"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "markdown",
133 |    "metadata": {},
134 |    "source": [
135 |     "## Create Sample Data and load it into Aerospike"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": 6,
141 |    "metadata": {},
142 |    "outputs": [
143 |     {
144 |      "name": "stdout",
145 |      "output_type": "stream",
146 |      "text": [
147 |       "Data created\n"
148 |      ]
149 |     }
150 |    ],
151 |    "source": [
152 |     "# We create age vs salary data, using three different Gaussian distributions\n",
153 |     "import numpy as np\n",
154 |     "import matplotlib.pyplot as plt\n",
155 |     "import pandas as pd\n",
156 |     "import math\n",
157 |     "\n",
158 |     "# Create covariance matrix from std devs + correlation\n",
159 |     "def covariance_matrix(std_dev_1,std_dev_2,correlation):\n",
160 |     "    return [[std_dev_1 ** 2, correlation * std_dev_1 * std_dev_2], \n",
161 |     "           [correlation * std_dev_1 * std_dev_2, std_dev_2 ** 2]]\n",
162 |     "\n",
163 |     "# Return a bivariate sample given means/std dev/correlation\n",
164 |     "def age_salary_sample(distribution_params,sample_size):\n",
165 |     "    mean = [distribution_params[\"age_mean\"], distribution_params[\"salary_mean\"]]\n",
166 |     "    cov = covariance_matrix(distribution_params[\"age_std_dev\"],distribution_params[\"salary_std_dev\"],\n",
167 |     "                            distribution_params[\"age_salary_correlation\"])\n",
168 |     "    return np.random.multivariate_normal(mean, cov, sample_size).T\n",
169 |     "\n",
170 |     "# Define the characteristics of our age/salary distribution\n",
171 |     "age_salary_distribution_1 = {\"age_mean\":25,\"salary_mean\":50000,\n",
172 |     "                             \"age_std_dev\":1,\"salary_std_dev\":5000,\"age_salary_correlation\":0.3}\n",
173 |     "\n",
174 |     "age_salary_distribution_2 = {\"age_mean\":45,\"salary_mean\":80000,\n",
175 |     "                             \"age_std_dev\":4,\"salary_std_dev\":10000,\"age_salary_correlation\":0.7}\n",
176 |     "\n",
177 |     "age_salary_distribution_3 = {\"age_mean\":35,\"salary_mean\":70000,\n",
178 |     "                             \"age_std_dev\":2,\"salary_std_dev\":9000,\"age_salary_correlation\":0.1}\n",
179 |     "\n",
180 |     "distribution_data = [age_salary_distribution_1,age_salary_distribution_2,age_salary_distribution_3]\n",
181 |     "\n",
182 |     "# Sample age/salary data for each distributions\n",
183 |     "group_1_ages,group_1_salaries = age_salary_sample(age_salary_distribution_1,sample_size=100)\n",
184 |     "group_2_ages,group_2_salaries = age_salary_sample(age_salary_distribution_2,sample_size=120)\n",
185 |     "group_3_ages,group_3_salaries = age_salary_sample(age_salary_distribution_3,sample_size=80)\n",
186 |     "\n",
187 |     "ages=np.concatenate([group_1_ages,group_2_ages,group_3_ages])\n",
188 |     "salaries=np.concatenate([group_1_salaries,group_2_salaries,group_3_salaries])\n",
189 |     "\n",
190 |     "print(\"Data created\")"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "code",
195 |    "execution_count": 7,
196 |    "metadata": {},
197 |    "outputs": [],
198 |    "source": [
199 |     "# Turn the above records into a Data Frame\n",
200 |     "# First of all, create an array of arrays\n",
201 |     "inputBuf = []\n",
202 |     "\n",
203 |     "for  i in range(0, len(ages)) :\n",
204 |     "     id = i + 1 # Avoid counting from zero\n",
205 |     "     name = \"Individual: {:03d}\".format(id)\n",
206 |     "     # Note we need to make sure values are typed correctly\n",
207 |     "     # salary will have type numpy.float64 - if it is not cast as below, an error will be thrown\n",
208 |     "     age = float(ages[i])\n",
209 |     "     salary = int(salaries[i])\n",
210 |     "     inputBuf.append((id, name,age,salary))\n",
211 |     "\n",
212 |     "# Convert to an RDD \n",
213 |     "inputRDD = spark.sparkContext.parallelize(inputBuf)\n",
214 |     "       \n",
215 |     "# Convert to a data frame using a schema\n",
216 |     "schema = StructType([\n",
217 |     "    StructField(\"id\", IntegerType(), True),\n",
218 |     "    StructField(\"name\", StringType(), True),\n",
219 |     "    StructField(\"age\", DoubleType(), True),\n",
220 |     "    StructField(\"salary\",IntegerType(), True)\n",
221 |     "])\n",
222 |     "\n",
223 |     "inputDF=spark.createDataFrame(inputRDD,schema)\n",
224 |     "\n",
225 |     "#Write the data frame to Aerospike, the id field is used as the primary key\n",
226 |     "inputDF \\\n",
227 |     ".write \\\n",
228 |     ".mode('overwrite') \\\n",
229 |     ".format(\"com.aerospike.spark.sql\")  \\\n",
230 |     ".option(\"aerospike.set\", \"salary_data\")\\\n",
231 |     ".option(\"aerospike.updateByKey\", \"id\") \\\n",
232 |     ".save()"
233 |    ]
234 |   },
235 |   {
236 |    "cell_type": "markdown",
237 |    "metadata": {},
238 |    "source": [
239 |     "## Step 1: Load data into a DataFrame using user specified schema "
240 |    ]
241 |   },
242 |   {
243 |    "cell_type": "code",
244 |    "execution_count": 8,
245 |    "metadata": {},
246 |    "outputs": [
247 |     {
248 |      "name": "stdout",
249 |      "output_type": "stream",
250 |      "text": [
251 |       "+---+---------------+------------------+------+\n",
252 |       "| id|           name|               age|salary|\n",
253 |       "+---+---------------+------------------+------+\n",
254 |       "|239|Individual: 239|35.045714151242784| 64851|\n",
255 |       "|101|Individual: 101| 48.94863100225242| 92233|\n",
256 |       "|194|Individual: 194| 43.87904465057981| 76336|\n",
257 |       "| 31|Individual: 031|25.419955216543517| 51542|\n",
258 |       "|139|Individual: 139|39.658710069583876| 80585|\n",
259 |       "+---+---------------+------------------+------+\n",
260 |       "only showing top 5 rows\n",
261 |       "\n"
262 |      ]
263 |     },
264 |     {
265 |      "data": {
266 |       "text/plain": [
267 |        "300"
268 |       ]
269 |      },
270 |      "execution_count": 8,
271 |      "metadata": {},
272 |      "output_type": "execute_result"
273 |     }
274 |    ],
275 |    "source": [
276 |     "# If we explicitly set the schema, using the previously created schema object\n",
277 |     "# we effectively type the rows in the Data Frame\n",
278 |     "\n",
279 |     "loadedDFWithSchema=spark \\\n",
280 |     ".read \\\n",
281 |     ".format(\"com.aerospike.spark.sql\") \\\n",
282 |     ".schema(schema) \\\n",
283 |     ".option(\"aerospike.set\", \"salary_data\").load()\n",
284 |     "\n",
285 |     "loadedDFWithSchema.show(5)\n",
286 |     "loadedDFWithSchema.count()"
287 |    ]
288 |   },
289 |   {
290 |    "cell_type": "markdown",
291 |    "metadata": {},
292 |    "source": [
293 |     "## Step 2: Register a Temp Table"
294 |    ]
295 |   },
296 |   {
297 |    "cell_type": "code",
298 |    "execution_count": 9,
299 |    "metadata": {
300 |     "scrolled": true
301 |    },
302 |    "outputs": [],
303 |    "source": [
304 |     "loadedDFWithSchema.registerTempTable(\"myview\")"
305 |    ]
306 |   },
307 |   {
308 |    "cell_type": "markdown",
309 |    "metadata": {},
310 |    "source": [
311 |     "## Step 3a: Read Data"
312 |    ]
313 |   },
314 |   {
315 |    "cell_type": "code",
316 |    "execution_count": 18,
317 |    "metadata": {},
318 |    "outputs": [
319 |     {
320 |      "data": {
321 |       "text/plain": [
322 |        "[Row(id=239, name='Individual: 239', age=36.31763988049552, salary=73121),\n",
323 |        " Row(id=101, name='Individual: 101', age=42.131372446959624, salary=88392),\n",
324 |        " Row(id=194, name='Individual: 194', age=45.67209291776493, salary=68430),\n",
325 |        " Row(id=31, name='Individual: 031', age=25.369666877630568, salary=48846),\n",
326 |        " Row(id=139, name='Individual: 139', age=43.51114009073862, salary=82116),\n",
327 |        " Row(id=14, name='Individual: 014', age=26.58855120481238, salary=61593),\n",
328 |        " Row(id=142, name='Individual: 142', age=43.170929881406686, salary=86203),\n",
329 |        " Row(id=272, name='Individual: 272', age=38.43340146883269, salary=72691),\n",
330 |        " Row(id=76, name='Individual: 076', age=24.93997559158264, salary=64180),\n",
331 |        " Row(id=147, name='Individual: 147', age=52.175425376631246, salary=88246),\n",
332 |        " Row(id=79, name='Individual: 079', age=24.65820831985479, salary=54088),\n",
333 |        " Row(id=96, name='Individual: 096', age=25.518457474526, salary=49251),\n",
334 |        " Row(id=132, name='Individual: 132', age=41.798677512668064, salary=84438),\n",
335 |        " Row(id=10, name='Individual: 010', age=25.509944072858175, salary=45908),\n",
336 |        " Row(id=141, name='Individual: 141', age=49.80648644002289, salary=87623),\n",
337 |        " Row(id=140, name='Individual: 140', age=41.11269768838019, salary=78535),\n",
338 |        " Row(id=160, name='Individual: 160', age=36.35698689416882, salary=61116),\n",
339 |        " Row(id=112, name='Individual: 112', age=47.632639902424046, salary=78404),\n",
340 |        " Row(id=120, name='Individual: 120', age=49.876620096920284, salary=94501),\n",
341 |        " Row(id=34, name='Individual: 034', age=26.77243285030579, salary=46245)]"
342 |       ]
343 |      },
344 |      "execution_count": 18,
345 |      "metadata": {},
346 |      "output_type": "execute_result"
347 |     }
348 |    ],
349 |    "source": [
350 |     "spark.sql(\"select * from myview\").take(20)"
351 |    ]
352 |   },
353 |   {
354 |    "cell_type": "markdown",
355 |    "metadata": {},
356 |    "source": [
357 |     "## Step 3b: Write data (Coming Soon!) \n",
358 |     "#### Please note that Spark does on support DELETE statement, so just INSERT INTO and INSERT statements will be supported"
359 |    ]
360 |   },
361 |   {
362 |    "cell_type": "code",
363 |    "execution_count": null,
364 |    "metadata": {},
365 |    "outputs": [],
366 |    "source": []
367 |   }
368 |  ],
369 |  "metadata": {
370 |   "kernelspec": {
371 |    "display_name": "Python 3",
372 |    "language": "python",
373 |    "name": "python3"
374 |   },
375 |   "language_info": {
376 |    "codemirror_mode": {
377 |     "name": "ipython",
378 |     "version": 3
379 |    },
380 |    "file_extension": ".py",
381 |    "mimetype": "text/x-python",
382 |    "name": "python",
383 |    "nbconvert_exporter": "python",
384 |    "pygments_lexer": "ipython3",
385 |    "version": "3.7.5"
386 |   }
387 |  },
388 |  "nbformat": 4,
389 |  "nbformat_minor": 2
390 | }
391 | 


--------------------------------------------------------------------------------
/notebooks/spark/resources/fs-arch.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerospike-examples/interactive-notebooks/0e582d4305974f6cadd390e2086e8550f1b3ecf7/notebooks/spark/resources/fs-arch.jpg


--------------------------------------------------------------------------------
/notebooks/spark/resources/fs-model-ws.py:
--------------------------------------------------------------------------------
  1 | # fs-model-ws.py
  2 | # This file implements the web service for a simple fraud prediction model from
  3 | # the Jupyter notebook feature-store-model-serving.ipynb.
  4 | 
  5 | from flask import Flask, jsonify
  6 | from flask_restful import Resource, Api, reqparse
  7 | 
  8 | app = Flask(__name__)
  9 | api = Api(app)
 10 | 
 11 | # globals
 12 | client = None
 13 | spark = None
 14 | rf_model = None
 15 | namespace = 'test'
 16 | entity_set = 'cctxn-features'
 17 | features = None
 18 | schema = None
 19 | 
 20 | class CCTxnModel(Resource): 
 21 | 
 22 |     # predict() processes requests and returns predictions
 23 |     @app.route('/', methods=['GET', 'POST'])
 24 |     def predict(): 
 25 |         global client, spark, rf_model, namespace, entity_set, features, schema
 26 |         
 27 |         # use parser to find txnid
 28 |         parser = reqparse.RequestParser()
 29 |         parser.add_argument('txnid')        
 30 |         args = parser.parse_args()
 31 |         txnid = args['txnid'] 
 32 |         
 33 |         # Retrieving Features
 34 |         record_key = (namespace, entity_set, txnid)
 35 |         try:
 36 |             (key, meta, bins) = client.select(record_key, features)
 37 |         except:
 38 |           import sys
 39 |           print('failed to get record')
 40 |           sys.exit(1)
 41 | 
 42 |         # create a input dataframe for the model
 43 |         featureBuf = [tuple([bins[f] for f in features])]
 44 |         featureRDD = spark.sparkContext.parallelize(featureBuf)            
 45 |         featureDF = spark.createDataFrame(featureRDD, schema)
 46 |         
 47 |         # Construct Feature Vector
 48 |         from pyspark.ml.feature import VectorAssembler
 49 | 
 50 |         # create a feature vector from features
 51 |         assembler = VectorAssembler(inputCols=features, outputCol="fvector")
 52 |         featureVectorDF = assembler.transform(featureDF)
 53 | 
 54 |         # Predict
 55 |         from pyspark.ml.classification import RandomForestClassificationModel
 56 |         rf_prediction = rf_model.transform(featureVectorDF['fvector', ])
 57 |         result = rf_prediction['probability', 'prediction'].collect()[0]
 58 |         
 59 |         return jsonify({'normal_prob': result[0][0], 
 60 |                 'fraud_prob': result[0][1],
 61 |                 'prediction':'no fraud' if result[1] < 0.5  else 'fraud'})
 62 | 
 63 | # add resource for processing requests
 64 | api.add_resource(CCTxnModel, '/')
 65 | 
 66 | # initialization of client, spark, model
 67 | def initialize():
 68 |     global client, spark, rf_model, features, schema
 69 |     
 70 |     # Initialize Client
 71 |     # connect to the database
 72 |     import aerospike
 73 |     import sys
 74 |     config = {
 75 |       'hosts': [ ('127.0.0.1', 3000) ]
 76 |     }
 77 |     try:
 78 |       client = aerospike.client(config).connect()
 79 |     except:
 80 |       print("failed to connect to the cluster with", config['hosts'])
 81 |       sys.exit(1)
 82 |     print('Client initialized and connected to database')
 83 |     
 84 |     # Initialize Spark
 85 |     # directory where spark notebook requisites are installed
 86 |     #SPARK_NB_DIR = '/home/jovyan/notebooks/spark'
 87 |     SPARK_NB_DIR = '/opt/spark-nb'
 88 |     SPARK_HOME = SPARK_NB_DIR + '/spark-3.0.3-bin-hadoop3.2'
 89 |     # IP Address or DNS name for one host in your Aerospike cluster
 90 |     AS_HOST ="localhost"
 91 |     # Name of one of your namespaces. Type 'show namespaces' at the aql prompt if you are not sure
 92 |     AS_NAMESPACE = "test" 
 93 |     AEROSPIKE_SPARK_JAR_VERSION="3.2.0"
 94 |     AS_PORT = 3000 # Usually 3000, but change here if not
 95 |     AS_CONNECTION_STRING = AS_HOST + ":"+ str(AS_PORT)
 96 |     # Next we locate the Spark installation - this will be found using the SPARK_HOME environment 
 97 |     # variable that you will have set 
 98 |     import findspark
 99 |     findspark.init(SPARK_HOME)
100 |     # Aerospike Spark Connector related settings
101 |     import os 
102 |     AEROSPIKE_JAR_PATH= "aerospike-spark-assembly-"+AEROSPIKE_SPARK_JAR_VERSION+".jar"
103 |     os.environ["PYSPARK_SUBMIT_ARGS"] = '--jars ' + SPARK_NB_DIR + '/' + AEROSPIKE_JAR_PATH + ' pyspark-shell'
104 |     # imports
105 |     import pyspark
106 |     from pyspark.context import SparkContext
107 |     from pyspark.sql.session import SparkSession
108 |     from pyspark.sql.types import StructField, StructType, DoubleType
109 |     
110 |     sc = SparkContext.getOrCreate()
111 |     conf=sc._conf.setAll([("aerospike.namespace",AS_NAMESPACE),("aerospike.seedhost",AS_CONNECTION_STRING)])
112 |     sc.stop()
113 |     sc = pyspark.SparkContext(conf=conf)
114 |     spark = SparkSession(sc)
115 | 
116 |     # Load Model
117 |     from pyspark.ml.classification import RandomForestClassificationModel
118 | 
119 |     rf_model = RandomForestClassificationModel.read().load(
120 |         "/home/jovyan/notebooks/spark/resources/fs_model_rf")
121 |     print("Loaded Random Forest model.")
122 |     
123 |     # Initialize model features and schema
124 |     features = ["CC1_V"+str(i) for i in range(1,29)] # need features CC1_V1-CC1_V28
125 |     schema = StructType()
126 |     for i in range(1,29): # all features are of type float or Double
127 |         schema.add("CC1_V"+str(i), DoubleType(), True)   
128 |     return
129 | 
130 | 
131 | if __name__ == '__main__':
132 |     initialize()
133 |     app.run(debug=True)
134 | 


--------------------------------------------------------------------------------
/notebooks/spark/resources/fs_model_rf.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aerospike-examples/interactive-notebooks/0e582d4305974f6cadd390e2086e8550f1b3ecf7/notebooks/spark/resources/fs_model_rf.tar.gz


--------------------------------------------------------------------------------
/notebooks/spark/resources/install.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | #Jupyter notebook installation for Mac
 3 | 
 4 | brew install pyenv
 5 | pyenv install 3.7.0
 6 | 
 7 | pyenv global 3.7.0  //setting 3.7.0 as our python version
 8 | echo -e 'if command -v pyenv 1>/dev/null 2>&1; then\n  eval "$(pyenv init -)"\nfi' >> ~/.bash_profile
 9 | source ~/.bash_profile
10 | 
11 | pip install --upgrade pip
12 | pip install jupyter
13 | pip install spylon-kernel
14 | python -m spylon_kernel install
15 | jupyter kernelspec list // should display installed kernels => python3 and spylon-kernel


--------------------------------------------------------------------------------
/notebooks/udf/aggregate_fns.lua:
--------------------------------------------------------------------------------
  1 | -- aggregate_fns.lua - stream UDF functions to implement aggregates
  2 | 
  3 | -- count and sum reducer
  4 | local function add_values(val1, val2)
  5 |     return (val1 or 0) + (val2 or 0)
  6 | end
  7 | 
  8 | -- count mapper
  9 | -- note closures are used to access aggregate parameters such as bin
 10 | local function rec_to_count_closure(bin)
 11 |     local function rec_to_count(rec) 
 12 |     -- if bin is specified: if bin exists in record return 1 else 0; if no bin is specified, return 1
 13 |         return (not bin and 1) or ((rec[bin] and 1) or 0)
 14 |     end
 15 |     return rec_to_count
 16 | end
 17 | 
 18 | -- count
 19 | function count(stream)
 20 |     return stream : map(rec_to_count_closure()) : reduce(add_values)
 21 | end
 22 | 
 23 | -- mapper for various single bin aggregates
 24 | local function rec_to_bin_value_closure(bin)
 25 |     local function rec_to_bin_value(rec)
 26 |     -- if a numeric bin exists in record return its value; otherwise return nil
 27 |         local val = rec[bin]
 28 |         if (type(val) ~= "number") then val = nil end
 29 |         return val
 30 |     end
 31 |     return rec_to_bin_value 
 32 | end
 33 | 
 34 | -- sum
 35 | function sum(stream, bin)
 36 |     return stream : map(rec_to_bin_value_closure(bin)) : reduce(add_values)
 37 | end
 38 | 
 39 | 
 40 | -- range filter
 41 | local function range_filter_closure(range_bin, range_low, range_high)
 42 |     local function range_filter(rec)
 43 |         -- if bin value is in [low,high] return true, false otherwise
 44 |         local val = rec[range_bin]
 45 |         if (not val or type(val) ~= "number") then val = nil end
 46 |         return (val and (val >= range_low and val <= range_high)) or false
 47 |     end
 48 |     return range_filter
 49 | end
 50 |     
 51 | -- sum of range: sum(sum_bin) where range_bin in [range_low, range_high]
 52 | function sum_range(stream, sum_bin, range_bin, range_low, range_high)
 53 |     return stream : filter(range_filter_closure(range_bin, range_low, range_high)) 
 54 |                     : map(rec_to_bin_value_closure(sum_bin)) : reduce(add_values)
 55 | end
 56 | 
 57 | -- min reducer
 58 | local function get_min(val1, val2)
 59 |     local min = nil
 60 |     if val1 then
 61 |         if val2 then
 62 |             if val1 < val2 then min = val1 else min = val2 end
 63 |         else min = val1 
 64 |         end
 65 |     else 
 66 |         if val2 then min = val2 end
 67 |     end
 68 |     return min
 69 | end
 70 | 
 71 | -- min
 72 | function min(stream, bin)
 73 |     return stream : map(rec_to_bin_value_closure(bin)) : reduce(get_min)
 74 | end
 75 |  
 76 | -- max reducer
 77 | local function get_max(val1, val2)
 78 |     local max = nil
 79 |     if val1 then
 80 |         if val2 then
 81 |             if val1 > val2 then max = val1 else max = val2 end
 82 |         else max = val1 
 83 |         end
 84 |     else 
 85 |         if val2 then max = val2 end
 86 |     end
 87 |     return max
 88 | end
 89 | 
 90 | -- max
 91 | function max(stream, bin)
 92 |     return stream : map(rec_to_bin_value_closure(bin)) : reduce(get_max)
 93 | end
 94 |    
 95 | -- map function to comoute average and range
 96 | local function compute_final_stats(stats)
 97 |     local ret = map();
 98 |     ret['AVERAGE'] = stats["sum"] / stats["count"]
 99 |     ret['RANGE'] = stats["max"] - stats["min"]
100 |     return ret
101 | end
102 | 
103 | -- merge partial stream maps into one
104 | local function merge_stats(a, b)
105 |     local ret = map()
106 |     ret["sum"] = add_values(a["sum"], b["sum"])
107 |     ret["count"] = add_values(a["count"], b["count"])
108 |     ret["min"] = get_min(a["min"], b["min"])
109 |     ret["max"] = get_max(a["max"], b["max"])
110 |     return ret
111 | end
112 | 
113 | -- aggregate operator to compute stream state for average_range
114 | local function aggregate_stats(agg, val)
115 |     agg["count"] = (agg["count"] or 0) + ((val["bin_avg"] and 1) or 0)
116 |     agg["sum"] = (agg["sum"] or 0) + (val["bin_avg"] or 0)
117 |     agg["min"] = get_min(agg["min"], val["bin_range"])
118 |     agg["max"] = get_max(agg["max"], val["bin_range"])
119 |     return agg
120 | end
121 | 
122 | -- average_range
123 | function average_range(stream, bin_avg, bin_range)
124 |     local function rec_to_bins(rec)
125 |         -- extract the values of the two bins in ret 
126 |         local ret = map()
127 |         ret["bin_avg"] = rec[bin_avg]
128 |         ret["bin_range"] = rec[bin_range]
129 |         return ret
130 |     end
131 |     return stream : map(rec_to_bins) : aggregate(map(), aggregate_stats) : reduce(merge_stats) : map(compute_final_stats)
132 | end
133 | 
134 | -- nested map merge for group-by sum/count; explicit map merge at each nested level
135 | local function merge_group_sum(a, b)
136 |     local function merge_group(x, y)
137 |     -- inner map merge
138 |         return map.merge(x, y, add_values)
139 |     end
140 |     -- outer map merge
141 |     return map.merge(a, b, merge_group)
142 | end
143 | 
144 | -- aggregate for group-by sum
145 | --    creates a map for each distinct group value and adds the value tagged for a group to the group's sum
146 | local function group_sum(agg, groupval)
147 |     if not agg[groupval["group"]] then agg[groupval["group"]] = map() end
148 |     agg[groupval["group"]]["sum"] = (agg[groupval["group"]]["sum"] or 0) + (groupval["value"] or 0)
149 |     return agg
150 | end
151 | 
152 | -- group-by with sum
153 | function groupby_with_sum(stream, bin_grpby, bin_sum)
154 |     local function rec_to_group_and_bin(rec)
155 |     -- tag the group by bin_grpby value, return a map containing group and bin_sum value 
156 |         local ret = map()
157 |         ret["group"] = rec[bin_grpby]
158 |         local val = rec[bin_sum]
159 |         if (not val or type(val) ~= "number") then val = 0 end
160 |         ret["value"] = val
161 |         return ret
162 |     end
163 |     return stream : map(rec_to_group_and_bin) : aggregate(map(), group_sum) : reduce(merge_group_sum) 
164 | end
165 | 
166 | -- aggregate for group-by count
167 | --   creates a map for each distinct group value and increments the tagged group's count
168 | local function group_count(agg, group)
169 |     if not agg[group] then agg[group] = map() end
170 |     agg[group]["count"] = (agg[group]["count"] or 0) + ((group and 1) or 0)
171 |     return agg
172 | end
173 | 
174 | -- map function for group-by processing
175 | local function rec_to_group_closure(bin_grpby)
176 |     local function rec_to_group(rec)
177 |         -- returns group-by bin value in a record
178 |         return rec[bin_grpby]
179 |     end
180 |     return rec_to_group
181 | end
182 | 
183 | -- group-by having example: count(*) having low <= count <= high
184 | function groupby_with_count_having(stream, bin_grpby, having_range_low, having_range_high)
185 |     local function process_having(stats)
186 |         -- filters groups with count in the range
187 |         local ret = map()
188 |         for key, value in map.pairs(stats) do 
189 |             if (key >= having_range_low and key <= having_range_high) then 
190 |                 ret[key] = value
191 |             end
192 |         end
193 |         return ret
194 |     end
195 |     return stream : map(rec_to_group_closure(bin_grpby)) : aggregate(map(), group_count) 
196 |                     : reduce(merge_group_sum) : map(process_having)
197 | end
198 | 
199 | -- group-by count(*) order-by count
200 | function groupby_with_count_orderby(stream, bin_grpby, bin_orderby)
201 |     local function orderby(t, order)
202 |         -- collect the keys
203 |         local keys = {}
204 |         for k in pairs(t) do keys[#keys+1] = k end
205 |         -- sort by the order by passing the table and keys a, b,
206 |         table.sort(keys, function(a,b) return order(t, a, b) end)
207 |         -- return the iterator function
208 |         local i = 0
209 |         return function()
210 |             i = i + 1
211 |             if keys[i] then
212 |                 return keys[i], t[keys[i] ]
213 |             end
214 |         end
215 |     end
216 |     local function process_orderby(stats)
217 |         -- uses lua table sort to sort aggregate map into a list 
218 |         -- list has k and v separately added for sorted entries 
219 |         local ret = list()
220 |         local t = {}
221 |         for k,v in map.pairs(stats) do t[k] = v end
222 |         for k,v in orderby(t, function(t, a, b) return t[a][bin_orderby] < t[b][bin_orderby] end) do
223 |             list.append(ret, k)
224 |             list.append(ret, v)
225 |         end        
226 |         return ret
227 |     end
228 |     return stream : map(rec_to_group_closure(bin_grpby)) : aggregate(map(), group_count) 
229 |                     : reduce(merge_group_sum) : map(process_orderby)
230 | end
231 | 
232 | -- return map keys in a list
233 | local function map_to_list(values)
234 |     local ret = list()
235 |     for k in map.keys(values) do list.append(ret, k) end
236 |     return ret
237 | end
238 | 
239 | -- merge partial aggregate maps
240 | local function merge_values(a, b)
241 |     return map.merge(a, b, function(v1, v2) return ((v1 or v2) and 1) or nil end)
242 | end
243 | 
244 | -- map for distinct; using map unique keys
245 | local function distinct_values(agg, value)
246 |     if value then agg[value] = 1 end
247 |     return agg
248 | end
249 | 
250 | -- distinct 
251 | function distinct(stream, bin)
252 |     local function rec_to_bin_value(rec)
253 |         -- simply return bin value in rec
254 |         return rec[bin]
255 |     end
256 |     return stream : map(rec_to_bin_value) : aggregate(map(), distinct_values) 
257 |                     : reduce(merge_values) : map(map_to_list)
258 | end 
259 |     
260 | -- limit 
261 | function limit(stream, bin, max)
262 |    local function list_limit(agg, rec)
263 |         -- add to list if the list size is below the limit
264 |         if list.size(agg) < max then
265 |             local ret = map()
266 |             ret[bin] = rec[bin]
267 |             list.append(agg, ret)
268 |         end
269 |         return agg
270 |     end
271 |     local function list_merge_limit(a, b)
272 |         local ret = list()
273 |         list.concat(ret, list.take(a, max))
274 |         list.concat(ret, list.take(b, (max > list.size(ret) and max-list.size(ret)) or 0))
275 |         return ret
276 |     end
277 |     return stream : aggregate(list(), list_limit) : reduce(list_merge_limit) 
278 | end 
279 |     
280 | -- top n
281 | function top_n(stream, bin, n)
282 |     local function get_top_n(values)
283 |         -- return top n values in a map as an ordered list
284 |         -- uses lua table sort
285 |         local t = {}
286 |         local i = 1
287 |         for k in map.keys(values) do 
288 |             t[i] = k 
289 |             i = i + 1
290 |         end
291 |         table.sort(t, function(a,b) return a > b end)
292 |         local ret = list()
293 |         local i = 0
294 |         for k, v in pairs(t) do 
295 |             list.append(ret, v) 
296 |             i = i + 1 
297 |             if i == n then break end
298 |         end
299 |         return ret
300 |     end
301 |     local function top_n_values(agg, value)
302 |         if value then agg[value] = 1 end
303 |         -- if map size exceeds n*10, trim to top n
304 |         if map.size(agg) > n*10 then 
305 |             local new_agg = map()
306 |             local trimmed = trim_to_top_n(agg) 
307 |             for value in list.iterator(trimmed) do
308 |                 new_agg[value] = 1
309 |             end
310 |             agg = new_agg
311 |         end
312 |         return agg
313 |     end
314 |     return stream : map(rec_to_bin_value_closure(bin)) : aggregate(map(), top_n_values) 
315 |                     : reduce(merge_values) : map(get_top_n)
316 | end 
317 | 


--------------------------------------------------------------------------------
/notebooks/udf/update_example.lua:
--------------------------------------------------------------------------------
 1 | -- update_example.lua
 2 | 
 3 | function multiplyBy(rec, binName, factor)
 4 |     rec[binName] = rec[binName] * factor
 5 |     aerospike:update(rec)
 6 | end
 7 | 
 8 | function increment(rec, binName, value)
 9 |     rec[binName] = rec[binName] + value
10 |     aerospike:update(rec)
11 | end
12 | 
13 | function increment_and_get(rec, binName, value)
14 |     local ret = map()                     -- Initialize the return value (a map)
15 |     rec[binName] = rec[binName] + value
16 |     ret[binName] = rec[binName]
17 |     aerospike:update(rec)
18 |     return ret
19 | end
20 | 
21 | -- update the specified bins by adding and appending the values provided
22 | function add_append(rec, binName1, addVal, binName2, appendVal)
23 |     rec[binName1] = rec[binName1] + addVal
24 |     rec[binName2] = rec[binName2] .. appendVal
25 |     aerospike:update(rec)
26 | end


--------------------------------------------------------------------------------
/update.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | if [ -z "$1" ]; then
 5 | 
 6 |    serverVer="$(curl -sSL 'https://download.aerospike.com/artifacts/aerospike-server-enterprise/' | grep -E '<a href="[0-9.-]+[-]*.*/"' | sed -r 's!.*<a href="([0-9.-]+[-]*.*)/".*!\1!' | sort -V | tail -1)"
 7 | 
 8 | else 
 9 |    serverVer=$1
10 | fi
11 | 
12 | toolsVer="$(curl -sSL "https://download.aerospike.com/artifacts/aerospike-server-enterprise/$serverVer/" | grep  'ubuntu20.04_x86_64.tgz' | sed 's/.*\(aerospike-server-enterprise_.*_tools-.*_ubuntu20.04_x86_64.tgz\).*/\1/' | tail -1 | sed 's/aerospike-server-enterprise_.*_tools-\(.*\)_ubuntu20.04_x86_64.tgz/\1/')"
13 | 
14 | sha256="$(curl -sSL "https://download.aerospike.com/artifacts/aerospike-server-enterprise/${serverVer}/aerospike-server-enterprise_${serverVer}_tools-${toolsVer}_ubuntu20.04_x86_64.tgz.sha256" | cut -d' ' -f1)"
15 | 
16 | set -x
17 | sed -ir 's/^\(ENV AEROSPIKE_VERSION\) .*/\1 '"$serverVer"'/; s/^\(ENV AEROSPIKE_SHA256\) .*/\1 '"$sha256"'/; s/^\(ARG AEROSPIKE_TOOLS_VERSION\).*/\1='"$toolsVer"'/'  Dockerfile
18 | 


--------------------------------------------------------------------------------
/update_readme.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | OUTPUT_FILE="./README.md"
  4 | 
  5 | #### Constant header text ####
  6 | HEADER_TEXT=$( cat <<"END-HEADER-TEXT"
  7 | # Notebooks
  8 | 
  9 | This area is for [Jupyter notebooks](https://jupyter.org/) in .ipynb format. Python and Java notebooks are currently supported by the kernel. 
 10 | 
 11 | The list of notebooks below has links to browse each notebook in the viewer and to launch it in interactive mode in Binder. This repository also provides a Docker container that you can install (see the [instructions](../README.md)) to run the notebooks locally. 
 12 | END-HEADER-TEXT
 13 | )
 14 | 
 15 | #### Constant footer text ####
 16 | FOOTER_TEXT=$( cat <<"END-FOOTER-TEXT"
 17 | 
 18 | _Copyright Aerospike Inc 2021_
 19 | END-FOOTER-TEXT
 20 | )
 21 | 
 22 | REPO="aerospike-examples/interactive-notebooks"
 23 | REPO_URL="https://github.com/${REPO}"
 24 | VIEWER_URL="${REPO_URL}/tree/main/notebooks"
 25 | BINDER_URL="https://mybinder.org/v2/gh/aerospike-examples/interactive-notebooks/main?filepath="
 26 | 
 27 | function get_title () {
 28 |   fname=$1
 29 | 
 30 |   nice_name=$( grep -m1 '^ *"# ' $fname | sed -e 's/^ *"# //' -e 's/\\n",$//' -e 's/\\"/"/g' )
 31 |   if [[ $nice_name ]] ; then
 32 |     echo $nice_name
 33 |   else
 34 |     echo $fname
 35 |   fi
 36 | }
 37 | 
 38 | function get_notebooks () {
 39 |   dir=$1
 40 | 
 41 |   for ii in ${dir}/*.ipynb ; do
 42 |     echo ${ii} | sed -e 's/^\.\///'
 43 |   done
 44 | }
 45 | 
 46 | function print_notebooks_table () {
 47 |   title=$1
 48 |   notebooks=$2
 49 | 
 50 |   echo ""
 51 |   echo "${title} | View | Launch"
 52 |   echo "-------- | ---- | ------"
 53 | 
 54 |   for ii in $notebooks ; do
 55 |     view_url="${VIEWER_URL}/${ii}"
 56 |     launch_url="${BINDER_URL}${ii}"
 57 |     nice_name=$( get_title $ii )
 58 | 
 59 |     echo "${nice_name} | [View](${view_url}) | [Launch](${launch_url})"
 60 |   done
 61 | 
 62 |   echo ""
 63 | }
 64 | 
 65 | function print_notebook_rows () {
 66 |   notebooks=$1
 67 | 
 68 |   for ii in $notebooks ; do
 69 |     view_url="${VIEWER_URL}/${ii}"
 70 |     launch_url="${BINDER_URL}${ii}"
 71 |     nice_name=$( get_title $ii )
 72 | 
 73 |     echo "&nbsp; ${nice_name} | [View](${view_url}) | [Launch](${launch_url})"
 74 |   done
 75 | }
 76 | 
 77 | function print_notebook_rows_nobinder () {
 78 |   notebooks=$1
 79 | 
 80 |   for ii in $notebooks ; do
 81 |     view_url="${VIEWER_URL}/${ii}"
 82 |     nice_name=$( get_title $ii )
 83 | 
 84 |     echo "&nbsp; ${nice_name} | [View](${view_url})"
 85 |   done
 86 | }
 87 | 
 88 | #### Main
 89 | 
 90 | # check whether we're in the right repo's notebooks directory
 91 | if [[ ! $PWD =~ interactive-notebooks/notebooks$ ]] ; then
 92 |   echo 'Must cd to the "notebooks" directory of a local copy of the' >&2
 93 |   echo '"aerospike-examples/interactive-notebooks" repo before running this script.' >&2
 94 |   hints=$( find $HOME -type d -name notebooks 2>/dev/null | grep 'interactive-notebooks/notebooks' )
 95 |   if [[ hints ]] ; then
 96 |     echo '' >&2
 97 |     echo 'Maybe try one of these:' >&2
 98 |     echo $hints >&2
 99 |   fi
100 |   exit 1
101 | fi
102 | 
103 | # Record md5sum so we can check whether we changed the file
104 | old_sum=$( md5sum $OUTPUT_FILE 2>/dev/null)
105 | 
106 | {
107 |   echo "${HEADER_TEXT}"
108 |   echo ""
109 | 
110 |   echo ""
111 |   view_url="${VIEWER_URL}"
112 |   launch_url="${BINDER_URL}"
113 |   echo "All Notebooks | [View All](${view_url}) | [Launch in Binder](${launch_url})"
114 |   echo ":-------- | ---- | ------"
115 |   nbs=$( get_notebooks . )
116 |   print_notebook_rows "$nbs"
117 | 
118 |   view_url="${VIEWER_URL}/java"
119 |   launch_url="${BINDER_URL}java"
120 |   echo " | | | | "
121 |   echo "**Java  Notebooks** | [View All](${view_url}) | [Launch in Binder](${launch_url})"
122 |   echo " | | | | "
123 |   nbs=$( get_notebooks ./java )
124 |   print_notebook_rows "$nbs"
125 | 
126 |   view_url="${VIEWER_URL}/python"
127 |   launch_url="${BINDER_URL}python"
128 |   echo " | | | | "
129 |   echo "**Python  Notebooks** | [View All](${view_url}) | [Launch in Binder](${launch_url})"
130 |   echo " | | | | "
131 |   nbs=$( get_notebooks ./python )
132 |   print_notebook_rows "$nbs"
133 | 
134 |   view_url="${VIEWER_URL}/spark"
135 |   echo " | | | | "
136 |   echo "**Spark  Notebooks** | [View All](${view_url})"
137 |   echo " | | | | "
138 |   nbs=$( get_notebooks ./spark )
139 |   print_notebook_rows_nobinder "$nbs"
140 | 
141 |   echo ""
142 | } > ${OUTPUT_FILE}
143 | 
144 | # Compute new md5sum and check whether it's different
145 | new_sum=$( md5sum $OUTPUT_FILE )
146 | 
147 | if [[ "${old_sum}" == "${new_sum}" ]] ; then
148 |   echo "${OUTPUT_FILE} did not change" >&2
149 | else
150 |   echo "${OUTPUT_FILE} changed" >&2
151 |   git add ${OUTPUT_FILE}
152 | fi
153 | 


--------------------------------------------------------------------------------