├── plugin_tests ├── __init__.py ├── .env.sample ├── astra_db_tests.py └── cassandra_tests.py ├── src └── feast_cassandra_online_store │ ├── __init__.py │ └── cassandra_online_store.py ├── setup.cfg ├── .gitmodules ├── requirements-dev.txt ├── CHANGES.txt ├── README.md ├── .gitignore ├── TODO.md ├── setup.py ├── CONTRIBUTING.md ├── archived_README.md └── LICENSE.txt /plugin_tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/feast_cassandra_online_store/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | license_files = LICENSE.txt 3 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "feast"] 2 | path = feast 3 | url = https://github.com/feast-dev/feast.git 4 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | flake8>=4.0.1 2 | pytest>=6.2.4 3 | python-dotenv>=0.20.0 4 | twine==4.0.0 5 | -------------------------------------------------------------------------------- /CHANGES.txt: -------------------------------------------------------------------------------- 1 | CHANGES 2 | ======= 3 | 4 | 0.1.4 5 | ----- 6 | 7 | * Deprecation notice (development moved to contrib code in Feast codebase), no other changes in code 8 | 9 | 0.1.3 10 | ----- 11 | 12 | * Repo transferred to "datastaxdevs", change in setup.py metadata. 13 | 14 | 0.1.2 15 | ----- 16 | 17 | * Fixed link to "Awesome Astra" page. 18 | 19 | 0.1.1 20 | ----- 21 | 22 | * Credits in README. 23 | * Link to specific "Awesome Astra" page. 24 | 25 | 0.1.0 26 | ----- 27 | 28 | * This is the first release. 29 | -------------------------------------------------------------------------------- /plugin_tests/.env.sample: -------------------------------------------------------------------------------- 1 | # For testing with Astra DB 2 | ASTRA_DB_SECURE_CONNECT_BUNDLE="/path/to/secure/bundle.zip" 3 | ASTRA_DB_USERNAME="Astra_Token_Client_ID" 4 | ASTRA_DB_PASSWORD="Astra_Token_Client_Secret" 5 | # 6 | ASTRA_DB_KEYSPACE="Astra_DB_Keyspace" 7 | 8 | 9 | # For testing with a regular Cassandra 10 | CASSANDRA_HOSTS="192.168.1.1,192.168.1.2" 11 | # CASSANDRA_PORT="9042" # optional 12 | # CASSANDRA_USERNAME="tester" # optional 13 | # CASSANDRA_PASSWORD="123456" # optional 14 | # 15 | CASSANDRA_KEYSPACE="Cassandra_Keyspace" 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Feast Cassandra / Astra DB online store plugin 2 | 3 | # Deprecation notice 4 | 5 | This plugin has been incorporated into the main Feast codebase, 6 | and has been improved since. 7 | 8 | This stand-alone distribution is DEPRECATED. Please migrate your Feast 9 | project to using the online store bundled with Feast. 10 | This plugin will not receive any more 11 | updates of any kind. 12 | 13 | New issues, and pull requests, to this repo will be _ignored_. 14 | 15 | Resources: 16 | 17 | - [Feast Cassandra/Astra DB online store](https://docs.feast.dev/reference/online-stores/cassandra) 18 | - [Awesome Astra page](https://awesome-astra.github.io/docs/pages/tools/integration/feast/) 19 | 20 | _(if you want to see the previous README for historical reasons, [here it is](archived_README.md))_. 21 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # custom 2 | temp_notes.md 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | 7 | # python 8 | *.py[cod] 9 | *$py.class 10 | *.prof 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | classes/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # Unit test / coverage reports 34 | htmlcov/ 35 | .tox/ 36 | .nox/ 37 | .coverage 38 | .coverage.* 39 | .cache 40 | nosetests.xml 41 | coverage.xml 42 | *.cover 43 | .hypothesis/ 44 | .pytest_cache/ 45 | 46 | # Environments 47 | .env 48 | .venv 49 | env/ 50 | venv/ 51 | ENV/ 52 | env.bak/ 53 | venv.bak/ 54 | 55 | # mypy 56 | .mypy_cache/ 57 | .dmypy.json 58 | dmypy.json 59 | -------------------------------------------------------------------------------- /plugin_tests/astra_db_tests.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | from dotenv import dotenv_values 3 | 4 | from tests.integration.feature_repos.integration_test_repo_config import ( 5 | IntegrationTestRepoConfig, 6 | ) 7 | 8 | 9 | HERE = pathlib.Path(__file__).parent.absolute() 10 | config = dotenv_values(HERE / ".env") 11 | ASTRA_DB_SECURE_CONNECT_BUNDLE = config["ASTRA_DB_SECURE_CONNECT_BUNDLE"] 12 | ASTRA_DB_USERNAME = config["ASTRA_DB_USERNAME"] 13 | ASTRA_DB_PASSWORD = config["ASTRA_DB_PASSWORD"] 14 | ASTRA_DB_KEYSPACE = config["ASTRA_DB_KEYSPACE"] 15 | 16 | 17 | ASTRA_DB_CONFIG = { 18 | "type": ("feast_cassandra_online_store.cassandra_online_store" 19 | ".CassandraOnlineStore"), 20 | "secure_bundle_path": ASTRA_DB_SECURE_CONNECT_BUNDLE, 21 | "username": ASTRA_DB_USERNAME, 22 | "password": ASTRA_DB_PASSWORD, 23 | "keyspace": ASTRA_DB_KEYSPACE, 24 | "protocol_version": 4, 25 | } 26 | 27 | FULL_REPO_CONFIGS = [ 28 | IntegrationTestRepoConfig(online_store=ASTRA_DB_CONFIG), 29 | ] 30 | -------------------------------------------------------------------------------- /TODO.md: -------------------------------------------------------------------------------- 1 | # TODO 2 | 3 | ## Improvements 4 | 5 | #### Issues 6 | 7 | Issue template(s) on github. (not if planning to enter Feast' repo itself). 8 | 9 | 10 | 11 | ## Open questions/issues 12 | 13 | #### Schema changes? 14 | 15 | "create table if not exist", what about a table changes its schema? 16 | 17 | #### No-insertion cells 18 | 19 | We currently avoid altogether insertion of nulls for `created_ts` when missing: 20 | check it does not create problems (potentially, a wrong ts from a preexisting row?) 21 | 22 | #### Ignored parameters in store methods 23 | 24 | There are ignored params to `update`: `[entities_to_delete, entities_to_keep, partial]`. 25 | Not sure what they should control, docs says little to nothing. 26 | 27 | Same for param `entities` to `teardown`: this is also ignored by major stores. 28 | 29 | Make sure this is OK. 30 | 31 | #### Provider? 32 | 33 | Is it necessary to create a provider? (it seems it does not add much in this case) 34 | 35 | #### Tracing span 36 | 37 | Usage of `with tracing_span(...)` here: 38 | individually for each call to the CQL driver ops. 39 | 40 | Is that the right way? (comparing with other datastores one is not so sure) 41 | -------------------------------------------------------------------------------- /plugin_tests/cassandra_tests.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | from dotenv import dotenv_values 3 | 4 | from tests.integration.feature_repos.integration_test_repo_config import ( 5 | IntegrationTestRepoConfig, 6 | ) 7 | 8 | 9 | HERE = pathlib.Path(__file__).parent.absolute() 10 | config = dotenv_values(HERE / ".env") 11 | CASSANDRA_HOSTS = config["CASSANDRA_HOSTS"] 12 | CASSANDRA_KEYSPACE = config["CASSANDRA_KEYSPACE"] 13 | CASSANDRA_PORT = config.get("CASSANDRA_PORT") 14 | CASSANDRA_USERNAME = config.get("CASSANDRA_USERNAME") 15 | CASSANDRA_PASSWORD = config.get("CASSANDRA_PASSWORD") 16 | 17 | if CASSANDRA_HOSTS: 18 | hosts = CASSANDRA_HOSTS.split(",") 19 | else: 20 | hosts = None 21 | if CASSANDRA_PORT: 22 | port = int(CASSANDRA_PORT) 23 | else: 24 | port = None 25 | 26 | 27 | CASSANDRA_CONFIG = { 28 | k: v 29 | for k, v in { 30 | "type": ("feast_cassandra_online_store.cassandra_online_store" 31 | ".CassandraOnlineStore"), 32 | "hosts": hosts, 33 | "port": port, 34 | "keyspace": CASSANDRA_KEYSPACE, 35 | "username": CASSANDRA_USERNAME, 36 | "password": CASSANDRA_PASSWORD, 37 | "protocol_version": 5, 38 | }.items() 39 | if v is not None 40 | } 41 | 42 | 43 | FULL_REPO_CONFIGS = [ 44 | IntegrationTestRepoConfig(online_store=CASSANDRA_CONFIG), 45 | ] 46 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | import pathlib 3 | 4 | here = pathlib.Path(__file__).parent.resolve() 5 | 6 | setup( 7 | name="feast-cassandra", 8 | version="0.1.4", 9 | author="Stefano Lottini", 10 | author_email="stefano.lottini@datastax.com", 11 | package_dir={"": "src"}, 12 | packages=find_packages(where="src"), 13 | # entry_points={ 14 | # "console_scripts": [ 15 | # "command=importable:function", 16 | # ], 17 | # }, 18 | url="https://github.com/datastaxdevs/feast-cassandra-online-store", 19 | license="LICENSE.txt", 20 | description="Cassandra/Astra DB support for Feast online store", 21 | long_description=(here / "README.md").read_text(encoding="utf-8"), 22 | long_description_content_type="text/markdown", 23 | python_requires=">=3.7.0", 24 | install_requires=[ 25 | "cassandra-driver>=3.24.0,<4", 26 | ], 27 | classifiers=[ 28 | "Development Status :: 4 - Beta", 29 | "Environment :: Console", 30 | "Intended Audience :: Developers", 31 | "License :: OSI Approved :: Apache Software License", 32 | # 33 | "Programming Language :: Python :: 3", 34 | "Programming Language :: Python :: 3.7", 35 | "Programming Language :: Python :: 3.8", 36 | "Programming Language :: Python :: 3.9", 37 | "Programming Language :: Python :: 3.10", 38 | "Programming Language :: Python :: 3.11", 39 | "Programming Language :: Python :: 3 :: Only", 40 | ], 41 | keywords="feast, cassandra, mlops", 42 | ) 43 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## Development quick guide 2 | 3 | ### Dev environment 4 | 5 | _Instructions to develop this package:_ 6 | 7 | Once cloned, make sure the `feast` Git [submodule](https://git-scm.com/book/en/v2/Git-Tools-Submodules) is cloned as well: 8 | in root of repo, `git submodule init` and `git submodule update`. 9 | (To advance the commit of the submodule if necessary, `git submodule update --remote` .) 10 | (If advancing `feast` submodule, one may have to rebuild the protobuf assets: 11 | `pip install --upgrade setuptools` and `make protos` in the `feast` subdir. 12 | There may be an error related to generating protos doc, it can be ignored.) 13 | 14 | Create a fresh development virtualenv, e.g. `feast-cassandra-dev-39`, and 15 | `pip install --upgrade pip` in it. 16 | 17 | Go to the repo's root. Add this path and its `src` to the python path, e.g. creating a text 18 | file such as `~/.virtualenvs/feast-cassandra-dev-39/lib/python3.9/site-packages/custom-path.pth` 19 | with two lines in it such as: 20 | 21 | ``` 22 | [path to repos]/feast-cassandra-online-store/src 23 | [path to repos]/feast-cassandra-online-store 24 | ``` 25 | 26 | Then deactivate and activate the virtualenv again. 27 | _This step is needed only to make the universal unit test able to import the test 28 | config definitions later._ 29 | 30 | Install `pip install -r requirements-dev.txt` . 31 | 32 | Install this plugin in develop mode: `python setup.py develop` . 33 | 34 | Also install `feast` itself in the same way: `cd feast; pip install -e ".[dev]"; cd ..` . 35 | 36 | Now unit tests can be run: `cd feast; make test-python; cd ..` . 37 | 38 | But, most important, integration tests can be run: 39 | 40 | - copy `plugin_tests/.env.sample` to `plugin_tests.env` and adjust its settings; 41 | - run either `export FULL_REPO_CONFIGS_MODULE='plugin_tests.cassandra_tests'` or `export FULL_REPO_CONFIGS_MODULE='plugin_tests.astra_db_tests'` ; 42 | - finally run `cd feast; make test-python-universal; cd ..` (and expect at most one single failure about GCP credentials not found). 43 | 44 | ### Installed (from local) package 45 | 46 | _An environment with "actual" installations, needed for example to run the 47 | "Quick usage" setups given above, is obtained as follows:_ 48 | 49 | - create an empty virtualenv, such as `feast-cassandra-39` ; 50 | - run `pip install --upgrade pip` ; 51 | - run `pip install feast` ; 52 | - run `python setup.py install` . 53 | 54 | ### Publishing to PyPI 55 | 56 | Make sure: 57 | 58 | - version has been incremented in `setup.py`; 59 | - `CHANGES.txt` has been updated; 60 | - you have a PyPI account with access to `feast-cassandra`. 61 | 62 | To build: 63 | 64 | ``` 65 | rm dist/* 66 | python setup.py sdist bdist_wheel 67 | ls dist/ # should show version-named wheel and tarball files 68 | ``` 69 | 70 | To publish (keep your PyPI credentials ready): 71 | 72 | ``` 73 | twine upload dist/* 74 | ``` 75 | -------------------------------------------------------------------------------- /archived_README.md: -------------------------------------------------------------------------------- 1 | # Feast Cassandra / Astra DB online store plugin 2 | 3 | **Notice**: this plugin is DEPRECATED. Refer to the [README](README.md) for more info. 4 | 5 | A [Feast](https://feast.dev/) 6 | plugin to use 7 | [Apache Cassandra™](https://cassandra.apache.org) / 8 | [Astra DB](https://astra.datastax.com/) as online store. 9 | 10 | ## Installation 11 | 12 | Install the plugin alongside Feast with: 13 | 14 | ``` 15 | pip install feast-cassandra 16 | ``` 17 | 18 | ## Quick usage 19 | 20 | Once the package is installed, switching online store to Cassandra / Astra DB 21 | is a matter of altering the `online_store` key in `feature_store.yaml`. 22 | 23 | With reference to the [Feast quickstart](https://docs.feast.dev/getting-started/quickstart), 24 | the minimal steps are: 25 | 26 | 1. (assuming both `feast` and this plugin are installed) 27 | 2. creating a feature repository, `feast init feature_repo`; 28 | 3. `cd feature_repo`; 29 | 4. editing the `feature_store.yaml` as detailed below; 30 | 5. all subsequent steps proceed as usual. 31 | 32 | ### Cassandra setup 33 | 34 | The only required settings are `hosts` and `type`. The port number 35 | is to be provided only if different than the default (9042), 36 | and username/password only if the database requires authentication. 37 | 38 | ```yaml 39 | [...] 40 | online_store: 41 | type: feast_cassandra_online_store.cassandra_online_store.CassandraOnlineStore 42 | hosts: 43 | - 192.168.1.1 44 | - 192.168.1.2 45 | - 192.168.1.3 46 | keyspace: KeyspaceName 47 | port: 9042 # optional 48 | username: user # optional 49 | password: secret # optional 50 | ``` 51 | 52 | ### Astra DB setup: 53 | 54 | To point Feast to using an Astra DB instance as online store, an 55 | [Astra DB token](https://awesome-astra.github.io/docs/pages/astra/create-token/#c-procedure) 56 | with "Database Administrator" role is required: provide the Client ID and 57 | Client Secret in the token as username and password. 58 | 59 | The 60 | ["secure connect bundle"](https://awesome-astra.github.io/docs/pages/astra/download-scb/#c-procedure) 61 | for connecting to the database is also needed: 62 | its full path must be given in the configuration below: 63 | 64 | ```yaml 65 | [...] 66 | online_store: 67 | type: feast_cassandra_online_store.cassandra_online_store.CassandraOnlineStore 68 | secure_bundle_path: /path/to/secure/bundle.zip 69 | keyspace: KeyspaceName 70 | username: Client_ID 71 | password: Client_Secret 72 | ``` 73 | 74 | ### More info 75 | 76 | For a more detailed walkthrough, please see the 77 | [Awesome Astra](https://awesome-astra.github.io/docs/pages/tools/integration/feast/) 78 | page on the Feast integration. 79 | 80 | ## Features 81 | 82 | The plugin leverages the architecture of Cassandra for optimal performance: 83 | 84 | - table partitioning tailored to data access pattern; 85 | - prepared statements. 86 | 87 | #### Credits 88 | 89 | The author of this plugin acknowledges prior exploratory work by 90 | [`hamzakpt`](https://github.com/hamzakpt) and Brian Mortimore, 91 | on which this implementation is loosely based. 92 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | -------------------------------------------------------------------------------- /src/feast_cassandra_online_store/cassandra_online_store.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2022 Stefano Lottini 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | 16 | Cassandra/Astra DB online store for Feast. 17 | """ 18 | 19 | import logging 20 | from datetime import datetime 21 | from typing import (Sequence, List, Optional, Tuple, Dict, Callable, 22 | Any, Iterable) 23 | 24 | from feast import RepoConfig, FeatureView, Entity 25 | from feast.infra.key_encoding_utils import serialize_entity_key 26 | from feast.infra.online_stores.online_store import OnlineStore 27 | from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto 28 | from feast.protos.feast.types.Value_pb2 import Value as ValueProto 29 | from feast.repo_config import FeastConfigBaseModel 30 | from feast.usage import log_exceptions_and_usage, tracing_span 31 | 32 | from pydantic import StrictStr, StrictInt 33 | from pydantic.typing import Literal 34 | 35 | from cassandra.cluster import Cluster, Session, ResultSet 36 | from cassandra.auth import PlainTextAuthProvider 37 | # 38 | from cassandra.policies import DCAwareRoundRobinPolicy, TokenAwarePolicy 39 | from cassandra.cluster import ExecutionProfile 40 | from cassandra.cluster import EXEC_PROFILE_DEFAULT 41 | 42 | # Error messages 43 | E_CASSANDRA_UNEXPECTED_CONFIGURATION_CLASS = ( 44 | "Unexpected configuration object (not a " 45 | "CassandraOnlineStoreConfig instance)" 46 | ) 47 | E_CASSANDRA_NOT_CONFIGURED = ( 48 | "Inconsistent Cassandra configuration: provide exactly one between " 49 | "'hosts' and 'secure_bundle_path' and a 'keyspace'" 50 | ) 51 | E_CASSANDRA_MISCONFIGURED = ( 52 | "Inconsistent Cassandra configuration: provide either 'hosts' or " 53 | "'secure_bundle_path', not both" 54 | ) 55 | E_CASSANDRA_INCONSISTENT_AUTH = ( 56 | "Username and password for Cassandra must be provided either both or none" 57 | ) 58 | E_CASSANDRA_UNKNOWN_LB_POLICY = ( 59 | "Unknown/unsupported Load Balancing Policy name in Cassandra configuration" 60 | ) 61 | 62 | # CQL command templates (that is, before replacing schema names) 63 | INSERT_CQL_4_TEMPLATE = ("INSERT INTO {fqtable} (feature_name," 64 | " value, entity_key, event_ts) VALUES" 65 | " (?, ?, ?, ?);") 66 | 67 | INSERT_CQL_5_TEMPLATE = ("INSERT INTO {fqtable} (feature_name, " 68 | "value, entity_key, event_ts, created_ts)" 69 | " VALUES (?, ?, ?, ?, ?);") 70 | 71 | SELECT_CQL_TEMPLATE = ("SELECT {columns} FROM {fqtable}" 72 | " WHERE entity_key = ?;") 73 | 74 | CREATE_TABLE_CQL_TEMPLATE = """ 75 | CREATE TABLE IF NOT EXISTS {fqtable} ( 76 | entity_key TEXT, 77 | feature_name TEXT, 78 | value BLOB, 79 | event_ts TIMESTAMP, 80 | created_ts TIMESTAMP, 81 | PRIMARY KEY ((entity_key), feature_name) 82 | ) WITH CLUSTERING ORDER BY (feature_name ASC); 83 | """ 84 | 85 | DROP_TABLE_CQL_TEMPLATE = "DROP TABLE IF EXISTS {fqtable};" 86 | 87 | # op_name -> (cql template string, prepare boolean) 88 | CQL_TEMPLATE_MAP = { 89 | # Queries/DML, statements to be prepared 90 | 'insert4': (INSERT_CQL_4_TEMPLATE, True), 91 | 'insert5': (INSERT_CQL_5_TEMPLATE, True), 92 | 'select': (SELECT_CQL_TEMPLATE, True), 93 | # DDL, do not prepare these 94 | 'drop': (DROP_TABLE_CQL_TEMPLATE, False), 95 | 'create': (CREATE_TABLE_CQL_TEMPLATE, False), 96 | } 97 | 98 | # Logger 99 | logger = logging.getLogger(__name__) 100 | 101 | 102 | class CassandraInvalidConfig(Exception): 103 | def __init__(self, msg: str): 104 | super().__init__(msg) 105 | 106 | 107 | class CassandraOnlineStoreConfig(FeastConfigBaseModel): 108 | """ 109 | Configuration for the Cassandra/Astra DB online store. 110 | 111 | Exactly one of `hosts` and `secure_bundle_path` must be provided; 112 | depending on which one, the connection will be to a regular Cassandra 113 | or an Astra DB instance (respectively). 114 | 115 | If connecting to Astra DB, authentication must be provided with username 116 | and password being the Client ID and Client Secret of the database token. 117 | """ 118 | _full_class_name = ("feast_cassandra_online_store.cassandra_online_store" 119 | ".CassandraOnlineStore") 120 | # 121 | type: Literal["cassandra", _full_class_name] = _full_class_name 122 | """Online store type selector.""" 123 | 124 | # settings for connection to Cassandra / Astra DB 125 | 126 | hosts: Optional[List[StrictStr]] = None 127 | """List of host addresses to reach the cluster.""" 128 | 129 | secure_bundle_path: Optional[StrictStr] = None 130 | """Path to the secure connect bundle (for Astra DB; replaces hosts).""" 131 | 132 | port: Optional[StrictInt] = None 133 | """Port number for connecting to the cluster (optional).""" 134 | 135 | keyspace: StrictStr = None 136 | """Target Cassandra keyspace where all tables will be.""" 137 | 138 | username: Optional[StrictStr] = None 139 | """Username for DB auth, possibly Astra DB token Client ID.""" 140 | 141 | password: Optional[StrictStr] = None 142 | """Password for DB auth, possibly Astra DB token Client Secret.""" 143 | 144 | protocol_version: Optional[StrictInt] = None 145 | """Explicit specification of the CQL protocol version used.""" 146 | 147 | class CassandraLoadBalancingPolicy(FeastConfigBaseModel): 148 | """ 149 | Configuration block related to the Cluster's load-balancing policy. 150 | """ 151 | 152 | load_balancing_policy: StrictStr 153 | """ 154 | A stringy description of the load balancing policy to instantiate 155 | the cluster with. Supported values: 156 | "DCAwareRoundRobinPolicy" 157 | "TokenAwarePolicy(DCAwareRoundRobinPolicy)" 158 | """ 159 | 160 | local_dc: StrictStr = None 161 | """The local datacenter, usually necessary to create the policy.""" 162 | 163 | load_balancing: Optional[CassandraLoadBalancingPolicy] = None 164 | """ 165 | Details on the load-balancing policy: it will be 166 | wrapped into an execution profile if present. 167 | """ 168 | 169 | class CassandraOnlineStore(OnlineStore): 170 | """ 171 | Cassandra/Astra DB online store implementation for Feast. 172 | 173 | Attributes: 174 | _cluster: Cassandra cluster to connect to. 175 | _session: (DataStax Cassandra drivers) session object 176 | to issue commands. 177 | _keyspace: Cassandra keyspace all tables live in. 178 | _prepared_statements: cache of statements prepared by the driver. 179 | """ 180 | 181 | _cluster: Cluster = None 182 | _session: Session = None 183 | _keyspace: str = None 184 | _prepared_statements = {} 185 | 186 | def _get_session(self, config: RepoConfig): 187 | """ 188 | Establish the database connection, if not yet created, 189 | and return it. 190 | 191 | Also perform basic config validation checks. 192 | """ 193 | 194 | online_store_config = config.online_store 195 | if not isinstance(online_store_config, CassandraOnlineStoreConfig): 196 | raise CassandraInvalidConfig( 197 | E_CASSANDRA_UNEXPECTED_CONFIGURATION_CLASS 198 | ) 199 | 200 | if self._session: 201 | return self._session 202 | if not self._session: 203 | # configuration consistency checks 204 | hosts = online_store_config.hosts 205 | secure_bundle_path = online_store_config.secure_bundle_path 206 | port = online_store_config.port or 9042 207 | keyspace = online_store_config.keyspace 208 | username = online_store_config.username 209 | password = online_store_config.password 210 | protocol_version = online_store_config.protocol_version 211 | 212 | db_directions = hosts or secure_bundle_path 213 | if not db_directions or not keyspace: 214 | raise CassandraInvalidConfig(E_CASSANDRA_NOT_CONFIGURED) 215 | if hosts and secure_bundle_path: 216 | raise CassandraInvalidConfig(E_CASSANDRA_MISCONFIGURED) 217 | if (username is None) ^ (password is None): 218 | raise CassandraInvalidConfig(E_CASSANDRA_INCONSISTENT_AUTH) 219 | 220 | if username is not None: 221 | auth_provider = PlainTextAuthProvider( 222 | username=username, 223 | password=password, 224 | ) 225 | else: 226 | auth_provider = None 227 | 228 | # handling of load-balancing policy (optional) 229 | if online_store_config.load_balancing: 230 | # construct a proper execution profile embedding 231 | # the configured LB policy 232 | _lbp_name = online_store_config.load_balancing.load_balancing_policy 233 | if _lbp_name == 'DCAwareRoundRobinPolicy': 234 | lb_policy = DCAwareRoundRobinPolicy( 235 | local_dc=online_store_config.load_balancing.local_dc, 236 | ) 237 | elif _lbp_name == 'TokenAwarePolicy(DCAwareRoundRobinPolicy)': 238 | lb_policy = TokenAwarePolicy(DCAwareRoundRobinPolicy( 239 | local_dc=online_store_config.load_balancing.local_dc, 240 | )) 241 | else: 242 | raise CassandraInvalidConfig(E_CASSANDRA_UNKNOWN_LB_POLICY) 243 | 244 | # wrap it up in a map of ex.profiles with a default 245 | exe_profile = ExecutionProfile( 246 | load_balancing_policy = lb_policy, 247 | ) 248 | execution_profiles = {EXEC_PROFILE_DEFAULT: exe_profile} 249 | else: 250 | execution_profiles = None 251 | 252 | # additional optional keyword args to Cluster 253 | cluster_kwargs = { 254 | k: v 255 | for k, v in { 256 | 'protocol_version': protocol_version, 257 | 'execution_profiles': execution_profiles, 258 | }.items() 259 | if v is not None 260 | } 261 | 262 | # creation of Cluster (Cassandra vs. Astra) 263 | if hosts: 264 | self._cluster = Cluster( 265 | hosts, 266 | port=port, 267 | auth_provider=auth_provider, 268 | **cluster_kwargs, 269 | ) 270 | else: 271 | # we use 'secure_bundle_path' 272 | self._cluster = Cluster( 273 | cloud={ 274 | "secure_connect_bundle": secure_bundle_path, 275 | }, 276 | auth_provider=auth_provider, 277 | **cluster_kwargs, 278 | ) 279 | 280 | # creation of Session 281 | self._keyspace = keyspace 282 | self._session = self._cluster.connect(self._keyspace) 283 | 284 | return self._session 285 | 286 | def __del__(self): 287 | if self._session: 288 | self._session.shutdown() 289 | 290 | @log_exceptions_and_usage(online_store="cassandra") 291 | def online_write_batch( 292 | self, 293 | config: RepoConfig, 294 | table: FeatureView, 295 | data: List[ 296 | Tuple[EntityKeyProto, 297 | Dict[str, ValueProto], datetime, Optional[datetime]] 298 | ], 299 | progress: Optional[Callable[[int], Any]], 300 | ) -> None: 301 | """ 302 | Write a batch of features of several entities to the database. 303 | 304 | Args: 305 | config: The RepoConfig for the current FeatureStore. 306 | table: Feast FeatureView. 307 | data: a list of quadruplets containing Feature data. Each 308 | quadruplet contains an Entity Key, a dict containing feature 309 | values, an event timestamp for the row, and 310 | the created timestamp for the row if it exists. 311 | progress: Optional function to be called once every mini-batch of 312 | rows is written to the online store. Can be used to 313 | display progress. 314 | """ 315 | project = config.project 316 | # 317 | for entity_key, values, timestamp, created_ts in data: 318 | entity_key_bin = serialize_entity_key(entity_key).hex() 319 | with tracing_span(name="remote_call"): 320 | self._write_rows(config, project, table, entity_key_bin, 321 | values.items(), timestamp, created_ts) 322 | if progress: 323 | progress(1) 324 | 325 | @log_exceptions_and_usage(online_store="cassandra") 326 | def online_read( 327 | self, 328 | config: RepoConfig, 329 | table: FeatureView, 330 | entity_keys: List[EntityKeyProto], 331 | requested_features: Optional[List[str]] = None, 332 | ) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]: 333 | """ 334 | Read feature values pertaining to the requested entities from 335 | the online store. 336 | 337 | Args: 338 | config: The RepoConfig for the current FeatureStore. 339 | table: Feast FeatureView. 340 | entity_keys: a list of entity keys that should be read 341 | from the FeatureStore. 342 | """ 343 | project = config.project 344 | 345 | result: List[Tuple[Optional[datetime], 346 | Optional[Dict[str, ValueProto]]]] = [] 347 | 348 | for entity_key in entity_keys: 349 | entity_key_bin = serialize_entity_key(entity_key).hex() 350 | 351 | with tracing_span(name="remote_call"): 352 | feature_rows = self._read_rows_by_entity_key( 353 | config, project, table, entity_key_bin, 354 | proj=["feature_name", "value", "event_ts"], 355 | ) 356 | 357 | res = {} 358 | res_ts = None 359 | for feature_row in feature_rows: 360 | if (requested_features is None 361 | or feature_row.feature_name in requested_features): 362 | val = ValueProto() 363 | val.ParseFromString(feature_row.value) 364 | res[feature_row.feature_name] = val 365 | res_ts = feature_row.event_ts 366 | # 367 | if not res: 368 | result.append((None, None)) 369 | else: 370 | result.append((res_ts, res)) 371 | return result 372 | 373 | @log_exceptions_and_usage(online_store="cassandra") 374 | def update( 375 | self, 376 | config: RepoConfig, 377 | tables_to_delete: Sequence[FeatureView], 378 | tables_to_keep: Sequence[FeatureView], 379 | entities_to_delete: Sequence[Entity], 380 | entities_to_keep: Sequence[Entity], 381 | partial: bool, 382 | ): 383 | """ 384 | Update schema on DB, by creating and destroying tables accordingly. 385 | 386 | Args: 387 | config: The RepoConfig for the current FeatureStore. 388 | tables_to_delete: Tables to delete from the Online Store. 389 | tables_to_keep: Tables to keep in the Online Store. 390 | """ 391 | project = config.project 392 | 393 | for table in tables_to_keep: 394 | with tracing_span(name="remote_call"): 395 | self._create_table(config, project, table) 396 | for table in tables_to_delete: 397 | with tracing_span(name="remote_call"): 398 | self._drop_table(config, project, table) 399 | 400 | @log_exceptions_and_usage(online_store="cassandra") 401 | def teardown( 402 | self, 403 | config: RepoConfig, 404 | tables: Sequence[FeatureView], 405 | entities: Sequence[Entity], 406 | ): 407 | """ 408 | Delete tables from the database. 409 | 410 | Args: 411 | config: The RepoConfig for the current FeatureStore. 412 | tables: Tables to delete from the feature repo. 413 | """ 414 | project = config.project 415 | 416 | for table in tables: 417 | with tracing_span(name="remote_call"): 418 | self._drop_table(config, project, table) 419 | 420 | @staticmethod 421 | def _fq_table_name( 422 | keyspace: str, 423 | project: str, 424 | table: FeatureView, 425 | ) -> str: 426 | """ 427 | Generate a fully-qualified table name, 428 | including quotes and keyspace. 429 | """ 430 | return f"\"{keyspace}\".\"{project}_{table.name}\"" 431 | 432 | def _write_rows( 433 | self, 434 | config: RepoConfig, 435 | project: str, 436 | table: FeatureView, 437 | entity_key_bin: bytes, 438 | features_vals: Iterable[Tuple[str, ValueProto]], 439 | timestamp: datetime, 440 | created_ts: Optional[datetime], 441 | ): 442 | """ 443 | Handle the CQL (low-level) insertion of feature values to a table. 444 | 445 | Note: `created_ts` can be None: in that case we avoid explicitly 446 | inserting it to prevent unnecessary tombstone creation on Cassandra. 447 | """ 448 | session: Session = self._get_session(config) 449 | keyspace: str = self._keyspace 450 | # 451 | fqtable = CassandraOnlineStore._fq_table_name(keyspace, project, table) 452 | if created_ts is None: 453 | insert_cql = self._get_cql_statement( 454 | config, 455 | 'insert4', 456 | fqtable=fqtable, 457 | ) 458 | fixed_vals = [entity_key_bin, timestamp] 459 | else: 460 | insert_cql = self._get_cql_statement( 461 | config, 462 | 'insert5', 463 | fqtable=fqtable, 464 | ) 465 | fixed_vals = [entity_key_bin, timestamp, created_ts] 466 | # 467 | for feature_name, val in features_vals: 468 | session.execute( 469 | insert_cql, 470 | [feature_name, val.SerializeToString()] + fixed_vals, 471 | ) 472 | 473 | def _read_rows_by_entity_key( 474 | self, 475 | config: RepoConfig, 476 | project: str, 477 | table: FeatureView, 478 | entity_key_bin: bytes, 479 | proj: Optional[List[str]] = None, 480 | ) -> ResultSet: 481 | """ 482 | Handle the CQL (low-level) reading of feature values from a table. 483 | """ 484 | session: Session = self._get_session(config) 485 | keyspace: str = self._keyspace 486 | # 487 | fqtable = CassandraOnlineStore._fq_table_name(keyspace, project, table) 488 | columns="*" if proj is None else ", ".join(proj) 489 | select_cql = self._get_cql_statement( 490 | config, 491 | 'select', 492 | fqtable=fqtable, 493 | columns=columns, 494 | ) 495 | return session.execute(select_cql, [entity_key_bin]) 496 | 497 | def _drop_table( 498 | self, 499 | config: RepoConfig, 500 | project: str, 501 | table: FeatureView, 502 | ): 503 | """Handle the CQL (low-level) deletion of a table.""" 504 | session: Session = self._get_session(config) 505 | keyspace: str = self._keyspace 506 | # 507 | fqtable = CassandraOnlineStore._fq_table_name(keyspace, project, table) 508 | drop_cql = self._get_cql_statement(config, 'drop', fqtable) 509 | logger.info(f"Deleting table {fqtable}.") 510 | session.execute(drop_cql) 511 | 512 | def _create_table( 513 | self, 514 | config: RepoConfig, 515 | project: str, 516 | table: FeatureView, 517 | ): 518 | """Handle the CQL (low-level) creation of a table.""" 519 | session: Session = self._get_session(config) 520 | keyspace: str = self._keyspace 521 | # 522 | fqtable = CassandraOnlineStore._fq_table_name(keyspace, project, table) 523 | create_cql = self._get_cql_statement(config, 'create', fqtable) 524 | logger.info(f"Creating table {fqtable}.") 525 | session.execute(create_cql) 526 | 527 | def _get_cql_statement( 528 | self, 529 | config: RepoConfig, 530 | op_name: str, 531 | fqtable: str, 532 | **kwargs, 533 | ): 534 | """ 535 | Resolve an 'op_name' (create, insert4, etc) into a CQL statement 536 | ready to be bound to parameters when executing. 537 | 538 | If the statement is defined to be 'prepared', use an instance-specific 539 | cache of prepared statements. 540 | 541 | This additional layer makes it easy to control whether to use prepared 542 | statements and, if so, on which database operations. 543 | """ 544 | session: Session = self._get_session(config) 545 | # 546 | template, prepare = CQL_TEMPLATE_MAP[op_name] 547 | statement = template.format( 548 | fqtable=fqtable, 549 | **kwargs, 550 | ) 551 | if prepare: 552 | # using the statement itself as key (no problem with that) 553 | cache_key = statement 554 | if cache_key not in self._prepared_statements: 555 | logger.info(f"Preparing a {op_name} statement on {fqtable}.") 556 | self._prepared_statements[cache_key] = \ 557 | session.prepare(statement) 558 | return self._prepared_statements[cache_key] 559 | else: 560 | return statement 561 | --------------------------------------------------------------------------------