├── .gitignore ├── LICENSE ├── README.md ├── env-example.txt ├── requirements.txt └── src ├── 01_crud_operations.py ├── 02_aggregation.py └── csfle ├── README.md ├── client_schema_create_key.py ├── client_schema_main.py ├── server_schema_create_key.py └── server_schema_main.py /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.toptal.com/developers/gitignore/api/python 3 | # Edit at https://www.toptal.com/developers/gitignore?templates=python 4 | 5 | ### Python ### 6 | # Byte-compiled / optimized / DLL files 7 | __pycache__/ 8 | *.py[cod] 9 | *$py.class 10 | 11 | # C extensions 12 | *.so 13 | 14 | # Distribution / packaging 15 | .Python 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | wheels/ 28 | pip-wheel-metadata/ 29 | share/python-wheels/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | MANIFEST 34 | 35 | # PyInstaller 36 | # Usually these files are written by a python script from a template 37 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 38 | *.manifest 39 | *.spec 40 | 41 | # Installer logs 42 | pip-log.txt 43 | pip-delete-this-directory.txt 44 | 45 | # Unit test / coverage reports 46 | htmlcov/ 47 | .tox/ 48 | .nox/ 49 | .coverage 50 | .coverage.* 51 | .cache 52 | nosetests.xml 53 | coverage.xml 54 | *.cover 55 | *.py,cover 56 | .hypothesis/ 57 | .pytest_cache/ 58 | pytestdebug.log 59 | 60 | # Translations 61 | *.mo 62 | *.pot 63 | 64 | # Django stuff: 65 | *.log 66 | local_settings.py 67 | db.sqlite3 68 | db.sqlite3-journal 69 | 70 | # Flask stuff: 71 | instance/ 72 | .webassets-cache 73 | 74 | # Scrapy stuff: 75 | .scrapy 76 | 77 | # Sphinx documentation 78 | docs/_build/ 79 | doc/_build/ 80 | 81 | # PyBuilder 82 | target/ 83 | 84 | # Jupyter Notebook 85 | .ipynb_checkpoints 86 | 87 | # IPython 88 | profile_default/ 89 | ipython_config.py 90 | 91 | # pyenv 92 | .python-version 93 | 94 | # pipenv 95 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 96 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 97 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 98 | # install all needed dependencies. 99 | #Pipfile.lock 100 | 101 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 102 | __pypackages__/ 103 | 104 | # Celery stuff 105 | celerybeat-schedule 106 | celerybeat.pid 107 | 108 | # SageMath parsed files 109 | *.sage.py 110 | 111 | # Environments 112 | .env 113 | .venv 114 | env/ 115 | venv/ 116 | ENV/ 117 | env.bak/ 118 | venv.bak/ 119 | 120 | # Spyder project settings 121 | .spyderproject 122 | .spyproject 123 | 124 | # Rope project settings 125 | .ropeproject 126 | 127 | # mkdocs documentation 128 | /site 129 | 130 | # mypy 131 | .mypy_cache/ 132 | .dmypy.json 133 | dmypy.json 134 | 135 | # Pyre type checker 136 | .pyre/ 137 | 138 | # pytype static type analyzer 139 | .pytype/ 140 | 141 | # End of https://www.toptal.com/developers/gitignore/api/python 142 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MongoDB Python Quick Start Code 2 | 3 | This repository contains sample code from the MongoDB Python Quick Start series. 4 | 5 | There are currently two posts in the series: 6 | 7 | * [Basic MongoDB Operations in Python](https://developer.mongodb.com/quickstart/python-quickstart-crud) 8 | * [Getting Started with Aggregation Pipelines in Python](https://developer.mongodb.com/quickstart/python-quickstart-aggregation) 9 | 10 | # How To Run It 11 | 12 | The source code is all in the [src](./src) directory. 13 | It is written for Python 3.6 and later. 14 | 15 | Full instructions on how to set up your Python environment for this code can be found in the first blog post in the series, [Basic MongoDB Operations in Python](https://developer.mongodb.com/quickstart/python-quickstart-crud). 16 | 17 | To install the dependencies, create a virtualenv using your favourite tool, such as [venv](https://docs.python.org/3/tutorial/venv.html) or [virtualenv](https://virtualenv.pypa.io/en/stable/), activate it, and then run: 18 | 19 | ```bash 20 | python3 -m pip install -r requirements.txt 21 | ``` 22 | 23 | You will also want to set the environment variable `MONGODB_URI` to your MongoDB Atlas cluster, either on the command-line, or in a `.env` file. 24 | 25 | Once you've installed the dependencies and set `MONGODB_URI` you can run the python scripts directly with something like: 26 | 27 | ```bash 28 | python3 src/01_crud_operations.py 29 | ``` 30 | 31 | If you have questions or feedback, 32 | please let us know at the [MongoDB Community Forums](https://community.mongodb.com/)! 33 | -------------------------------------------------------------------------------- /env-example.txt: -------------------------------------------------------------------------------- 1 | # Replace the dummy URL below with your MongoDB cluster URL, and 2 | # rename this file to .env 3 | # (make sure it doesn't end up being called .env.txt by mistake!) 4 | # And DON'T COMMIT IT TO YOUR GIT REPOSITORY!!! 5 | # 6 | # Then when you run the scripts in the src directory, they pick up your 7 | # cluster configuration automatically. 8 | declare -x MONGODB_URI="mongodb+srv://username:password@cluster0-abcde.platform.mongodb.net/test?retryWrites=true&w=majority" -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pymongo[srv,encryption]~=3.11 2 | python-dotenv~=0.14.0 -------------------------------------------------------------------------------- /src/01_crud_operations.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | import os 3 | # Import the `pprint` function to print nested data: 4 | from pprint import pprint 5 | 6 | from dotenv import load_dotenv 7 | 8 | import bson 9 | from pymongo import MongoClient 10 | 11 | 12 | def print_title(title): 13 | """ 14 | Utility function to print a title with an underline. 15 | """ 16 | print() # Print a blank line 17 | print(title) 18 | print('=' * len(title)) 19 | 20 | 21 | # Load config from a .env file: 22 | load_dotenv(verbose=True) 23 | MONGODB_URI = os.environ['MONGODB_URI'] 24 | 25 | # Connect to your MongoDB cluster: 26 | client = MongoClient(MONGODB_URI) 27 | 28 | print_title("Database names") 29 | # List all the databases in the cluster: 30 | for db_info in client.list_database_names(): 31 | print(db_info) 32 | 33 | 34 | # Get a reference to the 'sample_mflix' database: 35 | db = client['sample_mflix'] 36 | 37 | print_title("Collections in 'sample_mflix'") 38 | # List all the collections in 'sample_mflix': 39 | collections = db.list_collection_names() 40 | for collection in collections: 41 | print(collection) 42 | 43 | # Get a reference to the 'movies' collection: 44 | movies = db['movies'] 45 | 46 | # Get the document with the title 'Blacksmith Scene': 47 | print_title("'Blacksmith Scene' document") 48 | pprint(movies.find_one({'title': 'Blacksmith Scene'})) 49 | 50 | print_title("Insert a document for 'Parasite'") 51 | 52 | # Insert a document for the movie 'Parasite': 53 | insert_result = movies.insert_one({ 54 | "title": "Parasite", 55 | "year": 2020, 56 | "plot": "A poor family, the Kims, con their way into becoming the servants of a rich family, the Parks. " 57 | "But their easy life gets complicated when their deception is threatened with exposure.", 58 | "released": datetime(2020, 2, 7, 0, 0, 0), 59 | }) 60 | 61 | # Save the inserted_id of the document you just created: 62 | parasite_id = insert_result.inserted_id 63 | print("_id of inserted document: {parasite_id}".format(parasite_id=parasite_id)) 64 | 65 | print_title("Look up the inserted document") 66 | 67 | # Look up the document you just created in the collection: 68 | print(movies.find_one({'_id': bson.ObjectId(parasite_id)})) 69 | 70 | print_title("Look up all documents with the title 'Parasite'") 71 | 72 | # Look up the documents you've created in the collection: 73 | for doc in movies.find({"title": "Parasite"}): 74 | pprint(doc) 75 | 76 | print_title("Update 'Parasite' year to 2019") 77 | 78 | # Update the document with the correct year: 79 | update_result = movies.update_one({ '_id': parasite_id }, { 80 | '$set': {"year": 2019} 81 | }) 82 | 83 | # Print out the updated record to make sure it's correct: 84 | pprint(movies.find_one({'_id': bson.ObjectId(parasite_id)})) 85 | 86 | print_title("Update *all* documents with the title 'Parasite' to 2019") 87 | 88 | # Update *all* the Parasite movie docs to the correct year: 89 | update_result = movies.update_many({"title": "Parasite"}, {"$set": {"year": 2019}}) 90 | print(f"Documents updated: {update_result.modified_count}") 91 | 92 | print_title("Delete all documents with the title 'Parasite'") 93 | 94 | delete_result = movies.delete_many( 95 | {"title": "Parasite",} 96 | ) 97 | print(f"Documents updated: {delete_result.deleted_count}") -------------------------------------------------------------------------------- /src/02_aggregation.py: -------------------------------------------------------------------------------- 1 | import os 2 | # Import the `pprint` function to print nested data: 3 | from pprint import pprint 4 | 5 | from dotenv import load_dotenv 6 | 7 | import bson 8 | import pymongo 9 | 10 | 11 | def print_title(title, underline_char="="): 12 | """ 13 | Utility function to print a title with an underline. 14 | """ 15 | print() # Print a blank line 16 | print(title) 17 | print(underline_char * len(title)) # Print an underline made of `underline_char` 18 | 19 | 20 | # ---------------------------------------------------------------------------- 21 | # The purpose of the next few lines is to load a MONGODB_URI environment 22 | # variable and use it to configure a MongoClient, where it will then set up 23 | # the global variable `movie_collection`, representing the `movies` collection 24 | # in the `sample_mflix` database. 25 | 26 | # Load config from a .env file: 27 | load_dotenv(verbose=True) 28 | MONGODB_URI = os.environ["MONGODB_URI"] 29 | 30 | # Connect to your MongoDB cluster: 31 | client = pymongo.MongoClient(MONGODB_URI) 32 | 33 | # Get a reference to the "sample_mflix" database: 34 | db = client["sample_mflix"] 35 | 36 | # Get a reference to the "movies" collection: 37 | movie_collection = db["movies"] 38 | 39 | 40 | def a_sample_movie_document(): 41 | """ 42 | Obtain a single movie document, and pretty-print it. 43 | """ 44 | print_title("A Sample Movie") 45 | 46 | pipeline = [ 47 | { 48 | "$match": { 49 | "title": "A Star Is Born" 50 | } 51 | }, 52 | { "$limit": 1 }, 53 | ] 54 | results = movie_collection.aggregate(pipeline) 55 | for movie in results: 56 | pprint(movie) 57 | 58 | 59 | def a_sample_comment_document(): 60 | """ 61 | Obtain a single comment document, and pretty-print it. 62 | """ 63 | print_title("A Sample Comment") 64 | 65 | pipeline = [ 66 | { "$limit": 1 }, 67 | ] 68 | results = db["comments"].aggregate(pipeline) 69 | for movie in results: 70 | pprint(movie) 71 | 72 | 73 | def a_star_is_born_all(): 74 | """ 75 | Print a summary of all documents for "A Star Is Born" in the collection. 76 | """ 77 | print_title("A Star Is Born - All Documents") 78 | 79 | # A pipeline with the following stages: 80 | # * Match title = "A Star Is Born" 81 | # * Sort by year, ascending 82 | pipeline = [ 83 | { 84 | "$match": { 85 | "title": "A Star Is Born" 86 | } 87 | }, 88 | { "$sort": { "year": pymongo.ASCENDING } }, 89 | ] 90 | results = movie_collection.aggregate(pipeline) 91 | for movie in results: 92 | print(" * {title}, {first_castmember}, {year}".format( 93 | title=movie["title"], 94 | first_castmember=movie["cast"][0], 95 | year=movie["year"], 96 | )) 97 | 98 | 99 | def a_star_is_born_most_recent(): 100 | """ 101 | Print a summary for the most recent production of "A Star Is Born" in the collection. 102 | """ 103 | print_title("A Star Is Born - Most Recent") 104 | 105 | # Match title = "A Star Is Born": 106 | stage_match_title = { 107 | "$match": { 108 | "title": "A Star Is Born" 109 | } 110 | } 111 | 112 | # Sort by year, descending: 113 | stage_sort_year_descending = { 114 | "$sort": { "year": pymongo.DESCENDING } 115 | } 116 | 117 | # Limit to 1 document: 118 | stage_limit_1 = { "$limit": 1 } 119 | 120 | pipeline = [ 121 | stage_match_title, 122 | stage_sort_year_descending, 123 | stage_limit_1, 124 | ] 125 | results = movie_collection.aggregate(pipeline) 126 | for movie in results: 127 | print(" * {title}, {first_castmember}, {year}".format( 128 | title=movie["title"], 129 | first_castmember=movie["cast"][0], 130 | year=movie["year"], 131 | )) 132 | 133 | 134 | def movies_with_comments(): 135 | """ 136 | Print the first 5 comments for 10 movies in the collection. 137 | 138 | This query can be a little slow - see the comments for tips. 139 | """ 140 | print_title("Movies With Comments") 141 | 142 | # Look up related documents in the 'comments' collection: 143 | stage_lookup_comments = { 144 | "$lookup": { 145 | "from": "comments", 146 | "localField": "_id", 147 | "foreignField": "movie_id", 148 | "as": "related_comments" 149 | } 150 | } 151 | 152 | # Calculate the number of comments for each movie: 153 | stage_add_comment_count = { 154 | "$addFields": { 155 | "comment_count": { 156 | "$size": "$related_comments" 157 | } 158 | } 159 | } 160 | 161 | # Match movie documents with more than 2 comments: 162 | stage_match_with_comments = { 163 | "$match": { 164 | "comment_count": { 165 | "$gt": 2 166 | } 167 | } 168 | } 169 | # Limit to the first 5 documents: 170 | limit_5 = { "$limit": 5 } 171 | 172 | # Optional limit to 1000 documents. 173 | # Run at the start of the pipeline, to speed things up during development: 174 | limit_1000 = { "$limit": 1000 } 175 | 176 | pipeline = [ 177 | # Uncomment the line below to run on 1000 documents instead of 178 | # the full collection, for speed: 179 | # 180 | # limit_1000, 181 | stage_lookup_comments, 182 | stage_add_comment_count, 183 | stage_match_with_comments, 184 | limit_5, 185 | ] 186 | results = movie_collection.aggregate(pipeline) 187 | for movie in results: 188 | print_title(movie["title"], "-") 189 | print("Comment count:", movie["comment_count"]) 190 | 191 | # Loop through the first 5 comments and print the name and text: 192 | for comment in movie["related_comments"][:5]: 193 | print(" * {name}: {text}".format( 194 | name=comment["name"], 195 | text=comment["text"])) 196 | 197 | 198 | def movies_each_year(): 199 | """ 200 | Print the number of movies produced in each year until 1920, along with the movie titles. 201 | """ 202 | print_title("Movies Grouped By Year") 203 | 204 | # Group movies by year, producing 'year-summary' documents that look like: 205 | # { 206 | # '_id': 1917, 207 | # 'movie_count': 3, 208 | # 'movie_titles': [ 209 | # 'The Poor Little Rich Girl', 210 | # 'Wild and Woolly', 211 | # 'The Immigrant' 212 | # ] 213 | # } 214 | stage_group_year = { 215 | "$group": { 216 | "_id": "$year", 217 | "movie_count": { "$sum": 1 }, 218 | "movie_titles": { "$push": "$title" }, 219 | } 220 | } 221 | 222 | # Match a year-summary document where the year (stored as `_id`) is both: 223 | # * numeric 224 | # * less than 1920 225 | stage_match_years = { 226 | "$match": { 227 | "year": { 228 | "$type": "number", 229 | "$lt": 1920, 230 | } 231 | } 232 | } 233 | 234 | # Sort year-summary documents by '_id' 235 | # (which is the year the document summarizes): 236 | stage_sort_year_ascending = { 237 | "$sort": {"_id": pymongo.ASCENDING} 238 | } 239 | 240 | pipeline = [ 241 | stage_match_years, 242 | stage_group_year, 243 | stage_sort_year_ascending, 244 | ] 245 | results = movie_collection.aggregate(pipeline) 246 | 247 | # Loop through the 'year-summary' documents: 248 | for year_summary in results: 249 | # Print an underlined heading for each year: 250 | title = "{year}: {count} movies".format( 251 | year=year_summary["_id"], 252 | count=year_summary["movie_count"]) 253 | print_title(title, "-") 254 | # Loop through the document titles for each year and print them 255 | # as bullets: 256 | for title in year_summary["movie_titles"]: 257 | print(" *", title) 258 | 259 | 260 | # The following lines are commented out, as the functions they call aren't 261 | # described in the accompanying blog post. 262 | # I left the function definitions here anyway, in case the reader might find 263 | # them useful. 264 | # 265 | # a_sample_movie_document() 266 | # a_sample_comment_document() 267 | 268 | a_star_is_born_all() 269 | a_star_is_born_most_recent() 270 | movies_with_comments() 271 | movies_each_year() -------------------------------------------------------------------------------- /src/csfle/README.md: -------------------------------------------------------------------------------- 1 | # Client-Side Field-Level Encryption 2 | 3 | This directory contains sample code to accompany a [CSFLE post](https://developer.mongodb.com/quickstart/python-quickstart-fle). 4 | It contains 4 scripts, in pairs: 5 | 6 | ## The Sample Code 7 | 8 | | File | Description | 9 | |---|---| 10 | |[client_schema_create_key.py](client_schema_create_key.py)| A script demonstrating how to create a key for local storage, and a data key within MongoDB. It outputs two files: The random bytes used to encrypt the data key, and a JSON schema file containing a schema to be used to configure MongoClient | 11 | |[client_schema_main.py](client_schema_main.py)| A Python script which executes various commands against a MongoDB cluster to demonstrate various aspects of Client-Side Field Level Encryption in MongoDB. It's designed to be run after `client_schema_create_key.py`, which creates some files this script depends on. | 12 | |[server_schema_create_key.py](server_schema_create_key.py)| A script demonstrating how to create a key for local storage, and a data key within MongoDB. It outputs one file containing the random bytes used to encrypt the data key. The JSON schema is set directly on the `people` collection as a validator | 13 | |[server_schema_main.py](server_schema_main.py)| A Python script which executes various commands against a MongoDB cluster to demonstrate various aspects of Client-Side Field Level Encryption in MongoDB (with a CSFLE schema applied to a collection). It's designed to be run after `server_schema_create_key.py`, which creates some files this script depends on. | 14 | 15 | ## Executing The Sample Code 16 | 17 | The best way to understand and run the code is to follow the [blog post](https://developer.mongodb.com/quickstart/python-quickstart-fle) it was written for, but the summary is: 18 | 19 | * You need Python 3.6+ 20 | * Pip install the dependencies with `python3 -m pip install pymongo[svr,encryption] ~= 3.11` 21 | * Set the ``MDB_URL`` environment variable to your Atlas cluster's URL. 22 | * Run each script to see what it does! (The code is heavily commented.) 23 | -------------------------------------------------------------------------------- /src/csfle/client_schema_create_key.py: -------------------------------------------------------------------------------- 1 | """ 2 | client_schema_create_key.py - A Python script to create a random key. 3 | 4 | This script: 5 | * Generates a random 96-byte key, and writes it to "key_bytes.bin" 6 | * Connects to the MongoDB server at "MDB_URL" and adds a key to "fle_demo.__keystore", with the alt name of "example". 7 | * Writes a schema for the fle_demo.people colleciton to "json_schema.json". 8 | """ 9 | 10 | import os 11 | from pathlib import Path 12 | from secrets import token_bytes 13 | 14 | from bson import json_util 15 | from bson.binary import STANDARD 16 | from bson.codec_options import CodecOptions 17 | from pymongo import MongoClient 18 | from pymongo.encryption import ClientEncryption 19 | from pymongo.encryption_options import AutoEncryptionOpts 20 | 21 | 22 | # Generate a secure 96-byte secret key: 23 | key_bytes = token_bytes(96) 24 | 25 | # Configure a single, local KMS provider, with the saved key: 26 | kms_providers = {"local": {"key": key_bytes}} 27 | fle_opts = AutoEncryptionOpts( 28 | kms_providers=kms_providers, key_vault_namespace="fle_demo.__keystore" 29 | ) 30 | 31 | # Connect to MongoDB with the key information generated above: 32 | with MongoClient(os.environ["MDB_URL"], auto_encryption_opts=fle_opts) as client: 33 | print("Resetting demo database & keystore ...") 34 | client.drop_database("fle_demo") 35 | 36 | # Create a ClientEncryption object to create the data key below: 37 | client_encryption = ClientEncryption( 38 | kms_providers, 39 | "fle_demo.__keystore", 40 | client, 41 | CodecOptions(uuid_representation=STANDARD), 42 | ) 43 | 44 | print("Creating key in MongoDB ...") 45 | key_id = client_encryption.create_data_key("local", key_alt_names=["example"]) 46 | 47 | # This is the schema which will be saved out to "json_schema.json": 48 | schema = { 49 | "bsonType": "object", 50 | "properties": { 51 | "ssn": { 52 | "encrypt": { 53 | "bsonType": "string", 54 | # Change to "AEAD_AES_256_CBC_HMAC_SHA_512-Deterministic" in order to filter by ssn value: 55 | "algorithm": "AEAD_AES_256_CBC_HMAC_SHA_512-Random", 56 | "keyId": [key_id], # Reference the key 57 | } 58 | }, 59 | }, 60 | } 61 | 62 | print("Writing secret key to 'key_bytes.bin' ...") 63 | Path("key_bytes.bin").write_bytes(key_bytes) 64 | 65 | print("Writing schema to 'json_schema.json' ...") 66 | json_schema = json_util.dumps( 67 | schema, json_options=json_util.CANONICAL_JSON_OPTIONS, indent=2 68 | ) 69 | Path("json_schema.json").write_text(json_schema) 70 | 71 | print("Done.") -------------------------------------------------------------------------------- /src/csfle/client_schema_main.py: -------------------------------------------------------------------------------- 1 | """ 2 | client_schema_main.py - A script to execute some commands demonstrating MongoDB's client-side field-level encryption. 3 | 4 | Note: 5 | ----- 6 | Before running this script, first run "client_schema_create_key.py" to 7 | configure a key in the database and to generate "key_bytes.bin" 8 | and "json_schema.json". 9 | """ 10 | 11 | import os 12 | from pathlib import Path 13 | 14 | from pymongo import MongoClient 15 | from pymongo.encryption_options import AutoEncryptionOpts 16 | from pymongo.errors import EncryptionError 17 | from bson import json_util 18 | 19 | 20 | # Load the secret key from 'key_bytes.bin': 21 | key_bin = Path("key_bytes.bin").read_bytes() 22 | 23 | # Load the 'person' schema from "json_schema.json": 24 | collection_schema = json_util.loads(Path("json_schema.json").read_text()) 25 | 26 | # Configure a single, local KMS provider, with the saved key: 27 | kms_providers = {"local": {"key": key_bin}} 28 | 29 | # Create a configuration for PyMongo, specifying the local key, 30 | # the collection used for storing key data, and the json schema specifying 31 | # field encryption: 32 | fle_opts = AutoEncryptionOpts( 33 | kms_providers, 34 | "fle_demo.__keystore", 35 | schema_map={"fle_demo.people": collection_schema}, 36 | ) 37 | 38 | # Add a new document to the "people" collection, and then read it back out 39 | # to demonstrate that the ssn field is automatically decrypted by PyMongo: 40 | with MongoClient(os.environ["MDB_URL"], auto_encryption_opts=fle_opts) as client: 41 | client.fle_demo.people.delete_many({}) 42 | client.fle_demo.people.insert_one( 43 | { 44 | "full_name": "Sophia Duleep Singh", 45 | "ssn": "123-12-1234", 46 | } 47 | ) 48 | print("Decrypted find() results: ") 49 | print(client.fle_demo.people.find_one()) 50 | 51 | # Connect to MongoDB, but this time without FLE configuration. 52 | # This will print the document with ssn *still encrypted*: 53 | with MongoClient(os.environ["MDB_URL"]) as client: 54 | print("Encrypted find() results: ") 55 | print(client.fle_demo.people.find_one()) 56 | 57 | # The following demonstrates that if the ssn field is encrypted as 58 | # "Random" it cannot be filtered: 59 | try: 60 | with MongoClient(os.environ["MDB_URL"], auto_encryption_opts=fle_opts) as client: 61 | # This will fail if ssn is specified as "Random". 62 | # Change the algorithm to "AEAD_AES_256_CBC_HMAC_SHA_512-Deterministic" 63 | # in client_schema_create_key.py (and run it again) for this to succeed: 64 | print("Find by ssn: ") 65 | print(client.fle_demo.people.find_one({"ssn": "123-12-1234"})) 66 | except EncryptionError as e: 67 | # This is expected if the field is "Random" but not if it's "Deterministic" 68 | print(e) 69 | 70 | # Configure encryption options with the same key, but *without* a schema: 71 | fle_opts_no_schema = AutoEncryptionOpts( 72 | kms_providers, 73 | "fle_demo.__keystore", 74 | ) 75 | with MongoClient( 76 | os.environ["MDB_URL"], auto_encryption_opts=fle_opts_no_schema 77 | ) as client: 78 | print("Inserting Dora Thewlis, without configured schema.") 79 | # This will insert a document *without* encrypted ssn, because 80 | # no schema is specified in the client or server: 81 | client.fle_demo.people.insert_one( 82 | { 83 | "full_name": "Dora Thewlis", 84 | "ssn": "234-23-2345", 85 | } 86 | ) 87 | 88 | # Connect without FLE configuration to show that Sophia Duleep Singh is 89 | # encrypted, but Dora Thewlis has her ssn saved as plaintext. 90 | with MongoClient(os.environ["MDB_URL"]) as client: 91 | print("Encrypted find() results: ") 92 | for doc in client.fle_demo.people.find(): 93 | print(" *", doc) 94 | -------------------------------------------------------------------------------- /src/csfle/server_schema_create_key.py: -------------------------------------------------------------------------------- 1 | """ 2 | client_schema_create_key.py - A Python script to create a random key. 3 | 4 | This script: 5 | * Generates a random 96-byte key, and writes it to "key_bytes.bin" 6 | * Connects to the MongoDB server at "MDB_URL" and adds a key to "fle_demo.__keystore", with the alt name of "example". 7 | * Creates a "people" collection, with a JSON schema provided as validator. 8 | """ 9 | 10 | import os 11 | from pathlib import Path 12 | from secrets import token_bytes 13 | 14 | from bson.binary import STANDARD 15 | from bson.codec_options import CodecOptions 16 | from pymongo import MongoClient 17 | from pymongo.encryption import ClientEncryption 18 | from pymongo.encryption_options import AutoEncryptionOpts 19 | 20 | 21 | # Generate a secure 96-byte secret key: 22 | key_bytes = token_bytes(96) 23 | 24 | # Configure a single, local KMS provider, with the saved key: 25 | kms_providers = {"local": {"key": key_bytes}} 26 | fle_opts = AutoEncryptionOpts( 27 | kms_providers=kms_providers, key_vault_namespace="fle_demo.__keystore" 28 | ) 29 | 30 | # Connect to MongoDB with the key information generated above: 31 | with MongoClient(os.environ["MDB_URL"], auto_encryption_opts=fle_opts) as client: 32 | print("Resetting demo database & keystore...") 33 | client.drop_database("fle_demo") 34 | 35 | # Create a ClientEncryption object to create the data key below: 36 | client_encryption = ClientEncryption( 37 | kms_providers, 38 | "fle_demo.__keystore", 39 | client, 40 | CodecOptions(uuid_representation=STANDARD), 41 | ) 42 | 43 | print("Creating key in MongoDB ...") 44 | key_id = client_encryption.create_data_key("local", key_alt_names=["example"]) 45 | 46 | # This is the schema which will be saved out to "json_schema.json": 47 | schema = { 48 | "bsonType": "object", 49 | "properties": { 50 | "ssn": { 51 | "encrypt": { 52 | "bsonType": "string", 53 | # Change to "AEAD_AES_256_CBC_HMAC_SHA_512-Deterministic" in order to filter by ssn value: 54 | "algorithm": "AEAD_AES_256_CBC_HMAC_SHA_512-Random", 55 | "keyId": [key_id], 56 | } 57 | }, 58 | }, 59 | } 60 | 61 | print("Creating 'people' collection in 'fle_demo' database (with schema) ...") 62 | client.fle_demo.create_collection( 63 | "people", 64 | codec_options=CodecOptions(uuid_representation=STANDARD), 65 | validator={"$jsonSchema": schema}, 66 | ) 67 | 68 | print("Writing secret key to 'key_bytes.bin' ...") 69 | Path("key_bytes.bin").write_bytes(key_bytes) 70 | 71 | print("Done.") -------------------------------------------------------------------------------- /src/csfle/server_schema_main.py: -------------------------------------------------------------------------------- 1 | """ 2 | server_schema_main.py - A script to execute some commands demonstrating MongoDB's client-side field-level encryption with FLE configured with a JSON schema validator. 3 | 4 | Note: 5 | ----- 6 | Before running this script, first run "server_schema_create_key.py" to configure a key in the database, 7 | to generate "key_bytes.bin", 8 | and to create a "person" collection with a suitable schema. 9 | """ 10 | 11 | import os 12 | from pathlib import Path 13 | 14 | from pymongo import MongoClient 15 | from pymongo.encryption_options import AutoEncryptionOpts 16 | from pymongo.errors import EncryptionError 17 | 18 | 19 | # Load the secret key from 'key_bytes.bin': 20 | key_bin = Path("key_bytes.bin").read_bytes() 21 | 22 | # Configure a single, local KMS provider, with the saved key: 23 | kms_providers = {"local": {"key": key_bin}} 24 | 25 | # Create a configuration for PyMongo, specifying the local key and the 26 | # collection used for storing key data: 27 | fle_opts = AutoEncryptionOpts( 28 | kms_providers, 29 | "fle_demo.__keystore", 30 | ) 31 | 32 | # Add a new document to the "people" collection, and then read it back out 33 | # to demonstrate that the ssn field is automatically decrypted by PyMongo: 34 | with MongoClient(os.environ["MDB_URL"], auto_encryption_opts=fle_opts) as client: 35 | client.fle_demo.people.delete_many({}) 36 | client.fle_demo.people.insert_one( 37 | { 38 | "full_name": "Sophia Duleep Singh", 39 | "ssn": "123-12-1234", 40 | } 41 | ) 42 | print("Decrypted find() results: ") 43 | print(client.fle_demo.people.find_one()) 44 | 45 | # Connect to MongoDB, but this time without FLE configuration. 46 | # This will print the document with ssn *still encrypted*: 47 | with MongoClient(os.environ["MDB_URL"]) as client: 48 | print("Encrypted find() results: ") 49 | print(client.fle_demo.people.find_one()) 50 | 51 | # The following demonstrates that if the ssn field is encrypted as 52 | # "Random" it cannot be filtered: 53 | try: 54 | with MongoClient(os.environ["MDB_URL"], auto_encryption_opts=fle_opts) as client: 55 | # This will fail if ssn is specified as "Random". 56 | # Change the algorithm to "AEAD_AES_256_CBC_HMAC_SHA_512-Deterministic" 57 | # in server_schema_create_key.py for this to succeed: 58 | print("Find by ssn: ") 59 | print(client.fle_demo.people.find_one({"ssn": "123-12-1234"})) 60 | except EncryptionError as e: 61 | # This is expected if the field is "Random" but not if it's "Deterministic" 62 | print(e) 63 | --------------------------------------------------------------------------------