├── .gitignore ├── .ini ├── README.rst ├── data └── mflix │ ├── comments.bson.gz │ ├── comments.metadata.json.gz │ ├── movies.bson.gz │ ├── movies.metadata.json.gz │ ├── sessions.bson.gz │ ├── sessions.metadata.json.gz │ ├── theaters.bson.gz │ ├── theaters.metadata.json.gz │ ├── users.bson.gz │ ├── users.metadata.json.gz │ ├── watching_pings.bson.gz │ └── watching_pings.metadata.json.gz ├── dotini_win ├── mflix ├── __init__.py ├── api │ ├── __init__.py │ ├── movies.py │ ├── user.py │ └── utils.py ├── build │ ├── asset-manifest.json │ ├── favicon.ico │ ├── index.html │ ├── manifest.json │ ├── service-worker.js │ └── static │ │ ├── css │ │ ├── main.61f16bdd.css │ │ └── main.61f16bdd.css.map │ │ ├── js │ │ ├── main.9cd550cb.js │ │ ├── main.9cd550cb.js.map │ │ ├── main.a4491d28.js │ │ ├── main.a4491d28.js.map │ │ ├── main.eb78d1bb.js │ │ └── main.eb78d1bb.js.map │ │ └── media │ │ ├── mongoleaf.0ebc1843.png │ │ └── pixelatedLeaf.6c93bd20.svg ├── db.py └── factory.py ├── migrations └── movie_last_updated_migration.py ├── notebooks ├── MongoClient.ipynb ├── basic_aggregation.ipynb ├── bulk_writes.ipynb ├── change_streams.ipynb ├── connection_pooling.ipynb ├── cursor_methods_agg_equivalents.ipynb ├── deletes.ipynb ├── error_handling.ipynb ├── m220p_app_arch.ipynb ├── polp.ipynb ├── read_concerns.ipynb ├── robust_applications.ipynb ├── updates.ipynb ├── updates_every_one_second.ipynb ├── write_concerns.ipynb ├── your_first_aggregation.ipynb ├── your_first_join.ipynb ├── your_first_read.ipynb └── your_first_write.ipynb ├── requirements.txt ├── run.py └── tests ├── __init__.py ├── conftest.py ├── test_connection_pooling.py ├── test_create_update_comments.py ├── test_db_connection.py ├── test_delete_comments.py ├── test_error_handling.py ├── test_facets.py ├── test_get_comments.py ├── test_migration.py ├── test_paging.py ├── test_projection.py ├── test_text_and_subfield_search.py ├── test_timeouts.py ├── test_user_management.py ├── test_user_preferences.py └── test_user_report.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .nox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *.cover 48 | .hypothesis/ 49 | .pytest_cache/ 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Django stuff: 56 | *.log 57 | local_settings.py 58 | db.sqlite3 59 | 60 | # Flask stuff: 61 | instance/ 62 | .webassets-cache 63 | 64 | # Scrapy stuff: 65 | .scrapy 66 | 67 | # Sphinx documentation 68 | docs/_build/ 69 | 70 | # PyBuilder 71 | target/ 72 | 73 | # Jupyter Notebook 74 | .ipynb_checkpoints 75 | 76 | # IPython 77 | profile_default/ 78 | ipython_config.py 79 | 80 | # pyenv 81 | .python-version 82 | 83 | # celery beat schedule file 84 | celerybeat-schedule 85 | 86 | # SageMath parsed files 87 | *.sage.py 88 | 89 | # Environments 90 | .env 91 | .venv 92 | env/ 93 | venv/ 94 | ENV/ 95 | env.bak/ 96 | venv.bak/ 97 | 98 | # Spyder project settings 99 | .spyderproject 100 | .spyproject 101 | 102 | # Rope project settings 103 | .ropeproject 104 | 105 | # mkdocs documentation 106 | /site 107 | 108 | # mypy 109 | .mypy_cache/ 110 | .dmypy.json 111 | dmypy.json 112 | -------------------------------------------------------------------------------- /.ini: -------------------------------------------------------------------------------- 1 | # Ticket: Connection 2 | # Rename this file to .ini after filling in your MFLIX_DB_URI and your SECRET_KEY 3 | # Do not surround the URI with quotes 4 | 5 | [PROD] 6 | SECRET_KEY = super_secret_key_you_should_change 7 | MFLIX_DB_URI = mongodb+srv://m220student:m220password@mflix-37ay0.mongodb.net/test 8 | 9 | [TEST] 10 | SECRET_KEY = super_secret_testing_key 11 | MFLIX_DB_URI = mongodb+srv://m220student:m220password@mflix-37ay0.mongodb.net/test 12 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ===== 2 | mflix 3 | ===== 4 | 5 | This is a short guide on setting up the system and environment dependencies 6 | required for the MFlix application to run. 7 | 8 | 9 | Project Structure 10 | ----------------- 11 | 12 | Everything you will implement is located in the ``mflix/db.py`` file, which 13 | contains all database interfacing methods. The API will make calls to ``db.py`` 14 | to interact with MongoDB. 15 | 16 | The unit tests in ``tests`` will test these database access methods directly, 17 | without going through the API. The UI will run these methods in integration 18 | tests, and therefore requires the full application to be running. 19 | 20 | The API layer is fully implemented, as is the UI. If you need to run on a port 21 | other than 5000, you can edit the ``index.html`` file in the build directory to 22 | modify the value of **window.host**. 23 | 24 | Please do not modify the API layer in any way, ``movies.py`` and ``user.py`` 25 | under the **mflix/api** directory. Doing so will most likely result in the 26 | frontend application failing to validate some of the labs. 27 | 28 | 29 | Local Development Environment Configuration 30 | ------------------------------------------- 31 | 32 | Anaconda 33 | ~~~~~~~~ 34 | 35 | We're going to use `Anaconda `_ to install Python 3 and 36 | to manage our Python 3 environment. 37 | 38 | **Installing Anaconda for Mac** 39 | 40 | You can download Anaconda from their `MacOS download site 41 | `_. The installer will give you 42 | the option to "Change Install Location", so you can choose the path where the 43 | ``anaconda3`` folder will be placed. Remember this location, because you will 44 | need it to activate the environment. 45 | 46 | Once installed, you will have to create and activate a ``conda`` environment: 47 | 48 | .. code-block:: sh 49 | 50 | # navigate to the mflix-python directory 51 | cd mflix-python 52 | 53 | # enable the "conda" command in Terminal 54 | echo ". /anaconda3/etc/profile.d/conda.sh" >> ~/.bash_profile 55 | source ~/.bash_profile 56 | 57 | # create a new environment for MFlix 58 | conda create --name mflix 59 | 60 | # activate the environment 61 | conda activate mflix 62 | 63 | You can deactivate the environment with the following command: 64 | 65 | .. code-block:: sh 66 | 67 | conda deactivate 68 | 69 | **Installing Anaconda for Windows** 70 | 71 | You can download Anaconda from their `Windows download site 72 | `_. 73 | 74 | The Anaconda installer will prompt you to *Add Anaconda to your PATH*. Select 75 | this option to use ``conda`` commands from the Command Prompt. 76 | 77 | If you forget to select this option before installing, no worries. The installer 78 | will let you choose an "Install Location" for Anaconda, which is the directory 79 | where the ``Anaconda3`` folder will be placed. 80 | 81 | Using your machine's location of ``Anaconda3`` as ````, run 82 | the following commands to activate ``conda`` commands from the Command Prompt:: 83 | 84 | set PATH=%PATH%;;\Scripts\ 85 | 86 | Once installed, you will have to create and enable a ``conda`` environment. 87 | 88 | .. code-block:: sh 89 | 90 | # enter mflix-python folder 91 | cd mflix-python 92 | 93 | # create a new environment for MFlix 94 | conda create --name mflix 95 | 96 | # activate the environment 97 | activate mflix 98 | 99 | You can deactivate the environment with the following command: 100 | 101 | .. code-block:: sh 102 | 103 | deactivate 104 | 105 | 106 | Virtualenv 107 | ~~~~~~~~~~ 108 | 109 | *Note: If you installed Anaconda instead, skip this step.* 110 | 111 | As an alternative to Anaconda, you can also use ``virtualenv``, to define your 112 | Python 3 environment. You are required to have a Python 3 installed in your 113 | workstation. 114 | 115 | You can find the `virtualenv installation procedure`_ on the PyPA website. 116 | 117 | Once you've installed Python 3 and ``virtualenv``, you will have to setup a 118 | ``virtualenv`` environment: 119 | 120 | .. code-block:: sh 121 | 122 | # navigate to the mflix-python directory 123 | cd mflix-python 124 | 125 | # create the virtual environment for MFlix 126 | virtualenv -p YOUR_LOCAL_PYTHON3_PATH mflix_venv 127 | 128 | # activate the virtual environment 129 | source mflix_venv/bin/activate 130 | 131 | You can deactivate the virtual environment with the following command: 132 | 133 | .. code-block:: sh 134 | 135 | deactivate 136 | 137 | .. _`virtualenv installation procedure`: https://virtualenv.pypa.io/en/stable/installation/ 138 | 139 | Please remember that you may have to reactivate the virtual environment if you 140 | open a new Terminal or Command Prompt window, or restart your system. 141 | 142 | 143 | Python Library Dependencies 144 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 145 | 146 | Once the Python 3 environment is activated, we need to install our python 147 | dependencies. These dependencies are defined in the ``requirements.txt`` file, 148 | and can be installed with the following command: 149 | 150 | .. code-block:: sh 151 | 152 | pip install -r requirements.txt 153 | 154 | 155 | MongoDB Installation 156 | ~~~~~~~~~~~~~~~~~~~~ 157 | 158 | It is recommended to connect MFlix with MongoDB Atlas, so you do not need to 159 | have a MongoDB server running on your host machine. The lectures and labs in 160 | this course will assume that you are using an Atlas cluster instead of a local 161 | instance. 162 | 163 | That said, you are still required to have the MongoDB server installed, in order 164 | to be able to use two server tool dependencies: 165 | 166 | - ``mongorestore`` 167 | 168 | - A utility for importing binary data into MongoDB. 169 | 170 | - ``mongo`` 171 | 172 | - The MongoDB shell 173 | 174 | To download these command line tools, please visit the 175 | `MongoDB download center `_ 176 | and choose the appropriate platform. 177 | 178 | 179 | MongoDB Atlas Cluster 180 | --------------------- 181 | 182 | MFlix uses MongoDB to persist all of its data. 183 | 184 | One of easiest ways to get up and running with MongoDB is to use MongoDB Atlas, 185 | a hosted and fully-managed database solution. 186 | 187 | If you have taken other MongoDB University courses like M001 or M121, you may 188 | already have an account - feel free to reuse that cluster for this course. 189 | 190 | *Note: Be advised that some of the UI aspects of Atlas may have changed since 191 | the inception of this README, therefore some of the screenshots in this file may 192 | be different from the actual Atlas UI interface.* 193 | 194 | 195 | Using an existing MongoDB Atlas Account: 196 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 197 | 198 | If you already have a previous Atlas account created, perhaps because you've 199 | taken one of our other MongoDB university courses, you can repurpose it for 200 | M220P. 201 | 202 | Log-in to your Atlas account and create a new project named **M220** by clicking 203 | on the *Context* dropdown menu: 204 | 205 | .. image:: https://s3.amazonaws.com/university-courses/m220/cluster_create_project.png 206 | 207 | After creating a new project, you need to create a **mflix** free tier cluster. 208 | 209 | 210 | Creating a new MongoDB Atlas Account: 211 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 212 | 213 | If you do not have an existing Atlas account, go ahead and `create an Atlas 214 | Account `_ by filling in the 215 | required fields: 216 | 217 | .. image:: https://s3.amazonaws.com/university-courses/m220/atlas_registration.png 218 | 219 | 220 | Creating an **mflix** free tier cluster: 221 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 222 | 223 | *Note: You will need to do this step even if you are reusing an Atlas account.* 224 | 225 | 1. After creating a new project, you will be prompted to create the first 226 | cluster in that project: 227 | 228 | .. image:: https://s3.amazonaws.com/university-courses/m220/cluster_create.png 229 | 230 | 231 | 2. Choose AWS as the cloud provider, in a Region that has the label 232 | ``Free Tier Available``: 233 | 234 | .. image:: https://s3.amazonaws.com/university-courses/m220/cluster_provider.png 235 | 236 | 237 | 3. Select *Cluster Tier* M0: 238 | 239 | .. image:: https://s3.amazonaws.com/university-courses/m220/cluster_tier.png 240 | 241 | 242 | 4. Set *Cluster Name* to **mflix** and click "Create Cluster": 243 | 244 | .. image:: https://s3.amazonaws.com/university-courses/m220/cluster_name.png 245 | 246 | 247 | 5. Once you press *Create Cluster*, you will be redirected to the account 248 | dashboard. In this dashboard, make sure you set your project name to **M220**. 249 | Go to ``Settings`` menu item and change the project name from the default 250 | **Project 0** to **M220**: 251 | 252 | .. image:: https://s3.amazonaws.com/university-courses/m220/cluster_project.png 253 | 254 | 255 | 6. Next, configure the security settings of this cluster, by enabling the *IP 256 | Whitelist* and *MongoDB Users*: 257 | 258 | .. image:: https://s3.amazonaws.com/university-courses/m220/cluster_ipwhitelisting.png 259 | 260 | Update your IP Whitelist so that your app can talk to the cluster. Click the 261 | "Security" tab from the "Clusters" page. Then click "IP Whitelist" followed by 262 | "Add IP Address". Finally, click "Allow Access from Anywhere" and click 263 | "Confirm". 264 | 265 | .. image:: https://s3.amazonaws.com/university-courses/m220/cluster_allowall.png 266 | 267 | 268 | 7. Then create the application MongoDB database user required for this course: 269 | 270 | - username: **m220student** 271 | - password: **m220password** 272 | 273 | You can create new users through *Security* -> *Add New User*. 274 | 275 | Allow this user the privilege to ``Read and write to any database``: 276 | 277 | .. image:: https://s3.amazonaws.com/university-courses/m220/cluster_application_user.png 278 | 279 | 280 | 8. When the user is created, and the cluster deployed, you can test the setup by 281 | connecting via ``mongo`` shell. You can find instructions to connect in the 282 | "Connect" section of the cluster dashboard: 283 | 284 | .. image:: https://s3.amazonaws.com/university-courses/m220/cluster_connect_shell.png 285 | 286 | Go to your cluster *Overview* -> *Connect* -> *Connect with the Mongo Shell*. 287 | Select the option corresponding to your local MongoDB version and copy the 288 | ``mongo`` connection command. 289 | 290 | You can execute it from your command line: 291 | 292 | .. code-block:: sh 293 | 294 | mongo "mongodb+srv://" 295 | 296 | By connecting to the server from your host machine, you have validated that the 297 | cluster is configured and reachable from your local workstation. 298 | 299 | 300 | Importing Data 301 | -------------- 302 | 303 | The ``mongorestore`` command necessary to import the data is located below. Copy 304 | and paste the command, and replace ```` with your Atlas SRV 305 | string: 306 | 307 | .. code-block:: sh 308 | 309 | # navigate to mflix-python directory 310 | cd mflix-python 311 | 312 | # import data into Atlas 313 | mongorestore --drop --gzip --uri data 314 | 315 | 316 | Running the Application 317 | ----------------------- 318 | 319 | In the ``mflix-python`` directory you can find a file called ``dotini``. Rename 320 | this file to ``.ini`` with the following command: 321 | 322 | .. code-block:: sh 323 | 324 | mv dotini .ini # on Unix 325 | ren dotini .ini # on Windows 326 | 327 | Once the file has been renamed, open it, and enter your Atlas SRV connection 328 | string as directed in the comment. This is the information the driver will use 329 | to connect! 330 | 331 | To start MFlix, run the following command: 332 | 333 | .. code-block:: sh 334 | 335 | python run.py 336 | 337 | 338 | And then point your browser to `http://localhost:5000/`_. 339 | 340 | 341 | Running the Unit Tests 342 | ---------------------- 343 | 344 | To run the unit tests for this course, you will use ``pytest``. Each course lab 345 | contains a module of unit tests that you can call individually with a command 346 | like the following: 347 | 348 | .. code-block:: sh 349 | 350 | pytest -m LAB_UNIT_TEST_NAME 351 | 352 | Each ticket will contain the command to run that ticket's specific unit tests. 353 | -------------------------------------------------------------------------------- /data/mflix/comments.bson.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oleg-belov/M220P-MongoDB-for-Python-Developers/62f517e1f120a9a4d9b2255e1d2b20ad03bc2d17/data/mflix/comments.bson.gz -------------------------------------------------------------------------------- /data/mflix/comments.metadata.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oleg-belov/M220P-MongoDB-for-Python-Developers/62f517e1f120a9a4d9b2255e1d2b20ad03bc2d17/data/mflix/comments.metadata.json.gz -------------------------------------------------------------------------------- /data/mflix/movies.bson.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oleg-belov/M220P-MongoDB-for-Python-Developers/62f517e1f120a9a4d9b2255e1d2b20ad03bc2d17/data/mflix/movies.bson.gz -------------------------------------------------------------------------------- /data/mflix/movies.metadata.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oleg-belov/M220P-MongoDB-for-Python-Developers/62f517e1f120a9a4d9b2255e1d2b20ad03bc2d17/data/mflix/movies.metadata.json.gz -------------------------------------------------------------------------------- /data/mflix/sessions.bson.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oleg-belov/M220P-MongoDB-for-Python-Developers/62f517e1f120a9a4d9b2255e1d2b20ad03bc2d17/data/mflix/sessions.bson.gz -------------------------------------------------------------------------------- /data/mflix/sessions.metadata.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oleg-belov/M220P-MongoDB-for-Python-Developers/62f517e1f120a9a4d9b2255e1d2b20ad03bc2d17/data/mflix/sessions.metadata.json.gz -------------------------------------------------------------------------------- /data/mflix/theaters.bson.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oleg-belov/M220P-MongoDB-for-Python-Developers/62f517e1f120a9a4d9b2255e1d2b20ad03bc2d17/data/mflix/theaters.bson.gz -------------------------------------------------------------------------------- /data/mflix/theaters.metadata.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oleg-belov/M220P-MongoDB-for-Python-Developers/62f517e1f120a9a4d9b2255e1d2b20ad03bc2d17/data/mflix/theaters.metadata.json.gz -------------------------------------------------------------------------------- /data/mflix/users.bson.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oleg-belov/M220P-MongoDB-for-Python-Developers/62f517e1f120a9a4d9b2255e1d2b20ad03bc2d17/data/mflix/users.bson.gz -------------------------------------------------------------------------------- /data/mflix/users.metadata.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oleg-belov/M220P-MongoDB-for-Python-Developers/62f517e1f120a9a4d9b2255e1d2b20ad03bc2d17/data/mflix/users.metadata.json.gz -------------------------------------------------------------------------------- /data/mflix/watching_pings.bson.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oleg-belov/M220P-MongoDB-for-Python-Developers/62f517e1f120a9a4d9b2255e1d2b20ad03bc2d17/data/mflix/watching_pings.bson.gz -------------------------------------------------------------------------------- /data/mflix/watching_pings.metadata.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oleg-belov/M220P-MongoDB-for-Python-Developers/62f517e1f120a9a4d9b2255e1d2b20ad03bc2d17/data/mflix/watching_pings.metadata.json.gz -------------------------------------------------------------------------------- /dotini_win: -------------------------------------------------------------------------------- 1 | # Ticket: Connection 2 | # Rename this file to .ini after filling in your MFLIX_DB_URI and your SECRET_KEY 3 | # Do not surround the URI with quotes 4 | 5 | [PROD] 6 | SECRET_KEY = super_secret_key_you_should_change 7 | MFLIX_DB_URI = your_atlas_3.6_srv_connection_uri 8 | 9 | [TEST] 10 | SECRET_KEY = super_secret_testing_key 11 | MFLIX_DB_URI = your_testing_db_uri(can be the same as atlas) 12 | -------------------------------------------------------------------------------- /mflix/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oleg-belov/M220P-MongoDB-for-Python-Developers/62f517e1f120a9a4d9b2255e1d2b20ad03bc2d17/mflix/__init__.py -------------------------------------------------------------------------------- /mflix/api/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oleg-belov/M220P-MongoDB-for-Python-Developers/62f517e1f120a9a4d9b2255e1d2b20ad03bc2d17/mflix/api/__init__.py -------------------------------------------------------------------------------- /mflix/api/movies.py: -------------------------------------------------------------------------------- 1 | from flask import Blueprint, request, jsonify 2 | from mflix.db import get_movie, get_movies, get_movies_by_country, \ 3 | get_movies_faceted, add_comment, update_comment, delete_comment, \ 4 | get_configuration 5 | 6 | from flask_cors import CORS 7 | from flask_jwt_extended import ( 8 | jwt_required, get_jwt_claims 9 | ) 10 | from mflix.api.user import jwt, User 11 | from mflix.api.utils import expect 12 | from datetime import datetime 13 | 14 | 15 | movies_api_v1 = Blueprint( 16 | 'movies_api_v1', 'movies_api_v1', url_prefix='/api/v1/movies') 17 | 18 | CORS(movies_api_v1) 19 | 20 | 21 | @movies_api_v1.route('/', methods=['GET']) 22 | def api_get_movies(): 23 | MOVIES_PER_PAGE = 20 24 | 25 | (movies, total_num_entries) = get_movies( 26 | None, page=0, movies_per_page=MOVIES_PER_PAGE) 27 | 28 | response = { 29 | "movies": movies, 30 | "page": 0, 31 | "filters": {}, 32 | "entries_per_page": MOVIES_PER_PAGE, 33 | "total_results": total_num_entries, 34 | } 35 | 36 | return jsonify(response) 37 | 38 | 39 | @movies_api_v1.route('/search', methods=['GET']) 40 | def api_search_movies(): 41 | MOVIES_PER_PAGE = 20 42 | 43 | # first determine the page of the movies to collect 44 | try: 45 | page = request.args.get('page') 46 | # if no page, get the first page (0 indexed) 47 | if page is None: 48 | page = 0 49 | # otherwise pass the page along the client asked for 50 | else: 51 | page = int(page) 52 | except (TypeError, ValueError) as e: 53 | print('Got bad value for page:\t', e) 54 | page = 0 55 | 56 | # determine the filters 57 | filters = {} 58 | return_filters = {} 59 | cast = request.args.getlist('cast') 60 | genre = request.args.getlist('genre') 61 | if cast: 62 | filters["cast"] = cast 63 | return_filters["cast"] = cast 64 | if genre: 65 | filters["genres"] = genre 66 | return_filters["genre"] = genre 67 | search = request.args.get('text') 68 | if search: 69 | filters["text"] = search 70 | return_filters["search"] = search 71 | 72 | # finally use the database and get what is necessary 73 | (movies, total_num_entries) = get_movies( 74 | filters, page, MOVIES_PER_PAGE) 75 | 76 | response = { 77 | "status": "success", 78 | "movies": movies, 79 | "page": page, 80 | "filters": return_filters, 81 | "entries_per_page": MOVIES_PER_PAGE, 82 | "total_results": total_num_entries, 83 | } 84 | 85 | return jsonify(response), 200 86 | 87 | 88 | @movies_api_v1.route('/id/', methods=['GET']) 89 | def api_get_movie_by_id(id): 90 | movie = get_movie(id) 91 | if movie is None or movie == {}: 92 | return jsonify({ 93 | "status": "fail" 94 | }), 400 95 | else: 96 | updated_type = str(type(movie.get('lastupdated'))) 97 | return jsonify( 98 | { 99 | "status": "success", 100 | "movie": movie, 101 | "updated_type": updated_type 102 | } 103 | ), 200 104 | 105 | 106 | @movies_api_v1.route('/countries', methods=['GET']) 107 | def api_get_movies_by_country(): 108 | try: 109 | countries = request.args.getlist('countries') 110 | results = get_movies_by_country(countries) 111 | response_object = { 112 | "status": "success", 113 | "titles": results 114 | } 115 | return jsonify(response_object), 200 116 | except Exception as e: 117 | response_object = { 118 | "status": "fail", 119 | "error": str(e) 120 | } 121 | return jsonify(response_object), 400 122 | 123 | 124 | @movies_api_v1.route('/facet-search', methods=['GET']) 125 | def api_search_movies_faceted(): 126 | MOVIES_PER_PAGE = 20 127 | 128 | # first determine the page of the movies to collect 129 | try: 130 | page = request.args.get('page') 131 | # if no page, get the first page (0 indexed) 132 | if page is None: 133 | page = 0 134 | # otherwise pass the page along the client asked for 135 | else: 136 | page = int(page) 137 | except (TypeError, ValueError) as e: 138 | print('Got bad value for page, defaulting to 0:\t', e) 139 | page = 0 140 | 141 | # determine the filters 142 | filters = {} 143 | return_filters = {} 144 | cast = request.args.getlist('cast') 145 | if cast: 146 | filters["cast"] = cast 147 | return_filters["cast"] = cast 148 | if not filters: 149 | return api_search_movies() 150 | 151 | # finally use the database and get what is necessary 152 | try: 153 | (movies, total_num_entries) = get_movies_faceted( 154 | filters, page, MOVIES_PER_PAGE) 155 | 156 | response = { 157 | "status": "success", 158 | "movies": movies.get('movies'), 159 | "facets": { 160 | "runtime": movies.get('runtime'), 161 | "rating": movies.get('rating') 162 | }, 163 | "page": page, 164 | "filters": return_filters, 165 | "entries_per_page": MOVIES_PER_PAGE, 166 | "total_results": total_num_entries, 167 | } 168 | 169 | return jsonify(response), 200 170 | except Exception as e: 171 | return jsonify({'status': 'fail', 'error': str(e)}) 172 | 173 | 174 | @movies_api_v1.route('/comment', methods=["POST"]) 175 | @jwt_required 176 | def api_post_comment(): 177 | """ 178 | Posts a comment about a specific movie. Validates the user is logged in by 179 | ensuring a valid JWT is provided 180 | """ 181 | claims = get_jwt_claims() 182 | user = User.from_claims(claims) 183 | post_data = request.get_json() 184 | try: 185 | movie_id = expect(post_data.get('movie_id'), str, 'movie_id') 186 | comment = expect(post_data.get('comment'), str, 'comment') 187 | add_comment(movie_id, user, comment, datetime.now()) 188 | updated_comments = get_movie(movie_id).get('comments') 189 | return jsonify({"status": "success", "comments": updated_comments}) 190 | except Exception as e: 191 | return jsonify({'status': 'fail', 'error': str(e)}) 192 | 193 | 194 | @movies_api_v1.route('/comment', methods=["PUT"]) 195 | @jwt_required 196 | def api_update_comment(): 197 | """ 198 | Updates a user comment. Validates the user is logged in by ensuring a 199 | valid JWT is provided 200 | """ 201 | claims = get_jwt_claims() 202 | user_email = User.from_claims(claims).email 203 | post_data = request.get_json() 204 | try: 205 | comment_id = expect(post_data.get('comment_id'), str, 'comment_id') 206 | updated_comment = expect(post_data.get( 207 | 'updated_comment'), str, 'updated_comment') 208 | movie_id = expect(post_data.get('movie_id'), str, 'movie_id') 209 | update_comment(comment_id, user_email, updated_comment, datetime.now()) 210 | updated_comments = get_movie(movie_id).get('comments') 211 | return jsonify({"status": "success", "comments": updated_comments}) 212 | except Exception as e: 213 | return jsonify({'status': 'fail', 'error': str(e)}) 214 | 215 | 216 | @movies_api_v1.route('/comment', methods=["DELETE"]) 217 | @jwt_required 218 | def api_delete_comment(): 219 | """ 220 | Delete a comment. Requires a valid JWT 221 | """ 222 | claims = get_jwt_claims() 223 | user_email = User.from_claims(claims).email 224 | post_data = request.get_json() 225 | try: 226 | comment_id = expect(post_data.get('comment_id'), str, 'comment_id') 227 | movie_id = expect(post_data.get('movie_id'), str, 'movie_id') 228 | delete_comment(comment_id, user_email) 229 | updated_comments = get_movie(movie_id).get('comments') 230 | return jsonify({'status': 'success', 'comments': updated_comments}) 231 | except Exception as e: 232 | return jsonify({'status': 'fail', 'error': str(e)}), 200 233 | 234 | 235 | @movies_api_v1.route('/config-options', methods=["GET"]) 236 | def get_conn_pool_size(): 237 | try: 238 | (pool_size, w_concern, user_info) = get_configuration() 239 | return jsonify({ 240 | 'status': 'success', 241 | 'pool_size': pool_size, 242 | 'wtimeout': w_concern._WriteConcern__document.get('wtimeout', '0'), 243 | **user_info 244 | }) 245 | except Exception as e: 246 | return jsonify({'status': 'fail', 'error': str(e)}) 247 | -------------------------------------------------------------------------------- /mflix/api/user.py: -------------------------------------------------------------------------------- 1 | from flask import jsonify, Blueprint, make_response, request 2 | from mflix.db import get_user, add_user, login_user, make_admin, \ 3 | logout_user, delete_user, update_prefs, most_active_commenters 4 | from mflix.api.utils import expect 5 | from bson.json_util import dumps, loads 6 | 7 | from flask_jwt_extended import ( 8 | jwt_required, create_access_token, 9 | get_jwt_claims 10 | ) 11 | 12 | from flask_cors import CORS 13 | 14 | from flask import current_app, g 15 | from werkzeug.local import LocalProxy 16 | 17 | 18 | def get_bcrypt(): 19 | bcrypt = getattr(g, '_bcrypt', None) 20 | if bcrypt is None: 21 | bcrypt = g._bcrypt = current_app.config['BCRYPT'] 22 | return bcrypt 23 | 24 | 25 | def get_jwt(): 26 | jwt = getattr(g, '_jwt', None) 27 | if jwt is None: 28 | jwt = g._jwt = current_app.config['JWT'] 29 | 30 | return jwt 31 | 32 | 33 | def init_claims_loader(): 34 | add_claims = getattr(g, '_add_claims', None) 35 | if add_claims is None: 36 | add_claims = g._add_claims = current_app.config['ADD_CLAIMS'] 37 | return add_claims 38 | 39 | 40 | jwt = LocalProxy(get_jwt) 41 | bcrypt = LocalProxy(get_bcrypt) 42 | add_claims_to_access_token = LocalProxy(init_claims_loader) 43 | 44 | 45 | user_api_v1 = Blueprint('user_api_v1', 'user_api_v1', 46 | url_prefix='/api/v1/user') 47 | 48 | CORS(user_api_v1) 49 | 50 | 51 | class User(object): 52 | 53 | def __init__(self, userdata): 54 | self.email = userdata.get('email') 55 | self.name = userdata.get('name') 56 | self.password = userdata.get('password') 57 | self.preferences = userdata.get('preferences') 58 | self.is_admin = userdata.get('isAdmin', False) 59 | 60 | def to_json(self): 61 | return loads(dumps(self, default=lambda o: o.__dict__, sort_keys=True)) 62 | 63 | @staticmethod 64 | def from_claims(claims): 65 | return User(claims.get('user')) 66 | 67 | 68 | @user_api_v1.route('/register', methods=['POST']) 69 | def register(): 70 | try: 71 | post_data = request.get_json() 72 | email = expect(post_data['email'], str, 'email') 73 | name = expect(post_data['name'], str, 'name') 74 | password = expect(post_data['password'], str, 'password') 75 | except Exception as e: 76 | return jsonify({'error': str(e)}), 400 77 | 78 | errors = {} 79 | if len(password) < 8: 80 | errors['password'] = "Your password must be at least 8 characters." 81 | 82 | if len(name) <= 3: 83 | errors['name'] = "You must specify a name of at least 3 characters." 84 | 85 | if len(errors.keys()) != 0: 86 | response_object = { 87 | 'status': 'fail', 88 | 'error': errors 89 | } 90 | return jsonify(response_object), 411 91 | 92 | insertionresult = add_user(name, email, bcrypt.generate_password_hash( 93 | password=password.encode('utf8')).decode("utf-8")) 94 | if 'error' in insertionresult: 95 | errors['email'] = insertionresult["error"] 96 | 97 | userdata = get_user(email) 98 | 99 | if not userdata: 100 | errors['general'] = "Internal error, please try again later." 101 | 102 | if len(errors.keys()) != 0: 103 | response_object = { 104 | 'status': 'fail', 105 | 'error': errors 106 | } 107 | return make_response(jsonify(response_object)), 400 108 | else: 109 | 110 | userdata = { 111 | "email": userdata['email'], 112 | "name": userdata['name'], 113 | "preferences": userdata.get('preferences') 114 | } 115 | 116 | user = User(userdata) 117 | jwt = create_access_token(user.to_json()) 118 | 119 | try: 120 | login_user(user.email, jwt) 121 | response_object = { 122 | 'status': 'success', 123 | 'auth_token': jwt, 124 | 'info': userdata 125 | } 126 | return make_response(jsonify(response_object)), 201 127 | except Exception as e: 128 | response_object = { 129 | 'status': 'fail', 130 | 'error': {'internal': e} 131 | } 132 | return make_response(jsonify(response_object)), 500 133 | 134 | 135 | @user_api_v1.route('/login', methods=['POST']) 136 | def login(): 137 | email = "" 138 | password = "" 139 | try: 140 | post_data = request.get_json() 141 | email = expect(post_data['email'], str, 'email') 142 | password = expect(post_data['password'], str, 'email') 143 | except Exception as e: 144 | jsonify({'error': str(e)}), 400 145 | 146 | userdata = get_user(email) 147 | if not userdata: 148 | response_object = { 149 | 'status': 'fail', 150 | 'error': {'email': 'Make sure your email is correct.'} 151 | } 152 | return make_response(jsonify(response_object)), 401 153 | if not bcrypt.check_password_hash(userdata['password'], password): 154 | response_object = { 155 | 'status': 'fail', 156 | 'error': {'password': 'Make sure your password is correct.'} 157 | } 158 | return make_response(jsonify(response_object)), 401 159 | 160 | userdata = { 161 | "email": userdata['email'], 162 | "name": userdata['name'], 163 | "preferences": userdata.get('preferences'), 164 | "isAdmin": userdata.get('isAdmin', False) 165 | } 166 | 167 | user = User(userdata) 168 | jwt = create_access_token(user.to_json()) 169 | 170 | try: 171 | login_user(user.email, jwt) 172 | response_object = { 173 | 'status': 'success', 174 | 'auth_token': jwt, 175 | 'info': userdata, 176 | } 177 | return make_response(jsonify(response_object)), 201 178 | except Exception as e: 179 | response_object = { 180 | 'status': 'fail', 181 | 'error': {'internal': e} 182 | } 183 | return make_response(jsonify(response_object)), 500 184 | 185 | 186 | @user_api_v1.route('/update-preferences', methods=['PUT']) 187 | @jwt_required 188 | def save(): 189 | claims = get_jwt_claims() 190 | user = User.from_claims(claims) 191 | body = request.get_json() 192 | prefs = expect(body.get('preferences'), dict, 'preferences') 193 | try: 194 | # get an updated user, remove the password 195 | update_prefs(user.email, prefs) 196 | updated_user = User(get_user(user.email)) 197 | del updated_user.password 198 | updated_jwt = create_access_token(updated_user.to_json()) 199 | # lastly, update the user's session 200 | response_object = { 201 | 'status': 'success', 202 | 'auth_token': updated_jwt, 203 | 'info': updated_user.to_json(), 204 | } 205 | return make_response(jsonify(response_object)), 201 206 | except Exception as e: 207 | response_object = { 208 | 'status': 'fail', 209 | 'error': {'internal': str(e)} 210 | } 211 | return make_response(jsonify(response_object)), 500 212 | 213 | 214 | @user_api_v1.route('/logout', methods=['POST']) 215 | @jwt_required 216 | def logout(): 217 | claims = get_jwt_claims() 218 | user = User.from_claims(claims) 219 | try: 220 | logout_user(user.email) 221 | response_object = { 222 | 'status': 'success' 223 | } 224 | return make_response(jsonify(response_object)), 201 225 | except Exception as e: 226 | response_object = { 227 | 'status': 'fail', 228 | 'error': {'internal': e} 229 | } 230 | return make_response(jsonify(response_object)), 401 231 | 232 | 233 | @user_api_v1.route('/delete', methods=['DELETE']) 234 | @jwt_required 235 | def delete(): 236 | claims = get_jwt_claims() 237 | user = User.from_claims(claims) 238 | try: 239 | password = expect(request.get_json().get('password'), str, 'password') 240 | userdata = get_user(user.email) 241 | if (not user.email == userdata['email'] and not 242 | bcrypt.check_password_hash(userdata['password'], password)): 243 | response_object = { 244 | 'status': 'fail', 245 | 'error': {'password': 'Make sure your password is correct.'} 246 | } 247 | return make_response(jsonify(response_object)), 401 248 | else: 249 | delete_user(user.email) 250 | response_object = { 251 | 'status': 'success' 252 | } 253 | return make_response(jsonify(response_object)), 201 254 | except Exception as e: 255 | response_object = { 256 | 'status': 'fail', 257 | 'error': {'internal': e} 258 | } 259 | return make_response(jsonify(response_object)), 500 260 | 261 | 262 | @user_api_v1.route('/admin', methods=['GET']) 263 | @jwt_required 264 | def is_admin(): 265 | claims = get_jwt_claims() 266 | user = User.from_claims(claims) 267 | try: 268 | if check_admin(user): 269 | return jsonify({'status': 'success'}), 202 270 | else: 271 | return jsonify({'status': 'fail'}), 401 272 | 273 | except Exception as e: 274 | return jsonify({'status': 'fail', 'error': str(e)}), 500 275 | 276 | 277 | @user_api_v1.route('/comment-report', methods=['GET']) 278 | @jwt_required 279 | def comment_report(): 280 | claims = get_jwt_claims() 281 | user = User.from_claims(claims) 282 | try: 283 | if check_admin(user): 284 | result = most_active_commenters() 285 | return jsonify({'status': 'success', 'report': result}) 286 | else: 287 | return jsonify({'status': 'fail'}), 401 288 | except Exception as e: 289 | return jsonify({'status': 'fail', 'error': str(e)}), 500 290 | 291 | # the following api call is strictly for the UI 292 | 293 | 294 | @user_api_v1.route('/make-admin', methods=['POST']) 295 | def make_admin_user_for_ui_test(): 296 | try: 297 | post_data = request.get_json() 298 | email = expect(post_data['email'], str, 'email') 299 | name = expect(post_data['name'], str, 'name') 300 | password = expect(post_data['password'], str, 'password') 301 | except Exception as e: 302 | jsonify({'error': str(e)}), 400 303 | 304 | errors = {} 305 | if len(password) < 8: 306 | errors['password'] = "Your password must be at least 8 characters." 307 | 308 | if len(name) <= 3: 309 | errors['name'] = "You must specify a name of at least 3 characters." 310 | 311 | if len(errors.keys()) != 0: 312 | response_object = { 313 | 'status': 'fail', 314 | 'error': errors 315 | } 316 | return jsonify(response_object), 411 317 | 318 | insertionresult = add_user(name, email, bcrypt.generate_password_hash( 319 | password=password.encode('utf8')).decode("utf-8")) 320 | if 'error' in insertionresult: 321 | errors['email'] = insertionresult["error"] 322 | 323 | make_admin(email) 324 | userdata = get_user(email) 325 | 326 | if not userdata: 327 | errors['general'] = "Internal error, please try again later." 328 | 329 | if len(errors.keys()) != 0: 330 | response_object = { 331 | 'status': 'fail', 332 | 'error': errors 333 | } 334 | return make_response(jsonify(response_object)), 400 335 | else: 336 | 337 | userdata = { 338 | "email": userdata['email'], 339 | "name": userdata['name'], 340 | "preferences": userdata.get('preferences'), 341 | "isAdmin": True 342 | } 343 | 344 | user = User(userdata) 345 | jwt = create_access_token(user.to_json()) 346 | 347 | try: 348 | login_user(user.email, jwt) 349 | response_object = { 350 | 'status': 'success', 351 | 'auth_token': jwt, 352 | 'info': userdata 353 | } 354 | return make_response(jsonify(response_object)), 201 355 | except Exception as e: 356 | response_object = { 357 | 'status': 'fail', 358 | 'error': {'internal': e} 359 | } 360 | return make_response(jsonify(response_object)), 500 361 | 362 | 363 | def check_admin(user): 364 | updated_user = get_user(user.email) 365 | return updated_user.get('isAdmin', False) 366 | -------------------------------------------------------------------------------- /mflix/api/utils.py: -------------------------------------------------------------------------------- 1 | def expect(input, expectedType, field): 2 | if isinstance(input, expectedType): 3 | return input 4 | raise AssertionError("Invalid input for type", field) 5 | -------------------------------------------------------------------------------- /mflix/build/asset-manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "main.css": "static/css/main.61f16bdd.css", 3 | "main.css.map": "static/css/main.61f16bdd.css.map", 4 | "main.js": "static/js/main.9cd550cb.js", 5 | "main.js.map": "static/js/main.9cd550cb.js.map", 6 | "static/media/mongoleaf.png": "static/media/mongoleaf.0ebc1843.png", 7 | "static/media/pixelatedLeaf.svg": "static/media/pixelatedLeaf.6c93bd20.svg" 8 | } -------------------------------------------------------------------------------- /mflix/build/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oleg-belov/M220P-MongoDB-for-Python-Developers/62f517e1f120a9a4d9b2255e1d2b20ad03bc2d17/mflix/build/favicon.ico -------------------------------------------------------------------------------- /mflix/build/index.html: -------------------------------------------------------------------------------- 1 | mflix
-------------------------------------------------------------------------------- /mflix/build/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "short_name": "Mflix", 3 | "name": "Mflix", 4 | "icons": [ 5 | { 6 | "src": "favicon.ico", 7 | "sizes": "64x64 32x32 24x24 16x16", 8 | "type": "image/x-icon" 9 | } 10 | ], 11 | "start_url": "./index.html", 12 | "display": "standalone", 13 | "theme_color": "#000000", 14 | "background_color": "#ffffff" 15 | } 16 | -------------------------------------------------------------------------------- /mflix/build/service-worker.js: -------------------------------------------------------------------------------- 1 | "use strict";var precacheConfig=[["/index.html","b6c0d3c6d441174d450cc776fe6866a7"],["/static/css/main.61f16bdd.css","ce70416f89e27171f8bc501d54242290"],["/static/js/main.9cd550cb.js","aa8f2e7b06c398ea14db6d9837eceb7f"],["/static/media/mongoleaf.0ebc1843.png","0ebc18432c0a71cfd876a27933f91a4a"],["/static/media/pixelatedLeaf.6c93bd20.svg","6c93bd2010905059e08c8b522adfc219"]],cacheName="sw-precache-v3-sw-precache-webpack-plugin-"+(self.registration?self.registration.scope:""),ignoreUrlParametersMatching=[/^utm_/],addDirectoryIndex=function(e,t){var n=new URL(e);return"/"===n.pathname.slice(-1)&&(n.pathname+=t),n.toString()},cleanResponse=function(t){return t.redirected?("body"in t?Promise.resolve(t.body):t.blob()).then(function(e){return new Response(e,{headers:t.headers,status:t.status,statusText:t.statusText})}):Promise.resolve(t)},createCacheKey=function(e,t,n,r){var a=new URL(e);return r&&a.pathname.match(r)||(a.search+=(a.search?"&":"")+encodeURIComponent(t)+"="+encodeURIComponent(n)),a.toString()},isPathWhitelisted=function(e,t){if(0===e.length)return!0;var n=new URL(t).pathname;return e.some(function(e){return n.match(e)})},stripIgnoredUrlParameters=function(e,n){var t=new URL(e);return t.hash="",t.search=t.search.slice(1).split("&").map(function(e){return e.split("=")}).filter(function(t){return n.every(function(e){return!e.test(t[0])})}).map(function(e){return e.join("=")}).join("&"),t.toString()},hashParamName="_sw-precache",urlsToCacheKeys=new Map(precacheConfig.map(function(e){var t=e[0],n=e[1],r=new URL(t,self.location),a=createCacheKey(r,hashParamName,n,/\.\w{8}\./);return[r.toString(),a]}));function setOfCachedUrls(e){return e.keys().then(function(e){return e.map(function(e){return e.url})}).then(function(e){return new Set(e)})}self.addEventListener("install",function(e){e.waitUntil(caches.open(cacheName).then(function(r){return setOfCachedUrls(r).then(function(n){return Promise.all(Array.from(urlsToCacheKeys.values()).map(function(t){if(!n.has(t)){var e=new Request(t,{credentials:"same-origin"});return fetch(e).then(function(e){if(!e.ok)throw new Error("Request for "+t+" returned a response with status "+e.status);return cleanResponse(e).then(function(e){return r.put(t,e)})})}}))})}).then(function(){return self.skipWaiting()}))}),self.addEventListener("activate",function(e){var n=new Set(urlsToCacheKeys.values());e.waitUntil(caches.open(cacheName).then(function(t){return t.keys().then(function(e){return Promise.all(e.map(function(e){if(!n.has(e.url))return t.delete(e)}))})}).then(function(){return self.clients.claim()}))}),self.addEventListener("fetch",function(t){if("GET"===t.request.method){var e,n=stripIgnoredUrlParameters(t.request.url,ignoreUrlParametersMatching),r="index.html";(e=urlsToCacheKeys.has(n))||(n=addDirectoryIndex(n,r),e=urlsToCacheKeys.has(n));var a="/index.html";!e&&"navigate"===t.request.mode&&isPathWhitelisted(["^(?!\\/__).*"],t.request.url)&&(n=new URL(a,self.location).toString(),e=urlsToCacheKeys.has(n)),e&&t.respondWith(caches.open(cacheName).then(function(e){return e.match(urlsToCacheKeys.get(n)).then(function(e){if(e)return e;throw Error("The cached response that was expected is missing.")})}).catch(function(e){return console.warn('Couldn\'t serve response for "%s" from cache: %O',t.request.url,e),fetch(t.request)}))}}); -------------------------------------------------------------------------------- /mflix/build/static/css/main.61f16bdd.css: -------------------------------------------------------------------------------- 1 | @-webkit-keyframes spinningLeaf{0%{-webkit-transform:rotateY(0deg);transform:rotateY(0deg)}to{-webkit-transform:rotateY(-1turn);transform:rotateY(-1turn)}}@keyframes spinningLeaf{0%{-webkit-transform:rotateY(0deg);transform:rotateY(0deg)}to{-webkit-transform:rotateY(-1turn);transform:rotateY(-1turn)}}@-webkit-keyframes blink{0%{opacity:0}50%{opacity:1}to{opacity:0}}@keyframes blink{0%{opacity:0}50%{opacity:1}to{opacity:0}} 2 | /*! normalize.css v7.0.0 | MIT License | github.com/necolas/normalize.css */html{line-height:1.15;-ms-text-size-adjust:100%;-webkit-text-size-adjust:100%}body,html{height:100%;margin:0;padding:0}#full{background:"#black";height:100%}body,button,div,h1,h2,h3,h4,input,p,select,span,textarea{font-family:BlinkMacSystemFont,-apple-system,Segoe UI,Roboto,Oxygen,Ubuntu,Cantarell,Fira Sans,Droid Sans,Helvetica Neue,Helvetica,Arial,sans-serif}article,aside,footer,header,nav,section{display:block}h1{font-size:2em;margin:.67em 0}figcaption,figure,main{display:block}figure{margin:1em 40px}hr{-webkit-box-sizing:content-box;box-sizing:content-box;height:0;overflow:visible}pre{font-family:monospace,monospace;font-size:1em}a{background-color:transparent;-webkit-text-decoration-skip:objects}abbr[title]{border-bottom:none;text-decoration:underline;-webkit-text-decoration:underline dotted;text-decoration:underline dotted}b,strong{font-weight:inherit;font-weight:bolder}code,kbd,samp{font-family:monospace,monospace;font-size:1em}dfn{font-style:italic}mark{background-color:#ff0;color:#000}small{font-size:80%}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline}sub{bottom:-.25em}sup{top:-.5em}audio,video{display:inline-block}audio:not([controls]){display:none;height:0}img{border-style:none}.material-icons.red{color:#cd0000;margin-right:5px;vertical-align:sub}.material-icons:hover{cursor:pointer;font-size:28px;color:#b60000}button,input,optgroup,select,textarea{font-family:sans-serif;font-size:100%;line-height:1.15;margin:0}button,input{overflow:visible}button,select{text-transform:none}[type=reset],[type=submit],button,html [type=button]{-webkit-appearance:button}[type=button]::-moz-focus-inner,[type=reset]::-moz-focus-inner,[type=submit]::-moz-focus-inner,button::-moz-focus-inner{border-style:none;padding:0}[type=button]:-moz-focusring,[type=reset]:-moz-focusring,[type=submit]:-moz-focusring,button:-moz-focusring{outline:1px dotted ButtonText}fieldset{padding:.35em .75em .625em}legend{-webkit-box-sizing:border-box;box-sizing:border-box;color:inherit;display:table;max-width:100%;padding:0;white-space:normal}progress{display:inline-block;vertical-align:baseline}textarea{overflow:auto}[type=checkbox],[type=radio]{-webkit-box-sizing:border-box;box-sizing:border-box;padding:0}[type=number]::-webkit-inner-spin-button,[type=number]::-webkit-outer-spin-button{height:auto}[type=search]{-webkit-appearance:textfield;outline-offset:-2px}[type=search]::-webkit-search-cancel-button,[type=search]::-webkit-search-decoration{-webkit-appearance:none}::-webkit-file-upload-button{-webkit-appearance:button;font:inherit}details,menu{display:block}summary{display:list-item}canvas{display:inline-block}[hidden],template{display:none} 3 | /*# sourceMappingURL=main.61f16bdd.css.map*/ -------------------------------------------------------------------------------- /mflix/build/static/css/main.61f16bdd.css.map: -------------------------------------------------------------------------------- 1 | {"version":3,"sources":["containers/normalize.css"],"names":[],"mappings":"AAEA,gCACE,GACE,gCACQ,uBAAyB,CAEnC,GACE,kCACQ,yBAA4B,CACrC,CAGH,wBACE,GACE,gCACQ,uBAAyB,CAEnC,GACE,kCACQ,yBAA4B,CACrC,CAGH,yBACE,GACE,SAAW,CAEb,IACE,SAAW,CAEb,GACE,SAAW,CACZ,CAGH,iBACE,GACE,SAAW,CAEb,IACE,SAAW,CAEb,GACE,SAAW,CACZ,CACF;AACD,4EAWA,KACE,iBACA,0BACA,6BAA+B,CAUjC,UAEE,YACA,SACA,SAAW,CAGb,MACE,oBACA,WAAa,CAGf,yDAYE,mJAEmC,CAOrC,wCAME,aAAe,CAQjB,GACE,cACA,cAAiB,CAWnB,uBAIE,aAAe,CAOjB,OACE,eAAiB,CAQnB,GACE,+BACQ,uBACR,SACA,gBAAkB,CAQpB,IACE,gCACA,aAAe,CAWjB,EACE,6BACA,oCAAsC,CAQxC,YACE,mBACA,0BACA,yCACQ,gCAAkC,CAO5C,SAEE,oBASA,kBAAoB,CAQtB,cAGE,gCACA,aAAe,CAOjB,IACE,iBAAmB,CAOrB,KACE,sBACA,UAAY,CAOd,MACE,aAAe,CAQjB,QAEE,cACA,cACA,kBACA,uBAAyB,CAG3B,IACE,aAAgB,CAGlB,IACE,SAAY,CAUd,YAEE,oBAAsB,CAOxB,sBACE,aACA,QAAU,CAOZ,IACE,iBAAmB,CAcrB,oBACE,cACA,iBACA,kBAAoB,CAEtB,sBACE,eACA,eACA,aAAe,CAWjB,sCAKE,uBACA,eACA,iBACA,QAAU,CAQZ,aAGE,gBAAkB,CAQpB,cAGE,mBAAqB,CASvB,qDAIE,yBAA2B,CAO7B,wHAIE,kBACA,SAAW,CAOb,4GAIE,6BAA+B,CAOjC,SACE,0BAA+B,CAUjC,OACE,8BACQ,sBACR,cACA,cACA,eACA,UACA,kBAAoB,CAQtB,SACE,qBACA,uBAAyB,CAO3B,SACE,aAAe,CAQjB,6BAEE,8BACQ,sBACR,SAAW,CAOb,kFAEE,WAAa,CAQf,cACE,6BACA,mBAAqB,CAOvB,qFAEE,uBAAyB,CAQ3B,6BACE,0BACA,YAAc,CAWhB,aAEE,aAAe,CAOjB,QACE,iBAAmB,CAUrB,OACE,oBAAsB,CAkBxB,kBACE,YAAc","file":"static/css/main.61f16bdd.css","sourcesContent":["/* Custom styling (keyframes etc...) */\n\n@-webkit-keyframes spinningLeaf {\n from {\n -webkit-transform: rotateY(0deg);\n transform: rotateY(0deg);\n }\n to {\n -webkit-transform: rotateY(-360deg);\n transform: rotateY(-360deg);\n }\n}\n\n@keyframes spinningLeaf {\n from {\n -webkit-transform: rotateY(0deg);\n transform: rotateY(0deg);\n }\n to {\n -webkit-transform: rotateY(-360deg);\n transform: rotateY(-360deg);\n }\n}\n\n@-webkit-keyframes blink {\n 0% {\n opacity: 0;\n }\n 50% {\n opacity: 1;\n }\n 100% {\n opacity: 0;\n }\n}\n\n@keyframes blink {\n 0% {\n opacity: 0;\n }\n 50% {\n opacity: 1;\n }\n 100% {\n opacity: 0;\n }\n}\n/*! normalize.css v7.0.0 | MIT License | github.com/necolas/normalize.css */\n\n/* Document\n ========================================================================== */\n\n/**\n * 1. Correct the line height in all browsers.\n * 2. Prevent adjustments of font size after orientation changes in\n * IE on Windows Phone and in iOS.\n */\n\nhtml {\n line-height: 1.15; /* 1 */\n -ms-text-size-adjust: 100%; /* 2 */\n -webkit-text-size-adjust: 100%; /* 2 */\n}\n\n/* Sections\n ========================================================================== */\n\n/**\n * Remove the margin in all browsers (opinionated).\n */\n\nhtml,\nbody {\n height: 100%;\n margin: 0;\n padding: 0;\n}\n\n#full {\n background: '#black';\n height: 100%;\n}\n\nbody,\nbutton,\ninput,\nselect,\ntextarea,\ndiv,\nh1,\nh2,\nh3,\nh4,\np,\nspan {\n font-family: BlinkMacSystemFont, -apple-system, 'Segoe UI', 'Roboto', 'Oxygen',\n 'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue',\n 'Helvetica', 'Arial', sans-serif;\n}\n\n/**\n * Add the correct display in IE 9-.\n */\n\narticle,\naside,\nfooter,\nheader,\nnav,\nsection {\n display: block;\n}\n\n/**\n * Correct the font size and margin on `h1` elements within `section` and\n * `article` contexts in Chrome, Firefox, and Safari.\n */\n\nh1 {\n font-size: 2em;\n margin: 0.67em 0;\n}\n\n/* Grouping content\n ========================================================================== */\n\n/**\n * Add the correct display in IE 9-.\n * 1. Add the correct display in IE.\n */\n\nfigcaption,\nfigure,\nmain {\n /* 1 */\n display: block;\n}\n\n/**\n * Add the correct margin in IE 8.\n */\n\nfigure {\n margin: 1em 40px;\n}\n\n/**\n * 1. Add the correct box sizing in Firefox.\n * 2. Show the overflow in Edge and IE.\n */\n\nhr {\n -webkit-box-sizing: content-box;\n box-sizing: content-box; /* 1 */\n height: 0; /* 1 */\n overflow: visible; /* 2 */\n}\n\n/**\n * 1. Correct the inheritance and scaling of font size in all browsers.\n * 2. Correct the odd `em` font sizing in all browsers.\n */\n\npre {\n font-family: monospace, monospace; /* 1 */\n font-size: 1em; /* 2 */\n}\n\n/* Text-level semantics\n ========================================================================== */\n\n/**\n * 1. Remove the gray background on active links in IE 10.\n * 2. Remove gaps in links underline in iOS 8+ and Safari 8+.\n */\n\na {\n background-color: transparent; /* 1 */\n -webkit-text-decoration-skip: objects; /* 2 */\n}\n\n/**\n * 1. Remove the bottom border in Chrome 57- and Firefox 39-.\n * 2. Add the correct text decoration in Chrome, Edge, IE, Opera, and Safari.\n */\n\nabbr[title] {\n border-bottom: none; /* 1 */\n text-decoration: underline; /* 2 */\n -webkit-text-decoration: underline dotted;\n text-decoration: underline dotted; /* 2 */\n}\n\n/**\n * Prevent the duplicate application of `bolder` by the next rule in Safari 6.\n */\n\nb,\nstrong {\n font-weight: inherit;\n}\n\n/**\n * Add the correct font weight in Chrome, Edge, and Safari.\n */\n\nb,\nstrong {\n font-weight: bolder;\n}\n\n/**\n * 1. Correct the inheritance and scaling of font size in all browsers.\n * 2. Correct the odd `em` font sizing in all browsers.\n */\n\ncode,\nkbd,\nsamp {\n font-family: monospace, monospace; /* 1 */\n font-size: 1em; /* 2 */\n}\n\n/**\n * Add the correct font style in Android 4.3-.\n */\n\ndfn {\n font-style: italic;\n}\n\n/**\n * Add the correct background and color in IE 9-.\n */\n\nmark {\n background-color: #ff0;\n color: #000;\n}\n\n/**\n * Add the correct font size in all browsers.\n */\n\nsmall {\n font-size: 80%;\n}\n\n/**\n * Prevent `sub` and `sup` elements from affecting the line height in\n * all browsers.\n */\n\nsub,\nsup {\n font-size: 75%;\n line-height: 0;\n position: relative;\n vertical-align: baseline;\n}\n\nsub {\n bottom: -0.25em;\n}\n\nsup {\n top: -0.5em;\n}\n\n/* Embedded content\n ========================================================================== */\n\n/**\n * Add the correct display in IE 9-.\n */\n\naudio,\nvideo {\n display: inline-block;\n}\n\n/**\n * Add the correct display in iOS 4-7.\n */\n\naudio:not([controls]) {\n display: none;\n height: 0;\n}\n\n/**\n * Remove the border on images inside links in IE 10-.\n */\n\nimg {\n border-style: none;\n}\n\n/**\n * Hide the overflow in IE.e\n\nsvg:not(:root) {\n overflow: hidden;\n}\n\n/*\n * Style the error notifications.\n */\n\n.material-icons.red {\n color: #cd0000;\n margin-right: 5px;\n vertical-align: sub;\n}\n.material-icons:hover {\n cursor: pointer;\n font-size: 28px;\n color: #b60000;\n}\n\n/* Forms\n ========================================================================== */\n\n/**\n * 1. Change the font styles in all browsers (opinionated).\n * 2. Remove the margin in Firefox and Safari.\n */\n\nbutton,\ninput,\noptgroup,\nselect,\ntextarea {\n font-family: sans-serif; /* 1 */\n font-size: 100%; /* 1 */\n line-height: 1.15; /* 1 */\n margin: 0; /* 2 */\n}\n\n/**\n * Show the overflow in IE.\n * 1. Show the overflow in Edge.\n */\n\nbutton,\ninput {\n /* 1 */\n overflow: visible;\n}\n\n/**\n * Remove the inheritance of text transform in Edge, Firefox, and IE.\n * 1. Remove the inheritance of text transform in Firefox.\n */\n\nbutton,\nselect {\n /* 1 */\n text-transform: none;\n}\n\n/**\n * 1. Prevent a WebKit bug where (2) destroys native `audio` and `video`\n * controls in Android 4.\n * 2. Correct the inability to style clickable types in iOS and Safari.\n */\n\nbutton,\nhtml [type=\"button\"], /* 1 */\n[type=\"reset\"],\n[type=\"submit\"] {\n -webkit-appearance: button; /* 2 */\n}\n\n/**\n * Remove the inner border and padding in Firefox.\n */\n\nbutton::-moz-focus-inner,\n[type='button']::-moz-focus-inner,\n[type='reset']::-moz-focus-inner,\n[type='submit']::-moz-focus-inner {\n border-style: none;\n padding: 0;\n}\n\n/**\n * Restore the focus styles unset by the previous rule.\n */\n\nbutton:-moz-focusring,\n[type='button']:-moz-focusring,\n[type='reset']:-moz-focusring,\n[type='submit']:-moz-focusring {\n outline: 1px dotted ButtonText;\n}\n\n/**\n * Correct the padding in Firefox.\n */\n\nfieldset {\n padding: 0.35em 0.75em 0.625em;\n}\n\n/**\n * 1. Correct the text wrapping in Edge and IE.\n * 2. Correct the color inheritance from `fieldset` elements in IE.\n * 3. Remove the padding so developers are not caught out when they zero out\n * `fieldset` elements in all browsers.\n */\n\nlegend {\n -webkit-box-sizing: border-box;\n box-sizing: border-box; /* 1 */\n color: inherit; /* 2 */\n display: table; /* 1 */\n max-width: 100%; /* 1 */\n padding: 0; /* 3 */\n white-space: normal; /* 1 */\n}\n\n/**\n * 1. Add the correct display in IE 9-.\n * 2. Add the correct vertical alignment in Chrome, Firefox, and Opera.\n */\n\nprogress {\n display: inline-block; /* 1 */\n vertical-align: baseline; /* 2 */\n}\n\n/**\n * Remove the default vertical scrollbar in IE.\n */\n\ntextarea {\n overflow: auto;\n}\n\n/**\n * 1. Add the correct box sizing in IE 10-.\n * 2. Remove the padding in IE 10-.\n */\n\n[type='checkbox'],\n[type='radio'] {\n -webkit-box-sizing: border-box;\n box-sizing: border-box; /* 1 */\n padding: 0; /* 2 */\n}\n\n/**\n * Correct the cursor style of increment and decrement buttons in Chrome.\n */\n\n[type='number']::-webkit-inner-spin-button,\n[type='number']::-webkit-outer-spin-button {\n height: auto;\n}\n\n/**\n * 1. Correct the odd appearance in Chrome and Safari.\n * 2. Correct the outline style in Safari.\n */\n\n[type='search'] {\n -webkit-appearance: textfield; /* 1 */\n outline-offset: -2px; /* 2 */\n}\n\n/**\n * Remove the inner padding and cancel buttons in Chrome and Safari on macOS.\n */\n\n[type='search']::-webkit-search-cancel-button,\n[type='search']::-webkit-search-decoration {\n -webkit-appearance: none;\n}\n\n/**\n * 1. Correct the inability to style clickable types in iOS and Safari.\n * 2. Change font properties to `inherit` in Safari.\n */\n\n::-webkit-file-upload-button {\n -webkit-appearance: button; /* 1 */\n font: inherit; /* 2 */\n}\n\n/* Interactive\n ========================================================================== */\n\n/*\n * Add the correct display in IE 9-.\n * 1. Add the correct display in Edge, IE, and Firefox.\n */\n\ndetails, /* 1 */\nmenu {\n display: block;\n}\n\n/*\n * Add the correct display in all browsers.\n */\n\nsummary {\n display: list-item;\n}\n\n/* Scripting\n ========================================================================== */\n\n/**\n * Add the correct display in IE 9-.\n */\n\ncanvas {\n display: inline-block;\n}\n\n/**\n * Add the correct display in IE.\n */\n\ntemplate {\n display: none;\n}\n\n/* Hidden\n ========================================================================== */\n\n/**\n * Add the correct display in IE 10-.\n */\n\n[hidden] {\n display: none;\n}\n\n\n\n// WEBPACK FOOTER //\n// ./src/containers/normalize.css"],"sourceRoot":""} -------------------------------------------------------------------------------- /mflix/build/static/media/mongoleaf.0ebc1843.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oleg-belov/M220P-MongoDB-for-Python-Developers/62f517e1f120a9a4d9b2255e1d2b20ad03bc2d17/mflix/build/static/media/mongoleaf.0ebc1843.png -------------------------------------------------------------------------------- /mflix/build/static/media/pixelatedLeaf.6c93bd20.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Created with Sketch. 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /mflix/factory.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from flask import Flask, render_template 4 | from flask.json import JSONEncoder 5 | from flask_cors import CORS 6 | from flask_bcrypt import Bcrypt 7 | from flask_jwt_extended import JWTManager 8 | 9 | from bson import json_util, ObjectId 10 | from datetime import datetime, timedelta 11 | 12 | from mflix.api.movies import movies_api_v1 13 | from mflix.api.user import user_api_v1 14 | 15 | 16 | class MongoJsonEncoder(JSONEncoder): 17 | def default(self, obj): 18 | if isinstance(obj, datetime): 19 | return obj.strftime("%Y-%m-%d %H:%M:%S") 20 | if isinstance(obj, ObjectId): 21 | return str(obj) 22 | return json_util.default(obj, json_util.CANONICAL_JSON_OPTIONS) 23 | 24 | 25 | def create_app(): 26 | 27 | APP_DIR = os.path.abspath(os.path.dirname(__file__)) 28 | STATIC_FOLDER = os.path.join(APP_DIR, 'build/static') 29 | TEMPLATE_FOLDER = os.path.join(APP_DIR, 'build') 30 | 31 | app = Flask(__name__, static_folder=STATIC_FOLDER, 32 | template_folder=TEMPLATE_FOLDER, 33 | ) 34 | CORS(app) 35 | app.json_encoder = MongoJsonEncoder 36 | app.register_blueprint(movies_api_v1) 37 | app.register_blueprint(user_api_v1) 38 | jwt = JWTManager(app) 39 | 40 | @jwt.user_claims_loader 41 | def add_claims(identity): 42 | return { 43 | 'user': identity, 44 | } 45 | 46 | app.config['JWT'] = jwt 47 | app.config['BCRYPT'] = Bcrypt(app) 48 | app.config['CLAIMS_LOADER'] = add_claims 49 | app.config['JWT_ACCESS_TOKEN_EXPIRES'] = timedelta(days=30) 50 | 51 | @app.route('/', defaults={'path': ''}) 52 | @app.route('/') 53 | def serve(path): 54 | return render_template('index.html') 55 | 56 | return app 57 | -------------------------------------------------------------------------------- /migrations/movie_last_updated_migration.py: -------------------------------------------------------------------------------- 1 | from pymongo import MongoClient, UpdateOne 2 | from pymongo.errors import InvalidOperation 3 | from bson import ObjectId 4 | import dateutil.parser as parser 5 | 6 | """ 7 | Ticket: Migration 8 | 9 | Update all the documents in the `movies` collection, such that the "lastupdated" 10 | field is stored as an ISODate() rather than a string. 11 | 12 | The parser.parse() method can transform date strings into ISODate objects for 13 | us. We just need to make sure the correct operations are sent to MongoDB! 14 | """ 15 | 16 | # ensure you update your host information below! 17 | host = "mongodb+srv://m220student:m220password@mflix-37ay0.mongodb.net/test" 18 | mflix = MongoClient(host)["mflix"] 19 | 20 | # TODO: Create the proper predicate and projection 21 | # add a predicate that checks that the "lastupdated" field exists, and then 22 | # checks that its type is a string 23 | # a projection is not required, but may help reduce the amount of data sent 24 | # over the wire! 25 | predicate = {"lastupdated": {"$exists": True} , "lastupdated": {"$type": "string"}} 26 | projection = {"lastupdated": 1, "_id": 1} 27 | 28 | cursor = mflix.movies.find(predicate, projection) 29 | 30 | # this will transform the "lastupdated" field to an ISODate() from a string 31 | movies_to_migrate = [] 32 | for doc in cursor: 33 | doc_id = doc.get('_id') 34 | lastupdated = doc.get('lastupdated', None) 35 | movies_to_migrate.append( 36 | { 37 | "doc_id": ObjectId(doc_id), 38 | "lastupdated": parser.parse(lastupdated) 39 | } 40 | ) 41 | 42 | print(f"{len(movies_to_migrate)} documents to migrate") 43 | 44 | try: 45 | # TODO: Complete the UpdateOne statement below 46 | # build the UpdateOne so it updates the "lastupdated" field to contain 47 | # the new ISODate() type 48 | bulk_updates = [UpdateOne( 49 | {"_id": movie.get("doc_id")}, 50 | {"$set": {"lastupdated": movie.get("lastupdated")}} 51 | ) for movie in movies_to_migrate] 52 | 53 | # here's where the bulk operation is sent to MongoDB 54 | bulk_results = mflix.movies.bulk_write(bulk_updates) 55 | print(f"{bulk_results.modified_count} documents updated") 56 | 57 | except InvalidOperation: 58 | print("no updates necessary") 59 | except Exception as e: 60 | print(str(e)) 61 | -------------------------------------------------------------------------------- /notebooks/MongoClient.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": { 17 | "slideshow": { 18 | "slide_type": "slide" 19 | } 20 | }, 21 | "source": [ 22 | "

MongoClient

" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": { 29 | "slideshow": { 30 | "slide_type": "subslide" 31 | } 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "from pymongo import MongoClient\n", 36 | "uri = \"mongodb+srv://m220-user:m220-pass@m220-lessons-mcxlm.mongodb.net/test\"" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": { 42 | "slideshow": { 43 | "slide_type": "notes" 44 | } 45 | }, 46 | "source": [ 47 | "The MongoClient constructor accepts many different arguments to configure how the driver connects to MongoDB and how many operations will be performed. We'll look at the most basic configuration first, which is passing the SRV string of our Atlas cluster to MongoClient." 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": { 54 | "slideshow": { 55 | "slide_type": "subslide" 56 | } 57 | }, 58 | "outputs": [], 59 | "source": [ 60 | "client = MongoClient(uri)" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": { 67 | "slideshow": { 68 | "slide_type": "fragment" 69 | } 70 | }, 71 | "outputs": [], 72 | "source": [ 73 | "client.stats" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": { 79 | "slideshow": { 80 | "slide_type": "notes" 81 | } 82 | }, 83 | "source": [ 84 | "Note that because we're using an Atlas SRV string, we got an SSL connection for free! It also defaults the **authSource** to the **admin** database.\n", 85 | "\n", 86 | "Now that we've connected to our **mongod**, we can create a database handle. Let's look at the available databases." 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": { 93 | "slideshow": { 94 | "slide_type": "subslide" 95 | } 96 | }, 97 | "outputs": [], 98 | "source": [ 99 | "client.list_database_names()" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": { 105 | "slideshow": { 106 | "slide_type": "notes" 107 | } 108 | }, 109 | "source": [ 110 | "Let's use the **mflix** database. One useful property of a MongoClient object is we can use property accessors" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": { 117 | "slideshow": { 118 | "slide_type": "subslide" 119 | } 120 | }, 121 | "outputs": [], 122 | "source": [ 123 | "mflix = client.mflix\n", 124 | "mflix.list_collection_names()" 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "metadata": { 130 | "slideshow": { 131 | "slide_type": "notes" 132 | } 133 | }, 134 | "source": [ 135 | "or we can use dictionary accessors" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "metadata": { 142 | "slideshow": { 143 | "slide_type": "subslide" 144 | } 145 | }, 146 | "outputs": [], 147 | "source": [ 148 | "mflix = client['mflix']\n", 149 | "mflix.list_collection_names()" 150 | ] 151 | }, 152 | { 153 | "cell_type": "markdown", 154 | "metadata": { 155 | "slideshow": { 156 | "slide_type": "notes" 157 | } 158 | }, 159 | "source": [ 160 | "Now that we have a database object and have listed available collections, let's create a collection object. As with the database object, we can use either property or dictionary accessors." 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": null, 166 | "metadata": { 167 | "slideshow": { 168 | "slide_type": "subslide" 169 | } 170 | }, 171 | "outputs": [], 172 | "source": [ 173 | "movies = mflix.movies" 174 | ] 175 | }, 176 | { 177 | "cell_type": "markdown", 178 | "metadata": { 179 | "slideshow": { 180 | "slide_type": "notes" 181 | } 182 | }, 183 | "source": [ 184 | "And let's perform a query on our movies collection. We'll just get the count of documents in the collection." 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": null, 190 | "metadata": { 191 | "scrolled": true, 192 | "slideshow": { 193 | "slide_type": "subslide" 194 | } 195 | }, 196 | "outputs": [], 197 | "source": [ 198 | "movies.count_documents({})" 199 | ] 200 | }, 201 | { 202 | "cell_type": "markdown", 203 | "metadata": { 204 | "slideshow": { 205 | "slide_type": "notes" 206 | } 207 | }, 208 | "source": [ 209 | "The MongoClient constructor also accepts many optional keyword parameters. We can set the maximum connection pool, default read and write concerns, whether to retry writes, configuring SSL, authentication, and much more.\n", 210 | "\n", 211 | "A full list and how to use MongoClient for more advanced use cases is available [here](http://api.mongodb.com/python/current/api/pymongo/mongo_client.html)" 212 | ] 213 | }, 214 | { 215 | "cell_type": "markdown", 216 | "metadata": { 217 | "slideshow": { 218 | "slide_type": "subslide" 219 | } 220 | }, 221 | "source": [ 222 | "Here is an example setting the **connectTimeoutMS** to 200 milliseconds, how long the driver will allow attempt to connect before erroring, and setting **retryWrites** to True, signaling to the driver to retry a write in the event of a network error." 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": null, 228 | "metadata": { 229 | "slideshow": { 230 | "slide_type": "fragment" 231 | } 232 | }, 233 | "outputs": [], 234 | "source": [ 235 | "client = MongoClient(uri, connectTimeoutMS=200, retryWrites=True)" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": null, 241 | "metadata": { 242 | "slideshow": { 243 | "slide_type": "fragment" 244 | } 245 | }, 246 | "outputs": [], 247 | "source": [ 248 | "client.stats" 249 | ] 250 | }, 251 | { 252 | "cell_type": "markdown", 253 | "metadata": { 254 | "slideshow": { 255 | "slide_type": "slide" 256 | } 257 | }, 258 | "source": [ 259 | "## Summary\n", 260 | "\n", 261 | "* MongoClient accepts many optional keyword arguments to fine-tune your connection.\n", 262 | "* After instantiating the client, databases handles can be created via property or dictionary accessors on the client object.\n", 263 | "* Collections handles are referenced from the database object.\n", 264 | "* Collection specific operations like querying or updating documents are performed on the collection object." 265 | ] 266 | } 267 | ], 268 | "metadata": { 269 | "celltoolbar": "Slideshow", 270 | "kernelspec": { 271 | "display_name": "Python 3", 272 | "language": "python", 273 | "name": "python3" 274 | }, 275 | "language_info": { 276 | "codemirror_mode": { 277 | "name": "ipython", 278 | "version": 3 279 | }, 280 | "file_extension": ".py", 281 | "mimetype": "text/x-python", 282 | "name": "python", 283 | "nbconvert_exporter": "python", 284 | "pygments_lexer": "ipython3", 285 | "version": "3.6.5" 286 | } 287 | }, 288 | "nbformat": 4, 289 | "nbformat_minor": 2 290 | } 291 | -------------------------------------------------------------------------------- /notebooks/basic_aggregation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pymongo\n", 10 | "from bson.json_util import dumps\n", 11 | "uri = \"mongodb+srv://m220-user:m220-pass@m220-lessons-mcxlm.mongodb.net/test\"\n", 12 | "client = pymongo.MongoClient(uri)\n", 13 | "m220 = client.m220\n", 14 | "movies = m220.movies" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "sam_raimi_cursor = movies.find( { \"directors\": \"Sam Raimi\" } )" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "sam_raimi_cursor" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "list(sam_raimi_cursor)" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "match_stage = {\"$match\": { \"directors\": \"Sam Raimi\" } }\n", 51 | "pipeline = [\n", 52 | " match_stage\n", 53 | "]\n", 54 | "sam_raimi_aggregation = movies.aggregate( pipeline )" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "sam_raimi_aggregation" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "list(sam_raimi_aggregation)" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "sam_raimi_cursor = movies.find(\n", 82 | " { \"directors\": \"Sam Raimi\" },\n", 83 | " { \"_id\": 0, \"title\": 1, \"cast\": 1 }\n", 84 | ")\n", 85 | "\n", 86 | "print(dumps(sam_raimi_cursor, indent=2))" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "match_stage = { \"$match\": { \"directors\": \"Sam Raimi\" } }\n", 96 | "project_stage = { \"$project\": { \"_id\": 0, \"title\": 1, \"cast\": 1 } }\n", 97 | "\n", 98 | "pipeline = [\n", 99 | " match_stage,\n", 100 | " project_stage\n", 101 | "]\n", 102 | "\n", 103 | "sam_raimi_aggregation = movies.aggregate( pipeline )\n", 104 | "\n", 105 | "print(dumps(sam_raimi_aggregation, indent=2))" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "unwind_stage = { \"$unwind\": \"$directors\" }\n", 115 | "\n", 116 | "group_stage = {\n", 117 | " \"$group\": {\n", 118 | " \"_id\": {\n", 119 | " \"director\": \"$directors\"\n", 120 | " },\n", 121 | " \"average_rating\": { \"$avg\": \"$imdb.rating\" }\n", 122 | " }\n", 123 | "}\n", 124 | "\n", 125 | "sort_stage = {\n", 126 | " \"$sort\": { \"average_rating\": -1 }\n", 127 | "}\n", 128 | "\n", 129 | "# create pipeline from four different stages\n", 130 | "pipeline = [\n", 131 | " unwind_stage,\n", 132 | " group_stage,\n", 133 | " sort_stage\n", 134 | "]\n", 135 | "\n", 136 | "# aggregate using pipeline\n", 137 | "director_ratings = movies.aggregate(pipeline)\n", 138 | "\n", 139 | "# iterate through the resulting cursor\n", 140 | "list(director_ratings)" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": null, 146 | "metadata": {}, 147 | "outputs": [], 148 | "source": [] 149 | } 150 | ], 151 | "metadata": { 152 | "kernelspec": { 153 | "display_name": "Python 3", 154 | "language": "python", 155 | "name": "python3" 156 | }, 157 | "language_info": { 158 | "codemirror_mode": { 159 | "name": "ipython", 160 | "version": 3 161 | }, 162 | "file_extension": ".py", 163 | "mimetype": "text/x-python", 164 | "name": "python", 165 | "nbconvert_exporter": "python", 166 | "pygments_lexer": "ipython3", 167 | "version": "3.6.4" 168 | } 169 | }, 170 | "nbformat": 4, 171 | "nbformat_minor": 2 172 | } 173 | -------------------------------------------------------------------------------- /notebooks/bulk_writes.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": { 17 | "slideshow": { 18 | "slide_type": "slide" 19 | } 20 | }, 21 | "source": [ 22 | "

Bulk Writes

" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": { 28 | "slideshow": { 29 | "slide_type": "notes" 30 | } 31 | }, 32 | "source": [ 33 | "In this lesson we're going to discuss a different type of writes called \"Bulk Writes,\" and the performance implications of these kinds of writes." 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": { 39 | "slideshow": { 40 | "slide_type": "slide" 41 | } 42 | }, 43 | "source": [ 44 | "" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": { 50 | "slideshow": { 51 | "slide_type": "notes" 52 | } 53 | }, 54 | "source": [ 55 | "Often times, our applications will encounter situations in which they need to perform a series of writes at once, and in some cases these writes have a causal effect on one another. One failing or succeeding in a group of operations, may affect your application logic.\n", 56 | "\n", 57 | "In this case, a user is purchasing food items on our grocery store application, and we are updating the database to reflect the new quantities we have in stock. This person bought 2 (point) apples, so we decrement the quantity by 2.\n", 58 | "\n", 59 | "When our application receives these writes, one option it has is to send each (point) of these writes (point) one at a time." 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": { 65 | "slideshow": { 66 | "slide_type": "slide" 67 | } 68 | }, 69 | "source": [ 70 | "" 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": { 76 | "slideshow": { 77 | "slide_type": "notes" 78 | } 79 | }, 80 | "source": [ 81 | "So the client sends a write (point) over to the database (point)." 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": { 87 | "slideshow": { 88 | "slide_type": "slide" 89 | } 90 | }, 91 | "source": [ 92 | "" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": { 98 | "slideshow": { 99 | "slide_type": "notes" 100 | } 101 | }, 102 | "source": [ 103 | "And at some point later, the client receives an acknowledgement that the write succeeded. Nice! Now let's send over the next write." 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": { 109 | "slideshow": { 110 | "slide_type": "slide" 111 | } 112 | }, 113 | "source": [ 114 | "" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": { 120 | "slideshow": { 121 | "slide_type": "notes" 122 | } 123 | }, 124 | "source": [ 125 | "So we send over our next write," 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": { 131 | "slideshow": { 132 | "slide_type": "notes" 133 | } 134 | }, 135 | "source": [ 136 | "" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "metadata": { 142 | "slideshow": { 143 | "slide_type": "notes" 144 | } 145 | }, 146 | "source": [ 147 | "And then eventually get our acknowledgement. Now we just performed two write operations, and it required two round trips to the database.\n", 148 | "\n", 149 | "That's a round trip to the database for each write operation - but if we already knew all the writes we wanted to perform, why is our client sending them each one at a time? You probably see where this is going." 150 | ] 151 | }, 152 | { 153 | "cell_type": "markdown", 154 | "metadata": { 155 | "slideshow": { 156 | "slide_type": "notes" 157 | } 158 | }, 159 | "source": [ 160 | "" 161 | ] 162 | }, 163 | { 164 | "cell_type": "markdown", 165 | "metadata": { 166 | "slideshow": { 167 | "slide_type": "notes" 168 | } 169 | }, 170 | "source": [ 171 | "So what we can do instead is \"batch\" these inserts together, and then send them in bulk. The exact method of grouping documents together is implemented differently in each driver, because the data structures are different, but the general idea is the same: Package a bunch of writes into a batch, usually a list or array (but again, the implementation is different in each language), and then send that whole batch to MongoDB.\n", 172 | "\n", 173 | "This is the implementation of bulk writes in the Mongo shell, and you can copy this from the lecture notes if you want to try it out. But it will look different in your chosen programming language, so bear that in mind." 174 | ] 175 | }, 176 | { 177 | "cell_type": "markdown", 178 | "metadata": { 179 | "slideshow": { 180 | "slide_type": "slide" 181 | } 182 | }, 183 | "source": [ 184 | "" 185 | ] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "metadata": { 190 | "slideshow": { 191 | "slide_type": "notes" 192 | } 193 | }, 194 | "source": [ 195 | "When a client sends a bulk write, it gets one acknowledgement back from the database for the whole batch.\n", 196 | "\n", 197 | "This is a benefit to our application's performance, because it limits the effect of latency on the overall speed of the operation. If it takes one second for each round trip, then sending one write at a time takes four seconds. But if we can send all four writes in one round trip, then sending four writes only takes one second.\n", 198 | "\n", 199 | "Now, the default behavior of a bulk write in Mongo is an ordered execution of these (point) writes (point). And in the ordered bulk write, any failure will stop execution of the rest of the batch. This benefits us in this case, because these (point) writes might have an effect on each other, like if two different update operations want to buy 4 sticks of butter, but there's only one stick left. In that situation, the first operation in the batch should get the last stick of butter, and the second operation should error out. That's why we need these (point) executed in order.\n", 200 | "\n", 201 | "The bulk write would throw an error on this (point) update statement, and then return an acknowledgement to the client **before** trying to purchase anymore (point) items. The acknowledgement (point) we get back will then tell us if something errored out." 202 | ] 203 | }, 204 | { 205 | "cell_type": "markdown", 206 | "metadata": { 207 | "slideshow": { 208 | "slide_type": "slide" 209 | } 210 | }, 211 | "source": [ 212 | "### Ordered Bulk Write\n", 213 | "\n", 214 | "- The default setting for bulk writes in MongoDB\n", 215 | "- Executes writes sequentially\n", 216 | " - Will end execution after first write failure" 217 | ] 218 | }, 219 | { 220 | "cell_type": "markdown", 221 | "metadata": { 222 | "slideshow": { 223 | "slide_type": "notes" 224 | } 225 | }, 226 | "source": [ 227 | "By default, bulk writes in Mongo will be ordered. This means that even though we send all the writes at the same time, the replica (point) set will apply them in the order they were sent.\n", 228 | "\n", 229 | "Sending an ordered bulk write implies that each write (point to butter or eggs) in the batch depends on all the writes (point to the ones above it) that occurred before it. So if a write operation results in an error, all subsequent writes will not be executed, because Mongo assumes that those (point to ones below it) writes were expecting this (point) write to succeed." 230 | ] 231 | }, 232 | { 233 | "cell_type": "markdown", 234 | "metadata": { 235 | "slideshow": { 236 | "slide_type": "slide" 237 | } 238 | }, 239 | "source": [ 240 | "" 241 | ] 242 | }, 243 | { 244 | "cell_type": "markdown", 245 | "metadata": { 246 | "slideshow": { 247 | "slide_type": "notes" 248 | } 249 | }, 250 | "source": [ 251 | "But there's also a chance that the writes in our batch are not dependent on each other. In this case, we've just received a shipment of food to the warehouse, and we want to update the new food quantities in stock.\n", 252 | "\n", 253 | "Because all these (point) changes are additive, we don't need all of them to be executed in order, because they're not causally related. So I passed this \"ordered: false\" flag to the bulk write command, which will execute them in parallel. If some of them fail (for whatever reason), we can still continue on with the execution of other operations in the batch." 254 | ] 255 | }, 256 | { 257 | "cell_type": "markdown", 258 | "metadata": { 259 | "slideshow": { 260 | "slide_type": "slide" 261 | } 262 | }, 263 | "source": [ 264 | "" 265 | ] 266 | }, 267 | { 268 | "cell_type": "markdown", 269 | "metadata": { 270 | "slideshow": { 271 | "slide_type": "notes" 272 | } 273 | }, 274 | "source": [ 275 | "And when we receive an acknowledgement back from the database, it will let us know if any operations failed." 276 | ] 277 | }, 278 | { 279 | "cell_type": "markdown", 280 | "metadata": { 281 | "slideshow": { 282 | "slide_type": "slide" 283 | } 284 | }, 285 | "source": [ 286 | "### Unordered Bulk Write\n", 287 | "\n", 288 | "- Has to be specified with the flag: `{ ordered: false }`\n", 289 | "- Executes writes in parallel" 290 | ] 291 | }, 292 | { 293 | "cell_type": "markdown", 294 | "metadata": { 295 | "slideshow": { 296 | "slide_type": "notes" 297 | } 298 | }, 299 | "source": [ 300 | "If the writes in our batch don't have any causal relationship, then the client can send them over in an \"Unordered Bulk Write\". This will execute the write operations in parallel, so the writes are non-blocking. And as a result, a single failure won't prevent any of the other writes from succeeding. Now those writes might fail on their own, but their execution is not tied to the success of any other writes." 301 | ] 302 | }, 303 | { 304 | "cell_type": "markdown", 305 | "metadata": { 306 | "slideshow": { 307 | "slide_type": "slide" 308 | } 309 | }, 310 | "source": [ 311 | "## Summary\n", 312 | "\n", 313 | "- Bulk writes allow database clients to send multiple writes\n", 314 | "- Can either be ordered or unordered" 315 | ] 316 | }, 317 | { 318 | "cell_type": "markdown", 319 | "metadata": { 320 | "slideshow": { 321 | "slide_type": "notes" 322 | } 323 | }, 324 | "source": [ 325 | "So in conclusion, bulk writes make it more efficient to insert many documents into the database, by sending them all at the same time.\n", 326 | "\n", 327 | "These bulk writes can be ordered, which means writes are executed in the order they were sent to the database, and any errors will prevent subsequent writes from executing.\n", 328 | "\n", 329 | "They can also be unordered, which means writes are executed in parallel, and errors don't affect the execution of other writes.\n", 330 | "\n", 331 | "One small thing to note: in a sharded collection, ordered bulk writes are expected to take longer because different write operations need to be routed to their designated shards. An ordered bulk write might reach the mongos in one batch, but then it has to be serialized across the different shards. Regardless of their designated shard, the write operation needs to be evaluated to see if we should continue or exit the execution of the remainder of the batch." 332 | ] 333 | } 334 | ], 335 | "metadata": { 336 | "celltoolbar": "Slideshow", 337 | "kernelspec": { 338 | "display_name": "Python 3", 339 | "language": "python", 340 | "name": "python3" 341 | }, 342 | "language_info": { 343 | "codemirror_mode": { 344 | "name": "ipython", 345 | "version": 3 346 | }, 347 | "file_extension": ".py", 348 | "mimetype": "text/x-python", 349 | "name": "python", 350 | "nbconvert_exporter": "python", 351 | "pygments_lexer": "ipython3", 352 | "version": "3.6.5" 353 | }, 354 | "livereveal": { 355 | "transition": "none" 356 | } 357 | }, 358 | "nbformat": 4, 359 | "nbformat_minor": 2 360 | } 361 | -------------------------------------------------------------------------------- /notebooks/change_streams.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": { 17 | "slideshow": { 18 | "slide_type": "slide" 19 | } 20 | }, 21 | "source": [ 22 | "

Change Streams

" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": { 28 | "slideshow": { 29 | "slide_type": "notes" 30 | } 31 | }, 32 | "source": [ 33 | "In this lesson, we're going to use change streams to track real-time changes to the data that our application's using." 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": { 39 | "slideshow": { 40 | "slide_type": "slide" 41 | } 42 | }, 43 | "source": [ 44 | "### Change Streams\n", 45 | "\n", 46 | "- Report changes at the collection level\n", 47 | "- Accept pipelines to transform change events" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": { 53 | "slideshow": { 54 | "slide_type": "notes" 55 | } 56 | }, 57 | "source": [ 58 | "As of MongoDB 3.6, change streams report changes at the collection level, so we open a change stream against a specific collection.\n", 59 | "\n", 60 | "But by default it will return any change to the data in that collection regardless of what it is, so we can also pass a pipeline to transform the change events we get back from the stream." 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": { 67 | "slideshow": { 68 | "slide_type": "slide" 69 | } 70 | }, 71 | "outputs": [], 72 | "source": [ 73 | "from pymongo import MongoClient, errors\n", 74 | "uri = \"mongodb+srv://m220-user:m220-pass@m220-lessons-mcxlm.mongodb.net/test\"\n", 75 | "client = MongoClient(uri)" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": { 81 | "slideshow": { 82 | "slide_type": "notes" 83 | } 84 | }, 85 | "source": [ 86 | "So here I'm just initializing my MongoClient object," 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": { 93 | "slideshow": { 94 | "slide_type": "slide" 95 | } 96 | }, 97 | "outputs": [], 98 | "source": [ 99 | "lessons = client.lessons\n", 100 | "inventory = lessons.inventory\n", 101 | "inventory.drop()\n", 102 | "\n", 103 | "fruits = [ \"strawberries\", \"bananas\", \"apples\" ]\n", 104 | "for fruit in fruits:\n", 105 | " inventory.insert_one( { \"type\": fruit, \"quantity\": 100 } )\n", 106 | " \n", 107 | "list(inventory.find())" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": { 113 | "slideshow": { 114 | "slide_type": "notes" 115 | } 116 | }, 117 | "source": [ 118 | "And I'm using a new collection for this lesson, `inventory`. If you imagine we have a store that sells fruits, this collection will store the total quanities of every fruit that we have in stock.\n", 119 | "\n", 120 | "In this case, we have a very small store that only sells three types of fruits, and I've just updated the inventory to reflect that we just got a shipment for 100 of each fruit.\n", 121 | "\n", 122 | "Now I'm just going to verify that our collection looks the way we expect.\n", 123 | "\n", 124 | "(run cell)\n", 125 | "\n", 126 | "And it looks like we have 100 of each fruit in the collection.\n", 127 | "\n", 128 | "But people will start buying them, cause you know, people like fruit. They'll go pretty quickly, and we want to make sure we don't run out. So I'm going to open a change stream against this collection, and track data changes to the `inventory` collection in real time." 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": { 135 | "slideshow": { 136 | "slide_type": "slide" 137 | } 138 | }, 139 | "outputs": [], 140 | "source": [ 141 | "try:\n", 142 | " with inventory.watch(full_document='updateLookup') as change_stream_cursor:\n", 143 | " for data_change in change_stream_cursor:\n", 144 | " print(data_change)\n", 145 | "except pymongo.errors.PyMongoError:\n", 146 | " print('Change stream closed because of an error.')" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": { 152 | "slideshow": { 153 | "slide_type": "notes" 154 | } 155 | }, 156 | "source": [ 157 | "So here I'm opening a change stream against the `inventory` (point) collection, using the `watch()` method. `watch()` (point) returns a cursor object, so we can iterate through it in Python to return whatever document is next in the cursor.\n", 158 | "\n", 159 | "We've wrapped this in a try-catch block so if something happens to the connection used for the change stream, we'll know immediately.\n", 160 | "\n", 161 | "(start the while loop)\n", 162 | "\n", 163 | "(go to `updates_every_one_second` notebook and start up process)\n", 164 | "\n", 165 | "(come back here)\n", 166 | "\n", 167 | "So the change stream cursor is just gonna spit out anything it gets, with no filter. Any change to the data in the `inventory` collection will appear in this output.\n", 168 | "\n", 169 | "But really, this is noise. We don't care when the quantity drops to 71 (point) or 60 (point), we only want to know when it's close to zero." 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": null, 175 | "metadata": { 176 | "slideshow": { 177 | "slide_type": "slide" 178 | } 179 | }, 180 | "outputs": [], 181 | "source": [ 182 | "low_quantity_pipeline = [ { \"$match\": { \"fullDocument.quantity\": { \"$lt\": 20 } } } ]\n", 183 | "\n", 184 | "try:\n", 185 | " with inventory.watch(pipeline=low_quantity_pipeline, full_document='updateLookup') as change_stream_cursor:\n", 186 | " for data_change in change_stream_cursor:\n", 187 | " current_quantity = data_change[\"fullDocument\"].get(\"quantity\")\n", 188 | " fruit = data_change[\"fullDocument\"].get(\"type\")\n", 189 | " msg = \"There are only {0} units left of {1}!\".format(current_quantity, fruit)\n", 190 | " print(msg)\n", 191 | "except pymongo.errors.PyMongoError:\n", 192 | " logging.error('Change stream closed because of an error.')" 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": { 198 | "slideshow": { 199 | "slide_type": "notes" 200 | } 201 | }, 202 | "source": [ 203 | "Let's say we want to know if any of our quantities (point to quantity values) dip below 20 units, so we know when to buy more.\n", 204 | "\n", 205 | "Here I've defined a pipeline for the change event documents returned by the cursor. In this case, if the cursor returns a change event to me, it's because that event caused one of our quantities to fall below 10 units.\n", 206 | "\n", 207 | "(open the change stream)\n", 208 | "\n", 209 | "(go to `updates_every_one_second` and start the third cell)\n", 210 | "\n", 211 | "(come back here)\n", 212 | "\n", 213 | "So if we just wait for the customers to go about their business...\n", 214 | "\n", 215 | "(wait for a print statement)\n", 216 | "\n", 217 | "And now we know that we need to buy more strawberries!" 218 | ] 219 | }, 220 | { 221 | "cell_type": "markdown", 222 | "metadata": { 223 | "slideshow": { 224 | "slide_type": "slide" 225 | } 226 | }, 227 | "source": [ 228 | "## Summary\n", 229 | "\n", 230 | "- Change streams can be opened against a collection\n", 231 | " - Tracks data changes in real time\n", 232 | "- Aggregation pipelines can be used to transform change event documents" 233 | ] 234 | }, 235 | { 236 | "cell_type": "markdown", 237 | "metadata": { 238 | "slideshow": { 239 | "slide_type": "notes" 240 | } 241 | }, 242 | "source": [ 243 | "So change streams are a great way to track changes to the data in a collection. And if you're using Mongo 4.0, you can open a change stream against a whole database, and even a whole cluster.\n", 244 | "\n", 245 | "We also have the flexibility to pass an aggregation pipeline to the change stream, to transform or filter out some of the change event documents." 246 | ] 247 | } 248 | ], 249 | "metadata": { 250 | "celltoolbar": "Slideshow", 251 | "kernelspec": { 252 | "display_name": "Python 3", 253 | "language": "python", 254 | "name": "python3" 255 | }, 256 | "language_info": { 257 | "codemirror_mode": { 258 | "name": "ipython", 259 | "version": 3 260 | }, 261 | "file_extension": ".py", 262 | "mimetype": "text/x-python", 263 | "name": "python", 264 | "nbconvert_exporter": "python", 265 | "pygments_lexer": "ipython3", 266 | "version": "3.6.5" 267 | } 268 | }, 269 | "nbformat": 4, 270 | "nbformat_minor": 2 271 | } 272 | -------------------------------------------------------------------------------- /notebooks/connection_pooling.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": { 17 | "slideshow": { 18 | "slide_type": "slide" 19 | } 20 | }, 21 | "source": [ 22 | "

Connection Pooling

" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": { 28 | "slideshow": { 29 | "slide_type": "notes" 30 | } 31 | }, 32 | "source": [ 33 | "In this lesson we're going to cover connection pooling in MongoDB." 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": { 39 | "slideshow": { 40 | "slide_type": "slide" 41 | } 42 | }, 43 | "source": [ 44 | "Reusing database connections." 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": { 50 | "slideshow": { 51 | "slide_type": "notes" 52 | } 53 | }, 54 | "source": [ 55 | "So what is connection pooling? \n", 56 | "\n", 57 | "If we look on Wikipedia, it says \"In software engineering, a connection pool is a cache of database connections maintained so that the connections can be reused when future requests to the database are required.\"\n", 58 | "\n", 59 | "What does that mean?" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": { 65 | "slideshow": { 66 | "slide_type": "slide" 67 | } 68 | }, 69 | "source": [ 70 | "" 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": { 76 | "slideshow": { 77 | "slide_type": "notes" 78 | } 79 | }, 80 | "source": [ 81 | "Imagine you run a taxi service ferrying people to and from one point to another. But rather than reuse the same taxi after you transport one customer, you scrap that vehicle and go buy another.\n", 82 | "\n", 83 | "When issuing several different requests to the database, we could take the lazy approach and just create a new connection whenever we need to make a request, and when the request's done we just destroy the connection. The issue with this approach is that establishing a database connection requires time and computing resources, to complete the handshake with the server and whatnot. We're essentially paying the cost of waiting for this connection to establish for every request.\n", 84 | "\n", 85 | "Connection pooling helps reduce the overhead of creating database connections, by creating a whole bunch right off the bat. Then as requests come in, different connections in the pool are allocated to fulfill those requests.\n", 86 | "\n", 87 | "By default, drivers will establish a connection pool with 100 connections to share. So 100 connections will get created off the bat, and then get assigned to different requests as they come in. This default of 100 connections is adequate for most applications.\n", 88 | "\n", 89 | "Additionally, if we didn't use a connection pool and we suddently got a **whole lot** of requests, we might easily reach the limit that our hardware and software could handle, leading to a lot of errors and unhappy developers." 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": { 95 | "slideshow": { 96 | "slide_type": "slide" 97 | } 98 | }, 99 | "source": [ 100 | "## Summary\n", 101 | "\n", 102 | "- Connection pools allow for reuse of connections\n", 103 | "- Subsequent requests appear faster to the client\n", 104 | "- Default size of 100" 105 | ] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "metadata": { 110 | "slideshow": { 111 | "slide_type": "notes" 112 | } 113 | }, 114 | "source": [ 115 | "So just to recap, connection pools allow connections to be quickly recycled for new requests for the database. To the developer, this will make database operations look faster, because the cost to create the new connection has already been paid, in a sense.\n", 116 | "\n", 117 | "And in Mongo drivers, the default connection pool is 100 connections large." 118 | ] 119 | } 120 | ], 121 | "metadata": { 122 | "celltoolbar": "Slideshow", 123 | "kernelspec": { 124 | "display_name": "Python 3", 125 | "language": "python", 126 | "name": "python3" 127 | }, 128 | "language_info": { 129 | "codemirror_mode": { 130 | "name": "ipython", 131 | "version": 3 132 | }, 133 | "file_extension": ".py", 134 | "mimetype": "text/x-python", 135 | "name": "python", 136 | "nbconvert_exporter": "python", 137 | "pygments_lexer": "ipython3", 138 | "version": "3.6.5" 139 | } 140 | }, 141 | "nbformat": 4, 142 | "nbformat_minor": 2 143 | } 144 | -------------------------------------------------------------------------------- /notebooks/deletes.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": { 17 | "slideshow": { 18 | "slide_type": "slide" 19 | } 20 | }, 21 | "source": [ 22 | "

Your First Delete

" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": { 28 | "slideshow": { 29 | "slide_type": "notes" 30 | } 31 | }, 32 | "source": [ 33 | "As usual, we'll import MongoClient and set up our connection uri." 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": { 40 | "slideshow": { 41 | "slide_type": "subslide" 42 | } 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "from pymongo import MongoClient\n", 47 | "uri = \"mongodb+srv://m220-user:m220-pass@m220-lessons-mcxlm.mongodb.net/test\"" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": { 53 | "slideshow": { 54 | "slide_type": "notes" 55 | } 56 | }, 57 | "source": [ 58 | "And then intialization our connection and get back a MongoClient object." 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": { 65 | "slideshow": { 66 | "slide_type": "subslide" 67 | } 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "client = MongoClient(uri)" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": { 77 | "slideshow": { 78 | "slide_type": "notes" 79 | } 80 | }, 81 | "source": [ 82 | "Since we're learning about deletes in this lesson and don't want to work with any of our production data, we'll define a new database and collection name to work with." 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": null, 88 | "metadata": { 89 | "slideshow": { 90 | "slide_type": "subslide" 91 | } 92 | }, 93 | "outputs": [], 94 | "source": [ 95 | "lessons = client.lessons\n", 96 | "deletes = lessons.deletes" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": { 102 | "slideshow": { 103 | "slide_type": "notes" 104 | } 105 | }, 106 | "source": [ 107 | "Now that we have a collection object named **deletes** with no data in it, let's insert some data.\n", 108 | "\n", 109 | "We'll insert 100 documents with an **_id** that ranges from 0 to 99, and a field called **random_bool** that will randomly be true or false. We'll run an assertion stating that we expect 100 ObjectIds to have been inserted. If this isn't true we'll see an error.\n", 110 | "\n", 111 | "We've added the drop method at the beginning of this code cell to ensure repeatability in case we want to run through this lesson again." 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": { 118 | "slideshow": { 119 | "slide_type": "subslide" 120 | } 121 | }, 122 | "outputs": [], 123 | "source": [ 124 | "import random\n", 125 | "random.seed(42)\n", 126 | "deletes.drop()\n", 127 | "imr = deletes.insert_many([{'_id': val, 'random_bool': random.choice([True, False])} for val in range(100)])\n", 128 | "assert len(imr.inserted_ids) == 100" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": { 134 | "slideshow": { 135 | "slide_type": "notes" 136 | } 137 | }, 138 | "source": [ 139 | "Ok, let's grab the first 3 documents to get a sense for what they look like." 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": { 146 | "slideshow": { 147 | "slide_type": "subslide" 148 | } 149 | }, 150 | "outputs": [], 151 | "source": [ 152 | "list(deletes.find().limit(3))" 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": { 158 | "slideshow": { 159 | "slide_type": "notes" 160 | } 161 | }, 162 | "source": [ 163 | "Ok, we're convinced that we have a fairly random **random_bool** field and an **_id** with values between 0 and 99.\n", 164 | "\n", 165 | "We've learned how to create, read, and update documents. Now to delete.\n", 166 | "\n", 167 | "**pymongo** offers two idiomatic delete methods, **delete_one** and **delete_many**. Let's look at them both to get a sense for how they work." 168 | ] 169 | }, 170 | { 171 | "cell_type": "markdown", 172 | "metadata": { 173 | "slideshow": { 174 | "slide_type": "slide" 175 | } 176 | }, 177 | "source": [ 178 | "

delete_one

" 179 | ] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": { 184 | "slideshow": { 185 | "slide_type": "fragment" 186 | } 187 | }, 188 | "source": [ 189 | "`delete_one` is a lot like `find_one`. It takes a predicate to match the document you want to delete, finds the document, and deletes it. If multiple documents match the predicate, `delete_one` will only delete the first document matched.\n", 190 | "\n", 191 | "Let's use `delete_one` to delete the first document where **random_bool** is True. Based on what I said, we should be left with 99 documents in the collection." 192 | ] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "metadata": { 197 | "slideshow": { 198 | "slide_type": "subslide" 199 | } 200 | }, 201 | "source": [ 202 | "We'll assign the DeleteResult object to the variable **dr** so we can print out the **deleted_count** property which tells us how many documents were deleted." 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": null, 208 | "metadata": { 209 | "scrolled": true, 210 | "slideshow": { 211 | "slide_type": "subslide" 212 | } 213 | }, 214 | "outputs": [], 215 | "source": [ 216 | "dr = deletes.delete_one({'random_bool': True})\n", 217 | "dr.deleted_count" 218 | ] 219 | }, 220 | { 221 | "cell_type": "markdown", 222 | "metadata": { 223 | "slideshow": { 224 | "slide_type": "slide" 225 | } 226 | }, 227 | "source": [ 228 | "`delete_one` can be thought of like a precision scalpel. If we know some value or values that uniquely identify a document, we're guaranteed to only delete that document.\n", 229 | "\n", 230 | "We know the **_id** must be unique, so let's delete the document with **'_id': 99**" 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": { 236 | "slideshow": { 237 | "slide_type": "notes" 238 | } 239 | }, 240 | "source": [ 241 | "First we'll find the document to prove it exists, then delete it, then try to find it again. We should get None back for the second find." 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": null, 247 | "metadata": { 248 | "slideshow": { 249 | "slide_type": "fragment" 250 | } 251 | }, 252 | "outputs": [], 253 | "source": [ 254 | "deletes.find_one({'_id': 99})" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": null, 260 | "metadata": { 261 | "slideshow": { 262 | "slide_type": "fragment" 263 | } 264 | }, 265 | "outputs": [], 266 | "source": [ 267 | "deletes.delete_one({'_id': 99})" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": null, 273 | "metadata": { 274 | "slideshow": { 275 | "slide_type": "fragment" 276 | } 277 | }, 278 | "outputs": [], 279 | "source": [ 280 | "deletes.find_one({'_id': 99})" 281 | ] 282 | }, 283 | { 284 | "cell_type": "markdown", 285 | "metadata": { 286 | "slideshow": { 287 | "slide_type": "slide" 288 | } 289 | }, 290 | "source": [ 291 | "

delete_many

" 292 | ] 293 | }, 294 | { 295 | "cell_type": "markdown", 296 | "metadata": { 297 | "slideshow": { 298 | "slide_type": "subslide" 299 | } 300 | }, 301 | "source": [ 302 | "Unlike `delete_one`, `delete_many` deletes all documents that match the supplied predicate. Because of this behavior, `delete_many` is a little more \"dangerous\".\n", 303 | "\n", 304 | "Let's first get a count of how many documents now have False and True for their **random_bool** value. Then, we'll use `delete_many` to delete **all** documents where **random_bool** is False." 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": null, 310 | "metadata": { 311 | "slideshow": { 312 | "slide_type": "fragment" 313 | } 314 | }, 315 | "outputs": [], 316 | "source": [ 317 | "len(list(deletes.find({'random_bool': False})))" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": null, 323 | "metadata": { 324 | "slideshow": { 325 | "slide_type": "fragment" 326 | } 327 | }, 328 | "outputs": [], 329 | "source": [ 330 | "len(list(deletes.find({'random_bool': True})))" 331 | ] 332 | }, 333 | { 334 | "cell_type": "markdown", 335 | "metadata": { 336 | "slideshow": { 337 | "slide_type": "subslide" 338 | } 339 | }, 340 | "source": [ 341 | "44 documents have a **random_bool** value of False. Our deleted count should be 44, and a count on the collection should yield 54." 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": null, 347 | "metadata": { 348 | "slideshow": { 349 | "slide_type": "fragment" 350 | } 351 | }, 352 | "outputs": [], 353 | "source": [ 354 | "dr = deletes.delete_many({'random_bool': False})\n", 355 | "dr.deleted_count" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": null, 361 | "metadata": { 362 | "slideshow": { 363 | "slide_type": "fragment" 364 | } 365 | }, 366 | "outputs": [], 367 | "source": [ 368 | "len(list(deletes.find({'random_bool': True})))" 369 | ] 370 | }, 371 | { 372 | "cell_type": "markdown", 373 | "metadata": { 374 | "slideshow": { 375 | "slide_type": "slide" 376 | } 377 | }, 378 | "source": [ 379 | "## Summary\n", 380 | "\n", 381 | "And that covers the basics of deleting documents with pymongo. Remember\n", 382 | "\n", 383 | "* `delete_one` will delete the first document that matches the supplied predicate.\n", 384 | "* `delete_many` will delete all documents matching the supplied predicate.\n", 385 | "* The number of documents deleted can be accessed via the **deleted_count** property on the `DeleteResult` object returned from a delete operation." 386 | ] 387 | } 388 | ], 389 | "metadata": { 390 | "celltoolbar": "Slideshow", 391 | "kernelspec": { 392 | "display_name": "Python 3", 393 | "language": "python", 394 | "name": "python3" 395 | }, 396 | "language_info": { 397 | "codemirror_mode": { 398 | "name": "ipython", 399 | "version": 3 400 | }, 401 | "file_extension": ".py", 402 | "mimetype": "text/x-python", 403 | "name": "python", 404 | "nbconvert_exporter": "python", 405 | "pygments_lexer": "ipython3", 406 | "version": "3.6.5" 407 | } 408 | }, 409 | "nbformat": 4, 410 | "nbformat_minor": 2 411 | } 412 | -------------------------------------------------------------------------------- /notebooks/m220p_app_arch.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "\n" 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": { 19 | "slideshow": { 20 | "slide_type": "slide" 21 | } 22 | }, 23 | "source": [ 24 | "

M220P Application Architecture

" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": { 30 | "slideshow": { 31 | "slide_type": "notes" 32 | } 33 | }, 34 | "source": [ 35 | "In this lesson, we'll go over the application architecture for the MFlix Application." 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": { 41 | "slideshow": { 42 | "slide_type": "slide" 43 | } 44 | }, 45 | "source": [ 46 | "" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": { 54 | "slideshow": { 55 | "slide_type": "notes" 56 | } 57 | }, 58 | "source": [ 59 | "Here is an overview of the structure using the \"tree\" command in my terminal. \n", 60 | "\n", 61 | "The README.md file contains detailed instructions for setting up your environment as well as information about the layout of the project.\n", 62 | "\n", 63 | "There are two directories under the mflix directory, api and build. api contains the Flask route handlers for the application, and build contains the frontend application. Both of these are completed for you to allow you to focus solely on learning to use Pymongo and MongoDB.\n", 64 | "\n", 65 | "The db.py file is where most of your effort will be focused. It contains all of the methods that interact with the database. Initially most of these will just be stubs that you will have to fill out with the required functionality. You do not need to have MongoDB installed as you'll be using your own MongoDB Atlas database.\n", 66 | "\n", 67 | "factory.py contains functionality that assembles the flask application for running. You won't have to modify this file.\n", 68 | "\n", 69 | "Lastly, the tests directory contains all of the unit tests.\n", 70 | "\n", 71 | "Throughout the course, you'll be presented with labs titled \"Ticket\". These will contain a user story or some other instruction along with instructions for running the particular test suite for this ticket. Once all of the tests are passing for that particular task, you'll go to the \"Status\" page in the UI." 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": { 77 | "slideshow": { 78 | "slide_type": "slide" 79 | } 80 | }, 81 | "source": [ 82 | "# Summary\n", 83 | "\n", 84 | "- README.md file contains detailed setup instructions\n", 85 | "- API layer is implemented by `movies.py` and `user.py` in the **mflix/api** folder\n", 86 | " - Do not modify either of these files\n", 87 | "- `db.py` file contains all methods that interact with the database\n", 88 | " - Modify this file to implement required functionality\n", 89 | "- **tests** directory contains all unit tests\n", 90 | " - Run these tests as you go\n", 91 | " - We recommend you focus on making tests pass one by one rather than trying to make all tests in the suite pass at once." 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": { 97 | "slideshow": { 98 | "slide_type": "notes" 99 | } 100 | }, 101 | "source": [ 102 | "That sums up the application architecture. \n", 103 | "\n", 104 | "Remember to read the README file as it contains detailed setup instructions. \n", 105 | "\n", 106 | "The API layer that handles requests when the application is running is implemented in movies.py and user.py. Feel free to look at these files but do not modfiy them as doing so may prevent the UI from validating correctly.\n", 107 | "\n", 108 | "db.py is where all of the methods that interact with Atlas are located and where you will be doing all of your implementation.\n", 109 | "\n", 110 | "The tests directory contains all of the unit tests. Make sure you read them to see what is being passed into the methods. We also highly recommend that you focus on making one test pass at a time. If there are 4 or 5 unit tests within a test file, write enough functionality to make one test pass. Trying to make all the tests pass on your first try can lead to frustration!" 111 | ] 112 | } 113 | ], 114 | "metadata": { 115 | "celltoolbar": "Slideshow", 116 | "kernelspec": { 117 | "display_name": "Python 3", 118 | "language": "python", 119 | "name": "python3" 120 | }, 121 | "language_info": { 122 | "codemirror_mode": { 123 | "name": "ipython", 124 | "version": 3 125 | }, 126 | "file_extension": ".py", 127 | "mimetype": "text/x-python", 128 | "name": "python", 129 | "nbconvert_exporter": "python", 130 | "pygments_lexer": "ipython3", 131 | "version": "3.6.4" 132 | } 133 | }, 134 | "nbformat": 4, 135 | "nbformat_minor": 2 136 | } 137 | -------------------------------------------------------------------------------- /notebooks/polp.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": { 17 | "slideshow": { 18 | "slide_type": "slide" 19 | } 20 | }, 21 | "source": [ 22 | "

Principle of Least Privilege

\n", 23 | "> Every program and every privileged user of the system should operate using the least amount of privilege necessary to complete the job.\n", 24 | ">\n", 25 | "> — Jerome Saltzer, Communications of the ACM" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": { 31 | "slideshow": { 32 | "slide_type": "notes" 33 | } 34 | }, 35 | "source": [ 36 | "In this lesson we're going to talk about the Principle of Least Privilege." 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": { 42 | "slideshow": { 43 | "slide_type": "slide" 44 | } 45 | }, 46 | "source": [ 47 | "" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": { 53 | "slideshow": { 54 | "slide_type": "notes" 55 | } 56 | }, 57 | "source": [ 58 | "Security is a multi-tiered effort. \n", 59 | "\n", 60 | "Atlas ensures the traffic being transmitted from the application to the database is encrypted in transit. We also make sure to hash mflix user passwords before storing them in the database.\n", 61 | "\n", 62 | "At the appication layer, we make sure that certain resources are only available to logged in users, and those users have permissions to perform an action, such as deleting only their own comments.\n", 63 | "\n", 64 | "But why stop there?" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": { 70 | "slideshow": { 71 | "slide_type": "slide" 72 | } 73 | }, 74 | "source": [ 75 | "![noun_lock](https://s3.amazonaws.com/edu-static.mongodb.com/lessons/M220/notebook_assets/noun_lock.png)" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": { 81 | "slideshow": { 82 | "slide_type": "notes" 83 | } 84 | }, 85 | "source": [ 86 | "MongoDB offers robust user management at the database level, and we should use it. And since we are using Atlas, this is made even easier because of the graphical interface. \n", 87 | "\n", 88 | "By creating a database user specifically for the application, we can, in a more granular way, select the privileges and resources MFlix has access to.\n", 89 | "\n", 90 | "Should the application have the ability to create indexes, new collections, or drop the entire database?\n", 91 | "\n", 92 | "Questions like these aren't always fun to ask and answer, but they are absolutely necessary.\n", 93 | "\n", 94 | "That's all for now. We highly recommend that you take our MongoDB Security course to learn more to help secure and harden your own MongoDB deployments." 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": { 100 | "slideshow": { 101 | "slide_type": "slide" 102 | } 103 | }, 104 | "source": [ 105 | "## Summary\n", 106 | "\n", 107 | "- Engineer systems with the principle of least privilege in mind\n", 108 | "- Consider what kinds of users and what permission they will have.\n", 109 | " - Application users that log into the application itself\n", 110 | " - Database users\n", 111 | " - Administrative database users that can create indexes, import data, and so on\n", 112 | " - Application database users that only have priveleges they require." 113 | ] 114 | } 115 | ], 116 | "metadata": { 117 | "celltoolbar": "Slideshow", 118 | "kernelspec": { 119 | "display_name": "Python 3", 120 | "language": "python", 121 | "name": "python3" 122 | }, 123 | "language_info": { 124 | "codemirror_mode": { 125 | "name": "ipython", 126 | "version": 3 127 | }, 128 | "file_extension": ".py", 129 | "mimetype": "text/x-python", 130 | "name": "python", 131 | "nbconvert_exporter": "python", 132 | "pygments_lexer": "ipython3", 133 | "version": "3.6.5" 134 | } 135 | }, 136 | "nbformat": 4, 137 | "nbformat_minor": 2 138 | } 139 | -------------------------------------------------------------------------------- /notebooks/read_concerns.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": { 17 | "slideshow": { 18 | "slide_type": "slide" 19 | } 20 | }, 21 | "source": [ 22 | "

Read Concerns

" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": { 28 | "slideshow": { 29 | "slide_type": "notes" 30 | } 31 | }, 32 | "source": [ 33 | "In this lesson we're going to discuss read concerns." 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": { 39 | "slideshow": { 40 | "slide_type": "slide" 41 | } 42 | }, 43 | "source": [ 44 | "#### Read Concerns\n", 45 | "\n", 46 | "- Represent different levels of \"read isolation\"\n", 47 | "- Can be used to specify a consistent view of the database" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": { 53 | "slideshow": { 54 | "slide_type": "notes" 55 | } 56 | }, 57 | "source": [ 58 | "Read concerns are similar to write concerns in that they both involve how many nodes have applied a database operation.\n", 59 | "\n", 60 | "While write concerns affected the acknowledgement received by the driver after a write operation, read concerns affect the data returned by a read operation. Different read concerns are referred to as different levels of \"read isolation,\" because you can essentially \"isolate\" a read from the rest of the database if the data being read has only been written to one of the nodes. If data can be read by clients before that data has been replicated to a majority of nodes, it's considered a low level of read isolation.\n", 61 | "\n", 62 | "The read concern you choose will depend on how consistent your view of the database needs to be. If you can afford to read slightly stale data, then maybe a low level of read isolation might suit your application." 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": { 68 | "slideshow": { 69 | "slide_type": "slide" 70 | } 71 | }, 72 | "source": [ 73 | "\n" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": { 79 | "slideshow": { 80 | "slide_type": "notes" 81 | } 82 | }, 83 | "source": [ 84 | "By default, when an application sends a read to a replica set, Mongo uses `readConcern(\"local\")` (point). From the perspective of the database client (point), the data read using `readConcern(\"local\")` has only been written to this one (point) node. In the vast majority of cases, the data will **also** be written to the other nodes (point) in the set, but the client only has proof that this (point) one node applied the write.\n", 85 | "\n", 86 | "This means that there's a chance, however slim, that the data (point) returned from this read will be rolled back. This would happen if, sometime after this (point) data is returned and before the secondaries have replicated that data, the primary goes down and a secondary gets elected to become the new primary. That would mean that one of these (point) two nodes, who haven't replicated the data yet, will be the new primary, and the old primary will be rolled back to match the state of the new primary." 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": { 92 | "slideshow": { 93 | "slide_type": "slide" 94 | } 95 | }, 96 | "source": [ 97 | "\n" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": { 103 | "slideshow": { 104 | "slide_type": "notes" 105 | } 106 | }, 107 | "source": [ 108 | "So the default readConcern in MongoDB is `\"local\"`, which reads whatever copy of the data exists on this (point) node, regardless of whether or not the other nodes had replicated that data. And for the vast majority of reads, `readConcern(\"local\")` will work just fine.\n", 109 | "\n", 110 | "But we might want a higher level of consistency on some of our reads, which we can achieve with the readConcern called `\"majority\"`. When the database client sends a read to Mongo with `readConcern(\"majority\")`, it can verifiably claim that the data it gets back (point) has been replicated to a majority of nodes the replica set. The benefit of this readConcern level is that once data has been replicated to a majority of nodes, it's super durable in the event of a failure. Even if the current primary (point) fails, this (point) secondary can be elected primary and then the data won't get rolled back.\n", 111 | "\n", 112 | "One thing to note here: if the secondaries aren't done replicating data at the time that the primary receives this (point) write, then whatever copy of the data **has** been replicated to a majority of nodes in the set will be the data returned to the client.\n", 113 | "\n", 114 | "This means that if my age (point) on the primary (point) is 66, but both of the secondaries still think that my age is 65, then the age 65 will be returned to the client. That's because in a replica set with three members, two nodes are required to constitute a majority.\n", 115 | "\n", 116 | "So clearly, `readConcern(\"majority\")` might return slightly stale data, but it provides a higher level of read isolation, so you can be more confident that the data you're reading won't get rolled back. For this reason it's most useful when reading mission-critical data, because lower levels of read isolation have a slightly higher chance of being rolled back in the event of an emergency. If your application's core functionality depends on one read, like checking a user's current account balance, then you probably want that read to have a higher durability." 117 | ] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "metadata": { 122 | "slideshow": { 123 | "slide_type": "slide" 124 | } 125 | }, 126 | "source": [ 127 | "## Summary\n", 128 | "\n", 129 | "- The default read concern in MongoDB is `\"local\"`\n", 130 | " - This does not check that data has been replicated\n", 131 | "- The read concern `\"majority\"` allows for more durable reads\n", 132 | " - This only returns data that has been replicated to a majority of nodes" 133 | ] 134 | } 135 | ], 136 | "metadata": { 137 | "celltoolbar": "Slideshow", 138 | "kernelspec": { 139 | "display_name": "Python 3", 140 | "language": "python", 141 | "name": "python3" 142 | }, 143 | "language_info": { 144 | "codemirror_mode": { 145 | "name": "ipython", 146 | "version": 3 147 | }, 148 | "file_extension": ".py", 149 | "mimetype": "text/x-python", 150 | "name": "python", 151 | "nbconvert_exporter": "python", 152 | "pygments_lexer": "ipython3", 153 | "version": "3.6.5" 154 | }, 155 | "livereveal": { 156 | "transition": "none" 157 | } 158 | }, 159 | "nbformat": 4, 160 | "nbformat_minor": 2 161 | } 162 | -------------------------------------------------------------------------------- /notebooks/robust_applications.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": { 17 | "slideshow": { 18 | "slide_type": "slide" 19 | } 20 | }, 21 | "source": [ 22 | "

Robust Client Configuration

" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": { 28 | "slideshow": { 29 | "slide_type": "notes" 30 | } 31 | }, 32 | "source": [ 33 | "In this lesson, we're going to discuss ways in which you can make your application's configuration more robust, with respect to how it talks to the database." 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": { 39 | "slideshow": { 40 | "slide_type": "slide" 41 | } 42 | }, 43 | "source": [ 44 | "## Always use Connection Pooling" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": { 50 | "slideshow": { 51 | "slide_type": "notes" 52 | } 53 | }, 54 | "source": [ 55 | "You've learned about connection pooling already but it's important so we'll briefly cover it again." 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": { 61 | "slideshow": { 62 | "slide_type": "slide" 63 | } 64 | }, 65 | "source": [ 66 | "" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": { 72 | "slideshow": { 73 | "slide_type": "notes" 74 | } 75 | }, 76 | "source": [ 77 | "Creating a new MongoClient for every request to the database might service your application in the short term, but it will eventually result in the application consuming and depleting available resources that become more and more scarce over time.\n", 78 | "\n", 79 | "Connection pooling reduces the total overhead associated with creating a new connection, by allowing the application to recycle and reuse database connections for new requests.\n", 80 | "\n", 81 | "The M220 API that you've been given correctly reuses the same class or object for all client communication if you'd like to look at an example of how we did it." 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": { 87 | "slideshow": { 88 | "slide_type": "slide" 89 | } 90 | }, 91 | "source": [ 92 | "# Always specify a `wtimeout` with majority writes." 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": { 98 | "slideshow": { 99 | "slide_type": "notes" 100 | } 101 | }, 102 | "source": [ 103 | "Another way to make a more robust database client is a write timeout, or `wtimeout`.\n", 104 | "\n", 105 | "No matter how well we engineer a system, we should always expect application external resources like queues, networks, and databases to take more time than expected. For application or consumer critical operations, a developer may choose to write with `w: majority` to ensure that acknowledged writes are written to a majority of replica set nodes." 106 | ] 107 | }, 108 | { 109 | "cell_type": "markdown", 110 | "metadata": { 111 | "slideshow": { 112 | "slide_type": "slide" 113 | } 114 | }, 115 | "source": [ 116 | "" 117 | ] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "metadata": { 122 | "slideshow": { 123 | "slide_type": "notes" 124 | } 125 | }, 126 | "source": [ 127 | "But if there's a problem on the secondary nodes, we might not get acknowledgements back from the server for a while. If more writes than reads are coming into the system and operations aren't being acknowledged, this will eventually lead to system gridlock.\n", 128 | "\n", 129 | "To avoid this, follow a simple rule. For any write operation written with majority, always specify a write timeout. The specific length of the timeout will need to be determined based on your network and hardware, but you should always be setting timeouts on these (point) writes." 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": { 135 | "slideshow": { 136 | "slide_type": "slide" 137 | } 138 | }, 139 | "source": [ 140 | "

{ w: \"majority\", wtimeout: 5000 }

" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": { 146 | "slideshow": { 147 | "slide_type": "slide" 148 | } 149 | }, 150 | "source": [ 151 | "# Always configure for and handle `serverSelectionTimeout` errors." 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": { 157 | "slideshow": { 158 | "slide_type": "slide" 159 | } 160 | }, 161 | "source": [ 162 | "" 163 | ] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "metadata": { 168 | "slideshow": { 169 | "slide_type": "notes" 170 | } 171 | }, 172 | "source": [ 173 | "And lastly, you should always handle a server selection timeout error.\n", 174 | "\n", 175 | "This error will be thrown in the event a MongoDB server is unavailable for a write or for a read with a preference that the replica set can't currently fulfill. At the end of the day, MongoDB is a distributed database. So you should expect the system to be running on remote servers, along with all the benefits and constraints that it brings to your application logic.\n", 176 | "\n", 177 | "By default, the time before a driver will raise this error is 30 seconds, but you should change this to suit your application's needs. By handling this error you also passively monitor the health of your application stack and can become very quickly aware of any hardware and software problems that haven't recovered in an adequate amount of time.\n", 178 | "\n", 179 | "Each driver and programming language has a specific way to deal with errors, and we handle this error in particular in Mflix." 180 | ] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "metadata": { 185 | "slideshow": { 186 | "slide_type": "slide" 187 | } 188 | }, 189 | "source": [ 190 | "## Summary\n", 191 | "\n", 192 | "- Always use connection pooling\n", 193 | "- Always specify a `wtimeout` with majority writes\n", 194 | "- Always handle `serverSelectionTimeout` errors." 195 | ] 196 | } 197 | ], 198 | "metadata": { 199 | "celltoolbar": "Slideshow", 200 | "kernelspec": { 201 | "display_name": "Python 3", 202 | "language": "python", 203 | "name": "python3" 204 | }, 205 | "language_info": { 206 | "codemirror_mode": { 207 | "name": "ipython", 208 | "version": 3 209 | }, 210 | "file_extension": ".py", 211 | "mimetype": "text/x-python", 212 | "name": "python", 213 | "nbconvert_exporter": "python", 214 | "pygments_lexer": "ipython3", 215 | "version": "3.6.5" 216 | } 217 | }, 218 | "nbformat": 4, 219 | "nbformat_minor": 2 220 | } 221 | -------------------------------------------------------------------------------- /notebooks/updates.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pymongo\n", 10 | "from bson.json_util import dumps\n", 11 | "from faker import Faker\n", 12 | "import random\n", 13 | "fake = Faker()\n", 14 | "fake.seed(42)\n", 15 | "random.seed(42)\n", 16 | "# IMPORTANT!! Use the connection string to your own Atlas cluster below!\n", 17 | "uri = \"\"\n", 18 | "client = pymongo.MongoClient(uri)\n", 19 | "mflix = client.mflix" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "fake_users = mflix.fake_users\n", 29 | "fake_users.drop()" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "def make_user(iter_count):\n", 39 | " account_type = \"premium\" if iter_count % 2 == 0 else \"standard\"\n", 40 | " return {\n", 41 | " \"name\": fake.name(),\n", 42 | " \"address\": fake.address(),\n", 43 | " \"email\": fake.email(),\n", 44 | " \"age\": random.randrange(18, 65),\n", 45 | " \"favorite_colors\": [fake.color_name(), fake.color_name(), fake.color_name()],\n", 46 | " \"account_type\": account_type\n", 47 | " }" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "to_insert = [make_user(i) for i in range(10)]" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "fake_users.insert_many(to_insert)" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "print(dumps(fake_users.find_one(), indent=2))" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "allison = {\"name\": \"Allison Hill\"}\n", 84 | "fake_users.update_one(allison, { \"$inc\": { \"age\": 1 }})" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": {}, 91 | "outputs": [], 92 | "source": [ 93 | "print(dumps(fake_users.find_one(allison), indent=2))" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": {}, 100 | "outputs": [], 101 | "source": [ 102 | "fake_users.update_one(allison, {\"$push\": { \"favorite_colors\": \"Black\"}})" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "print(dumps(fake_users.find_one(allison), indent=2))" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "print(fake_users.count({\"account_type\": \"standard\"}))" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "print(dumps(fake_users.find({\"account_type\": \"standard\"}, { \"_id\": 0, \"name\": 1, \"account_type\": 1}), indent=2))" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "u_r = fake_users.update_many({\"account_type\": \"standard\"}, {\"$set\": { \"account_type\": \"premium\", \"free_trial\": True}})" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "print(fake_users.count({\"account_type\": \"standard\"}))" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": null, 153 | "metadata": {}, 154 | "outputs": [], 155 | "source": [ 156 | "print(dumps(fake_users.find({\"free_trial\": True}, { \"_id\": 0, \"name\": 1, \"account_type\": 1}), indent=2))" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": null, 162 | "metadata": {}, 163 | "outputs": [], 164 | "source": [ 165 | "print(dir(u_r))" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "print(u_r.acknowledged, u_r.matched_count, u_r.modified_count, u_r.upserted_id)" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [ 183 | "new_or_updated_user = make_user(0)\n", 184 | "u_r = fake_users.update_one({\"email\": new_or_updated_user[\"email\"]}, {\"$set\": new_or_updated_user}, upsert=True)" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": null, 190 | "metadata": {}, 191 | "outputs": [], 192 | "source": [ 193 | "print(dumps(fake_users.find_one({\"email\": new_or_updated_user[\"email\"]}), indent=2))" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": null, 199 | "metadata": {}, 200 | "outputs": [], 201 | "source": [ 202 | "print(u_r.acknowledged, u_r.matched_count, u_r.modified_count, u_r.upserted_id)" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": null, 208 | "metadata": {}, 209 | "outputs": [], 210 | "source": [ 211 | "fake_users.drop()" 212 | ] 213 | } 214 | ], 215 | "metadata": { 216 | "kernelspec": { 217 | "display_name": "Python 3", 218 | "language": "python", 219 | "name": "python3" 220 | }, 221 | "language_info": { 222 | "codemirror_mode": { 223 | "name": "ipython", 224 | "version": 3 225 | }, 226 | "file_extension": ".py", 227 | "mimetype": "text/x-python", 228 | "name": "python", 229 | "nbconvert_exporter": "python", 230 | "pygments_lexer": "ipython3", 231 | "version": "3.6.5" 232 | } 233 | }, 234 | "nbformat": 4, 235 | "nbformat_minor": 2 236 | } 237 | -------------------------------------------------------------------------------- /notebooks/updates_every_one_second.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "slideshow": { 8 | "slide_type": "slide" 9 | } 10 | }, 11 | "outputs": [], 12 | "source": [ 13 | "from pymongo import MongoClient\n", 14 | "uri = \"mongodb+srv://m220-user:m220-pass@m220-lessons-mcxlm.mongodb.net/test\"\n", 15 | "client = MongoClient(uri)\n", 16 | "lessons = client.lessons\n", 17 | "inventory = lessons.inventory" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": { 23 | "slideshow": { 24 | "slide_type": "notes" 25 | } 26 | }, 27 | "source": [ 28 | "So this is a different notebook, called `updates_every_one_second` and its job is to perform 10 insert statements, each separated by 1 second. You can find this notebook attached to the lecture if you want to try it out.\n", 29 | "\n", 30 | "But here I'm just initializing my client, database and collection objects so I can run updates against the `inventory` collection." 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": { 37 | "slideshow": { 38 | "slide_type": "slide" 39 | } 40 | }, 41 | "outputs": [], 42 | "source": [ 43 | "import time\n", 44 | "import random\n", 45 | "fruits = [ \"strawberries\", \"bananas\", \"apples\" ]\n", 46 | "quantities = [ -1, -2, -4, -8 ]\n", 47 | "\n", 48 | "while True:\n", 49 | " random_fruit = random.choice(fruits)\n", 50 | " random_quantity = random.choice(quantities)\n", 51 | " inventory.update_one({ \"type\": random_fruit, \"quantity\": { \"$gt\": 10 } }, { \"$inc\": { \"quantity\": random_quantity } })\n", 52 | " time.sleep(1)" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": { 58 | "slideshow": { 59 | "slide_type": "notes" 60 | } 61 | }, 62 | "source": [ 63 | "This script is going to mimic customers at the supermarket, buying random fruits in random quantities. The numbers (point) here are negative because buying an item causes the quantity of that item to decrease.\n", 64 | "\n", 65 | "I've also asked the program to sleep for a tenth of a second after every purchase, so we can actually read the output we get from the change stream." 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": { 72 | "slideshow": { 73 | "slide_type": "slide" 74 | } 75 | }, 76 | "outputs": [], 77 | "source": [ 78 | "inventory.drop()\n", 79 | "fruits = [ \"strawberries\", \"bananas\", \"apples\" ]\n", 80 | "for fruit in fruits:\n", 81 | " inventory.insert_one( { \"type\": fruit, \"quantity\": 100 } )\n", 82 | "\n", 83 | "while True:\n", 84 | " random_fruit = random.choice(fruits)\n", 85 | " random_quantity = random.choice(quantities)\n", 86 | " inventory.update_one({ \"type\": random_fruit, \"quantity\": { \"$gt\": 10 } }, { \"$inc\": { \"quantity\": random_quantity } })\n", 87 | " time.sleep(.1)" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": { 93 | "slideshow": { 94 | "slide_type": "notes" 95 | } 96 | }, 97 | "source": [ 98 | "I've dropped the collection and repopulated it, so we start out with 100 of each fruit in stock.\n", 99 | "\n", 100 | "This is the same loop as before, except I'm only waiting a tenth of a second after each update. These customers are a little faster than the other ones." 101 | ] 102 | } 103 | ], 104 | "metadata": { 105 | "celltoolbar": "Slideshow", 106 | "kernelspec": { 107 | "display_name": "Python 3", 108 | "language": "python", 109 | "name": "python3" 110 | }, 111 | "language_info": { 112 | "codemirror_mode": { 113 | "name": "ipython", 114 | "version": 3 115 | }, 116 | "file_extension": ".py", 117 | "mimetype": "text/x-python", 118 | "name": "python", 119 | "nbconvert_exporter": "python", 120 | "pygments_lexer": "ipython3", 121 | "version": "3.6.5" 122 | } 123 | }, 124 | "nbformat": 4, 125 | "nbformat_minor": 2 126 | } 127 | -------------------------------------------------------------------------------- /notebooks/write_concerns.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": { 17 | "slideshow": { 18 | "slide_type": "slide" 19 | } 20 | }, 21 | "source": [ 22 | "

Write Concerns

" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": { 28 | "slideshow": { 29 | "slide_type": "notes" 30 | } 31 | }, 32 | "source": [ 33 | "In this lesson we're going to discuss write concern, and how it can provide different levels of write durability in our application." 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": { 39 | "slideshow": { 40 | "slide_type": "slide" 41 | } 42 | }, 43 | "source": [ 44 | "" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": { 50 | "slideshow": { 51 | "slide_type": "notes" 52 | } 53 | }, 54 | "source": [ 55 | "Write concern is a feature of the replica set, as we'll see in a minute. Anyway, this is a little supermarket (point) application, using a replica set as its data source.\n", 56 | "\n", 57 | "When the customer puts a food item into their cart, Mongo will send an insert statement, for that item, over to the current primary node in the replica set, and then wait for an acknowedgement back from the server.\n", 58 | "\n", 59 | "When the primary receives this write statement, the first thing it's gonna do is perform the write in its database." 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": { 65 | "slideshow": { 66 | "slide_type": "slide" 67 | } 68 | }, 69 | "source": [ 70 | "" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": { 77 | "slideshow": { 78 | "slide_type": "notes" 79 | } 80 | }, 81 | "source": [ 82 | "Once the write has been committed by the primary node, by default the primary will immediately send back an acknowledgement to the application that the write was committed." 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": { 88 | "slideshow": { 89 | "slide_type": "slide" 90 | } 91 | }, 92 | "source": [ 93 | "" 94 | ] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "metadata": { 99 | "slideshow": { 100 | "slide_type": "notes" 101 | } 102 | }, 103 | "source": [ 104 | "So at this point, the application receives the acknowledgement (point) that it was waiting for, and it considers the write to be complete.\n", 105 | "\n", 106 | "It assumes that the secondaries will replicate (point) the data soon, but it doesn't have any immediate proof that the secondaries did it." 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": { 112 | "slideshow": { 113 | "slide_type": "slide" 114 | } 115 | }, 116 | "source": [ 117 | "### writeConcern: { w: 1 }\n", 118 | "\n", 119 | "- Only requests an acknowledgement that **one** node applied the write\n", 120 | "- This is the default `writeConcern` in MongoDB" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": { 126 | "slideshow": { 127 | "slide_type": "notes" 128 | } 129 | }, 130 | "source": [ 131 | "So that was an example of a write with writeConcern `w: 1`. The number 1 (point) refers to the number of nodes that must perform the write statement before the client gets an acknowledgement back from the driver.\n", 132 | "\n", 133 | "And `w: 1` is the default writeConcern in MongoDB, so if a write is sent to MongoDB without a writeConcern specified, then Mongo will assume this `w: 1` (point) setting." 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "metadata": { 139 | "slideshow": { 140 | "slide_type": "slide" 141 | } 142 | }, 143 | "source": [ 144 | "" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": { 150 | "slideshow": { 151 | "slide_type": "notes" 152 | } 153 | }, 154 | "source": [ 155 | "So now let's consider a different level of write concern. Our shopping cart application sends a write statement to the primary node, and the primary applies that write just like it did before." 156 | ] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "metadata": { 161 | "slideshow": { 162 | "slide_type": "slide" 163 | } 164 | }, 165 | "source": [ 166 | "" 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": { 172 | "slideshow": { 173 | "slide_type": "notes" 174 | } 175 | }, 176 | "source": [ 177 | "But this time, the primary waits before sending an acknowledgement back to the client. What is it waiting for, you ask? Well, let me tell you.\n", 178 | "\n", 179 | "Before sending an acknowledgement of the write back to the client, the primary will actually wait for one of the secondary nodes to replicate the data. When the secondary applies this write, it will send an acknowledgement back to the primary, saying \"hey, I applied this write to my copy of the data!\"" 180 | ] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "metadata": { 185 | "slideshow": { 186 | "slide_type": "slide" 187 | } 188 | }, 189 | "source": [ 190 | "" 191 | ] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "metadata": { 196 | "slideshow": { 197 | "slide_type": "notes" 198 | } 199 | }, 200 | "source": [ 201 | "So now the primary knows that it (point) applied the write, and it also knows that a secondary (point) applied the write. The primary says \"ok, 2 nodes in this set have applied the write,\" and then send an acknowledgement back to the client.\n", 202 | "\n", 203 | "As denoted here (point), this write was completed with `w: majority`. `w: majority` is another way of saying that we want a majority of nodes in the replica set to apply the write to their data before the client gets an acknowledgement back from the driver. In this case, we had a three-node replica set, and two nodes makes a majority. So we sent acknowledgement after two nodes applied this write.\n", 204 | "\n", 205 | "Think of `w: majority` as a kind of contract with the client that this write will not be lost, even in the event of hosts going down. If our application sends a write with `w: majority` and gets back an acknowledgement of the write, we know that a majority of nodes have applied the write. In the case of a 3 nodes replica set, it would require 2 nodes performing the write. In the case of a 5 nodes replica set, it would require 3 nodes to perform the write.\n", 206 | "\n", 207 | "The current primary could actually go down immediately after we get an acknowledgement, and we could still rest assured that another node has captured the write we sent." 208 | ] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "metadata": { 213 | "slideshow": { 214 | "slide_type": "slide" 215 | } 216 | }, 217 | "source": [ 218 | "### writeConcern: { w: 'majority' }\n", 219 | "\n", 220 | "- Requests acknowledgement that a **majority of nodes** in the replica set applied the write\n", 221 | "- Takes longer than `w: 1`\n", 222 | "- Is more durable than `w: 1`\n", 223 | " - Useful for ensuring vital writes are majority-committed" 224 | ] 225 | }, 226 | { 227 | "cell_type": "markdown", 228 | "metadata": { 229 | "slideshow": { 230 | "slide_type": "notes" 231 | } 232 | }, 233 | "source": [ 234 | "So with `w: majority` the connection is going to wait for a majority (point) of nodes to apply a write before sending an acknowledgement back to the client. For that reason, it takes a little longer, and is subject to replication lag. But there is no additional load on the primary, so the primary can still perform the same number of writes per second.\n", 235 | "\n", 236 | "However, `w: majority` essentially guarantees to the client that a write will not be rolled back during failover, because the write was committed to a majority of nodes.\n", 237 | "\n", 238 | "This is useful when some of our application's writes are vital to the success of the application. A common example of this is a new user on a website. These types of operations must succeed, because without an account the user cannot do anything on the site." 239 | ] 240 | }, 241 | { 242 | "cell_type": "markdown", 243 | "metadata": { 244 | "slideshow": { 245 | "slide_type": "slide" 246 | } 247 | }, 248 | "source": [ 249 | "### writeConcern: { w: 0 }\n", 250 | "\n", 251 | "- Does **not** request an acknowledgement that any nodes applied the write\n", 252 | " - \"Fire-and-forget\"\n", 253 | "- Fastest writeConcern level\n", 254 | "- Least durable writeConcern" 255 | ] 256 | }, 257 | { 258 | "cell_type": "markdown", 259 | "metadata": { 260 | "slideshow": { 261 | "slide_type": "notes" 262 | } 263 | }, 264 | "source": [ 265 | "So I just wanna discuss one more writeConcern, `w: 0`. By now you must have realized that when the value of `w` is a number, it's the number of nodes that must apply a write before the client receieves an acknowledgement.\n", 266 | "\n", 267 | "We can pass any value to this `w` field, but obviously `w` can only be as high as the total number of nodes in our replica set.\n", 268 | "\n", 269 | "Following that rule, when `w` is 0, none of the nodes actually need to apply a write before the client receives an acknowledgement of that write. This means that when we're using `w: 0`, there is a chance we get an acknowledgement before any data has been written. So if the server crashes, we might lose some writes.\n", 270 | "\n", 271 | "This type of write is referred to as a \"fire-and-forget\" operation, because it sends the write and doesn't really worry about the response. But this isn't entirely true, because the acknowledgement from a `w: 0` write can also alert us to network errors and socket exceptions, so the client can implement some logic to figure out if a write was actually received by the database.\n", 272 | "\n", 273 | "Anyway, writing with `w: 0` is very fast and can be useful for less important writes that occur frequently. For example, if an Internet of Things device is sending a ping to Mongo every two minutes to report its status, it might be ok to speed up each write operation at the risk of losing a few writes." 274 | ] 275 | }, 276 | { 277 | "cell_type": "markdown", 278 | "metadata": { 279 | "slideshow": { 280 | "slide_type": "slide" 281 | } 282 | }, 283 | "source": [ 284 | "## Summary\n", 285 | "\n", 286 | "- `w: 1` is the default writeConcern, and it makes sure writes have been committed by at least 1 node\n", 287 | "- `w: majority` ensures that writes are committed by a majority of nodes\n", 288 | " - Slower, but very durable\n", 289 | "- `w: 0` does not ensure that a write was committed by any nodes\n", 290 | " - Very fast, but less durable\n" 291 | ] 292 | }, 293 | { 294 | "cell_type": "markdown", 295 | "metadata": { 296 | "slideshow": { 297 | "slide_type": "notes" 298 | } 299 | }, 300 | "source": [ 301 | "So to recap, `w: 1` is the default writeConcern, and it commits a write to 1 node before sending an acknowledgement to the client.\n", 302 | "\n", 303 | "`w: majority` commits a write to a majority of nodes in the replica set before sending an acknowledgement to the client. The application will have to wait a little longer for a response, however it should not have a performance impact as long as you have enough connections to the Primary to handle all your requests.\n", 304 | "\n", 305 | "`w: 0` does not commit the write at all, but sends an acknowledgment back to the client immediately. So there's a higher chance that we lose data in the event of a primary going down." 306 | ] 307 | } 308 | ], 309 | "metadata": { 310 | "celltoolbar": "Slideshow", 311 | "kernelspec": { 312 | "display_name": "Python 3", 313 | "language": "python", 314 | "name": "python3" 315 | }, 316 | "language_info": { 317 | "codemirror_mode": { 318 | "name": "ipython", 319 | "version": 3 320 | }, 321 | "file_extension": ".py", 322 | "mimetype": "text/x-python", 323 | "name": "python", 324 | "nbconvert_exporter": "python", 325 | "pygments_lexer": "ipython3", 326 | "version": "3.6.5" 327 | }, 328 | "livereveal": { 329 | "transition": "none" 330 | } 331 | }, 332 | "nbformat": 4, 333 | "nbformat_minor": 2 334 | } 335 | -------------------------------------------------------------------------------- /notebooks/your_first_aggregation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": { 17 | "slideshow": { 18 | "slide_type": "slide" 19 | } 20 | }, 21 | "source": [ 22 | "

Your First Aggregation

" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": { 28 | "slideshow": { 29 | "slide_type": "notes" 30 | } 31 | }, 32 | "source": [ 33 | "In this lesson we're going to briefly cover the MongoDB Aggregation Framework. We're going to use the Aggregation Builder in MongoDB Compass to export our aggregation to our language of choice." 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": { 39 | "slideshow": { 40 | "slide_type": "slide" 41 | } 42 | }, 43 | "source": [ 44 | "- Aggregation is a pipeline\n", 45 | " - Pipelines are composed of stages, broad units of work.\n", 46 | " - Within stages, expressions are used to specify individual units of work.\n", 47 | "- Expressions are functions" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": { 53 | "slideshow": { 54 | "slide_type": "notes" 55 | } 56 | }, 57 | "source": [ 58 | "The Aggregation Framework is a pipeline, like the Unix shell or, to use an analogy, a conveyer belt in a factory." 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": { 64 | "slideshow": { 65 | "slide_type": "slide" 66 | } 67 | }, 68 | "source": [ 69 | "\n" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": { 75 | "slideshow": { 76 | "slide_type": "notes" 77 | } 78 | }, 79 | "source": [ 80 | "Documents flow through the pipeline like widgets on a factory conveyor belt. Each stage is like an an assembly station. Documents enter and some work is performed on them. In this example, we have a stage to filter out colors we don't want. Then we have a stage to transform the shape. Lastly, we have a stage that collects all input, and in this case gives us the ratio of colors.\n", 81 | "\n", 82 | "Within each of these stages, expressions are being used. Expressions are the individual tools that perform detailed work." 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": { 88 | "slideshow": { 89 | "slide_type": "slide" 90 | } 91 | }, 92 | "source": [ 93 | "Expressions are functions. Let's look at a function called **add** in Python, Java, JavaScript, and the Aggregation framework.\n" 94 | ] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "metadata": { 99 | "slideshow": { 100 | "slide_type": "subslide" 101 | } 102 | }, 103 | "source": [ 104 | "### Python\n", 105 | "```python\n", 106 | "def add(a, b):\n", 107 | " return a + b\n", 108 | "```" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": { 114 | "slideshow": { 115 | "slide_type": "subslide" 116 | } 117 | }, 118 | "source": [ 119 | "### Java\n", 120 | "```java\n", 121 | "static double add(T a, T b) {\n", 122 | " return a.doubleValue() + b.doubleValue();\n", 123 | "}\n", 124 | "```" 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "metadata": { 130 | "slideshow": { 131 | "slide_type": "subslide" 132 | } 133 | }, 134 | "source": [ 135 | "### JavaScript\n", 136 | "```javascript\n", 137 | "function add(a, b) {\n", 138 | " return a + b\n", 139 | "}\n", 140 | "```" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": { 146 | "slideshow": { 147 | "slide_type": "subslide" 148 | } 149 | }, 150 | "source": [ 151 | "### Aggregation\n", 152 | "```\n", 153 | "{ \"$add\": [\"$a\", \"$b\"] }\n", 154 | "```" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "# Reviewer Note\n", 162 | "Read the following as notes slide as: \n", 163 | "\n", 164 | "All are equivalent. We have a course that dives much more deeply into aggregation, covering syntax and semantics and almost every stage. You can find more by looking in the lesson handout. Also included is a link to the Aggregation Quick Reference." 165 | ] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "metadata": { 170 | "slideshow": { 171 | "slide_type": "notes" 172 | } 173 | }, 174 | "source": [ 175 | "All are equivalent, adding the values \"a\" and \"b\" together. We have a course that dives much more deeply into aggregation, covering syntax and semantics and almost every stage. You can find out more by following [this link](https://university.mongodb.com/courses/M121/about).\n", 176 | "\n", 177 | "A link to the Aggregation Quick Reference is [here](https://docs.mongodb.com/manual/meta/aggregation-quick-reference/)" 178 | ] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "metadata": { 183 | "slideshow": { 184 | "slide_type": "slide" 185 | } 186 | }, 187 | "source": [ 188 | "# Compass Aggregation Builder" 189 | ] 190 | }, 191 | { 192 | "cell_type": "markdown", 193 | "metadata": { 194 | "slideshow": { 195 | "slide_type": "notes" 196 | } 197 | }, 198 | "source": [ 199 | "We have Compass open and connected to an Atlas cluster. We've selected the mflix database and the movies collection.\n", 200 | "\n", 201 | "Let's minimize this sidebar to make more room.\n", 202 | "\n", 203 | "Lastly, lets select the Aggregations tab." 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": { 209 | "slideshow": { 210 | "slide_type": "notes" 211 | } 212 | }, 213 | "source": [ 214 | "Ok, let's perform an aggregation. For this aggregation, we're going to get the average rating for all movies Sam Raimi has directed. On a scale of 1 to 10, we know Army of Darkness alone will put him at a 14.\n", 215 | "\n", 216 | "First, let's start with a ``$match`` stage, which is very similar to a standard query. The difference is really semantics here though. Where a find operation only returns us documents that match a predicate, the ``$match`` stage only lets documents continue flowing through the pipeline that match.\n", 217 | "\n", 218 | "```javascript\n", 219 | "{\n", 220 | " directors: \"Sam Raimi\"\n", 221 | "}\n", 222 | "```" 223 | ] 224 | }, 225 | { 226 | "cell_type": "markdown", 227 | "metadata": { 228 | "slideshow": { 229 | "slide_type": "notes" 230 | } 231 | }, 232 | "source": [ 233 | "And we can see a sampling of documents that met this criteria. Let's add another stage, projecting away all information except the title and the imbd.rating.\n", 234 | "\n", 235 | "```javascript\n", 236 | "{\n", 237 | " _id: 0,\n", 238 | " title: 1,\n", 239 | " \"imdb.rating\": 1\n", 240 | "}\n", 241 | "```\n", 242 | "\n", 243 | "Just like projection mechanics with find operations, if we want to remove the _id we have to explicitly do so." 244 | ] 245 | }, 246 | { 247 | "cell_type": "markdown", 248 | "metadata": { 249 | "slideshow": { 250 | "slide_type": "notes" 251 | } 252 | }, 253 | "source": [ 254 | "Lastly, let's add a `$group` stage. We'll group all incoming documents together and get an average rating.\n", 255 | "\n", 256 | "```javascript\n", 257 | "{\n", 258 | " _id: 0,\n", 259 | " avg_rating: {\n", 260 | " \"$avg\": \"$imdb.rating\"\n", 261 | " }\n", 262 | "}\n", 263 | "```\n", 264 | "\n", 265 | "It's matched all incoming documents together because I've specified 0 as the value to _id, and using the $avg accumulator expression gathers all incoming imdb.rating values (point) and then averages them together.\n", 266 | "\n", 267 | "And as output we see the grouping criteria, which was none, and and average rating value, which is a very funny looking 10. I'll have to check the fonts on this computer." 268 | ] 269 | }, 270 | { 271 | "cell_type": "markdown", 272 | "metadata": { 273 | "slideshow": { 274 | "slide_type": "slide" 275 | } 276 | }, 277 | "source": [ 278 | "## Summary\n", 279 | "\n", 280 | "- Aggregation is a pipeline\n", 281 | "- Pipelines are composed of one or more stages\n", 282 | "- Stages use one or more expressions\n", 283 | "- Expressions are functions" 284 | ] 285 | }, 286 | { 287 | "cell_type": "markdown", 288 | "metadata": { 289 | "slideshow": { 290 | "slide_type": "notes" 291 | } 292 | }, 293 | "source": [ 294 | "Again, you can find more information about our Aggregation course by following [this link](https://university.mongodb.com/courses/M121/about), and a link to the Aggregation Quick Reference is [here](https://docs.mongodb.com/manual/meta/aggregation-quick-reference/)" 295 | ] 296 | } 297 | ], 298 | "metadata": { 299 | "celltoolbar": "Slideshow", 300 | "kernelspec": { 301 | "display_name": "Python 3", 302 | "language": "python", 303 | "name": "python3" 304 | }, 305 | "language_info": { 306 | "codemirror_mode": { 307 | "name": "ipython", 308 | "version": 3 309 | }, 310 | "file_extension": ".py", 311 | "mimetype": "text/x-python", 312 | "name": "python", 313 | "nbconvert_exporter": "python", 314 | "pygments_lexer": "ipython3", 315 | "version": "3.6.4" 316 | } 317 | }, 318 | "nbformat": 4, 319 | "nbformat_minor": 2 320 | } 321 | -------------------------------------------------------------------------------- /notebooks/your_first_join.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": { 17 | "slideshow": { 18 | "slide_type": "slide" 19 | } 20 | }, 21 | "source": [ 22 | "

Your First Join

" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": { 28 | "slideshow": { 29 | "slide_type": "notes" 30 | } 31 | }, 32 | "source": [ 33 | "In this lesson we're going to cover joins in MongoDB." 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": { 39 | "slideshow": { 40 | "slide_type": "slide" 41 | } 42 | }, 43 | "source": [ 44 | "- Join two collections of data\n", 45 | " - Movies and comments\n", 46 | "- Use new expressive `$lookup`\n", 47 | "- Build aggregation in Compass, and then export to language" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": { 53 | "slideshow": { 54 | "slide_type": "notes" 55 | } 56 | }, 57 | "source": [ 58 | "Joins are used to combine data from two or more different collections. This is true for all database systems, but the implementation is different in MongoDB.\n", 59 | "\n", 60 | "The join we're doing is between the `movies` and `comments` collections. Each comment posted by a user is associated with one specific movie, and we want to count how many comments are associated with each movie. Users use comments as a way to discuss movies, so we can think of this like a popularity contest - which movies are talked about the most on our site?\n", 61 | "\n", 62 | "We're going to use the new expressive `$lookup` so we can express a pipeline for the data that we're joining. This might not make sense right now, so we'll explore what that means in a minute. We're going to build this pipeline in Compass using the Mongo shell syntax, and then use Compass' Export-to-Language feature to produce code that we can use directly in our application's native language." 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": { 69 | "slideshow": { 70 | "slide_type": "slide" 71 | } 72 | }, 73 | "outputs": [], 74 | "source": [ 75 | "{ year: { '$gte': 1980, '$lt': 1990 } }" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": { 81 | "slideshow": { 82 | "slide_type": "notes" 83 | } 84 | }, 85 | "source": [ 86 | "So I've just put a little match stage here, because I'm personally only concerned with movies that came out in the 1980s: a decade highlighted by the rise of the personal computer, the mullet hairstyle, and Black Sabbath.\n", 87 | "\n", 88 | "I've specified a `$match` (point) stage in Compass, as the first stage in my pipeline." 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": { 95 | "slideshow": { 96 | "slide_type": "slide" 97 | } 98 | }, 99 | "outputs": [], 100 | "source": [ 101 | "{\n", 102 | " from: 'comments',\n", 103 | " let: { 'id': '$_id' },\n", 104 | " pipeline: [\n", 105 | " { '$match':\n", 106 | " { '$expr': { '$eq': [ '$movie_id', '$$id' ] } }\n", 107 | " }\n", 108 | " ],\n", 109 | " as: 'movie_comments'\n", 110 | "}" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": { 116 | "slideshow": { 117 | "slide_type": "notes" 118 | } 119 | }, 120 | "source": [ 121 | "Now this is where my join happens. This is a `$lookup` (point) stage, and in the expressive version, there are four fields: `from`, `let`, `pipeline`, and `as`.\n", 122 | "\n", 123 | "`from` is the collection that we're joining from. We're running this aggregation against the `movies` collection, and we want to join from the `comments` collection.\n", 124 | "\n", 125 | "`let` is where this starts to get complicated. The `pipeline` (point) we write inside the join has access to the fields of documents inside the `comments` collection, because that's the collection we're joining. But it doesn't have access to fields inside the `movies` documents, unless we specify them in `let`. We want to use the `_id` of our `movies` documents inside our pipeline, so we declare this variable `id`, and assign to it the `$_id`.\n", 126 | "\n", 127 | "If we look inside the pipeline, we can see that we referred to this variable using two (point) dollar signs, because the variables inside the pipeline with one (point) dollar signs refer to fields inside the `comments` documents. The variables with two dollar signs refer to variables we declared in `$let`.\n", 128 | "\n", 129 | "The pipeline itself only has one match stage right now, and it's matching the `movie_id` of the comment to the `_id` of the movie. We've set `as` to `movie_comments`, so each movie document will have an array field called `movie_comments` that contains a list of all the comments associated with that movie." 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": { 135 | "slideshow": { 136 | "slide_type": "notes" 137 | } 138 | }, 139 | "source": [ 140 | "Now, I embedded all the `comment` documents inside each movie, but all I really want to figure out is how many comments are associated with each movie. I don't really care what each comment says, who wrote it, or when it was written." 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": null, 146 | "metadata": { 147 | "slideshow": { 148 | "slide_type": "slide" 149 | } 150 | }, 151 | "outputs": [], 152 | "source": [ 153 | "{\n", 154 | " from: 'comments',\n", 155 | " let: { 'id': '$_id' },\n", 156 | " pipeline: [\n", 157 | " { '$match':\n", 158 | " { '$expr': { '$eq': [ '$movie_id', '$$id' ] } }\n", 159 | " },\n", 160 | " {\n", 161 | " '$count': 'count'\n", 162 | " }\n", 163 | " ],\n", 164 | " as: 'movie_comments'\n", 165 | "}" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": { 171 | "slideshow": { 172 | "slide_type": "notes" 173 | } 174 | }, 175 | "source": [ 176 | "So here I've specified a new stage in my pipeline. It's a `$count` stage that counts all documents that pass into it. Since we already used a `$match` stage to ensure comments only belonged to this movie, this meets our needs perfectly. And as we can see, we've ended up with a single count of comments for each movie, instead of an array of `comment` documents.\n", 177 | "\n", 178 | "This pipeline (point) feature in expressive lookup is very useful, because it allows us to transform the comments data returned by a join, before that data gets embedded inside the movies document." 179 | ] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": { 184 | "slideshow": { 185 | "slide_type": "notes" 186 | } 187 | }, 188 | "source": [ 189 | "Now that we've written out our pipeline, and verified that our output documents look the way we expect, we can export the pipeline to our chosen programming language. The languages currently supported by Compass are Python 3, Node.js, Java, and C#." 190 | ] 191 | }, 192 | { 193 | "cell_type": "markdown", 194 | "metadata": { 195 | "slideshow": { 196 | "slide_type": "slide" 197 | } 198 | }, 199 | "source": [ 200 | "## Summary\n", 201 | "\n", 202 | "- Expressive lookup allows us to apply aggregation pipelines to data - before the data is joined\n", 203 | "- `let` allows us to declare variables in our pipeline, referring to document fields in our source collection\n", 204 | "- Compass' Export-to-Language feature produces aggregations in our application's native language" 205 | ] 206 | } 207 | ], 208 | "metadata": { 209 | "celltoolbar": "Slideshow", 210 | "kernelspec": { 211 | "display_name": "Python 3", 212 | "language": "python", 213 | "name": "python3" 214 | }, 215 | "language_info": { 216 | "codemirror_mode": { 217 | "name": "ipython", 218 | "version": 3 219 | }, 220 | "file_extension": ".py", 221 | "mimetype": "text/x-python", 222 | "name": "python", 223 | "nbconvert_exporter": "python", 224 | "pygments_lexer": "ipython3", 225 | "version": "3.6.5" 226 | } 227 | }, 228 | "nbformat": 4, 229 | "nbformat_minor": 2 230 | } 231 | -------------------------------------------------------------------------------- /notebooks/your_first_read.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pymongo\n", 10 | "uri = \"mongodb+srv://m220-user:m220-pass@m220-lessons-mcxlm.mongodb.net/test\"\n", 11 | "client = pymongo.MongoClient(uri)\n", 12 | "m220 = client.m220\n", 13 | "movies = m220.movies" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "movies.find_one()" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "movies.find_one( { \"cast\": \"Salma Hayek\" } )" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "movies.find( { \"cast\": \"Salma Hayek\" } )" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "movies.find( { \"cast\": \"Salma Hayek\" } ).count()" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "cursor = movies.find( { \"cast\": \"Salma Hayek\" } )\n", 59 | "from bson.json_util import dumps\n", 60 | "print(dumps(cursor, indent=2))" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "cursor = movies.find( { \"cast\": \"Salma Hayek\" }, { \"title\": 1 } )\n", 70 | "print(dumps(cursor, indent=2))" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "cursor = movies.find( { \"cast\": \"Salma Hayek\" }, { \"title\": 1, \"_id\": 0 } )\n", 80 | "print(dumps(cursor, indent=2))" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [] 89 | } 90 | ], 91 | "metadata": { 92 | "kernelspec": { 93 | "display_name": "Python 3", 94 | "language": "python", 95 | "name": "python3" 96 | }, 97 | "language_info": { 98 | "codemirror_mode": { 99 | "name": "ipython", 100 | "version": 3 101 | }, 102 | "file_extension": ".py", 103 | "mimetype": "text/x-python", 104 | "name": "python", 105 | "nbconvert_exporter": "python", 106 | "pygments_lexer": "ipython3", 107 | "version": "3.6.5" 108 | } 109 | }, 110 | "nbformat": 4, 111 | "nbformat_minor": 2 112 | } 113 | -------------------------------------------------------------------------------- /notebooks/your_first_write.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pymongo\n", 10 | "uri = \"mongodb+srv://m220-user:m220-pass@m220-lessons-mcxlm.mongodb.net/test\"\n", 11 | "client = pymongo.MongoClient(uri)\n", 12 | "db = client.electronicsDB" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "db.collection_names()" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "vg = db.video_games" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "insert_result = vg.insert_one({\"title\": \"Fortnite\", \"year\": 2018})" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "insert_result.acknowledged" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "insert_result.inserted_id" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "vg.find_one( { \"_id\": insert_result.inserted_id } )" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "fortnite_doc = {\"title\": \"Fortnite\", \"year\": 2018}\n", 76 | "\n", 77 | "upsert_result = vg.update_one( { \"title\": \"Fortnite\" } , { \"$set\": fortnite_doc }, upsert=True )" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "upsert_result.raw_result" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "rocketleague_doc = {\"title\": \"Rocket League\", \"year\": 2015}\n", 96 | "\n", 97 | "upsert_result = vg.update_one( { \"title\": \"Rocket League\" }, { \"$set\": rocketleague_doc }, upsert=True )" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "upsert_result.raw_result" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": {}, 113 | "outputs": [], 114 | "source": [] 115 | } 116 | ], 117 | "metadata": { 118 | "kernelspec": { 119 | "display_name": "Python 3", 120 | "language": "python", 121 | "name": "python3" 122 | }, 123 | "language_info": { 124 | "codemirror_mode": { 125 | "name": "ipython", 126 | "version": 3 127 | }, 128 | "file_extension": ".py", 129 | "mimetype": "text/x-python", 130 | "name": "python", 131 | "nbconvert_exporter": "python", 132 | "pygments_lexer": "ipython3", 133 | "version": "3.6.5" 134 | } 135 | }, 136 | "nbformat": 4, 137 | "nbformat_minor": 2 138 | } 139 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Flask==1.0 2 | Flask-Bcrypt==0.7.1 3 | Flask-Login==0.4.0 4 | pymongo==3.7.0 5 | dnspython==1.15.0 6 | pytest-flask==0.10.0 7 | Flask-Cors==3.0.9 8 | flask-jwt-extended==3.7.0 9 | faker==0.8.13 10 | jupyter==1.0.0 11 | jupyter-client==5.2.3 12 | jupyter-console==5.2.0 13 | jupyter-core==4.4.0 14 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | from mflix.factory import create_app 2 | 3 | import os 4 | import configparser 5 | 6 | 7 | config = configparser.ConfigParser() 8 | config.read(os.path.abspath(os.path.join(".ini"))) 9 | 10 | if __name__ == "__main__": 11 | app = create_app() 12 | app.config['DEBUG'] = True 13 | app.config['MFLIX_DB_URI'] = config['PROD']['MFLIX_DB_URI'] 14 | app.config['SECRET_KEY'] = config['PROD']['SECRET_KEY'] 15 | 16 | app.run() 17 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oleg-belov/M220P-MongoDB-for-Python-Developers/62f517e1f120a9a4d9b2255e1d2b20ad03bc2d17/tests/__init__.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from mflix.factory import create_app 3 | 4 | import os 5 | import configparser 6 | 7 | config = configparser.ConfigParser() 8 | config.read(os.path.abspath(os.path.join(".ini"))) 9 | 10 | 11 | @pytest.fixture 12 | def app(): 13 | app = create_app() 14 | app.config['SECRET_KEY'] = config['TEST']['SECRET_KEY'] 15 | app.config['MFLIX_DB_URI'] = config['TEST']['MFLIX_DB_URI'] 16 | return app 17 | -------------------------------------------------------------------------------- /tests/test_connection_pooling.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test file for database methods written in db.py 3 | 4 | All test methods must receive client as an argument, 5 | otherwise the database variable won't be configured correctly 6 | """ 7 | from mflix.db import get_configuration 8 | import pytest 9 | 10 | 11 | @pytest.mark.connection_pooling 12 | def test_max_pool_size(client): 13 | (pool_size, _, _) = get_configuration() 14 | assert pool_size == 50 15 | -------------------------------------------------------------------------------- /tests/test_create_update_comments.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test file for database methods written in db.py 3 | 4 | All test methods must receive client as an argument, 5 | otherwise the database variable won't be configured correctly 6 | """ 7 | from mflix.db import add_comment, update_comment, delete_comment, get_movie 8 | from mflix.api.user import User 9 | from pymongo.results import InsertOneResult 10 | from datetime import datetime 11 | import pytest 12 | 13 | test_user = { 14 | "name": "foobar", 15 | "email": "foobar@baz.com", 16 | } 17 | 18 | fake_user = { 19 | "name": "barfoo", 20 | "email": "baz@foobar.com" 21 | } 22 | # The Martian 23 | movie_id = "573a13eff29313caabdd82f3" 24 | now = datetime.now() 25 | comment = { 26 | 'text': 'fe-fi-fo-fum', 27 | 'id': '' 28 | } 29 | user = User(test_user) 30 | n_user = User(fake_user) 31 | 32 | 33 | @pytest.mark.create_update_comments 34 | def test_add_comment(client): 35 | result = add_comment(movie_id, user, comment['text'], now) 36 | assert isinstance(result, InsertOneResult) 37 | assert result.acknowledged is True 38 | assert result.inserted_id is not None 39 | 40 | comments = get_movie(movie_id).get('comments') 41 | assert comments[0].get('_id') == result.inserted_id 42 | assert comments[0].get('text') == comment['text'] 43 | comment['id'] = result.inserted_id 44 | 45 | 46 | @pytest.mark.create_update_comments 47 | def test_update_comment(client): 48 | result = update_comment(comment['id'], user.email, 'foo foo foo', now) 49 | assert result.acknowledged is True 50 | 51 | comments = get_movie(movie_id).get('comments') 52 | assert result.raw_result.get('nModified') == 1 53 | assert comments[0].get('text') == 'foo foo foo' 54 | 55 | 56 | @pytest.mark.create_update_comments 57 | def test_do_not_update_comment_if_is_not_owner(client): 58 | result = update_comment(comment['id'], n_user.email, 'blah', now) 59 | assert result.raw_result.get('nModified') == 0 60 | -------------------------------------------------------------------------------- /tests/test_db_connection.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test file for database methods written in db.py 3 | 4 | All test methods must receive client as an argument, 5 | otherwise the database variable won't be configured correctly 6 | """ 7 | from mflix.db import get_movies, get_movie 8 | import pytest 9 | from pymongo import MongoClient 10 | 11 | 12 | def get_coll_names(config): 13 | """ 14 | Method used in unit tests. Do not alter. You can cheat, but you only defeat 15 | yourself. 16 | """ 17 | db = MongoClient(config['MFLIX_DB_URI'])["mflix"] 18 | return db.list_collection_names() 19 | 20 | 21 | @pytest.mark.connection 22 | @pytest.mark.usefixtures('config') 23 | def test_atlas_setup(client, config): 24 | result = get_coll_names(config) 25 | assert all(x in result for x in ['users', 'comments', 'movies']) 26 | assert len(result) >= 5 27 | 28 | 29 | @pytest.mark.connection 30 | def test_basic_movies(client): 31 | (actual, num_found) = get_movies({}, 0, 20) 32 | assert num_found == 46014 33 | assert(len(list(actual))) == 20 34 | 35 | 36 | @pytest.mark.connection 37 | def test_search_by_movie_id(client): 38 | actual = get_movie("573a13eff29313caabdd82f3") 39 | assert actual['title'] == 'The Martian' 40 | 41 | 42 | @pytest.mark.connection 43 | def test_simple_text_search(client): 44 | (actual, _) = get_movies({"$text": {"$search": "The Martian"}}, 0, 20) 45 | assert len(list(actual)) >= 4 46 | -------------------------------------------------------------------------------- /tests/test_delete_comments.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test file for database methods written in db.py 3 | 4 | All test methods must receive client as an argument, 5 | otherwise the database variable won't be configured correctly 6 | """ 7 | from mflix.db import add_comment, update_comment, delete_comment, get_movie 8 | from mflix.api.user import User 9 | from datetime import datetime 10 | import pytest 11 | 12 | test_user = { 13 | "name": "foobar", 14 | "email": "foobar@baz.com", 15 | } 16 | 17 | fake_user = { 18 | "name": "barfoo", 19 | "email": "baz@foobar.com" 20 | } 21 | # The Martian 22 | movie_id = "573a13eff29313caabdd82f3" 23 | now = datetime.now() 24 | comment = { 25 | 'text': 'fe-fi-fo-fum', 26 | 'id': '' 27 | } 28 | comment_update = { 29 | 'text': 'frobscottle', 30 | 'id': '' 31 | } 32 | user = User(test_user) 33 | n_user = User(fake_user) 34 | 35 | 36 | # this test is only here to insert a comment 37 | # you should have implemented this in the previous ticket 38 | @pytest.mark.delete_comments 39 | def test_add_comment_should_be_implemented(client): 40 | # we need to add a comment 41 | # this test serves to do that 42 | result = add_comment(movie_id, user, comment['text'], now) 43 | comments = get_movie(movie_id).get('comments') 44 | assert comments[0].get('_id') == result.inserted_id 45 | assert comments[0].get('text') == comment['text'] 46 | comment['id'] = result.inserted_id 47 | 48 | 49 | @pytest.mark.delete_comments 50 | def test_should_not_delete_comment_if_email_does_not_match(client): 51 | result = delete_comment(comment['id'], "fakeemail@email.com") 52 | assert result.raw_result.get('n') == 0 53 | 54 | 55 | @pytest.mark.delete_comments 56 | def test_delete_comment_should_delete_if_email_is_owner(client): 57 | result = delete_comment(comment['id'], test_user['email']) 58 | assert result.raw_result.get('n') == 1 59 | -------------------------------------------------------------------------------- /tests/test_error_handling.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test file for database methods written in db.py 3 | 4 | All test methods must receive client as an argument, 5 | otherwise the database variable won't be configured correctly 6 | """ 7 | from mflix.db import get_movie 8 | import pytest 9 | 10 | 11 | @pytest.mark.error_handling 12 | def test_no_error(client): 13 | response = get_movie("foobar") 14 | assert response is None 15 | -------------------------------------------------------------------------------- /tests/test_facets.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test file for facet search method written in db.py 3 | 4 | All test methods must receive client as an argument, 5 | otherwise the database variable won't be configured correctly 6 | """ 7 | import pytest 8 | from mflix.db import get_movies_faceted 9 | 10 | 11 | @pytest.mark.facets 12 | def test_faceted_search_should_return_rating_and_runtime_buckets(client): 13 | filter = {'cast': ['Tom Hanks']} 14 | result = get_movies_faceted(filter, 0, 20) 15 | # expecting the first entry in the returned tuple to be a dictionary with 16 | # the key 'movies' 17 | assert len(result[0]['movies']) == 20 18 | assert len(result[0]['rating']) == 5 19 | assert len(result[0]['runtime']) == 4 20 | # expecting the second entry in the returned tupe to be the number of results 21 | assert result[1] == 51 22 | 23 | 24 | 25 | @pytest.mark.facets 26 | def test_faceted_search_should_also_support_paging(client): 27 | filter = {'cast': ['Susan Sarandon'], } 28 | result = get_movies_faceted(filter, 3, 20) 29 | assert(len(result[0]['movies'])) == 3 30 | assert len(result[0]['rating']) == 1 31 | assert len(result[0]['runtime']) == 2 32 | assert result[1] == 63 33 | -------------------------------------------------------------------------------- /tests/test_get_comments.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test file for database methods written in db.py 3 | 4 | All test methods must receive client as an argument, 5 | otherwise the database variable won't be configured correctly 6 | """ 7 | from mflix.db import get_movie 8 | from random import randint 9 | import pytest 10 | 11 | 12 | @pytest.mark.get_comments 13 | def test_fetch_comments_for_movie(client): 14 | # X-Men 15 | movie_id = "573a139af29313caabcf0f51" 16 | result = get_movie(movie_id) 17 | assert len(result.get('comments', [])) == 432 18 | 19 | 20 | @pytest.mark.get_comments 21 | def test_fetch_comments_for_another_movie(client): 22 | # 300 23 | movie_id = "573a13b1f29313caabd36321" 24 | result = get_movie(movie_id) 25 | assert len(result.get('comments', [])) == 409 26 | 27 | 28 | @pytest.mark.get_comments 29 | def test_comments_should_be_sorted_by_date(client): 30 | # in order from most to least recent 31 | movie_ids = [ 32 | "573a1391f29313caabcd8414", 33 | "573a1391f29313caabcd9058", 34 | "573a1391f29313caabcd91ed", 35 | "573a1392f29313caabcd9d4f", 36 | "573a1392f29313caabcdae3d", 37 | "573a1392f29313caabcdb40b", 38 | "573a1392f29313caabcdb585", 39 | "573a1393f29313caabcdbe7c", 40 | "573a1393f29313caabcdd6aa" 41 | ] 42 | for id in movie_ids: 43 | comments = get_movie(id).get('comments', []) 44 | test_comments = sorted( 45 | comments[:], 46 | key=lambda c: c.get('date'), 47 | reverse=True 48 | ) 49 | for _ in range(0, min(10, len(comments))): 50 | rand_int = randint(0, len(comments) - 1) 51 | assert comments[rand_int] == test_comments[rand_int] 52 | -------------------------------------------------------------------------------- /tests/test_migration.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test file for database methods written in db.py 3 | 4 | All test methods must receive client as an argument, 5 | otherwise the database variable won't be configured correctly 6 | """ 7 | from mflix.db import get_movie 8 | from datetime import datetime 9 | import pytest 10 | 11 | 12 | @pytest.mark.migration 13 | def test_proper_type(client): 14 | result = get_movie("573a1391f29313caabcd8526") 15 | assert isinstance(result.get('lastupdated'), datetime) 16 | -------------------------------------------------------------------------------- /tests/test_paging.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test file for database methods written in db.py 3 | 4 | All test methods must receive client as an argument, 5 | otherwise the database variable won't be configured correctly 6 | """ 7 | import pytest 8 | from mflix.db import get_movies 9 | 10 | 11 | @pytest.mark.paging 12 | def test_supports_paging_by_cast(client): 13 | filter = {'cast': ['Tom Hanks']} 14 | (movies0, results0) = get_movies(filter, 0, 20) 15 | assert len(list(movies0)) == 20 16 | assert results0 == 51 17 | (movies1, results1) = get_movies(filter, 1, 20) 18 | assert results1 == 51 19 | assert len(list(movies1)) == 20 20 | (movies2, results2) = get_movies(filter, 2, 20) 21 | assert len(list(movies2)) == 11 22 | 23 | 24 | @pytest.mark.paging 25 | def test_supports_paging_by_genre(client): 26 | filter = {'genres': ['History']} 27 | (movies0, results0) = get_movies(filter, 0, 20) 28 | assert len(list(movies0)) == 20 29 | assert results0 == 1503 30 | last_page = int(1503 / 20) 31 | (movies2, results2) = get_movies(filter, last_page, 20) 32 | assert len(list(movies2)) == results2 % 20 33 | 34 | 35 | @pytest.mark.paging 36 | def test_supports_paging_by_text(client): 37 | filter = {'text': 'bank robbery'} 38 | (movies0, results0) = get_movies(filter, 0, 20) 39 | assert len(list(movies0)) == 20 40 | assert results0 == 1084 41 | last_page = int(1084 / 20) 42 | (movies2, results2) = get_movies(filter, last_page, 20) 43 | assert len(list(movies2)) == 1084 % 20 44 | -------------------------------------------------------------------------------- /tests/test_projection.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test file for database methods written in db.py 3 | 4 | All test methods must receive client as an argument, 5 | otherwise the database variable won't be configured correctly 6 | """ 7 | import pytest 8 | from mflix.db import get_movies_by_country 9 | 10 | 11 | @pytest.mark.projection 12 | def test_basic_country_search_db(client): 13 | countries = ['Kosovo'] 14 | result = get_movies_by_country(countries) 15 | assert len(result) == 2 16 | 17 | 18 | @pytest.mark.projection 19 | def test_basic_country_search_shape_db(client): 20 | countries = ['Russia', 'Japan'] 21 | result = get_movies_by_country(countries) 22 | assert len(result) == 2421 23 | # we should only be returning the _id and title fields 24 | encountered_keys = {} 25 | for movie in result: 26 | for k in movie: 27 | encountered_keys[k] = encountered_keys.get(k, 0) + 1 28 | 29 | assert len(list(encountered_keys.keys())) == 2 30 | assert encountered_keys['_id'] == 2421 31 | assert encountered_keys['title'] == 2421 32 | -------------------------------------------------------------------------------- /tests/test_text_and_subfield_search.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test file for database methods written in db.py 3 | 4 | All test methods must receive client as an argument, 5 | otherwise the database variable won't be configured correctly 6 | """ 7 | import pytest 8 | from mflix.db import get_movies 9 | 10 | 11 | @pytest.mark.text_and_subfield_search 12 | def test_text_search(client): 13 | # ensure you sort text results based on their metaScore!! 14 | filter = {'text': 'mongo'} 15 | (movies, results) = get_movies(filter, 0, 20) 16 | movies = list(movies) 17 | assert len(movies) == 9 18 | assert results == 9 19 | assert movies[0].get('title') == "Flash Gordon Conquers the Universe" 20 | 21 | 22 | @pytest.mark.text_and_subfield_search 23 | def test_genre_search(client): 24 | filter = {'genres': ['Action']} 25 | (movies, results) = get_movies(filter, 0, 20) 26 | assert len(list(movies)) == 20 27 | assert results == 5917 28 | 29 | 30 | @pytest.mark.text_and_subfield_search 31 | def test_multiple_genre_search(client): 32 | filter = {'genres': ['Action', 'Adventure']} 33 | (movies, results) = get_movies(filter, 0, 25) 34 | assert len(list(movies)) == 25 35 | assert results == 8385 36 | 37 | 38 | @pytest.mark.text_and_subfield_search 39 | def test_cast_search(client): 40 | filter = {'cast': ['Elon Musk']} 41 | (movies, results) = get_movies(filter, 0, 20) 42 | assert len(list(movies)) == 1 43 | assert results == 1 44 | 45 | 46 | @pytest.mark.text_and_subfield_search 47 | def test_multiple_cast_search(client): 48 | filter = {'cast': ['Elon Musk', 'Robert Redford', 'Julia Roberts']} 49 | (movies, results) = get_movies(filter, 0, 33) 50 | assert (len(list(movies))) == 33 51 | assert results == 75 52 | -------------------------------------------------------------------------------- /tests/test_timeouts.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test file for database methods written in db.py 3 | 4 | All test methods must receive client as an argument, 5 | otherwise the database variable won't be configured correctly 6 | """ 7 | from mflix.db import get_configuration 8 | import pytest 9 | 10 | 11 | @pytest.mark.timeouts 12 | def test_proper_type(client): 13 | (_, w_concern, _) = get_configuration() 14 | assert w_concern._WriteConcern__document['wtimeout'] == 2500 15 | -------------------------------------------------------------------------------- /tests/test_user_management.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from mflix.db import delete_user, get_user_session, get_user, add_user, \ 3 | login_user, logout_user 4 | 5 | test_user = { 6 | 'name': 'Magical Mr. Mistoffelees', 7 | 'email': 'magicz@cats.com', 8 | 'password': 'somehashedpw', 9 | 'jwt': 'someneatjwt' 10 | } 11 | 12 | 13 | @pytest.mark.user_management 14 | def test_registration(client): 15 | delete_user('magicz@cats.com') 16 | # the password will be hashed at the api layer 17 | # NEVER 18 | # NEVER 19 | # NEVER store passwords in plaintext 20 | 21 | result = add_user(test_user.get('name'), test_user.get( 22 | 'email'), test_user.get('password')) 23 | 24 | assert result == {'success': True} 25 | 26 | found_user = get_user(test_user.get('email')) 27 | assert found_user.get('name') == test_user.get('name') 28 | assert found_user.get('email') == test_user.get('email') 29 | assert found_user.get('password') == test_user.get('password') 30 | 31 | 32 | @pytest.mark.user_management 33 | def test_no_duplicate_registrations(client): 34 | result = add_user(test_user.get('name'), test_user.get( 35 | 'email'), test_user.get('password')) 36 | 37 | assert result == {'error': "A user with the given email already exists."} 38 | 39 | 40 | @pytest.mark.user_management 41 | def test_login(client): 42 | result = login_user(test_user.get('email'), test_user.get('jwt')) 43 | assert result == {'success': True} 44 | session_result = get_user_session(test_user.get('email')) 45 | assert session_result.get('user_id') == test_user.get('email') 46 | assert session_result.get('jwt') == test_user.get('jwt') 47 | 48 | 49 | @pytest.mark.user_management 50 | def test_logout(client): 51 | result = logout_user(test_user.get('email')) 52 | assert result == {'success': True} 53 | session_result = get_user_session(test_user.get('email')) 54 | assert session_result is None 55 | -------------------------------------------------------------------------------- /tests/test_user_preferences.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from mflix.db import update_prefs, delete_user, get_user, add_user 3 | from pymongo.results import UpdateResult 4 | 5 | 6 | @pytest.mark.user_preferences 7 | def test_invalid_user_should_not_have_preferences(client): 8 | # delete the test user if it exists 9 | delete_user('foobaz@bar.com') 10 | preferences = { 11 | "color": "green", 12 | "favorite_letter": "q", 13 | "favorite_number": 42 14 | } 15 | 16 | result = update_prefs('foobaz@bar.com', preferences) 17 | 18 | assert result.get('error', None) is not None 19 | assert isinstance(result, UpdateResult) is False 20 | 21 | 22 | @pytest.mark.user_preferences 23 | def test_should_not_allow_None_as_prefs(client): 24 | add_user("foo", "foobaz@bar.com", "foobar") 25 | prefs = None 26 | update_prefs("foobaz@bar.com", prefs) 27 | 28 | user = get_user("foobaz@bar.com") 29 | assert user.get("preferences") == {} 30 | 31 | 32 | @pytest.mark.user_preferences 33 | def test_valid_user_preferences(client): 34 | # create the user 35 | add_user("foo", "foobaz@bar.com", "foobar") 36 | preferences = { 37 | "favorite_cast_member": "Goldie Hawn", 38 | "favorite_genre": "Comedy", 39 | "preferred_ratings": ["G", "PG", "PG-13"] 40 | } 41 | 42 | # update user preferences 43 | result = update_prefs("foobaz@bar.com", preferences) 44 | assert result.matched_count == 1 45 | assert result.modified_count == 1 46 | 47 | # get the user 48 | user = get_user("foobaz@bar.com") 49 | assert user.get('preferences') is not None 50 | new_preferences = { 51 | "favorite_cast_member": "Daniel Day-Lewis", 52 | "favorite_genre": "Drama", 53 | "preferred_ratings": ["R"] 54 | } 55 | result = update_prefs(user.get('email'), new_preferences) 56 | assert result.matched_count == 1 57 | assert result.modified_count == 1 58 | user = get_user("foobaz@bar.com") 59 | assert user.get("preferences") == new_preferences 60 | 61 | 62 | @pytest.mark.user_preferences 63 | def test_empty_prefs_are_valid(client): 64 | new_prefs = {} 65 | result = update_prefs("foobaz@bar.com", new_prefs) 66 | 67 | assert result.matched_count == 1 68 | assert result.modified_count == 1 69 | 70 | user = get_user("foobaz@bar.com") 71 | assert user.get("preferences") == {} 72 | -------------------------------------------------------------------------------- /tests/test_user_report.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test file for database methods written in db.py 3 | 4 | All test methods must receive client as an argument, 5 | otherwise the database variable won't be configured correctly 6 | """ 7 | from mflix.db import most_active_commenters 8 | import pytest 9 | 10 | 11 | @pytest.mark.user_report 12 | def test_cast_popularity(client): 13 | result = most_active_commenters() 14 | assert len(result) == 20 15 | 16 | 17 | @pytest.mark.user_report 18 | def test_check_report(client): 19 | result = most_active_commenters() 20 | assert {'_id': 'roger_ashton-griffiths@gameofthron.es', 'count': 909} in result 21 | --------------------------------------------------------------------------------