├── .gitignore ├── CourseAssignment.md ├── Figs ├── 1 HgrHDlU85dKtyLonNgu2zA.png ├── Nursery.png ├── SPARQL_data_representation.png ├── SQLAlchemyORM.png ├── database-model.gif ├── db_schema.png ├── query_result.png ├── screen.png ├── sqlite-sample-database-color.jpg ├── textimage.png └── wikidata_data_model.png ├── LICENSE ├── Notebooks ├── 00_Miscellaneous_Formats.ipynb ├── 01_SQLite.ipynb ├── 02_MySQL.ipynb ├── 03_PostgreSQL.ipynb ├── 04_JupyterSQL.ipynb ├── 05_SQLAlchemy.ipynb ├── 06_Spatial_SQLite.ipynb ├── 07_MongoDB.ipynb ├── 08_ElasticSearch.ipynb ├── 09_MySQL_JSON.ipynb ├── 10_Dolthub.ipynb ├── 11_Python_ODO.ipynb ├── 12_SPARQL.ipynb ├── 13_NetworkX.ipynb ├── 13a_NetworkX_plot.ipynb ├── 14_Cytoscape_Graph_Heroku.ipynb ├── Example_SQLite_matrix.ipynb ├── Example_SQLite_search engine.ipynb └── Exercises_SQLite.ipynb ├── README.md └── SampleDBs ├── 2013_ERCOT_Hourly_Load_Data.zip ├── ACDH_CH_WG4_simplified.xls ├── EssentialSQL.sqlite ├── HousingPrices.csv ├── HousingPrices.zip ├── LearningSQLExample.sql ├── RobertFrost_Poetry.pdf ├── big_csv.tar.xz ├── chinook.sqlite ├── df_yummly.pkl.gzip ├── dvdrental.zip ├── employees_db.zip ├── euro_soccer_sqlite.zip ├── fakedata.csv ├── food.json ├── hr.sqlite ├── influences.csv ├── matrix.sql ├── mysqlsampledatabase.sql ├── nursery_create.sql ├── reuters.sql ├── searchindex.sqlite └── simple.yaml /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /CourseAssignment.md: -------------------------------------------------------------------------------- 1 | ## Course Assignment 2 | 3 | + A disciplina terá dois momentos de avaliacao, nos quais diferentes estagios de desenvolvimento de um trabalho sobre Bancos de dados serao avaliados. 4 | + O objetivo final é a construcao de um grafo com informacoes sobre alimentos e receitas 5 | + O objetivo intermediário é a construcao de um banco de dados relacional com informacoes sobre alimentos, receitas, ingredientes, nutritional facts, etc. 6 | 7 | + O trabalho tem como objetivo explorar as muitas características do assunto "alimentacao" (aspectos geoespaciais; históricos e temporais; receitas e ingredientes; saúde; harmonizacao; aspectos culturais; etc.) de forma a gerar agrupamentos e grafos que auxiliem na visualizacao de características do domínio. 8 | O tópico a ser escolhido deve estar em uma das seguintes famílias: 9 | 10 | + Comida e Saúde 11 | + Receitas, ingredientes, sabores, harmonizacao 12 | + Receitas, alimentos e ingredientes: aspectos históricos, geográficos e culturais 13 | + Producao / Desperdício de alimentos 14 | + Biodiversidade, Carbon Footprint 15 | + Aspectos Históricos e Geográficos de tecnologias utilizadas para plantio, preparacao e consumo de alimentos 16 | 17 | Todos os passos para o desenvolvimento do projeto devem ser documentados, e estao listados a seguir: 18 | 19 | 1) Primeira entrega (relativa à A1) 20 | 21 | Neste estágio, as seguintes tarefas serao desenvolvidas: 22 | + Mapeamento das fontes de informacao (alguns exemplos de fontes sao apresentados em seguida): 23 | + https://www.kaggle.com/datasets?search=food 24 | + https://www.kaggle.com/shuyangli94/food-com-recipes-and-user-interactions 25 | + https://mmspg.epfl.ch/food-image-datasets 26 | + https://www.vision.ee.ethz.ch/datasets_extra/food-101/ 27 | + http://foodcam.mobi/dataset256.html 28 | + https://pfid.rit.albany.edu/ 29 | + Realizar a Modelagem do Domínio 30 | + Criar os modelos conceitual e lógico em um ambiente como o Vertabelo, ou as outras alternativas 31 | + Implementar o banco de dados na tecnologia MySQL 32 | + Gerar um dump em formato SQL para entrega e avaliacao 33 | 34 | 2) Segunda entrega (relativa à A2) 35 | 36 | Neste estágio, as seguintes tarefas serao desenvolvidas: 37 | + Escolha de aspectos do BD MySQL que serao trabalhados em uma estrutura de Grafo 38 | + Conversao de dados do DB MySQL para uma estrutura de Grafos (usando NetworkX, ou outra biblioteca) 39 | + Criacao de queries especificas para dados neste grafo (em Python, SPARQL) 40 | + Criar uma interface para consulta e visualizacao do Grafo [(exemplo para inspirar)](https://towardsdatascience.com/python-interactive-network-visualization-using-networkx-plotly-and-dash-e44749161ed7) 41 | 42 | 43 | Nao demorem a comecar e bom trabalho!! 44 | 45 | 46 | -------------------------------------------------------------------------------- /Figs/1 HgrHDlU85dKtyLonNgu2zA.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsouza/Databases_Course/c800a02ad2772c6d3156f650e2a45e51f68533d0/Figs/1 HgrHDlU85dKtyLonNgu2zA.png -------------------------------------------------------------------------------- /Figs/Nursery.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsouza/Databases_Course/c800a02ad2772c6d3156f650e2a45e51f68533d0/Figs/Nursery.png -------------------------------------------------------------------------------- /Figs/SPARQL_data_representation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsouza/Databases_Course/c800a02ad2772c6d3156f650e2a45e51f68533d0/Figs/SPARQL_data_representation.png -------------------------------------------------------------------------------- /Figs/SQLAlchemyORM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsouza/Databases_Course/c800a02ad2772c6d3156f650e2a45e51f68533d0/Figs/SQLAlchemyORM.png -------------------------------------------------------------------------------- /Figs/database-model.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsouza/Databases_Course/c800a02ad2772c6d3156f650e2a45e51f68533d0/Figs/database-model.gif -------------------------------------------------------------------------------- /Figs/db_schema.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsouza/Databases_Course/c800a02ad2772c6d3156f650e2a45e51f68533d0/Figs/db_schema.png -------------------------------------------------------------------------------- /Figs/query_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsouza/Databases_Course/c800a02ad2772c6d3156f650e2a45e51f68533d0/Figs/query_result.png -------------------------------------------------------------------------------- /Figs/screen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsouza/Databases_Course/c800a02ad2772c6d3156f650e2a45e51f68533d0/Figs/screen.png -------------------------------------------------------------------------------- /Figs/sqlite-sample-database-color.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsouza/Databases_Course/c800a02ad2772c6d3156f650e2a45e51f68533d0/Figs/sqlite-sample-database-color.jpg -------------------------------------------------------------------------------- /Figs/textimage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsouza/Databases_Course/c800a02ad2772c6d3156f650e2a45e51f68533d0/Figs/textimage.png -------------------------------------------------------------------------------- /Figs/wikidata_data_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsouza/Databases_Course/c800a02ad2772c6d3156f650e2a45e51f68533d0/Figs/wikidata_data_model.png -------------------------------------------------------------------------------- /Notebooks/01_SQLite.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Introduction to Databases\n", 8 | "\n", 9 | "### Accessing Relational Databases - SQLite" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import os\n", 19 | "import sys\n", 20 | "import time\n", 21 | "import datetime\n", 22 | "import numpy as np\n", 23 | "import pandas as pd\n", 24 | "\n", 25 | "import sqlite3" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "### Acessing [SQLite](https://docs.python.org/3/library/sqlite3.html) " 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 2, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "conn = sqlite3.connect(os.path.join(\"..\",\"SampleDBs\",'sqlite_example.db'))" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 3, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "cur = conn.cursor()" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "#### Querying the sqlite_master table for table names" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 4, 63 | "metadata": {}, 64 | "outputs": [ 65 | { 66 | "name": "stdout", 67 | "output_type": "stream", 68 | "text": [ 69 | "[]\n" 70 | ] 71 | } 72 | ], 73 | "source": [ 74 | "cur.execute(\"SELECT name FROM sqlite_master WHERE type='table';\")\n", 75 | "print(cur.fetchall())" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": {}, 81 | "source": [ 82 | "### Creating a Table and inserting Data" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 5, 88 | "metadata": {}, 89 | "outputs": [], 90 | "source": [ 91 | "# Create table\n", 92 | "cur.execute('''CREATE TABLE IF NOT EXISTS stocks\n", 93 | " (date text, trans text, symbol text, qty real, price real)''')\n", 94 | "\n", 95 | "# Insert a row of data\n", 96 | "cur.execute(\"INSERT INTO stocks VALUES ('2006-01-05','BUY','RHAT',100,35.14)\")\n", 97 | "\n", 98 | "# Save (commit) the changes\n", 99 | "conn.commit()\n", 100 | "\n", 101 | "# We can also close the connection if we are done with it.\n", 102 | "# Just be sure any changes have been committed or they will be lost.\n", 103 | "conn.close()" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 6, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "conn = sqlite3.connect(os.path.join(\"..\",\"SampleDBs\",'sqlite_example.db'))\n", 113 | "cur = conn.cursor()" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 7, 119 | "metadata": {}, 120 | "outputs": [ 121 | { 122 | "name": "stdout", 123 | "output_type": "stream", 124 | "text": [ 125 | "[('stocks',)]\n" 126 | ] 127 | } 128 | ], 129 | "source": [ 130 | "cur.execute(\"SELECT name FROM sqlite_master WHERE type='table';\")\n", 131 | "print(cur.fetchall())" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 8, 137 | "metadata": {}, 138 | "outputs": [ 139 | { 140 | "name": "stdout", 141 | "output_type": "stream", 142 | "text": [ 143 | "('2006-01-05', 'BUY', 'RHAT', 100.0, 35.14)\n" 144 | ] 145 | } 146 | ], 147 | "source": [ 148 | "t = ('RHAT',) #tuple with just one element\n", 149 | "cur.execute('SELECT * FROM stocks WHERE symbol=?', t)\n", 150 | "print(cur.fetchone())" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 9, 156 | "metadata": {}, 157 | "outputs": [ 158 | { 159 | "data": { 160 | "text/plain": [ 161 | "" 162 | ] 163 | }, 164 | "execution_count": 9, 165 | "metadata": {}, 166 | "output_type": "execute_result" 167 | } 168 | ], 169 | "source": [ 170 | "# Larger example that inserts many records at a time\n", 171 | "purchases = [('2006-03-28', 'BUY', 'IBM', 1000, 45.00),\n", 172 | " ('2006-04-05', 'BUY', 'MSFT', 1000, 72.00),\n", 173 | " ('2006-04-06', 'SELL', 'IBM', 500, 53.00),\n", 174 | " ]\n", 175 | "cur.executemany('INSERT INTO stocks VALUES (?,?,?,?,?)', purchases)" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 10, 181 | "metadata": {}, 182 | "outputs": [ 183 | { 184 | "name": "stdout", 185 | "output_type": "stream", 186 | "text": [ 187 | "('2006-01-05', 'BUY', 'RHAT', 100.0, 35.14)\n", 188 | "('2006-03-28', 'BUY', 'IBM', 1000.0, 45.0)\n", 189 | "('2006-04-06', 'SELL', 'IBM', 500.0, 53.0)\n", 190 | "('2006-04-05', 'BUY', 'MSFT', 1000.0, 72.0)\n" 191 | ] 192 | } 193 | ], 194 | "source": [ 195 | "for row in cur.execute('SELECT * FROM stocks ORDER BY price'):\n", 196 | " print(row)" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": 11, 202 | "metadata": {}, 203 | "outputs": [], 204 | "source": [ 205 | "conn.commit()\n", 206 | "conn.close()" 207 | ] 208 | }, 209 | { 210 | "cell_type": "markdown", 211 | "metadata": {}, 212 | "source": [ 213 | "#### Using Pandas to query" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": 12, 219 | "metadata": {}, 220 | "outputs": [ 221 | { 222 | "name": "stdout", 223 | "output_type": "stream", 224 | "text": [ 225 | " date trans symbol qty price\n", 226 | "0 2006-01-05 BUY RHAT 100.0 35.14\n", 227 | "1 2006-03-28 BUY IBM 1000.0 45.00\n", 228 | "2 2006-04-06 SELL IBM 500.0 53.00\n", 229 | "3 2006-04-05 BUY MSFT 1000.0 72.00\n" 230 | ] 231 | } 232 | ], 233 | "source": [ 234 | "conn = sqlite3.connect(os.path.join(\"..\",\"SampleDBs\",'sqlite_example.db'))\n", 235 | "df = pd.read_sql_query(\"SELECT * from stocks ORDER BY price\", conn)\n", 236 | "\n", 237 | "# verify that result of SQL query is stored in the dataframe\n", 238 | "print(df.head())\n", 239 | "\n", 240 | "conn.close()" 241 | ] 242 | }, 243 | { 244 | "cell_type": "markdown", 245 | "metadata": {}, 246 | "source": [ 247 | "Deleting the database file: " 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": 13, 253 | "metadata": {}, 254 | "outputs": [], 255 | "source": [ 256 | "os.remove(os.path.join(\"..\",\"SampleDBs\",'sqlite_example.db'))" 257 | ] 258 | }, 259 | { 260 | "cell_type": "markdown", 261 | "metadata": {}, 262 | "source": [ 263 | "### Another example" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": 14, 269 | "metadata": {}, 270 | "outputs": [], 271 | "source": [ 272 | "conn = sqlite3.connect(os.path.join(\"..\",\"SampleDBs\",\"test.db\"))\n", 273 | " \n", 274 | "#Here, you can also supply database name as the special name :memory: to create a database in RAM." 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": 15, 280 | "metadata": {}, 281 | "outputs": [], 282 | "source": [ 283 | "conn.execute('''CREATE TABLE COMPANY \n", 284 | " (ID INT PRIMARY KEY NOT NULL,\n", 285 | " NAME TEXT NOT NULL,\n", 286 | " AGE INT NOT NULL,\n", 287 | " ADDRESS CHAR(50),\n", 288 | " SALARY REAL);''')\n", 289 | "conn.close()" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": 16, 295 | "metadata": {}, 296 | "outputs": [], 297 | "source": [ 298 | "conn = sqlite3.connect(os.path.join(\"..\",\"SampleDBs\",\"test.db\"))\n", 299 | "\n", 300 | "conn.execute(\"INSERT INTO COMPANY (ID,NAME,AGE,ADDRESS,SALARY) VALUES (1, 'Paul', 32, 'California', 20000.00 )\");\n", 301 | "conn.execute(\"INSERT INTO COMPANY (ID,NAME,AGE,ADDRESS,SALARY) VALUES (2, 'Allen', 25, 'Texas', 15000.00 )\");\n", 302 | "conn.execute(\"INSERT INTO COMPANY (ID,NAME,AGE,ADDRESS,SALARY) VALUES (3, 'Teddy', 23, 'Norway', 20000.00 )\");\n", 303 | "conn.execute(\"INSERT INTO COMPANY (ID,NAME,AGE,ADDRESS,SALARY) VALUES (4, 'Mark', 25, 'Rich-Mond ', 65000.00 )\");\n", 304 | "\n", 305 | "conn.commit()\n", 306 | "conn.close()" 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": 17, 312 | "metadata": {}, 313 | "outputs": [ 314 | { 315 | "name": "stdout", 316 | "output_type": "stream", 317 | "text": [ 318 | "ID = 1\n", 319 | "NAME = Paul\n", 320 | "ADDRESS = California\n", 321 | "SALARY = 20000.0 \n", 322 | "\n", 323 | "ID = 2\n", 324 | "NAME = Allen\n", 325 | "ADDRESS = Texas\n", 326 | "SALARY = 15000.0 \n", 327 | "\n", 328 | "ID = 3\n", 329 | "NAME = Teddy\n", 330 | "ADDRESS = Norway\n", 331 | "SALARY = 20000.0 \n", 332 | "\n", 333 | "ID = 4\n", 334 | "NAME = Mark\n", 335 | "ADDRESS = Rich-Mond \n", 336 | "SALARY = 65000.0 \n", 337 | "\n" 338 | ] 339 | } 340 | ], 341 | "source": [ 342 | "conn = sqlite3.connect(os.path.join(\"..\",\"SampleDBs\",\"test.db\"))\n", 343 | "\n", 344 | "cursor = conn.execute(\"SELECT id, name, address, salary from COMPANY\")\n", 345 | "for row in cursor:\n", 346 | " print(\"ID = \", row[0])\n", 347 | " print(\"NAME = \", row[1])\n", 348 | " print(\"ADDRESS = \", row[2])\n", 349 | " print(\"SALARY = \", row[3], \"\\n\")\n", 350 | " \n", 351 | "conn.close()" 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": 18, 357 | "metadata": {}, 358 | "outputs": [], 359 | "source": [ 360 | "os.remove(os.path.join(\"..\",\"SampleDBs\",\"test.db\"))" 361 | ] 362 | }, 363 | { 364 | "cell_type": "markdown", 365 | "metadata": {}, 366 | "source": [ 367 | "### Accessing existing [Database](https://www.sqlitetutorial.net/sqlite-sample-database/):\n", 368 | "\n", 369 | "![Chinook Schema](../Figs/sqlite-sample-database-color.jpg) \n", 370 | "[Source](https://www.sqlitetutorial.net/) " 371 | ] 372 | }, 373 | { 374 | "cell_type": "markdown", 375 | "metadata": {}, 376 | "source": [ 377 | "#### Retrieving original database" 378 | ] 379 | }, 380 | { 381 | "cell_type": "markdown", 382 | "metadata": {}, 383 | "source": [ 384 | "! wget https://cdn.sqlitetutorial.net/wp-content/uploads/2018/03/chinook.zip \n", 385 | "! unzip chinook.zip ../SampleDBs/chinook.sqlite \n", 386 | "! rm chinook.zip " 387 | ] 388 | }, 389 | { 390 | "cell_type": "code", 391 | "execution_count": 19, 392 | "metadata": {}, 393 | "outputs": [], 394 | "source": [ 395 | "conn = sqlite3.connect(os.path.join(\"..\",\"SampleDBs\",'chinook.sqlite'))\n", 396 | "cur = conn.cursor()" 397 | ] 398 | }, 399 | { 400 | "cell_type": "code", 401 | "execution_count": 20, 402 | "metadata": {}, 403 | "outputs": [ 404 | { 405 | "name": "stdout", 406 | "output_type": "stream", 407 | "text": [ 408 | "('albums',)\n", 409 | "('artists',)\n", 410 | "('customers',)\n", 411 | "('employees',)\n", 412 | "('genres',)\n", 413 | "('invoices',)\n", 414 | "('invoice_items',)\n", 415 | "('media_types',)\n", 416 | "('playlists',)\n", 417 | "('playlist_track',)\n", 418 | "('tracks',)\n" 419 | ] 420 | } 421 | ], 422 | "source": [ 423 | "query = '''\n", 424 | "SELECT name\n", 425 | "FROM sqlite_master \n", 426 | "WHERE type ='table' AND name NOT LIKE 'sqlite_%';\n", 427 | "'''\n", 428 | "\n", 429 | "cur.execute(query)\n", 430 | "for c in cur.fetchall():\n", 431 | " print(c)" 432 | ] 433 | }, 434 | { 435 | "cell_type": "markdown", 436 | "metadata": {}, 437 | "source": [ 438 | "#### Querying the sqlite_master table to examine the table structure" 439 | ] 440 | }, 441 | { 442 | "cell_type": "code", 443 | "execution_count": 21, 444 | "metadata": {}, 445 | "outputs": [ 446 | { 447 | "name": "stdout", 448 | "output_type": "stream", 449 | "text": [ 450 | "CREATE TABLE \"albums\"\n", 451 | "(\n", 452 | " [AlbumId] INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,\n", 453 | " [Title] NVARCHAR(160) NOT NULL,\n", 454 | " [ArtistId] INTEGER NOT NULL,\n", 455 | " FOREIGN KEY ([ArtistId]) REFERENCES \"artists\" ([ArtistId]) \n", 456 | "\t\tON DELETE NO ACTION ON UPDATE NO ACTION\n", 457 | ")\n" 458 | ] 459 | } 460 | ], 461 | "source": [ 462 | "query = '''\n", 463 | "SELECT sql \n", 464 | "FROM sqlite_master \n", 465 | "WHERE name = 'albums';\n", 466 | "'''\n", 467 | "\n", 468 | "cur.execute(query)\n", 469 | "for c in cur.fetchall():\n", 470 | " print(c[0])" 471 | ] 472 | }, 473 | { 474 | "cell_type": "code", 475 | "execution_count": 22, 476 | "metadata": {}, 477 | "outputs": [ 478 | { 479 | "name": "stdout", 480 | "output_type": "stream", 481 | "text": [ 482 | "(156, '...And Justice For All')\n", 483 | "(257, '20th Century Masters - The Millennium Collection: The Best of Scorpions')\n", 484 | "(296, 'A Copland Celebration, Vol. I')\n", 485 | "(94, 'A Matter of Life and Death')\n", 486 | "(95, 'A Real Dead One')\n", 487 | "(96, 'A Real Live One')\n", 488 | "(285, 'A Soprano Inspired')\n", 489 | "(139, 'A TempestadeTempestade Ou O Livro Dos Dias')\n", 490 | "(203, 'A-Sides')\n", 491 | "(160, 'Ace Of Spades')\n" 492 | ] 493 | } 494 | ], 495 | "source": [ 496 | "query = '''\n", 497 | "SELECT albumid, title\n", 498 | "FROM albums\n", 499 | "ORDER BY title\n", 500 | "'''\n", 501 | "\n", 502 | "cur.execute(query)\n", 503 | "for c in cur.fetchmany(10):\n", 504 | " print(c)" 505 | ] 506 | }, 507 | { 508 | "cell_type": "code", 509 | "execution_count": 23, 510 | "metadata": {}, 511 | "outputs": [ 512 | { 513 | "name": "stdout", 514 | "output_type": "stream", 515 | "text": [ 516 | "(11, 'C.O.D.', 'Angus Young, Malcolm Young, Brian Johnson', 0.99)\n", 517 | "(12, 'Breaking The Rules', 'Angus Young, Malcolm Young, Brian Johnson', 0.99)\n", 518 | "(13, 'Night Of The Long Knives', 'Angus Young, Malcolm Young, Brian Johnson', 0.99)\n", 519 | "(14, 'Spellbound', 'Angus Young, Malcolm Young, Brian Johnson', 0.99)\n", 520 | "(15, 'Go Down', 'AC/DC', 0.99)\n", 521 | "(16, 'Dog Eat Dog', 'AC/DC', 0.99)\n", 522 | "(17, 'Let There Be Rock', 'AC/DC', 0.99)\n", 523 | "(18, 'Bad Boy Boogie', 'AC/DC', 0.99)\n", 524 | "(19, 'Problem Child', 'AC/DC', 0.99)\n", 525 | "(20, 'Overdose', 'AC/DC', 0.99)\n" 526 | ] 527 | } 528 | ], 529 | "source": [ 530 | "query = '''\n", 531 | "SELECT trackid, name, composer, unitprice\n", 532 | "FROM tracks\n", 533 | "LIMIT 10 OFFSET 10;\n", 534 | "'''\n", 535 | "\n", 536 | "cur.execute(query)\n", 537 | "for c in cur.fetchall():\n", 538 | " print(c)" 539 | ] 540 | }, 541 | { 542 | "cell_type": "code", 543 | "execution_count": 24, 544 | "metadata": {}, 545 | "outputs": [ 546 | { 547 | "name": "stdout", 548 | "output_type": "stream", 549 | "text": [ 550 | "('Buenos Aires', 'Argentina')\n", 551 | "('Sidney', 'Australia')\n", 552 | "('Vienne', 'Austria')\n", 553 | "('Brussels', 'Belgium')\n", 554 | "('São José dos Campos', 'Brazil')\n", 555 | "('São Paulo', 'Brazil')\n", 556 | "('Rio de Janeiro', 'Brazil')\n", 557 | "('Brasília', 'Brazil')\n", 558 | "('Montréal', 'Canada')\n", 559 | "('Edmonton', 'Canada')\n" 560 | ] 561 | } 562 | ], 563 | "source": [ 564 | "query = '''\n", 565 | "SELECT DISTINCT city, country\n", 566 | "FROM customers\n", 567 | "ORDER BY country;\n", 568 | "'''\n", 569 | "\n", 570 | "cur.execute(query)\n", 571 | "for c in cur.fetchmany(10):\n", 572 | " print(c)" 573 | ] 574 | }, 575 | { 576 | "cell_type": "code", 577 | "execution_count": 25, 578 | "metadata": {}, 579 | "outputs": [ 580 | { 581 | "name": "stdout", 582 | "output_type": "stream", 583 | "text": [ 584 | "('Occupation / Precipice', 5286953, 227)\n", 585 | "('Through a Looking Glass', 5088838, 229)\n", 586 | "('Greetings from Earth, Pt. 1', 2960293, 253)\n", 587 | "('The Man With Nine Lives', 2956998, 253)\n", 588 | "('Battlestar Galactica, Pt. 2', 2956081, 253)\n", 589 | "('Battlestar Galactica, Pt. 1', 2952702, 253)\n", 590 | "('Murder On the Rising Star', 2935894, 253)\n", 591 | "('Battlestar Galactica, Pt. 3', 2927802, 253)\n", 592 | "('Take the Celestra', 2927677, 253)\n", 593 | "('Fire In Space', 2926593, 253)\n" 594 | ] 595 | } 596 | ], 597 | "source": [ 598 | "query = '''\n", 599 | "SELECT name, milliseconds, albumid\n", 600 | "FROM tracks\n", 601 | "ORDER BY milliseconds DESC, albumid ASC;\n", 602 | "'''\n", 603 | "\n", 604 | "cur.execute(query)\n", 605 | "for c in cur.fetchmany(10):\n", 606 | " print(c)" 607 | ] 608 | }, 609 | { 610 | "cell_type": "code", 611 | "execution_count": 26, 612 | "metadata": {}, 613 | "outputs": [ 614 | { 615 | "name": "stdout", 616 | "output_type": "stream", 617 | "text": [ 618 | "('For Those About To Rock (We Salute You)', 343719, 11170334, 1)\n", 619 | "('Evil Walks', 263497, 8611245, 1)\n", 620 | "('Breaking The Rules', 263288, 8596840, 1)\n", 621 | "('Spellbound', 270863, 8817038, 1)\n" 622 | ] 623 | } 624 | ], 625 | "source": [ 626 | "query = '''\n", 627 | "SELECT name, milliseconds, bytes, albumid\n", 628 | "FROM tracks\n", 629 | "WHERE albumid = 1\n", 630 | "AND milliseconds > 250000;\n", 631 | "'''\n", 632 | "\n", 633 | "cur.execute(query)\n", 634 | "for c in cur.fetchmany(10):\n", 635 | " print(c)" 636 | ] 637 | }, 638 | { 639 | "cell_type": "code", 640 | "execution_count": 27, 641 | "metadata": {}, 642 | "outputs": [ 643 | { 644 | "name": "stdout", 645 | "output_type": "stream", 646 | "text": [ 647 | "('Restless and Wild', 3, 'F. Baltes, R.A. Smith-Diesel, S. Kaufman, U. Dirkscneider & W. Hoffman')\n", 648 | "('Princess of the Dawn', 3, 'Deaffy & R.A. Smith-Diesel')\n", 649 | "('Killing Floor', 19, 'Adrian Smith')\n", 650 | "('Machine Men', 19, 'Adrian Smith')\n", 651 | "('2 Minutes To Midnight', 95, 'Adrian Smith/Bruce Dickinson')\n", 652 | "('Can I Play With Madness', 96, 'Adrian Smith/Bruce Dickinson/Steve Harris')\n", 653 | "('The Evil That Men Do', 96, 'Adrian Smith/Bruce Dickinson/Steve Harris')\n", 654 | "('The Wicker Man', 97, 'Adrian Smith/Bruce Dickinson/Steve Harris')\n", 655 | "('The Fallen Angel', 97, 'Adrian Smith/Steve Harris')\n", 656 | "('Wildest Dreams', 98, 'Adrian Smith/Steve Harris')\n" 657 | ] 658 | } 659 | ], 660 | "source": [ 661 | "query = '''\n", 662 | "SELECT name, albumid, composer\n", 663 | "FROM tracks\n", 664 | "WHERE composer LIKE '%Smith%'\n", 665 | "ORDER BY albumid;\n", 666 | "'''\n", 667 | "\n", 668 | "cur.execute(query)\n", 669 | "for c in cur.fetchmany(10):\n", 670 | " print(c)" 671 | ] 672 | }, 673 | { 674 | "cell_type": "code", 675 | "execution_count": 28, 676 | "metadata": {}, 677 | "outputs": [ 678 | { 679 | "name": "stdout", 680 | "output_type": "stream", 681 | "text": [ 682 | "('Balls to the Wall', 2, 2)\n", 683 | "('Fast As a Shark', 3, 2)\n", 684 | "('Restless and Wild', 3, 2)\n", 685 | "('Princess of the Dawn', 3, 2)\n", 686 | "('Welcome to the Jungle', 90, 2)\n", 687 | "(\"It's So Easy\", 90, 2)\n", 688 | "('Nightrain', 90, 2)\n", 689 | "('Out Ta Get Me', 90, 2)\n", 690 | "('Mr. Brownstone', 90, 2)\n", 691 | "('Paradise City', 90, 2)\n" 692 | ] 693 | } 694 | ], 695 | "source": [ 696 | "query = '''\n", 697 | "SELECT name, albumid, mediatypeid\n", 698 | "FROM tracks\n", 699 | "WHERE mediatypeid IN (2, 3);\n", 700 | "'''\n", 701 | "\n", 702 | "cur.execute(query)\n", 703 | "for c in cur.fetchmany(10):\n", 704 | " print(c)" 705 | ] 706 | }, 707 | { 708 | "cell_type": "code", 709 | "execution_count": 29, 710 | "metadata": {}, 711 | "outputs": [ 712 | { 713 | "name": "stdout", 714 | "output_type": "stream", 715 | "text": [ 716 | "(193, 'Berger Straße 10', 14.91)\n", 717 | "(103, '162 E Superior Street', 15.86)\n", 718 | "(208, 'Ullevålsveien 14', 15.86)\n", 719 | "(306, 'Klanova 9/506', 16.86)\n", 720 | "(313, '68, Rue Jouvence', 16.86)\n", 721 | "(88, 'Calle Lira, 198', 17.91)\n", 722 | "(89, 'Rotenturmstraße 4, 1010 Innere Stadt', 18.86)\n", 723 | "(201, '319 N. Frances Street', 18.86)\n" 724 | ] 725 | } 726 | ], 727 | "source": [ 728 | "query = '''\n", 729 | "SELECT InvoiceId, BillingAddress, Total\n", 730 | "FROM invoices\n", 731 | "WHERE Total BETWEEN 14.91 and 18.86 \n", 732 | "ORDER BY Total; \n", 733 | "'''\n", 734 | "\n", 735 | "cur.execute(query)\n", 736 | "for c in cur.fetchmany(10):\n", 737 | " print(c)" 738 | ] 739 | }, 740 | { 741 | "cell_type": "code", 742 | "execution_count": 30, 743 | "metadata": {}, 744 | "outputs": [ 745 | { 746 | "name": "stdout", 747 | "output_type": "stream", 748 | "text": [ 749 | "(6, 'Berger Straße 10', 0.99)\n", 750 | "(13, '1600 Amphitheatre Parkway', 0.99)\n", 751 | "(20, '110 Raeburn Pl', 0.99)\n", 752 | "(27, '5112 48 Street', 0.99)\n", 753 | "(34, 'Praça Pio X, 119', 0.99)\n", 754 | "(41, 'C/ San Bernardo 85', 0.99)\n", 755 | "(48, '796 Dundas Street West', 0.99)\n", 756 | "(55, 'Grétrystraat 63', 0.99)\n", 757 | "(62, '3 Chatham Street', 0.99)\n", 758 | "(69, '319 N. Frances Street', 0.99)\n" 759 | ] 760 | } 761 | ], 762 | "source": [ 763 | "query = '''\n", 764 | "SELECT InvoiceId, BillingAddress, Total\n", 765 | "FROM invoices\n", 766 | "WHERE Total NOT BETWEEN 1 and 20\n", 767 | "ORDER BY Total; \n", 768 | "'''\n", 769 | "\n", 770 | "cur.execute(query)\n", 771 | "for c in cur.fetchmany(10):\n", 772 | " print(c)" 773 | ] 774 | }, 775 | { 776 | "cell_type": "code", 777 | "execution_count": 31, 778 | "metadata": {}, 779 | "outputs": [ 780 | { 781 | "name": "stdout", 782 | "output_type": "stream", 783 | "text": [ 784 | "('For Those About To Rock We Salute You', 'AC/DC')\n", 785 | "('Balls to the Wall', 'Accept')\n", 786 | "('Restless and Wild', 'Accept')\n", 787 | "('Let There Be Rock', 'AC/DC')\n", 788 | "('Big Ones', 'Aerosmith')\n", 789 | "('Jagged Little Pill', 'Alanis Morissette')\n", 790 | "('Facelift', 'Alice In Chains')\n", 791 | "('Warner 25 Anos', 'Antônio Carlos Jobim')\n", 792 | "('Plays Metallica By Four Cellos', 'Apocalyptica')\n", 793 | "('Audioslave', 'Audioslave')\n" 794 | ] 795 | } 796 | ], 797 | "source": [ 798 | "query = '''\n", 799 | "SELECT Title, Name\n", 800 | "FROM albums\n", 801 | "INNER JOIN artists \n", 802 | "ON artists.ArtistId = albums.ArtistId;\n", 803 | "'''\n", 804 | "\n", 805 | "cur.execute(query)\n", 806 | "for c in cur.fetchmany(10):\n", 807 | " print(c)" 808 | ] 809 | }, 810 | { 811 | "cell_type": "code", 812 | "execution_count": 32, 813 | "metadata": {}, 814 | "outputs": [ 815 | { 816 | "name": "stdout", 817 | "output_type": "stream", 818 | "text": [ 819 | "(123, 'Quadrant', 'The Best Of Billy Cobham', 'Billy Cobham')\n", 820 | "(124, \"Snoopy's search-Red baron\", 'The Best Of Billy Cobham', 'Billy Cobham')\n", 821 | "(125, 'Spanish moss-\"A sound portrait\"-Spanish moss', 'The Best Of Billy Cobham', 'Billy Cobham')\n", 822 | "(126, 'Moon germs', 'The Best Of Billy Cobham', 'Billy Cobham')\n", 823 | "(127, 'Stratus', 'The Best Of Billy Cobham', 'Billy Cobham')\n", 824 | "(128, 'The pleasant pheasant', 'The Best Of Billy Cobham', 'Billy Cobham')\n", 825 | "(129, 'Solo-Panhandler', 'The Best Of Billy Cobham', 'Billy Cobham')\n", 826 | "(130, 'Do what cha wanna', 'The Best Of Billy Cobham', 'Billy Cobham')\n" 827 | ] 828 | } 829 | ], 830 | "source": [ 831 | "query = '''\n", 832 | "SELECT tracks.trackid, tracks.name AS track, albums.title AS album, artists.name AS artist\n", 833 | "FROM tracks\n", 834 | "INNER JOIN albums \n", 835 | "ON albums.albumid = tracks.albumid\n", 836 | "INNER JOIN artists \n", 837 | "ON artists.artistid = albums.artistid\n", 838 | "WHERE artists.artistid = 10;\n", 839 | "'''\n", 840 | "\n", 841 | "cur.execute(query)\n", 842 | "for c in cur.fetchmany(10):\n", 843 | " print(c)" 844 | ] 845 | }, 846 | { 847 | "cell_type": "code", 848 | "execution_count": 33, 849 | "metadata": {}, 850 | "outputs": [ 851 | { 852 | "name": "stdout", 853 | "output_type": "stream", 854 | "text": [ 855 | "('A Cor Do Som', None)\n", 856 | "('AC/DC', 'For Those About To Rock We Salute You')\n", 857 | "('AC/DC', 'Let There Be Rock')\n", 858 | "('Aaron Copland & London Symphony Orchestra', 'A Copland Celebration, Vol. I')\n", 859 | "('Aaron Goldberg', 'Worlds')\n", 860 | "('Academy of St. Martin in the Fields & Sir Neville Marriner', 'The World of Classical Favourites')\n", 861 | "('Academy of St. Martin in the Fields Chamber Ensemble & Sir Neville Marriner', 'Sir Neville Marriner: A Celebration')\n", 862 | "('Academy of St. Martin in the Fields, John Birch, Sir Neville Marriner & Sylvia McNair', 'Fauré: Requiem, Ravel: Pavane & Others')\n", 863 | "('Academy of St. Martin in the Fields, Sir Neville Marriner & Thurston Dart', 'Bach: Orchestral Suites Nos. 1 - 4')\n", 864 | "('Academy of St. Martin in the Fields, Sir Neville Marriner & William Bennett', None)\n" 865 | ] 866 | } 867 | ], 868 | "source": [ 869 | "query = '''\n", 870 | "SELECT Name, Title\n", 871 | "FROM artists\n", 872 | "LEFT JOIN albums \n", 873 | "ON artists.ArtistId = albums.ArtistId\n", 874 | "ORDER BY Name;\n", 875 | "'''\n", 876 | "\n", 877 | "cur.execute(query)\n", 878 | "for c in cur.fetchmany(10):\n", 879 | " print(c)" 880 | ] 881 | }, 882 | { 883 | "cell_type": "markdown", 884 | "metadata": {}, 885 | "source": [ 886 | "#### Same with Pandas" 887 | ] 888 | }, 889 | { 890 | "cell_type": "code", 891 | "execution_count": 34, 892 | "metadata": {}, 893 | "outputs": [ 894 | { 895 | "data": { 896 | "text/html": [ 897 | "
\n", 898 | "\n", 911 | "\n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | "
NameTitle
0A Cor Do SomNone
1AC/DCFor Those About To Rock We Salute You
2AC/DCLet There Be Rock
3Aaron Copland & London Symphony OrchestraA Copland Celebration, Vol. I
4Aaron GoldbergWorlds
\n", 947 | "
" 948 | ], 949 | "text/plain": [ 950 | " Name \\\n", 951 | "0 A Cor Do Som \n", 952 | "1 AC/DC \n", 953 | "2 AC/DC \n", 954 | "3 Aaron Copland & London Symphony Orchestra \n", 955 | "4 Aaron Goldberg \n", 956 | "\n", 957 | " Title \n", 958 | "0 None \n", 959 | "1 For Those About To Rock We Salute You \n", 960 | "2 Let There Be Rock \n", 961 | "3 A Copland Celebration, Vol. I \n", 962 | "4 Worlds " 963 | ] 964 | }, 965 | "execution_count": 34, 966 | "metadata": {}, 967 | "output_type": "execute_result" 968 | } 969 | ], 970 | "source": [ 971 | "df = pd.read_sql_query(query, conn)\n", 972 | "df.head()" 973 | ] 974 | } 975 | ], 976 | "metadata": { 977 | "kernelspec": { 978 | "display_name": "Python 3 (ipykernel)", 979 | "language": "python", 980 | "name": "python3" 981 | }, 982 | "language_info": { 983 | "codemirror_mode": { 984 | "name": "ipython", 985 | "version": 3 986 | }, 987 | "file_extension": ".py", 988 | "mimetype": "text/x-python", 989 | "name": "python", 990 | "nbconvert_exporter": "python", 991 | "pygments_lexer": "ipython3", 992 | "version": "3.8.10" 993 | } 994 | }, 995 | "nbformat": 4, 996 | "nbformat_minor": 4 997 | } 998 | -------------------------------------------------------------------------------- /Notebooks/03_PostgreSQL.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Introduction to Databases\n", 8 | "\n", 9 | "### Accessing Relational Databases - PostgreSQL" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import os\n", 19 | "import sys\n", 20 | "import time\n", 21 | "import datetime\n", 22 | "import numpy as np\n", 23 | "import pandas as pd\n", 24 | "import pathlib\n", 25 | "import getpass\n", 26 | "\n", 27 | "import psycopg2" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "### Acessing [PostgreSQL](https://www.psycopg.org/docs/) " 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "p = getpass.getpass()\n", 44 | "try:\n", 45 | " conn = psycopg2.connect(\"dbname='testdb' user='rsouza' host='localhost' password='{}'\".format(p))\n", 46 | "except:\n", 47 | " print(\"I am unable to connect to the database\")\n", 48 | "cur = conn.cursor()" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "cur.execute('SELECT * FROM postgres LIMIT 2;')\n", 58 | "for r in cur.fetchall():\n", 59 | " print(r)" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "#### Using Pandas" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "p = getpass.getpass()\n", 76 | "try:\n", 77 | " conn = psycopg2.connect(\"dbname='postgres' user='rsouza' host='localhost' password='{}'\".format(p))\n", 78 | "except:\n", 79 | " print(\"I am unable to connect to the database\")" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "df_postgres = psql.read_sql('SELECT * FROM postgres LIMIT 5;', con=conn)\n", 89 | "df_postgres.head()" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "## A Python Pipeline using PostgreSQL and Docker\n", 97 | "\n", 98 | "Source: [this blog post](https://globoglobito.medium.com/creating-your-first-data-pipeline-with-python-62bfb7a298fe) and [Github](https://github.com/globoglobito/WebScraperPOC/blob/main/scraper.py)" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "### If Docker not installed https://docs.docker.com/get-docker/ \n", 106 | "\n", 107 | "### To create your docker container for the first time: \n", 108 | "> docker run -d -p 4321:5432 --name PostgresDB -e POSTGRES_PASSWORD=my_password postgres \n", 109 | "\n", 110 | "### To enter into your your container: \n", 111 | "> docker exec -it PostgresDB bash \n", 112 | "\n", 113 | "### Once inside your container, to enter postgres: \n", 114 | "> psql -U postgres \n", 115 | "\n", 116 | "### Finally, to create the table: \n", 117 | ">CREATE TABLE scraped_data ( \n", 118 | "> date_of_scraping timestamp, \n", 119 | "> seller varchar(20), \n", 120 | "> name varchar(100), \n", 121 | "> price integer, \n", 122 | "> in_stock bool, \n", 123 | "> deal bool, \n", 124 | "> url varchar(100) \n", 125 | ">); " 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | "### To create your Grafana container:\n", 133 | "> docker run -d -p 3000:3000 --name=grafana grafana/grafana \n", 134 | "\n", 135 | "Once the container is up and running, open your web browser and go to http://localhost:3000/. \n", 136 | "If not working, check the browser config. \n", 137 | "+ in firefox type about:config\n", 138 | "+ search localhost in it and make below flag true\n", 139 | "+ network.dns.native-is-localhost\n", 140 | "\n", 141 | "On the login page, enter \"admin\" for username and password.\n", 142 | "+ Click Log In. \n", 143 | "+ Click OK on the prompt, then change your password. \n", 144 | "+ Add your Postgres DB as a data source: \n", 145 | " + host: host.docker.internal:4321\n", 146 | " + Database: postgres\n", 147 | " + user: postgres\n", 148 | " + password: my_password (defined above)\n", 149 | " \n", 150 | "+ Create your own Dashboards" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "import os\n", 160 | "import sys\n", 161 | "import requests\n", 162 | "from bs4 import BeautifulSoup\n", 163 | "import re\n", 164 | "import datetime\n", 165 | "import psycopg2\n", 166 | "import smtplib\n", 167 | "import ssl\n", 168 | "import logging\n", 169 | "import argparse\n", 170 | "\n", 171 | "timestamp_of_script = '{:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now())\n", 172 | "\n", 173 | "# A very basic logger that dumps information into a file.\n", 174 | "log_file = os.path.join(os.getcwd(), \"WebScraper.log\")\n", 175 | "logger = logging.getLogger(\"WebScraper\")\n", 176 | "logger.setLevel(logging.INFO)\n", 177 | "file_logger = logging.FileHandler(log_file, mode='a')\n", 178 | "file_logger.setLevel(logging.INFO)\n", 179 | "logger.addHandler(file_logger)\n", 180 | "\n", 181 | "\n", 182 | "# These are the web pages I decided to scrape for information. The information we need to scrape the data is:\n", 183 | "# The URL of the web page, the class where the name of the GPU is stored, the class where the price is stored, and\n", 184 | "# the class where the buy button is stored (this is how we determine availability; unless there is stock this class wont appear)\n", 185 | "pages_dictionary = {\"coolmod\": [\"https://www.coolmod.com/asus-turbo-geforce-rtx-3090-24gb-gddr6x-tarjeta-grafica\",\n", 186 | " \"-precio\", \"product-first-part\", \"text-price-total\", \"button-buy\"],\n", 187 | " \"coolmod2\": [\"https://www.coolmod.com/evga-geforce-rtx-3090-xc3-black-gaming-24gb-gddr6x-tarjeta-grafica-precio\",\n", 188 | " \"product-first-part\", \"text-price-total\", \"button-buy\"],\n", 189 | " \"coolmod3\": [\"https://www.coolmod.com/evga-geforce-rtx-3090-xc3-gaming-24gb-gddr6x-tarjeta-grafica-precio\",\n", 190 | " \"product-first-part\", \"text-price-total\", \"button-buy\"],\n", 191 | " \"coolmod4\": [\"https://www.coolmod.com/evga-geforce-rtx-3090-xc3-ultra-gaming-24gb-gddr6x-tarjeta-grafica-precio\",\n", 192 | " \"product-first-part\", \"text-price-total\", \"button-buy\"],\n", 193 | " \"ibertronica\": [\"https://www.ibertronica.es/asus-rtx-3090-turbo-24gb-gddr6x\",\n", 194 | " \"mb-3 h2 product-title\", \"col-6 ng-tns-c1-1 ng-star-inserted\",\n", 195 | " \"btn btn-outline-primary btn-block m-0 mb-3\"],\n", 196 | " \"xtremmedia\": [\"https://www.xtremmedia.com/Asus_Turbo_GeForce_RTX_3090_24GB_GDDR6X.html\",\n", 197 | " \"ficha-titulo\", \"offerDetails article-list-pvp\", \"article-carrito2\", \"precio\"],\n", 198 | " \"xtremmedia2\": [\"https://www.xtremmedia.com/EVGA_GeForce_RTX_3090_XC3_Ultra_Gaming_24GB_GDDR6X.html\",\n", 199 | " \"ficha-titulo\", \"offerDetails article-list-pvp\", \"article-carrito2\", \"precio\"],\n", 200 | " \"pccomponentes\": [\"https://www.pccomponentes.com/asus-turbo-geforce-rtx-3090-24gb-gddr6x\", \"h4\",\n", 201 | " \"baseprice\", \"btn btn-primary btn-lg buy GTM-addToCart buy-button js-article-buy\"],\n", 202 | " \"pccomponentes2\": [\"https://www.pccomponentes.com/evga-geforce-rtx-3090-xc3-black-gaming-24gb-gdddr6x\", \"h4\",\n", 203 | " \"baseprice\", \"btn btn-primary btn-lg buy GTM-addToCart buy-button js-article-buy\"],\n", 204 | " \"pccomponentes3\": [\"https://www.pccomponentes.com/evga-geforce-rtx-3090-xc3-gaming-24gb-gddr6x\", \"h4\", \n", 205 | " \"baseprice\", \"btn btn-primary btn-lg buy GTM-addToCart buy-button js-article-buy\"],\n", 206 | " \"pccomponentes4\": [\"https://www.pccomponentes.com/evga-geforce-rtx-3090-xc3-ultra-gaming-24gb-gddr6x\", \"h4\",\n", 207 | " \"baseprice\", \"btn btn-primary btn-lg buy GTM-addToCart buy-button js-article-buy\"]}\n", 208 | "\n", 209 | "\n", 210 | "# Note for docker:\n", 211 | "# You might have an instance of Postgres running on local and it probably uses port 5432 already. We must bind another local port to port 5432 of the container.\n", 212 | "# In this case when builfing the container we used : docker run -d -p 4321:5432 ...... and so on.\n", 213 | "\n", 214 | "def get_product_details(urls, name_class, price_class, instock_class, alternate_price_class=None):\n", 215 | " \"\"\" Receives 4-5 inputs, and returns a dictionary with the scraped information.\n", 216 | " The function extracts the relevant information of the url provided (price, name, availability),\n", 217 | " it then cleans and formats the information so that it can be dumped into a relational DB\"\"\"\n", 218 | " headers = {\"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)\",\n", 219 | " \"Chrome/88.0.4324.104 Safari/537.36\"}\n", 220 | " details = {\"date_of_scraping\": \"\", \"seller\": \"\", \"name\": \"\", \"price\": 0, \"in_stock\": False, \"deal\": False,\n", 221 | " \"url\": \"\"}\n", 222 | " if urls == \"\":\n", 223 | " logger.warning(f\"URL parameter is empty, skipping this k-v pair\")\n", 224 | " details = None\n", 225 | " else:\n", 226 | " try:\n", 227 | " page = requests.get(urls, headers=headers)\n", 228 | " page.raise_for_status() # to check if we got a correct response (200) else it raises an Exception.\n", 229 | " soup = BeautifulSoup(page.content, features=\"html.parser\")\n", 230 | " timestamp = '{:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now())\n", 231 | " seller_raw = re.sub('^.*w\\.', '', urls)\n", 232 | " name = soup.find(class_=name_class)\n", 233 | " price = soup.find(class_=price_class)\n", 234 | " in_stock = soup.find(class_=instock_class)\n", 235 | " if alternate_price_class is not None and price is None:\n", 236 | " price = soup.find(class_=alternate_price_class)\n", 237 | " details[\"date_of_scraping\"] = timestamp\n", 238 | " if \"ibertronica\" in seller_raw:\n", 239 | " details[\"seller\"] = re.sub('\\.es.*', '', seller_raw)\n", 240 | " else:\n", 241 | " details[\"seller\"] = re.sub('\\.com.*', '', seller_raw)\n", 242 | " if name is not None:\n", 243 | " details[\"name\"] = name.get_text()\n", 244 | " details[\"name\"] = re.sub(\"GeForce\", \"\", details[\"name\"])\n", 245 | " details[\"name\"] = re.sub(\"®\", \"\", details[\"name\"])\n", 246 | " details[\"name\"] = re.sub(\" - {2}Tarjeta Gráfica\", \"\", details[\"name\"])\n", 247 | " details[\"name\"] = re.sub(\" {2}\", \" \", details[\"name\"])\n", 248 | " details[\"name\"] = re.sub(\"DDD\", \"DD\", details[\"name\"])\n", 249 | " details[\"name\"] = details[\"name\"].upper()\n", 250 | " details[\"name\"] = re.sub(\"ASUS TURBO RTX 3090\", \"ASUS RTX 3090 TURBO\", details[\"name\"])\n", 251 | " details[\"url\"] = urls\n", 252 | " else:\n", 253 | " details = None\n", 254 | " logger.warning(f\"URL: {urls} not scraped because the name of the product was not found @ {timestamp}\")\n", 255 | " return details\n", 256 | " if price is not None:\n", 257 | " details[\"price\"] = int(re.sub('[^0-9]', '', price.get_text())[0:4])\n", 258 | " if in_stock is not None:\n", 259 | " details[\"in_stock\"] = True\n", 260 | " if int(details[\"price\"]) <= 1800:\n", 261 | " details[\"deal\"] = True\n", 262 | " logger.info(f\"{urls} scraped successfully @ {timestamp}\")\n", 263 | " except Exception as ex:\n", 264 | " logger.warning(f\"Exception caught @ get_product_details :{ex}\")\n", 265 | " details = None\n", 266 | " return details\n", 267 | "\n", 268 | "\n", 269 | "def iterate_webpages(dictionary):\n", 270 | " \"\"\" Helper function to iterate over our pages directory using the get_products_details function\"\"\"\n", 271 | " if not dictionary:\n", 272 | " logger.warning(f\"Nothing to scrape, ending script\")\n", 273 | " sys.exit(1)\n", 274 | " sql_information_list = []\n", 275 | " for key in dictionary:\n", 276 | " query = get_product_details(*dictionary[key])\n", 277 | " if query is not None:\n", 278 | " sql_information_list.append(query)\n", 279 | " if not sql_information_list:\n", 280 | " logger.warning(f\"No information was scraped, terminating {timestamp_of_script}\")\n", 281 | " sys.exit(1)\n", 282 | " return sql_information_list\n", 283 | "\n", 284 | "\n", 285 | "def create_message(scraped_data):\n", 286 | " \"\"\" A simple function that creates the message to be sent in an email if the conditions are met.\"\"\"\n", 287 | " message = \"\"\n", 288 | " for dic in scraped_data:\n", 289 | " if dic[\"in_stock\"] and dic[\"deal\"]:\n", 290 | " line = f\"The item sold by {dic['seller']} is on sale for {dic['price']} euros @ {dic['url']}\\n\"\n", 291 | " message += line\n", 292 | " return message\n", 293 | "\n", 294 | "\n", 295 | "def send_email(message, config):\n", 296 | " \"\"\" This function sends the actual email should the conditions be met.\"\"\"\n", 297 | " try:\n", 298 | " with open(config) as reader:\n", 299 | " lines = reader.read().splitlines()\n", 300 | " port = 465 # For SSL\n", 301 | " smtp_server = lines[0]\n", 302 | " sender_email = lines[1]\n", 303 | " password = lines[2]\n", 304 | " receiver_email = lines[3]\n", 305 | " print(smtp_server, sender_email, password, receiver_email)\n", 306 | "\n", 307 | " message_to_send = f\"Subject: Price Alert \\n\\n {message}\"\n", 308 | " message_to_send = re.sub(r'[^\\x00-\\x7F]+', ' ', message_to_send) # Quick and dirty regex to remove non ascii chars.\n", 309 | "\n", 310 | " context = ssl.create_default_context()\n", 311 | " with smtplib.SMTP_SSL(smtp_server, port, context=context) as server:\n", 312 | " server.login(sender_email, password)\n", 313 | " server.sendmail(sender_email, receiver_email, message_to_send)\n", 314 | " except Exception as ex:\n", 315 | " logger.warning(f\"Exception caught when trying to send an email @ send_email():{ex}\")\n", 316 | "\n", 317 | "\n", 318 | "def do_insert(rec, config):\n", 319 | " \"\"\" This function inserts the scraped data into our Postgres DB, should an exception occur the function will\n", 320 | " rollback the transaction and continue with the rest.\"\"\"\n", 321 | " try:\n", 322 | " with open(config) as reader:\n", 323 | " lines = reader.read().splitlines()\n", 324 | " db_name = lines[0]\n", 325 | " username = lines[1]\n", 326 | " password = lines[2]\n", 327 | " ip_address = lines[3]\n", 328 | " port = lines[4]\n", 329 | " conn = psycopg2.connect(dbname=db_name, user=username, password=password, host=ip_address, port=port)\n", 330 | " cur = conn.cursor()\n", 331 | " except Exception as ex:\n", 332 | " logger.warning(f\"Exception caught when reading config file @ do_insert():{ex}\")\n", 333 | " sys.exit(1)\n", 334 | "\n", 335 | " for dictionary in rec:\n", 336 | " try:\n", 337 | " cols = dictionary.keys()\n", 338 | " cols_str = ','.join(cols)\n", 339 | " values_to_insert = [dictionary[k] for k in cols]\n", 340 | " values_wildcards = ','.join(['%s' for i in range(len(values_to_insert))]) # -> %s,%s,%s,%s,%s,%s,%s\n", 341 | " sql_str = f\"INSERT INTO scraped_data ({cols_str}) VALUES ({values_wildcards}) ON CONFLICT DO NOTHING\"\n", 342 | " cur.execute(sql_str, values_to_insert)\n", 343 | " conn.commit()\n", 344 | " except Exception as ex:\n", 345 | " conn.rollback()\n", 346 | " logger.warning(f\"Exception caught @ do_insert():{ex}\")\n", 347 | " continue\n", 348 | "\n", 349 | "\n", 350 | "def main():\n", 351 | " scraped_data = iterate_webpages(pages_dictionary)\n", 352 | " email = create_message(scraped_data)\n", 353 | " if email:\n", 354 | " send_email(email, config_path)\n", 355 | " do_insert(scraped_data, pg_config_path)\n", 356 | " logger.info(f\"We are done! @ {timestamp_of_script}\")\n", 357 | "\n", 358 | "\n", 359 | "if __name__ == \"__main__\":\n", 360 | "\n", 361 | " parser = argparse.ArgumentParser()\n", 362 | " parser.add_argument(\"email_config_file\",\n", 363 | " type=str,\n", 364 | " help=\"a text file with email_config parameters for sending the email\")\n", 365 | " parser.add_argument(\"postgres_config_file\",\n", 366 | " type=str,\n", 367 | " help=\"a text file with email_config parameters connecting to our postgres db\")\n", 368 | " args = parser.parse_args()\n", 369 | " pwd = os.getcwd()\n", 370 | " config_path = os.path.join(pwd, args.email_config_file)\n", 371 | " pg_config_path = os.path.join(pwd, args.postgres_config_file)\n", 372 | "\n", 373 | " main()" 374 | ] 375 | }, 376 | { 377 | "cell_type": "markdown", 378 | "metadata": {}, 379 | "source": [ 380 | "Use Crontab in Linux to execute the script every 30 minutes" 381 | ] 382 | }, 383 | { 384 | "cell_type": "code", 385 | "execution_count": null, 386 | "metadata": {}, 387 | "outputs": [], 388 | "source": [] 389 | } 390 | ], 391 | "metadata": { 392 | "kernelspec": { 393 | "display_name": "Python 3 (ipykernel)", 394 | "language": "python", 395 | "name": "python3" 396 | }, 397 | "language_info": { 398 | "codemirror_mode": { 399 | "name": "ipython", 400 | "version": 3 401 | }, 402 | "file_extension": ".py", 403 | "mimetype": "text/x-python", 404 | "name": "python", 405 | "nbconvert_exporter": "python", 406 | "pygments_lexer": "ipython3", 407 | "version": "3.8.10" 408 | } 409 | }, 410 | "nbformat": 4, 411 | "nbformat_minor": 4 412 | } 413 | -------------------------------------------------------------------------------- /Notebooks/06_Spatial_SQLite.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Introduction to Databases\n", 8 | "\n", 9 | "### Using Spatial Extensions - SQLite\n", 10 | "\n", 11 | "Based in [this](https://geoalchemy-2.readthedocs.io/en/latest/orm_tutorial.html#orm-tutorial) and [this](https://geoalchemy-2.readthedocs.io/en/latest/spatialite_tutorial.html) tutorials" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "https://ubuntu.pkgs.org/18.04/ubuntu-universe-amd64/libsqlite3-mod-spatialite_4.3.0a-5build1_amd64.deb.html \n", 19 | "https://zoomadmin.com/HowToInstall/UbuntuPackage/spatialite-bin \n", 20 | "\n", 21 | "! sudo apt-get install libsqlite3-mod-spatialite spatialite-bin \n", 22 | "! pip install -U sqlalchemy geoalchemy geoalchemy2" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "### Connect to the DB\n", 30 | "\n", 31 | "Just like when using PostGIS connecting to a SpatiaLite database requires an Engine. This is how you create one for SpatiaLite:" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 1, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "import sqlalchemy\n", 41 | "from sqlalchemy import create_engine, Column, Integer, String\n", 42 | "from sqlalchemy.event import listen\n", 43 | "from sqlalchemy.sql import select, func\n", 44 | "from sqlalchemy.ext.declarative import declarative_base\n", 45 | "from sqlalchemy.orm import sessionmaker\n", 46 | "from sqlalchemy.orm import relationship, backref\n", 47 | "\n", 48 | "from geoalchemy2 import Geometry, WKTElement" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 2, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "def load_spatialite(dbapi_conn, connection_record):\n", 58 | " dbapi_conn.enable_load_extension(True)\n", 59 | " dbapi_conn.load_extension('/usr/lib/x86_64-linux-gnu/mod_spatialite.so')" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 3, 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "name": "stdout", 69 | "output_type": "stream", 70 | "text": [ 71 | "file in use or not found\n" 72 | ] 73 | } 74 | ], 75 | "source": [ 76 | "try:\n", 77 | " os.remove('../SampleDBs/gis.sqlite')\n", 78 | " print(\"removed file\")\n", 79 | "except:\n", 80 | " print(\"file in use or not found\")" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 4, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "#engine = create_engine('postgresql://gis:gis@localhost/gis', echo=True)\n", 90 | "engine = create_engine('sqlite:///../SampleDBs/gis.sqlite', echo=True)\n", 91 | "listen(engine, 'connect', load_spatialite)" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "The call to create_engine creates an engine bound to the database file gis.db. After that a connect listener is registered on the engine. The listener is responsible for loading the SpatiaLite extension, which is a necessary operation for using SpatiaLite through SQL.\n", 99 | "\n", 100 | "At this point you can test that you are able to connect to the database:" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 5, 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [ 109 | "conn = engine.connect()" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "I would prefer the items are paid when collected. I do not want to have the money without the itens being taken away. Sorry.One additional step is required for using SpatiaLite: create the geometry_columns and spatial_ref_sys metadata tables. This is done by calling SpatiaLite’s InitSpatialMetaData function: \n", 117 | "Note that this operation may take some time the first time it is executed for a database. When InitSpatialMetaData is executed again it will report an error (that can be ignored) " 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 6, 123 | "metadata": {}, 124 | "outputs": [ 125 | { 126 | "name": "stdout", 127 | "output_type": "stream", 128 | "text": [ 129 | "2021-10-28 17:26:29,566 INFO sqlalchemy.engine.Engine SELECT InitSpatialMetaData() AS \"InitSpatialMetaData_1\"\n", 130 | "2021-10-28 17:26:29,567 INFO sqlalchemy.engine.Engine [generated in 0.00104s] ()\n" 131 | ] 132 | }, 133 | { 134 | "data": { 135 | "text/plain": [ 136 | "" 137 | ] 138 | }, 139 | "execution_count": 6, 140 | "metadata": {}, 141 | "output_type": "execute_result" 142 | } 143 | ], 144 | "source": [ 145 | "conn.execute(select([func.InitSpatialMetaData()]))" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "Before going further we can close the current connection:" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 7, 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [ 161 | "conn.close()" 162 | ] 163 | }, 164 | { 165 | "cell_type": "markdown", 166 | "metadata": {}, 167 | "source": [ 168 | "### Declare a Mapping\n", 169 | "\n", 170 | "Now that we have a working connection we can go ahead and create a mapping between a Python class and a database table.\n", 171 | "When using the ORM, the configurational process starts by describing the database tables we’ll be dealing with, and then by defining our own classes which will be mapped to those tables. In modern SQLAlchemy, these two tasks are usually performed together, using a system known as Declarative, which allows us to create classes that include directives to describe the actual database table they will be mapped to." 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 8, 177 | "metadata": {}, 178 | "outputs": [], 179 | "source": [ 180 | "Base = declarative_base()\n", 181 | "\n", 182 | "class Lake(Base):\n", 183 | " __tablename__ = 'lake'\n", 184 | " id = Column(Integer, primary_key=True)\n", 185 | " name = Column(String)\n", 186 | " geom = Column(Geometry(geometry_type='POLYGON', management=True))" 187 | ] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "metadata": {}, 192 | "source": [ 193 | "The Lake class establishes details about the table being mapped, including the name of the table denoted by __tablename__, and three columns id, name, and geom. The id column will be the primary key of the table. The geom column is a geoalchemy2.types.Geometry column whose geometry_type is POLYGON.\n", 194 | "\n", 195 | "Setting management to True indicates that the AddGeometryColumn and DiscardGeometryColumn management functions will be used for the creation and removal of the geometry column. This is required with SpatiaLite.\n", 196 | "\n", 197 | "### Create the Table in the Database\n", 198 | "\n", 199 | "We can now create the lake table in the gis.sqlite database:" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 9, 205 | "metadata": {}, 206 | "outputs": [ 207 | { 208 | "name": "stdout", 209 | "output_type": "stream", 210 | "text": [ 211 | "2021-10-28 18:47:32,768 INFO sqlalchemy.engine.Engine BEGIN (implicit)\n", 212 | "2021-10-28 18:47:32,771 INFO sqlalchemy.engine.Engine \n", 213 | "CREATE TABLE lake (\n", 214 | "\tid INTEGER NOT NULL, \n", 215 | "\tname VARCHAR, \n", 216 | "\tPRIMARY KEY (id)\n", 217 | ")\n", 218 | "\n", 219 | "\n", 220 | "2021-10-28 18:47:32,773 INFO sqlalchemy.engine.Engine [no key 0.00249s] ()\n", 221 | "2021-10-28 18:47:33,030 INFO sqlalchemy.engine.Engine SELECT AddGeometryColumn(?, ?, ?, ?, ?, ?) AS \"AddGeometryColumn_1\"\n", 222 | "2021-10-28 18:47:33,032 INFO sqlalchemy.engine.Engine [no key 0.00170s] ('lake', 'geom', -1, 'POLYGON', 2, 0)\n", 223 | "2021-10-28 18:47:35,115 INFO sqlalchemy.engine.Engine SELECT CreateSpatialIndex(?, ?) AS \"CreateSpatialIndex_1\"\n", 224 | "2021-10-28 18:47:35,117 INFO sqlalchemy.engine.Engine [generated in 0.00165s] ('lake', 'geom')\n", 225 | "2021-10-28 18:47:41,951 INFO sqlalchemy.engine.Engine COMMIT\n" 226 | ] 227 | } 228 | ], 229 | "source": [ 230 | "Lake.__table__.create(engine)" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": 10, 236 | "metadata": {}, 237 | "outputs": [ 238 | { 239 | "data": { 240 | "text/plain": [ 241 | "Table('lake', MetaData(), Column('id', Integer(), table=, primary_key=True, nullable=False), Column('name', String(), table=), Column('geom', Geometry(geometry_type='POLYGON', management=True, from_text='ST_GeomFromEWKT', name='geometry'), table=), schema=None)" 242 | ] 243 | }, 244 | "execution_count": 10, 245 | "metadata": {}, 246 | "output_type": "execute_result" 247 | } 248 | ], 249 | "source": [ 250 | "Lake.__table__" 251 | ] 252 | }, 253 | { 254 | "cell_type": "markdown", 255 | "metadata": {}, 256 | "source": [ 257 | "If we wanted to drop the table we’d use this. There’s nothing specific to SpatiaLite here." 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": 11, 263 | "metadata": {}, 264 | "outputs": [], 265 | "source": [ 266 | "#Lake.__table__.drop(engine)" 267 | ] 268 | }, 269 | { 270 | "cell_type": "markdown", 271 | "metadata": {}, 272 | "source": [ 273 | "### Create a Session\n", 274 | "\n", 275 | "When using the SQLAlchemy ORM the ORM interacts with the database through a Session." 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": 12, 281 | "metadata": {}, 282 | "outputs": [], 283 | "source": [ 284 | "Session = sessionmaker(bind=engine)\n", 285 | "session = Session()" 286 | ] 287 | }, 288 | { 289 | "cell_type": "markdown", 290 | "metadata": {}, 291 | "source": [ 292 | "### Add New Objects\n", 293 | "\n", 294 | "We can now create and insert new Lake objects into the database, the same way we’d do it using GeoAlchemy 2 with PostGIS." 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "execution_count": 13, 300 | "metadata": {}, 301 | "outputs": [ 302 | { 303 | "name": "stdout", 304 | "output_type": "stream", 305 | "text": [ 306 | "2021-10-28 18:47:43,751 INFO sqlalchemy.engine.Engine BEGIN (implicit)\n", 307 | "2021-10-28 18:47:43,758 INFO sqlalchemy.engine.Engine INSERT INTO lake (name, geom) VALUES (?, GeomFromEWKT(?))\n", 308 | "2021-10-28 18:47:43,759 INFO sqlalchemy.engine.Engine [generated in 0.00154s] ('Majeur', 'POLYGON((0 0,1 0,1 1,0 1,0 0))')\n", 309 | "2021-10-28 18:47:43,764 INFO sqlalchemy.engine.Engine COMMIT\n" 310 | ] 311 | } 312 | ], 313 | "source": [ 314 | "lake = Lake(name='Majeur', geom='POLYGON((0 0,1 0,1 1,0 1,0 0))')\n", 315 | "session.add(lake)\n", 316 | "session.commit()" 317 | ] 318 | }, 319 | { 320 | "cell_type": "markdown", 321 | "metadata": {}, 322 | "source": [ 323 | "We can now query the database for Majeur:" 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": 14, 329 | "metadata": {}, 330 | "outputs": [ 331 | { 332 | "name": "stdout", 333 | "output_type": "stream", 334 | "text": [ 335 | "2021-10-28 18:47:44,633 INFO sqlalchemy.engine.Engine BEGIN (implicit)\n", 336 | "2021-10-28 18:47:44,635 INFO sqlalchemy.engine.Engine SELECT lake.id AS lake_id, lake.name AS lake_name, AsEWKB(lake.geom) AS lake_geom \n", 337 | "FROM lake \n", 338 | "WHERE lake.name = ?\n", 339 | " LIMIT ? OFFSET ?\n", 340 | "2021-10-28 18:47:44,636 INFO sqlalchemy.engine.Engine [generated in 0.00047s] ('Majeur', 1, 0)\n" 341 | ] 342 | }, 343 | { 344 | "data": { 345 | "text/plain": [ 346 | "'Majeur'" 347 | ] 348 | }, 349 | "execution_count": 14, 350 | "metadata": {}, 351 | "output_type": "execute_result" 352 | } 353 | ], 354 | "source": [ 355 | "our_lake = session.query(Lake).filter_by(name='Majeur').first()\n", 356 | "our_lake.name" 357 | ] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "execution_count": 15, 362 | "metadata": {}, 363 | "outputs": [ 364 | { 365 | "data": { 366 | "text/plain": [ 367 | "" 368 | ] 369 | }, 370 | "execution_count": 15, 371 | "metadata": {}, 372 | "output_type": "execute_result" 373 | } 374 | ], 375 | "source": [ 376 | "our_lake.geom" 377 | ] 378 | }, 379 | { 380 | "cell_type": "markdown", 381 | "metadata": {}, 382 | "source": [ 383 | "our_lake.geom is a geoalchemy2.elements.WKBElement, which a type provided by GeoAlchemy. geoalchemy2.elements. \n", 384 | "WKBElement wraps a WKB value returned by the database." 385 | ] 386 | }, 387 | { 388 | "cell_type": "code", 389 | "execution_count": 16, 390 | "metadata": {}, 391 | "outputs": [ 392 | { 393 | "data": { 394 | "text/plain": [ 395 | "1" 396 | ] 397 | }, 398 | "execution_count": 16, 399 | "metadata": {}, 400 | "output_type": "execute_result" 401 | } 402 | ], 403 | "source": [ 404 | "our_lake.id" 405 | ] 406 | }, 407 | { 408 | "cell_type": "markdown", 409 | "metadata": {}, 410 | "source": [ 411 | "Let’s add more lakes:" 412 | ] 413 | }, 414 | { 415 | "cell_type": "code", 416 | "execution_count": 17, 417 | "metadata": {}, 418 | "outputs": [ 419 | { 420 | "name": "stdout", 421 | "output_type": "stream", 422 | "text": [ 423 | "2021-10-28 18:47:45,550 INFO sqlalchemy.engine.Engine INSERT INTO lake (name, geom) VALUES (?, GeomFromEWKT(?))\n", 424 | "2021-10-28 18:47:45,552 INFO sqlalchemy.engine.Engine [cached since 1.794s ago] ('Garde', 'POLYGON((1 0,3 0,3 2,1 2,1 0))')\n", 425 | "2021-10-28 18:47:45,555 INFO sqlalchemy.engine.Engine INSERT INTO lake (name, geom) VALUES (?, GeomFromEWKT(?))\n", 426 | "2021-10-28 18:47:45,557 INFO sqlalchemy.engine.Engine [cached since 1.8s ago] ('Orta', 'POLYGON((3 0,6 0,6 3,3 3,3 0))')\n", 427 | "2021-10-28 18:47:45,560 INFO sqlalchemy.engine.Engine COMMIT\n" 428 | ] 429 | } 430 | ], 431 | "source": [ 432 | "session.add_all([Lake(name='Garde', geom='POLYGON((1 0,3 0,3 2,1 2,1 0))'),\n", 433 | " Lake(name='Orta', geom='POLYGON((3 0,6 0,6 3,3 3,3 0))')\n", 434 | " ])\n", 435 | "session.commit()" 436 | ] 437 | }, 438 | { 439 | "cell_type": "markdown", 440 | "metadata": {}, 441 | "source": [ 442 | "### Query\n", 443 | "\n", 444 | "#### Let’s make a simple, non-spatial, query:" 445 | ] 446 | }, 447 | { 448 | "cell_type": "code", 449 | "execution_count": 18, 450 | "metadata": {}, 451 | "outputs": [ 452 | { 453 | "name": "stdout", 454 | "output_type": "stream", 455 | "text": [ 456 | "2021-10-28 18:47:46,295 INFO sqlalchemy.engine.Engine BEGIN (implicit)\n", 457 | "2021-10-28 18:47:46,297 INFO sqlalchemy.engine.Engine SELECT lake.id AS lake_id, lake.name AS lake_name, AsEWKB(lake.geom) AS lake_geom \n", 458 | "FROM lake ORDER BY lake.name\n", 459 | "2021-10-28 18:47:46,297 INFO sqlalchemy.engine.Engine [generated in 0.00060s] ()\n" 460 | ] 461 | }, 462 | { 463 | "data": { 464 | "text/plain": [ 465 | "['Garde', 'Majeur', 'Orta']" 466 | ] 467 | }, 468 | "execution_count": 18, 469 | "metadata": {}, 470 | "output_type": "execute_result" 471 | } 472 | ], 473 | "source": [ 474 | "query = session.query(Lake).order_by(Lake.name)\n", 475 | "\n", 476 | "#for lake in query:\n", 477 | "# print(lake.name)\n", 478 | "\n", 479 | "[l.name for l in query]" 480 | ] 481 | }, 482 | { 483 | "cell_type": "markdown", 484 | "metadata": {}, 485 | "source": [ 486 | "#### Now a spatial query:" 487 | ] 488 | }, 489 | { 490 | "cell_type": "code", 491 | "execution_count": 19, 492 | "metadata": {}, 493 | "outputs": [ 494 | { 495 | "name": "stdout", 496 | "output_type": "stream", 497 | "text": [ 498 | "2021-10-28 18:47:46,601 INFO sqlalchemy.engine.Engine SELECT lake.id AS lake_id, lake.name AS lake_name, AsEWKB(lake.geom) AS lake_geom \n", 499 | "FROM lake \n", 500 | "WHERE ST_Contains(lake.geom, ?)\n", 501 | "2021-10-28 18:47:46,603 INFO sqlalchemy.engine.Engine [no key 0.00184s] ('POINT(4 1)',)\n" 502 | ] 503 | }, 504 | { 505 | "data": { 506 | "text/plain": [ 507 | "['Majeur', 'Garde', 'Orta']" 508 | ] 509 | }, 510 | "execution_count": 19, 511 | "metadata": {}, 512 | "output_type": "execute_result" 513 | } 514 | ], 515 | "source": [ 516 | "query = session.query(Lake).filter(func.ST_Contains(Lake.geom, 'POINT(4 1)'))\n", 517 | "\n", 518 | "[l.name for l in query]" 519 | ] 520 | }, 521 | { 522 | "cell_type": "markdown", 523 | "metadata": {}, 524 | "source": [ 525 | "Altenatively: Here the ST_Contains function is applied to the Lake.geom column property. In that case the column property is actually passed to the function, as its first argument." 526 | ] 527 | }, 528 | { 529 | "cell_type": "code", 530 | "execution_count": 20, 531 | "metadata": {}, 532 | "outputs": [ 533 | { 534 | "name": "stdout", 535 | "output_type": "stream", 536 | "text": [ 537 | "2021-10-28 18:47:47,511 INFO sqlalchemy.engine.Engine SELECT lake.id AS lake_id, lake.name AS lake_name, AsEWKB(lake.geom) AS lake_geom \n", 538 | "FROM lake \n", 539 | "WHERE ST_Contains(lake.geom, ?)\n", 540 | "2021-10-28 18:47:47,512 INFO sqlalchemy.engine.Engine [no key 0.00089s] ('POINT(4 1)',)\n" 541 | ] 542 | }, 543 | { 544 | "data": { 545 | "text/plain": [ 546 | "['Majeur', 'Garde', 'Orta']" 547 | ] 548 | }, 549 | "execution_count": 20, 550 | "metadata": {}, 551 | "output_type": "execute_result" 552 | } 553 | ], 554 | "source": [ 555 | "query = session.query(Lake).filter(Lake.geom.ST_Contains('POINT(4 1)')) \n", 556 | "\n", 557 | "[l.name for l in query]" 558 | ] 559 | }, 560 | { 561 | "cell_type": "markdown", 562 | "metadata": {}, 563 | "source": [ 564 | "Here’s another spatial query, using ST_Intersects this time:" 565 | ] 566 | }, 567 | { 568 | "cell_type": "code", 569 | "execution_count": 21, 570 | "metadata": {}, 571 | "outputs": [ 572 | { 573 | "name": "stdout", 574 | "output_type": "stream", 575 | "text": [ 576 | "2021-10-28 18:47:48,737 INFO sqlalchemy.engine.Engine SELECT lake.id AS lake_id, lake.name AS lake_name, AsEWKB(lake.geom) AS lake_geom \n", 577 | "FROM lake \n", 578 | "WHERE ST_Intersects(lake.geom, ST_GeomFromText(?, ?))\n", 579 | "2021-10-28 18:47:48,738 INFO sqlalchemy.engine.Engine [no key 0.00193s] ('LINESTRING(2 1,4 1)', -1)\n" 580 | ] 581 | }, 582 | { 583 | "data": { 584 | "text/plain": [ 585 | "['Garde', 'Orta']" 586 | ] 587 | }, 588 | "execution_count": 21, 589 | "metadata": {}, 590 | "output_type": "execute_result" 591 | } 592 | ], 593 | "source": [ 594 | "query = session.query(Lake).filter(Lake.geom.ST_Intersects(WKTElement('LINESTRING(2 1,4 1)')))\n", 595 | "\n", 596 | "[l.name for l in query]" 597 | ] 598 | }, 599 | { 600 | "cell_type": "markdown", 601 | "metadata": {}, 602 | "source": [ 603 | "We can also apply relationship functions to geoalchemy2.elements.WKBElement. For example:" 604 | ] 605 | }, 606 | { 607 | "cell_type": "code", 608 | "execution_count": 22, 609 | "metadata": {}, 610 | "outputs": [ 611 | { 612 | "name": "stdout", 613 | "output_type": "stream", 614 | "text": [ 615 | "2021-10-28 18:47:49,511 INFO sqlalchemy.engine.Engine SELECT lake.id AS lake_id, lake.name AS lake_name, AsEWKB(lake.geom) AS lake_geom \n", 616 | "FROM lake \n", 617 | "WHERE lake.name = ?\n", 618 | "2021-10-28 18:47:49,513 INFO sqlalchemy.engine.Engine [generated in 0.00188s] ('Garde',)\n", 619 | "2021-10-28 18:47:49,518 INFO sqlalchemy.engine.Engine SELECT ST_Intersects(GeomFromEWKB(?), ST_GeomFromText(?, ?)) AS \"ST_Intersects_1\"\n", 620 | "2021-10-28 18:47:49,519 INFO sqlalchemy.engine.Engine [no key 0.00065s] ('0103000020FFFFFFFF0100000005000000000000000000F03F00000000000000000000000000000840000000000000000000000000000008400000000000000040000000000000F03F0000000000000040000000000000F03F0000000000000000', 'LINESTRING(2 1,4 1)', -1)\n", 621 | "1\n" 622 | ] 623 | } 624 | ], 625 | "source": [ 626 | "lake = session.query(Lake).filter_by(name='Garde').one()\n", 627 | "print(session.scalar(lake.geom.ST_Intersects(WKTElement('LINESTRING(2 1,4 1)'))))" 628 | ] 629 | }, 630 | { 631 | "cell_type": "markdown", 632 | "metadata": {}, 633 | "source": [ 634 | "session.scalar allows executing a clause and returning a scalar value (an integer value in this case).\n", 635 | "\n", 636 | "The value 1 indicates that the lake “Garde” does intersects the LINESTRING(2 1,4 1) geometry \n", 637 | "\n", 638 | "The GeoAlchemy functions all start with ST_. Operators are also called as functions, but the function names don’t include the ST_ prefix. \n", 639 | "As an example let’s test whether the bounding boxes of geometries intersect. GeoAlchemy provides the intersects function for that:" 640 | ] 641 | }, 642 | { 643 | "cell_type": "code", 644 | "execution_count": 24, 645 | "metadata": {}, 646 | "outputs": [ 647 | { 648 | "name": "stdout", 649 | "output_type": "stream", 650 | "text": [ 651 | "file in use or not found\n" 652 | ] 653 | } 654 | ], 655 | "source": [ 656 | "try:\n", 657 | " os.remove('../SampleDBs/gis.sqlite')\n", 658 | " print(\"removed file\")\n", 659 | "except:\n", 660 | " print(\"file in use or not found\")" 661 | ] 662 | }, 663 | { 664 | "cell_type": "code", 665 | "execution_count": null, 666 | "metadata": {}, 667 | "outputs": [], 668 | "source": [] 669 | } 670 | ], 671 | "metadata": { 672 | "kernelspec": { 673 | "display_name": "Python 3 (ipykernel)", 674 | "language": "python", 675 | "name": "python3" 676 | }, 677 | "language_info": { 678 | "codemirror_mode": { 679 | "name": "ipython", 680 | "version": 3 681 | }, 682 | "file_extension": ".py", 683 | "mimetype": "text/x-python", 684 | "name": "python", 685 | "nbconvert_exporter": "python", 686 | "pygments_lexer": "ipython3", 687 | "version": "3.8.10" 688 | } 689 | }, 690 | "nbformat": 4, 691 | "nbformat_minor": 4 692 | } 693 | -------------------------------------------------------------------------------- /Notebooks/09_MySQL_JSON.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Introduction to Databases\n", 8 | "\n", 9 | "### Using JSON fields in MySQL\n", 10 | "\n", 11 | "Sources: [here](https://www.sitepoint.com/use-json-data-fields-mysql-databases/) and [here](https://dev.mysql.com/doc/refman/5.7/en/json-search-functions.html) \n", 12 | "For another example using PostgreSQL, see [here](https://www.postgresqltutorial.com/postgresql-json/) \n", 13 | "JSONPATH sintax reference [here](https://support.smartbear.com/alertsite/docs/monitors/api/endpoint/jsonpath.html) " 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "metadata": {}, 19 | "source": [ 20 | "The line between SQL and NoSQL databases has become increasingly blurred, with each camp adopting features from the other. MySQL 5.7+ InnoDB databases and PostgreSQL 9.2+ both directly support JSON document types in a single field. In this article, we’ll examine the MySQL 8.0 JSON implementation in more detail.\n", 21 | "\n", 22 | "Just Because You Can Store JSON it doesn’t follow you should. But there are some use cases, especially those tackled by NoSQL databases\n", 23 | "\n", 24 | "Normalization is a technique used to optimize the database structure. The First Normal Form (1NF) rule governs that every column should hold a single value — which is clearly broken by storing multi-value JSON documents." 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 1, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "# load the ipython-sql extension\n", 34 | "%load_ext sql" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 2, 40 | "metadata": {}, 41 | "outputs": [ 42 | { 43 | "name": "stdin", 44 | "output_type": "stream", 45 | "text": [ 46 | " ·········\n" 47 | ] 48 | } 49 | ], 50 | "source": [ 51 | "import getpass\n", 52 | "\n", 53 | "password = getpass.getpass()\n", 54 | "user = 'root'\n", 55 | "database = 'employees'\n", 56 | "\n", 57 | "connection_string = f\"mysql+pymysql://{user}:{password}@localhost:3306/{database}\"\n", 58 | " \n", 59 | "%sql $connection_string" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "#### Create a Table With a JSON Field\n", 67 | "\n", 68 | "Consider a shop selling books. All books have an ID, ISBN, title, publisher, number of pages and other clear relational data. Presume you want to add any number of category tags to each book. You could achieve this in SQL using: \n", 69 | "\n", 70 | "+ a tag table which stored each tag name with a unique ID, and\n", 71 | "+ a tagmap table with many-to-many records mapping book IDs to tag IDs\n", 72 | "\n", 73 | "It’ll work, but it’s cumbersome and considerable effort for a minor feature. Therefore, you can define a tags JSON field in your MySQL database’s book table:" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 3, 79 | "metadata": {}, 80 | "outputs": [ 81 | { 82 | "name": "stdout", 83 | "output_type": "stream", 84 | "text": [ 85 | " * mysql+pymysql://root:***@localhost:3306/employees\n", 86 | "0 rows affected.\n" 87 | ] 88 | }, 89 | { 90 | "data": { 91 | "text/plain": [ 92 | "[]" 93 | ] 94 | }, 95 | "execution_count": 3, 96 | "metadata": {}, 97 | "output_type": "execute_result" 98 | } 99 | ], 100 | "source": [ 101 | "%%sql\n", 102 | "\n", 103 | "CREATE TABLE `book` (\n", 104 | " `id` INT(8) UNSIGNED NOT NULL AUTO_INCREMENT,\n", 105 | " `title` VARCHAR(200) NOT NULL,\n", 106 | " `tags` JSON DEFAULT NULL,\n", 107 | " PRIMARY KEY (`id`)\n", 108 | ") ENGINE=INNODB;" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "Note that JSON columns can’t have a default value, be used as a primary key, be used as a foreign key, or have an index. You can create secondary indexes on generated virtual columns, but it’s easier and more practical to retain a value in a separate field if indexes are required.\n", 116 | "\n", 117 | "### Adding JSON Data\n", 118 | "\n", 119 | "Whole JSON documents can be passed in INSERT or UPDATE statements. For example, our book tags can be passed as an array (inside a string):" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 4, 125 | "metadata": {}, 126 | "outputs": [ 127 | { 128 | "name": "stdout", 129 | "output_type": "stream", 130 | "text": [ 131 | " * mysql+pymysql://root:***@localhost:3306/employees\n", 132 | "1 rows affected.\n" 133 | ] 134 | }, 135 | { 136 | "data": { 137 | "text/plain": [ 138 | "[]" 139 | ] 140 | }, 141 | "execution_count": 4, 142 | "metadata": {}, 143 | "output_type": "execute_result" 144 | } 145 | ], 146 | "source": [ 147 | "%%sql\n", 148 | "\n", 149 | "INSERT INTO `book` (`title`, `tags`)\n", 150 | "VALUES (\n", 151 | " 'ECMAScript 2015: A SitePoint Anthology',\n", 152 | " '[\"JavaScript\", \"ES2015\", \"JSON\"]'\n", 153 | ");" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "metadata": {}, 159 | "source": [ 160 | "#### JSON can also be created with these:\n", 161 | "\n", 162 | "+ JSON_ARRAY() function, which creates arrays. \n", 163 | "+ JSON_OBJECT() function, which creates objects.\n", 164 | "+ JSON_QUOTE() function, which quotes a string as a JSON value.\n", 165 | "+ or you can (CAST anyValue AS JSON). \n", 166 | "\n", 167 | "For example:" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 5, 173 | "metadata": {}, 174 | "outputs": [ 175 | { 176 | "name": "stdout", 177 | "output_type": "stream", 178 | "text": [ 179 | " * mysql+pymysql://root:***@localhost:3306/employees\n", 180 | "1 rows affected.\n" 181 | ] 182 | }, 183 | { 184 | "data": { 185 | "text/html": [ 186 | "\n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | "
JSON_ARRAY(1, 2, 'abc' )
[1, 2, "abc"]
" 194 | ], 195 | "text/plain": [ 196 | "[('[1, 2, \"abc\"]',)]" 197 | ] 198 | }, 199 | "execution_count": 5, 200 | "metadata": {}, 201 | "output_type": "execute_result" 202 | } 203 | ], 204 | "source": [ 205 | "## returns [1, 2, \"abc\"]:\n", 206 | "\n", 207 | "%sql SELECT JSON_ARRAY(1, 2, 'abc');" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": 6, 213 | "metadata": {}, 214 | "outputs": [ 215 | { 216 | "name": "stdout", 217 | "output_type": "stream", 218 | "text": [ 219 | " * mysql+pymysql://root:***@localhost:3306/employees\n", 220 | "1 rows affected.\n" 221 | ] 222 | }, 223 | { 224 | "data": { 225 | "text/html": [ 226 | "\n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | "
JSON_OBJECT('a', 1, 'b' , 2)
{"a": 1, "b": 2}
" 234 | ], 235 | "text/plain": [ 236 | "[('{\"a\": 1, \"b\": 2}',)]" 237 | ] 238 | }, 239 | "execution_count": 6, 240 | "metadata": {}, 241 | "output_type": "execute_result" 242 | } 243 | ], 244 | "source": [ 245 | "## returns {\"a\": 1, \"b\": 2}:\n", 246 | "\n", 247 | "%sql SELECT JSON_OBJECT('a', 1, 'b', 2);" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": 7, 253 | "metadata": {}, 254 | "outputs": [ 255 | { 256 | "name": "stdout", 257 | "output_type": "stream", 258 | "text": [ 259 | " * mysql+pymysql://root:***@localhost:3306/employees\n", 260 | "1 rows affected.\n" 261 | ] 262 | }, 263 | { 264 | "data": { 265 | "text/html": [ 266 | "\n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | "
JSON_QUOTE('[1, 2, "abc" ]')
"[1, 2, \\"abc\\" ]"
" 274 | ], 275 | "text/plain": [ 276 | "[('\"[1, 2, \\\\\"abc\\\\\" ]\"',)]" 277 | ] 278 | }, 279 | "execution_count": 7, 280 | "metadata": {}, 281 | "output_type": "execute_result" 282 | } 283 | ], 284 | "source": [ 285 | "## returns \"[1, 2, \\\"abc\\\"]\":\n", 286 | "\n", 287 | "%sql SELECT JSON_QUOTE('[1, 2, \"abc\"]');" 288 | ] 289 | }, 290 | { 291 | "cell_type": "markdown", 292 | "metadata": {}, 293 | "source": [ 294 | "#### The JSON_TYPE() function allows you to check JSON value types. It should return OBJECT, ARRAY, a scalar type (INTEGER, BOOLEAN, etc), NULL, or an error. \n", 295 | "\n", 296 | "For example:" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": 8, 302 | "metadata": {}, 303 | "outputs": [ 304 | { 305 | "name": "stdout", 306 | "output_type": "stream", 307 | "text": [ 308 | " * mysql+pymysql://root:***@localhost:3306/employees\n", 309 | "1 rows affected.\n" 310 | ] 311 | }, 312 | { 313 | "data": { 314 | "text/html": [ 315 | "\n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | "
JSON_TYPE('[1, 2, "abc" ]')
ARRAY
" 323 | ], 324 | "text/plain": [ 325 | "[('ARRAY',)]" 326 | ] 327 | }, 328 | "execution_count": 8, 329 | "metadata": {}, 330 | "output_type": "execute_result" 331 | } 332 | ], 333 | "source": [ 334 | "## returns ARRAY:\n", 335 | "\n", 336 | "%sql SELECT JSON_TYPE('[1, 2, \"abc\"]');" 337 | ] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": 9, 342 | "metadata": {}, 343 | "outputs": [ 344 | { 345 | "name": "stdout", 346 | "output_type": "stream", 347 | "text": [ 348 | " * mysql+pymysql://root:***@localhost:3306/employees\n", 349 | "1 rows affected.\n" 350 | ] 351 | }, 352 | { 353 | "data": { 354 | "text/html": [ 355 | "\n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | "
JSON_TYPE('{"a": 1, "b" : 2}')
OBJECT
" 363 | ], 364 | "text/plain": [ 365 | "[('OBJECT',)]" 366 | ] 367 | }, 368 | "execution_count": 9, 369 | "metadata": {}, 370 | "output_type": "execute_result" 371 | } 372 | ], 373 | "source": [ 374 | "## returns OBJECT:\n", 375 | "\n", 376 | "%sql SELECT JSON_TYPE('{\"a\": 1, \"b\": 2}');" 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "execution_count": 10, 382 | "metadata": {}, 383 | "outputs": [ 384 | { 385 | "name": "stdout", 386 | "output_type": "stream", 387 | "text": [ 388 | " * mysql+pymysql://root:***@localhost:3306/employees\n", 389 | "(pymysql.err.OperationalError) (3141, 'Invalid JSON text in argument 1 to function json_type: \"Missing a comma or \\'}\\' after an object member.\" at position 16.')\n", 390 | "[SQL: SELECT JSON_TYPE('{\"a\": 1, \"b\" : 2');]\n", 391 | "(Background on this error at: https://sqlalche.me/e/14/e3q8)\n" 392 | ] 393 | } 394 | ], 395 | "source": [ 396 | "## returns an error:\n", 397 | "\n", 398 | "%sql SELECT JSON_TYPE('{\"a\": 1, \"b\": 2');" 399 | ] 400 | }, 401 | { 402 | "cell_type": "markdown", 403 | "metadata": {}, 404 | "source": [ 405 | "#### The JSON_VALID() function returns 1 if the JSON is valid or 0 otherwise:" 406 | ] 407 | }, 408 | { 409 | "cell_type": "code", 410 | "execution_count": 11, 411 | "metadata": {}, 412 | "outputs": [ 413 | { 414 | "name": "stdout", 415 | "output_type": "stream", 416 | "text": [ 417 | " * mysql+pymysql://root:***@localhost:3306/employees\n", 418 | "1 rows affected.\n" 419 | ] 420 | }, 421 | { 422 | "data": { 423 | "text/html": [ 424 | "\n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | "
JSON_VALID('[1, 2, "abc" ]')
1
" 432 | ], 433 | "text/plain": [ 434 | "[(1,)]" 435 | ] 436 | }, 437 | "execution_count": 11, 438 | "metadata": {}, 439 | "output_type": "execute_result" 440 | } 441 | ], 442 | "source": [ 443 | "## returns 1:\n", 444 | "%sql SELECT JSON_VALID('[1, 2, \"abc\"]');" 445 | ] 446 | }, 447 | { 448 | "cell_type": "code", 449 | "execution_count": 12, 450 | "metadata": {}, 451 | "outputs": [ 452 | { 453 | "name": "stdout", 454 | "output_type": "stream", 455 | "text": [ 456 | " * mysql+pymysql://root:***@localhost:3306/employees\n", 457 | "1 rows affected.\n" 458 | ] 459 | }, 460 | { 461 | "data": { 462 | "text/html": [ 463 | "\n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | "
JSON_VALID('{"a": 1, "b" : 2}')
1
" 471 | ], 472 | "text/plain": [ 473 | "[(1,)]" 474 | ] 475 | }, 476 | "execution_count": 12, 477 | "metadata": {}, 478 | "output_type": "execute_result" 479 | } 480 | ], 481 | "source": [ 482 | "## returns 1:\n", 483 | "%sql SELECT JSON_VALID('{\"a\": 1, \"b\": 2}');" 484 | ] 485 | }, 486 | { 487 | "cell_type": "code", 488 | "execution_count": 13, 489 | "metadata": {}, 490 | "outputs": [ 491 | { 492 | "name": "stdout", 493 | "output_type": "stream", 494 | "text": [ 495 | " * mysql+pymysql://root:***@localhost:3306/employees\n", 496 | "1 rows affected.\n" 497 | ] 498 | }, 499 | { 500 | "data": { 501 | "text/html": [ 502 | "\n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | "
JSON_VALID('{"a": 1, "b" : 2')
0
" 510 | ], 511 | "text/plain": [ 512 | "[(0,)]" 513 | ] 514 | }, 515 | "execution_count": 13, 516 | "metadata": {}, 517 | "output_type": "execute_result" 518 | } 519 | ], 520 | "source": [ 521 | "## returns 0:\n", 522 | "%sql SELECT JSON_VALID('{\"a\": 1, \"b\": 2');" 523 | ] 524 | }, 525 | { 526 | "cell_type": "markdown", 527 | "metadata": {}, 528 | "source": [ 529 | "Attempting to insert an invalid JSON document will raise an error and the whole record will not be inserted/updated. \n", 530 | "\n", 531 | "### Searching JSON Data\n", 532 | "\n", 533 | "#### The JSON_CONTAINS() function accepts the JSON document being searched and another to compare against. It returns 1 when a match is found. \n", 534 | "\n", 535 | "For example:" 536 | ] 537 | }, 538 | { 539 | "cell_type": "code", 540 | "execution_count": 14, 541 | "metadata": {}, 542 | "outputs": [ 543 | { 544 | "name": "stdout", 545 | "output_type": "stream", 546 | "text": [ 547 | " * mysql+pymysql://root:***@localhost:3306/employees\n", 548 | "1 rows affected.\n", 549 | "1 rows affected.\n" 550 | ] 551 | }, 552 | { 553 | "data": { 554 | "text/html": [ 555 | "\n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | "
idtitletags
1ECMAScript 2015: A SitePoint Anthology["JavaScript", "ES2015", "JSON"]
" 567 | ], 568 | "text/plain": [ 569 | "[(1, 'ECMAScript 2015: A SitePoint Anthology', '[\"JavaScript\", \"ES2015\", \"JSON\"]')]" 570 | ] 571 | }, 572 | "execution_count": 14, 573 | "metadata": {}, 574 | "output_type": "execute_result" 575 | } 576 | ], 577 | "source": [ 578 | "## all books with the 'JavaScript' tag:\n", 579 | "\n", 580 | "%sql SELECT JSON_VALID('{\"a\": 1, \"b\": 2');SELECT * FROM `book` WHERE JSON_CONTAINS(tags, '[\"JavaScript\"]');" 581 | ] 582 | }, 583 | { 584 | "cell_type": "markdown", 585 | "metadata": {}, 586 | "source": [ 587 | "The similar JSON_SEARCH() function returns the path to the given match or NULL when there’s no match. \n", 588 | "\n", 589 | "It’s passed the JSON document being searched, **'one'** to find the first match, or **'all'** to find all matches, and a search string (where % matches any number of characters and _ matches one character in an identical way to LIKE). \n", 590 | "\n", 591 | "For example:" 592 | ] 593 | }, 594 | { 595 | "cell_type": "code", 596 | "execution_count": 15, 597 | "metadata": {}, 598 | "outputs": [ 599 | { 600 | "name": "stdout", 601 | "output_type": "stream", 602 | "text": [ 603 | " * mysql+pymysql://root:***@localhost:3306/employees\n", 604 | "1 rows affected.\n" 605 | ] 606 | }, 607 | { 608 | "data": { 609 | "text/html": [ 610 | "\n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | "
idtitletags
1ECMAScript 2015: A SitePoint Anthology["JavaScript", "ES2015", "JSON"]
" 622 | ], 623 | "text/plain": [ 624 | "[(1, 'ECMAScript 2015: A SitePoint Anthology', '[\"JavaScript\", \"ES2015\", \"JSON\"]')]" 625 | ] 626 | }, 627 | "execution_count": 15, 628 | "metadata": {}, 629 | "output_type": "execute_result" 630 | } 631 | ], 632 | "source": [ 633 | "## all books with tags starting 'Java':\n", 634 | "\n", 635 | "%sql SELECT * FROM `book` WHERE JSON_SEARCH(tags, 'one', 'Java%') IS NOT NULL;" 636 | ] 637 | }, 638 | { 639 | "cell_type": "markdown", 640 | "metadata": {}, 641 | "source": [ 642 | "### [JSON Paths](https://jsonpath.com/) \n", 643 | "\n", 644 | "A JSON path targets values and can be used to extract or modify parts of a JSON document. The JSON_EXTRACT() function demonstrates this by extracting one or more values:" 645 | ] 646 | }, 647 | { 648 | "cell_type": "code", 649 | "execution_count": 16, 650 | "metadata": {}, 651 | "outputs": [ 652 | { 653 | "name": "stdout", 654 | "output_type": "stream", 655 | "text": [ 656 | " * mysql+pymysql://root:***@localhost:3306/employees\n", 657 | "1 rows affected.\n" 658 | ] 659 | }, 660 | { 661 | "data": { 662 | "text/html": [ 663 | "\n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | "
JSON_EXTRACT('{"id": 1, "website" : "SitePoint" }', '$.website' )
"SitePoint"
" 671 | ], 672 | "text/plain": [ 673 | "[('\"SitePoint\"',)]" 674 | ] 675 | }, 676 | "execution_count": 16, 677 | "metadata": {}, 678 | "output_type": "execute_result" 679 | } 680 | ], 681 | "source": [ 682 | "## returns \"SitePoint\":\n", 683 | "\n", 684 | "%sql SELECT JSON_EXTRACT('{\"id\": 1, \"website\": \"SitePoint\"}', '$.website');" 685 | ] 686 | }, 687 | { 688 | "cell_type": "markdown", 689 | "metadata": {}, 690 | "source": [ 691 | "All path definitions start with a $ followed by other selectors:\n", 692 | "\n", 693 | "+ a period followed by a name, such as $.website\n", 694 | "+ [N] where N is the position in a zero-indexed array\n", 695 | "+ the .[*] wildcard evaluates all members of an object\n", 696 | "+ the [*] wildcard evaluates all members of an array\n", 697 | "+ the prefix**suffix wildcard evaluates to all paths that begin with the named prefix and end with the named suffix\n", 698 | "\n", 699 | "The following examples refer to the following JSON document:" 700 | ] 701 | }, 702 | { 703 | "cell_type": "code", 704 | "execution_count": 17, 705 | "metadata": {}, 706 | "outputs": [ 707 | { 708 | "data": { 709 | "text/plain": [ 710 | "{'a': 1, 'b': 2, 'c': [3, 4], 'd': {'e': 5, 'f': 6}}" 711 | ] 712 | }, 713 | "execution_count": 17, 714 | "metadata": {}, 715 | "output_type": "execute_result" 716 | } 717 | ], 718 | "source": [ 719 | "{\n", 720 | " \"a\": 1,\n", 721 | " \"b\": 2,\n", 722 | " \"c\": [3, 4],\n", 723 | " \"d\": {\n", 724 | " \"e\": 5,\n", 725 | " \"f\": 6\n", 726 | " }\n", 727 | "}" 728 | ] 729 | }, 730 | { 731 | "cell_type": "markdown", 732 | "metadata": {}, 733 | "source": [ 734 | "Example paths:\n", 735 | "```\n", 736 | "> $.a returns 1 \n", 737 | "> $.c returns [3, 4] \n", 738 | "> $.c[1] returns 4 \n", 739 | "> $.d.e returns 5 \n", 740 | "> $**.e returns [5] \n", 741 | "```" 742 | ] 743 | }, 744 | { 745 | "cell_type": "markdown", 746 | "metadata": {}, 747 | "source": [ 748 | "### Extracting JSON Paths in Queries\n", 749 | "\n", 750 | "You could extract the name and first tag of your book table using the query:" 751 | ] 752 | }, 753 | { 754 | "cell_type": "code", 755 | "execution_count": 18, 756 | "metadata": {}, 757 | "outputs": [ 758 | { 759 | "name": "stdout", 760 | "output_type": "stream", 761 | "text": [ 762 | " * mysql+pymysql://root:***@localhost:3306/employees\n", 763 | "1 rows affected.\n" 764 | ] 765 | }, 766 | { 767 | "data": { 768 | "text/html": [ 769 | "\n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | "
titletag1
ECMAScript 2015: A SitePoint Anthology"JavaScript"
" 779 | ], 780 | "text/plain": [ 781 | "[('ECMAScript 2015: A SitePoint Anthology', '\"JavaScript\"')]" 782 | ] 783 | }, 784 | "execution_count": 18, 785 | "metadata": {}, 786 | "output_type": "execute_result" 787 | } 788 | ], 789 | "source": [ 790 | "%%sql\n", 791 | "\n", 792 | "SELECT\n", 793 | " title, tags->\"$[0]\" AS `tag1`\n", 794 | "FROM `book`;" 795 | ] 796 | }, 797 | { 798 | "cell_type": "markdown", 799 | "metadata": {}, 800 | "source": [ 801 | "#### For a more complex example, presume you have a user table with JSON profile data.\n", 802 | "\n", 803 | "For example:" 804 | ] 805 | }, 806 | { 807 | "cell_type": "code", 808 | "execution_count": 19, 809 | "metadata": {}, 810 | "outputs": [ 811 | { 812 | "name": "stdout", 813 | "output_type": "stream", 814 | "text": [ 815 | " * mysql+pymysql://root:***@localhost:3306/employees\n", 816 | "0 rows affected.\n" 817 | ] 818 | }, 819 | { 820 | "data": { 821 | "text/plain": [ 822 | "[]" 823 | ] 824 | }, 825 | "execution_count": 19, 826 | "metadata": {}, 827 | "output_type": "execute_result" 828 | } 829 | ], 830 | "source": [ 831 | "%%sql\n", 832 | "\n", 833 | "CREATE TABLE `user` (\n", 834 | " `id` INT(8) UNSIGNED NOT NULL AUTO_INCREMENT,\n", 835 | " `name` VARCHAR(200) NOT NULL,\n", 836 | " `profile` JSON DEFAULT NULL,\n", 837 | " PRIMARY KEY (`id`)\n", 838 | ") ENGINE=INNODB;" 839 | ] 840 | }, 841 | { 842 | "cell_type": "code", 843 | "execution_count": 20, 844 | "metadata": {}, 845 | "outputs": [ 846 | { 847 | "name": "stdout", 848 | "output_type": "stream", 849 | "text": [ 850 | " * mysql+pymysql://root:***@localhost:3306/employees\n", 851 | "2 rows affected.\n" 852 | ] 853 | }, 854 | { 855 | "data": { 856 | "text/plain": [ 857 | "[]" 858 | ] 859 | }, 860 | "execution_count": 20, 861 | "metadata": {}, 862 | "output_type": "execute_result" 863 | } 864 | ], 865 | "source": [ 866 | "%sql INSERT INTO employees.user (`name`, `profile`) VALUES \\\n", 867 | "('Craig', '{\"email\": [\"craig@email1.com\", \"craig@email2.com\"], \"twitter\": \"@craigbuckler\"}'), \\\n", 868 | "('SitePoint', '{\"email\": [], \"twitter\": \"@sitepointdotcom\"}');" 869 | ] 870 | }, 871 | { 872 | "cell_type": "markdown", 873 | "metadata": {}, 874 | "source": [ 875 | "You can extract the Twitter name using a JSON path. For example:" 876 | ] 877 | }, 878 | { 879 | "cell_type": "code", 880 | "execution_count": 21, 881 | "metadata": {}, 882 | "outputs": [ 883 | { 884 | "name": "stdout", 885 | "output_type": "stream", 886 | "text": [ 887 | " * mysql+pymysql://root:***@localhost:3306/employees\n", 888 | "2 rows affected.\n" 889 | ] 890 | }, 891 | { 892 | "data": { 893 | "text/html": [ 894 | "\n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | "
nametwitter
Craig"@craigbuckler"
SitePoint"@sitepointdotcom"
" 908 | ], 909 | "text/plain": [ 910 | "[('Craig', '\"@craigbuckler\"'), ('SitePoint', '\"@sitepointdotcom\"')]" 911 | ] 912 | }, 913 | "execution_count": 21, 914 | "metadata": {}, 915 | "output_type": "execute_result" 916 | } 917 | ], 918 | "source": [ 919 | "%%sql\n", 920 | "\n", 921 | "SELECT\n", 922 | " name, profile->\"$.twitter\" AS `twitter`\n", 923 | "FROM `user`;" 924 | ] 925 | }, 926 | { 927 | "cell_type": "markdown", 928 | "metadata": {}, 929 | "source": [ 930 | "You could use a JSON path in the WHERE clause to only return users with a Twitter account:" 931 | ] 932 | }, 933 | { 934 | "cell_type": "code", 935 | "execution_count": 22, 936 | "metadata": {}, 937 | "outputs": [ 938 | { 939 | "name": "stdout", 940 | "output_type": "stream", 941 | "text": [ 942 | " * mysql+pymysql://root:***@localhost:3306/employees\n", 943 | "2 rows affected.\n" 944 | ] 945 | }, 946 | { 947 | "data": { 948 | "text/html": [ 949 | "\n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | "
nametwitter
Craig"@craigbuckler"
SitePoint"@sitepointdotcom"
" 963 | ], 964 | "text/plain": [ 965 | "[('Craig', '\"@craigbuckler\"'), ('SitePoint', '\"@sitepointdotcom\"')]" 966 | ] 967 | }, 968 | "execution_count": 22, 969 | "metadata": {}, 970 | "output_type": "execute_result" 971 | } 972 | ], 973 | "source": [ 974 | "%%sql\n", 975 | "\n", 976 | "SELECT\n", 977 | " name, profile->\"$.twitter\" AS `twitter`\n", 978 | "FROM `user`\n", 979 | "WHERE\n", 980 | " profile->\"$.twitter\" IS NOT NULL;\n" 981 | ] 982 | }, 983 | { 984 | "cell_type": "markdown", 985 | "metadata": {}, 986 | "source": [ 987 | "### Modifying Part of a JSON Document\n", 988 | "\n", 989 | "#### There are several MySQL functions to modify parts of a JSON document using path notation. These include:\n", 990 | "\n", 991 | "+ JSON_SET(doc, path, val[, path, val]...): inserts or updates data in the document\n", 992 | "+ JSON_INSERT(doc, path, val[, path, val]...): inserts data into the document\n", 993 | "+ JSON_REPLACE(doc, path, val[, path, val]...): replaces data in the document\n", 994 | "+ JSON_MERGE(doc, doc[, doc]...): merges two or more document\n", 995 | "+ JSON_ARRAY_APPEND(doc, path, val[, path, val]...): appends values to the end of an array\n", 996 | "+ JSON_ARRAY_INSERT(doc, path, val[, path, val]...): inserts an array within the document\n", 997 | "+ JSON_REMOVE(doc, path[, path]...): removes data from the document\n", 998 | "\n", 999 | "You can therefore add a “technical” tag to any book which already has a “JavaScript” tag:" 1000 | ] 1001 | }, 1002 | { 1003 | "cell_type": "code", 1004 | "execution_count": 23, 1005 | "metadata": {}, 1006 | "outputs": [ 1007 | { 1008 | "name": "stdout", 1009 | "output_type": "stream", 1010 | "text": [ 1011 | " * mysql+pymysql://root:***@localhost:3306/employees\n", 1012 | "1 rows affected.\n" 1013 | ] 1014 | }, 1015 | { 1016 | "data": { 1017 | "text/plain": [ 1018 | "[]" 1019 | ] 1020 | }, 1021 | "execution_count": 23, 1022 | "metadata": {}, 1023 | "output_type": "execute_result" 1024 | } 1025 | ], 1026 | "source": [ 1027 | "%%sql\n", 1028 | "\n", 1029 | "UPDATE `book`\n", 1030 | " SET tags = JSON_MERGE(tags, '[\"technical\"]')\n", 1031 | "WHERE\n", 1032 | " JSON_SEARCH(tags, 'one', 'JavaScript') IS NOT NULL;" 1033 | ] 1034 | }, 1035 | { 1036 | "cell_type": "markdown", 1037 | "metadata": {}, 1038 | "source": [ 1039 | "#### The MySQL manual provides further information about the [JSON data type](https://dev.mysql.com/doc/refman/en/json.html) and the associated [JSON functions](https://dev.mysql.com/doc/refman//en/json-functions.html).\n", 1040 | "\n", 1041 | "Again, I urge you not to use JSON unless it’s absolutely necessary. You could emulate an entire document-oriented NoSQL database in MySQL, but it would negate many benefits of SQL, and you may as well switch to a real NoSQL system! That said, JSON data types might save effort for more obscure data requirements within an SQL application." 1042 | ] 1043 | } 1044 | ], 1045 | "metadata": { 1046 | "kernelspec": { 1047 | "display_name": "Python 3 (ipykernel)", 1048 | "language": "python", 1049 | "name": "python3" 1050 | }, 1051 | "language_info": { 1052 | "codemirror_mode": { 1053 | "name": "ipython", 1054 | "version": 3 1055 | }, 1056 | "file_extension": ".py", 1057 | "mimetype": "text/x-python", 1058 | "name": "python", 1059 | "nbconvert_exporter": "python", 1060 | "pygments_lexer": "ipython3", 1061 | "version": "3.8.10" 1062 | } 1063 | }, 1064 | "nbformat": 4, 1065 | "nbformat_minor": 4 1066 | } 1067 | -------------------------------------------------------------------------------- /Notebooks/11_Python_ODO.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Introduction to Databases\n", 8 | "\n", 9 | "### [Python ODO](http://odo.pydata.org/en/latest/) \n", 10 | "\n", 11 | "\n", 12 | "![ODO image](http://odo.pydata.org/en/latest/_images/conversions.png)" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 1, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "!pip install -q git+git://github.com/blaze/odo" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 2, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "import pandas as pd\n", 31 | "from odo import odo" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "#### load source into target\n", 39 | " odo(source, target) \n", 40 | " \n", 41 | " \n", 42 | "+ odo(df, list) # create new list from Pandas DataFrame\n", 43 | "+ odo(df, []) # append onto existing list\n", 44 | "+ odo(df, 'myfile.json') # Dump dataframe to line-delimited JSON\n", 45 | "+ odo('myfiles.*.csv', Iterator) # Stream through many CSV files\n", 46 | "+ odo(df, 'postgresql://hostname::tablename') # Migrate dataframe to Postgres\n", 47 | "+ odo('myfile.*.csv', 'postgresql://hostname::tablename') # Load CSVs to Postgres\n", 48 | "+ odo('postgresql://hostname::tablename', 'myfile.json') # Dump Postgres to JSON\n", 49 | "+ odo('mongodb://hostname/db::collection', pd.DataFrame) # Dump Mongo to DataFrame\n", 50 | "\n", 51 | "Pay attention to [Data Shapes](https://odo.readthedocs.io/en/latest/datashape.html)" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 3, 57 | "metadata": {}, 58 | "outputs": [ 59 | { 60 | "ename": "ValueError", 61 | "evalue": "Function has keyword-only parameters or annotations, use inspect.signature() API which can support them", 62 | "output_type": "error", 63 | "traceback": [ 64 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 65 | "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", 66 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf_fake\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0modo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"../SampleDBs/fakedata.csv\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msep\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'|'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 67 | "\u001b[0;32m~/environments/default_env/lib/python3.8/site-packages/odo/odo.py\u001b[0m in \u001b[0;36modo\u001b[0;34m(source, target, **kwargs)\u001b[0m\n\u001b[1;32m 89\u001b[0m \u001b[0modo\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mAdd\u001b[0m \u001b[0mthings\u001b[0m \u001b[0monto\u001b[0m \u001b[0mexisting\u001b[0m \u001b[0mthings\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 90\u001b[0m \"\"\"\n\u001b[0;32m---> 91\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0minto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtarget\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msource\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 68 | "\u001b[0;32m~/environments/default_env/lib/python3.8/site-packages/multipledispatch/dispatcher.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 276\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_cache\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtypes\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 277\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 278\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 279\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 280\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mMDNotImplementedError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 69 | "\u001b[0;32m~/environments/default_env/lib/python3.8/site-packages/odo/into.py\u001b[0m in \u001b[0;36mwrapped\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 41\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'dshape argument is not an instance of DataShape'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 42\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'dshape'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdshape\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 43\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 44\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mwrapped\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 45\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 70 | "\u001b[0;32m~/environments/default_env/lib/python3.8/site-packages/odo/into.py\u001b[0m in \u001b[0;36minto_string_string\u001b[0;34m(a, b, **kwargs)\u001b[0m\n\u001b[1;32m 147\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mvalidate\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 148\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0minto_string_string\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 149\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0minto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresource\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 150\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 151\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 71 | "\u001b[0;32m~/environments/default_env/lib/python3.8/site-packages/multipledispatch/dispatcher.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 276\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_cache\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtypes\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 277\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 278\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 279\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 280\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mMDNotImplementedError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 72 | "\u001b[0;32m~/environments/default_env/lib/python3.8/site-packages/odo/into.py\u001b[0m in \u001b[0;36mwrapped\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 41\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'dshape argument is not an instance of DataShape'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 42\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'dshape'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdshape\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 43\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 44\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mwrapped\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 45\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 73 | "\u001b[0;32m~/environments/default_env/lib/python3.8/site-packages/odo/into.py\u001b[0m in \u001b[0;36minto_type\u001b[0;34m(a, b, dshape, **kwargs)\u001b[0m\n\u001b[1;32m 50\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mignoring\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mNotImplementedError\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 51\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdshape\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 52\u001b[0;31m \u001b[0mdshape\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdiscover\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 53\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mconvert\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdshape\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdshape\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 54\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 74 | "\u001b[0;32m~/environments/default_env/lib/python3.8/site-packages/multipledispatch/dispatcher.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 276\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_cache\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtypes\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 277\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 278\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 279\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 280\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mMDNotImplementedError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 75 | "\u001b[0;32m~/environments/default_env/lib/python3.8/site-packages/odo/backends/csv.py\u001b[0m in \u001b[0;36mdiscover_csv\u001b[0;34m(c, nrows, **kwargs)\u001b[0m\n\u001b[1;32m 375\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mdiscover\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mregister\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mCSV\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 376\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mdiscover_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnrows\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1000\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 377\u001b[0;31m \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcsv_to_dataframe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnrows\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnrows\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 378\u001b[0m \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcoerce_datetimes\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 379\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 76 | "\u001b[0;32m~/environments/default_env/lib/python3.8/site-packages/odo/backends/csv.py\u001b[0m in \u001b[0;36mcsv_to_dataframe\u001b[0;34m(c, dshape, chunksize, nrows, **kwargs)\u001b[0m\n\u001b[1;32m 293\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mcsv_to_dataframe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdshape\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mchunksize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnrows\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 294\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 295\u001b[0;31m return _csv_to_dataframe(c, dshape=dshape, chunksize=chunksize,\n\u001b[0m\u001b[1;32m 296\u001b[0m nrows=nrows, **kwargs)\n\u001b[1;32m 297\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mStopIteration\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 77 | "\u001b[0;32m~/environments/default_env/lib/python3.8/site-packages/odo/backends/csv.py\u001b[0m in \u001b[0;36m_csv_to_dataframe\u001b[0;34m(c, dshape, chunksize, **kwargs)\u001b[0m\n\u001b[1;32m 344\u001b[0m \u001b[0mheader\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 345\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 346\u001b[0;31m \u001b[0mkwargs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkeyfilter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkeywords\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__contains__\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 347\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 348\u001b[0m return pd.read_csv(f,\n", 78 | "\u001b[0;32m~/environments/default_env/lib/python3.8/site-packages/odo/utils.py\u001b[0m in \u001b[0;36mkeywords\u001b[0;34m(func)\u001b[0m\n\u001b[1;32m 128\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 129\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mkeywords\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 130\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0minspect\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetargspec\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 131\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 132\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 79 | "\u001b[0;32m/usr/lib/python3.8/inspect.py\u001b[0m in \u001b[0;36mgetargspec\u001b[0;34m(func)\u001b[0m\n\u001b[1;32m 1081\u001b[0m \u001b[0mgetfullargspec\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1082\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mkwonlyargs\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mann\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1083\u001b[0;31m raise ValueError(\"Function has keyword-only parameters or annotations\"\n\u001b[0m\u001b[1;32m 1084\u001b[0m \", use inspect.signature() API which can support them\")\n\u001b[1;32m 1085\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mArgSpec\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvarargs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvarkw\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdefaults\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 80 | "\u001b[0;31mValueError\u001b[0m: Function has keyword-only parameters or annotations, use inspect.signature() API which can support them" 81 | ] 82 | } 83 | ], 84 | "source": [ 85 | "df_fake = odo(\"../SampleDBs/fakedata.csv\", pd.DataFrame, sep='|')" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [] 94 | } 95 | ], 96 | "metadata": { 97 | "kernelspec": { 98 | "display_name": "Python 3", 99 | "language": "python", 100 | "name": "python3" 101 | }, 102 | "language_info": { 103 | "codemirror_mode": { 104 | "name": "ipython", 105 | "version": 3 106 | }, 107 | "file_extension": ".py", 108 | "mimetype": "text/x-python", 109 | "name": "python", 110 | "nbconvert_exporter": "python", 111 | "pygments_lexer": "ipython3", 112 | "version": "3.8.5" 113 | } 114 | }, 115 | "nbformat": 4, 116 | "nbformat_minor": 4 117 | } 118 | -------------------------------------------------------------------------------- /Notebooks/Example_SQLite_matrix.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Introduction to Databases\n", 8 | "\n", 9 | "### Exercises" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import os\n", 19 | "import sys\n", 20 | "import time\n", 21 | "import datetime\n", 22 | "import numpy as np\n", 23 | "import pandas as pd\n", 24 | "\n", 25 | "import sqlite3" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "### 1st exercise - Matrix\n", 33 | "#### Acessing [SQLite](https://docs.python.org/3/library/sqlite3.html) and creating database" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 2, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "conn = sqlite3.connect(os.path.join(\"..\",\"SampleDBs\",'matrix.db'))" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 3, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "cur = conn.cursor()" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 4, 57 | "metadata": {}, 58 | "outputs": [ 59 | { 60 | "data": { 61 | "text/plain": [ 62 | "" 63 | ] 64 | }, 65 | "execution_count": 4, 66 | "metadata": {}, 67 | "output_type": "execute_result" 68 | } 69 | ], 70 | "source": [ 71 | "query = '''\n", 72 | "CREATE TABLE matrixA (\n", 73 | " row_num TINYINT,\n", 74 | " col_num TINYINT,\n", 75 | " value TINYINT\n", 76 | ");\n", 77 | "'''\n", 78 | "\n", 79 | "cur.execute(query)" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 5, 85 | "metadata": {}, 86 | "outputs": [ 87 | { 88 | "data": { 89 | "text/plain": [ 90 | "" 91 | ] 92 | }, 93 | "execution_count": 5, 94 | "metadata": {}, 95 | "output_type": "execute_result" 96 | } 97 | ], 98 | "source": [ 99 | "query = '''\n", 100 | "CREATE TABLE matrixB (\n", 101 | " row_num TINYINT,\n", 102 | " col_num TINYINT,\n", 103 | " value TINYINT\n", 104 | ");\n", 105 | "'''\n", 106 | "\n", 107 | "cur.execute(query)" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 6, 113 | "metadata": {}, 114 | "outputs": [ 115 | { 116 | "data": { 117 | "text/plain": [ 118 | "" 119 | ] 120 | }, 121 | "execution_count": 6, 122 | "metadata": {}, 123 | "output_type": "execute_result" 124 | } 125 | ], 126 | "source": [ 127 | "query = '''\n", 128 | "INSERT INTO matrixA (row_num, col_num, value)\n", 129 | "VALUES\n", 130 | "(1, 1, 4),\n", 131 | "(1, 2, 2),\n", 132 | "(1, 3, 6),\n", 133 | "(2, 1, 2),\n", 134 | "(2, 2, 5),\n", 135 | "(2, 3, 7);\n", 136 | "'''\n", 137 | "\n", 138 | "cur.execute(query)" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 7, 144 | "metadata": {}, 145 | "outputs": [ 146 | { 147 | "name": "stdout", 148 | "output_type": "stream", 149 | "text": [ 150 | "(1, 1, 4)\n", 151 | "(1, 2, 2)\n", 152 | "(1, 3, 6)\n", 153 | "(2, 1, 2)\n", 154 | "(2, 2, 5)\n", 155 | "(2, 3, 7)\n" 156 | ] 157 | } 158 | ], 159 | "source": [ 160 | "query = '''\n", 161 | "SELECT * from matrixA;\n", 162 | "'''\n", 163 | "\n", 164 | "cur.execute(query)\n", 165 | "for row in cur.fetchall():\n", 166 | " print(row)" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": 8, 172 | "metadata": {}, 173 | "outputs": [ 174 | { 175 | "data": { 176 | "text/plain": [ 177 | "" 178 | ] 179 | }, 180 | "execution_count": 8, 181 | "metadata": {}, 182 | "output_type": "execute_result" 183 | } 184 | ], 185 | "source": [ 186 | "query = '''\n", 187 | "INSERT INTO matrixB (row_num, col_num, value)\n", 188 | "VALUES\n", 189 | "(1, 1, 2),\n", 190 | "(1, 2, 4),\n", 191 | "(1, 3, 8),\n", 192 | "(2, 1, 1),\n", 193 | "(2, 2, 5),\n", 194 | "(2, 3, 10),\n", 195 | "(3, 1, 3),\n", 196 | "(3, 2, 6),\n", 197 | "(3, 3, 9);\n", 198 | "'''\n", 199 | "\n", 200 | "cur.execute(query)" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": 9, 206 | "metadata": {}, 207 | "outputs": [ 208 | { 209 | "name": "stdout", 210 | "output_type": "stream", 211 | "text": [ 212 | "(1, 1, 28)\n", 213 | "(1, 2, 62)\n", 214 | "(1, 3, 106)\n", 215 | "(2, 1, 30)\n", 216 | "(2, 2, 75)\n", 217 | "(2, 3, 129)\n" 218 | ] 219 | } 220 | ], 221 | "source": [ 222 | "query = '''\n", 223 | "SELECT A.row_num, B.col_num, SUM(A.value * B.value) AS result\n", 224 | "FROM matrixA AS A\n", 225 | "INNER JOIN matrixB AS B\n", 226 | "ON A.col_num = B.row_num\n", 227 | "GROUP BY A.row_num, B.col_num;\n", 228 | "'''\n", 229 | "\n", 230 | "cur.execute(query)\n", 231 | "for line in cur.fetchall():\n", 232 | " print(line)" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": 10, 238 | "metadata": {}, 239 | "outputs": [], 240 | "source": [ 241 | "conn.close()\n", 242 | "os.remove(os.path.join(\"..\",\"SampleDBs\",'matrix.db'))" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": null, 248 | "metadata": {}, 249 | "outputs": [], 250 | "source": [] 251 | } 252 | ], 253 | "metadata": { 254 | "kernelspec": { 255 | "display_name": "Python 3", 256 | "language": "python", 257 | "name": "python3" 258 | }, 259 | "language_info": { 260 | "codemirror_mode": { 261 | "name": "ipython", 262 | "version": 3 263 | }, 264 | "file_extension": ".py", 265 | "mimetype": "text/x-python", 266 | "name": "python", 267 | "nbconvert_exporter": "python", 268 | "pygments_lexer": "ipython3", 269 | "version": "3.7.6" 270 | } 271 | }, 272 | "nbformat": 4, 273 | "nbformat_minor": 4 274 | } 275 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | **Introduction to Databases** 2 | 3 | + References: 4 | + [SQL Tutorial](https://www.w3schools.com/sql/default.asp) 5 | 6 | + Modeling Environments: 7 | + [Vertabelo](https://www.vertabelo.com/) 8 | + [SQLDBM](https://sqldbm.com/Home/) 9 | + [Lucid Charts](https://www.lucidchart.com/pages/) 10 | + [Diagrams](https://app.diagrams.net/) 11 | + [DB Diagram](https://dbdiagram.io/home) 12 | + [Quick DB Diagrams](https://www.quickdatabasediagrams.com/) 13 | + [Visual Paradigm](https://www.visual-paradigm.com/) 14 | + [ERD Plus](https://erdplus.com/) 15 | 16 | + RDBMSs: 17 | + [MySQL](https://dev.mysql.com/downloads/installer/) (Local installation) 18 | + [MariaDB](https://mariadb.org/) 19 | + [PostegreSQL](https://www.postgresql.org/) 20 | + [PostGIS](https://postgis.net/) 21 | + [SQLite](https://www.sqlite.org/index.html) 22 | + [SpatialLite](https://www.gaia-gis.it/fossil/libspatialite/home) 23 | 24 | + Managing Environments: 25 | + [MySQL Workbench](https://www.mysql.com/products/workbench/) 26 | + [PHPMyAdmin](https://www.phpmyadmin.net/) 27 | + [PGAdmin](https://www.pgadmin.org/) 28 | + [DBeaver](https://dbeaver.io/download/) 29 | 30 | + NoSQL Databases 31 | + [MongoDB](https://www.mongodb.com/) 32 | + [ElasticSearch](https://www.elastic.co/elasticsearch/) 33 | + [TimeScale](https://www.timescale.com/) 34 | 35 | + Cloud and DS Databases: 36 | + [DoltHub](https://www.dolthub.com/) 37 | + [MLDB](https://mldb.ai/) 38 | + [DVC](https://dvc.org/) 39 | + [Graviti](https://www.graviti.com/) 40 | 41 | + [Anaconda - Jupyter - Python](https://www.anaconda.com/products/individual)(Python Environment) 42 | 43 | + [Fake Data Generation](https://www.generatedata.com/) 44 | 45 | **** 46 | 47 | **Syllabus** 48 | 49 | Entity-Relationship Models and Diagrams. Entity Types and Attributes. Relationships. Relational Databases. Keys, Integrity. Relational Algebra. Formal Query Languages. SQL. Programmatic Databases Access. NoSQL Databases. RDF Triplestores. API Access 50 | 51 | #### Detailed Program: 52 | 53 | |Day|Topic|Activities| 54 | |---|---|---| 55 | |Class 1|Introduction, Syllabus, Evaluation, Integrated Development Environment|Users creation| 56 | |Class 2|Basic Concepts, Representation. Abstraction. Structured and Unstructured Data| 57 | |Class 3|Types and Architecture of Databases. Relational and non-Relational Databases| 58 | |Class 4|Data Modeling: Diagrams, Relations, Schemas, Restrictions. Data Modeling Tools|Data Modeling Exercises| 59 | |Class 5|Data Modeling: [Table Normalization](https://www.guru99.com/database-normalization.html)|Data Modeling Exercises| 60 | |Class 6|[Data Modeling: Conceptual, Logical, Physical](https://online.visual-paradigm.com/knowledge/visual-modeling/conceptual-vs-logical-vs-physical-data-model/)|Data Modeling Exercises| 61 | |Class 7|SQL language: Standards, implementations, Relational Algebra| 62 | |Class 8|Introduction to RDBMS MySQL|Hands On MySQL CLI| 63 | |Class 9|SQL language|SQL practice| 64 | |Class 10|SQL language|SQL practice| 65 | |Class 11|SQL language|SQL practice| 66 | |Class 12|SQL language|[Regular Expressions](https://regex101.com/)| 67 | |Class 13|SQL language|SQL practice| 68 | |Class 14|SQL language|SQL practice| 69 | |---|Evaluation|---| 70 | |Class 15|Programmatic access to Databases|Python programming| 71 | |Class 16|Programmatic access to Databases|Python programming| 72 | |Class 17|Programmatic access to Databases|Python programming| 73 | |Class 18|Programmatic access to Databases|Python programming - SQLite| 74 | |Class 19|Programmatic access to Databases|Python programming - SQLite| 75 | |Class 20|Introduction to RDBMS Postgres|Hands On Postgres CLI| 76 | |Class 21|Programmatic access to Databases|Python programming - Postgres| 77 | |Class 22|Programmatic access to Databases|Python programming - Interface with Pandas| 78 | |Class 23|Programmatic access to Databases|Python programming - Interface with Pandas| 79 | |Class 24|Programmatic access to Databases|Python programming - Interface with Pandas| 80 | |Class 25|Programmatic access to Databases|Python programming - Interface with Pandas| 81 | |Class 26|Cloud Databases - DoltHub| 82 | |Class 27|Cloud Databases - DoltHub| 83 | |Class 28|NoSQL Databases| 84 | |Class 29|NoSQL Databases|MongoDB practice| 85 | |Class 30|NoSQL Databases|MongoDB practice| 86 | |Class 31|NoSQL Databases|Elastic Search| 87 | |---|Evaluation|---| 88 | |---|---|---| 89 | 90 | -------------------------------------------------------------------------------- /SampleDBs/2013_ERCOT_Hourly_Load_Data.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsouza/Databases_Course/c800a02ad2772c6d3156f650e2a45e51f68533d0/SampleDBs/2013_ERCOT_Hourly_Load_Data.zip -------------------------------------------------------------------------------- /SampleDBs/ACDH_CH_WG4_simplified.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsouza/Databases_Course/c800a02ad2772c6d3156f650e2a45e51f68533d0/SampleDBs/ACDH_CH_WG4_simplified.xls -------------------------------------------------------------------------------- /SampleDBs/EssentialSQL.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsouza/Databases_Course/c800a02ad2772c6d3156f650e2a45e51f68533d0/SampleDBs/EssentialSQL.sqlite -------------------------------------------------------------------------------- /SampleDBs/HousingPrices.csv: -------------------------------------------------------------------------------- 1 | TransactionDate,HouseAge,DistanceToMRT,NumStores,Latitude,Longitude,UnitPrice 2 | 2012,32.0,84.87881999999999,10,24.982979999999998,121.54024,37.9 3 | 2012,19.5,306.5947,9,24.980339999999998,121.53951,42.2 4 | 2013,13.3,561.9845,5,24.987460000000002,121.54391000000001,47.3 5 | 2013,13.3,561.9845,5,24.987460000000002,121.54391000000001,54.8 6 | 2012,5.0,390.5684,5,24.97937,121.54245,43.1 7 | 2012,7.1,2175.03,3,24.96305,121.51253999999999,32.1 8 | 2012,34.5,623.4731,7,24.979329999999997,121.53641999999999,40.3 9 | 2013,20.3,287.6025,6,24.980420000000002,121.54228,46.7 10 | 2013,31.7,5512.0380000000005,1,24.95095,121.48458000000001,18.8 11 | 2013,17.9,1783.18,3,24.96731,121.51486000000001,22.1 12 | 2013,34.8,405.2134,1,24.973489999999998,121.53372,41.4 13 | 2013,6.3,90.45606,9,24.97433,121.5431,58.1 14 | 2012,13.0,492.2313,5,24.96515,121.53737,39.3 15 | 2012,20.4,2469.645,4,24.96108,121.51046000000001,23.8 16 | 2013,13.2,1164.838,4,24.99156,121.53406000000001,34.3 17 | 2013,35.7,579.2083,2,24.9824,121.54619,50.5 18 | 2013,0.0,292.9978,6,24.977439999999998,121.54458000000001,70.1 19 | 2012,17.7,350.8515,1,24.97544,121.53119,37.4 20 | 2013,16.9,368.1363,8,24.9675,121.54451,42.3 21 | 2012,1.5,23.382839999999998,7,24.96772,121.54101999999999,47.7 22 | 2013,4.5,2275.877,3,24.96314,121.51151000000002,29.3 23 | 2013,10.5,279.1726,7,24.975279999999998,121.54541,51.6 24 | 2012,14.7,1360.139,1,24.95204,121.54842,24.6 25 | 2013,10.1,279.1726,7,24.975279999999998,121.54541,47.9 26 | 2013,39.6,480.6977,4,24.97353,121.53885,38.8 27 | 2013,29.3,1487.868,2,24.97542,121.51726000000001,27.0 28 | 2012,3.1,383.8624,5,24.98085,121.54391000000001,56.2 29 | 2013,10.4,276.449,5,24.95593,121.53913,33.6 30 | 2013,19.2,557.4780000000001,4,24.97419,121.53797,47.0 31 | 2013,7.1,451.2438,5,24.97563,121.54693999999999,57.1 32 | 2013,25.9,4519.69,0,24.94826,121.49587,22.1 33 | 2012,29.6,769.4034,7,24.98281,121.53408,25.0 34 | 2012,37.9,488.5727,1,24.973489999999998,121.53451000000001,34.2 35 | 2013,16.5,323.655,6,24.97841,121.54281,49.3 36 | 2012,15.4,205.36700000000002,7,24.984189999999998,121.54243000000001,55.1 37 | 2013,13.9,4079.4179999999997,0,25.01459,121.51816000000001,27.3 38 | 2012,14.7,1935.009,2,24.96386,121.51458000000001,22.9 39 | 2013,12.0,1360.139,1,24.95204,121.54842,25.3 40 | 2012,3.1,577.9615,6,24.97201,121.54722,47.7 41 | 2013,16.2,289.3248,5,24.982029999999998,121.54348,46.2 42 | 2013,13.6,4082.015,0,24.94155,121.50381000000002,15.9 43 | 2013,16.8,4066.587,0,24.942970000000003,121.50341999999999,18.2 44 | 2013,36.1,519.4617,5,24.96305,121.53758,34.7 45 | 2012,34.4,512.7871,6,24.987479999999998,121.54301000000001,34.1 46 | 2013,2.7,533.4762,4,24.97445,121.54765,53.9 47 | 2013,36.6,488.8193,8,24.97015,121.54494,38.3 48 | 2013,21.7,463.9623,9,24.9703,121.54458000000001,42.0 49 | 2013,35.9,640.7391,3,24.97563,121.53715,61.5 50 | 2013,24.2,4605.749,0,24.946839999999998,121.49578000000001,13.4 51 | 2012,29.4,4510.359,1,24.94925,121.49542,13.2 52 | 2013,21.7,512.5487,4,24.974,121.53842,44.2 53 | 2013,31.3,1758.406,1,24.95402,121.55282,20.7 54 | 2013,32.1,1438.579,3,24.97419,121.5175,27.0 55 | 2013,13.3,492.2313,5,24.96515,121.53737,38.9 56 | 2013,16.1,289.3248,5,24.982029999999998,121.54348,51.7 57 | 2012,31.7,1160.632,0,24.949679999999997,121.53008999999999,13.7 58 | 2013,33.6,371.2495,8,24.97254,121.54059,41.9 59 | 2012,3.5,56.47425,7,24.95744,121.53711000000001,53.5 60 | 2013,30.3,4510.359,1,24.94925,121.49542,22.6 61 | 2013,13.3,336.0532,5,24.95776,121.53438,42.4 62 | 2013,11.0,1931.207,2,24.96365,121.51471000000001,21.3 63 | 2013,5.3,259.6607,6,24.97585,121.54516000000001,63.2 64 | 2012,17.2,2175.877,3,24.96303,121.51253999999999,27.7 65 | 2013,2.6,533.4762,4,24.97445,121.54765,55.0 66 | 2013,17.5,995.7554,0,24.96305,121.54915,25.3 67 | 2013,40.1,123.7429,8,24.97635,121.54328999999998,44.3 68 | 2013,1.0,193.5845,6,24.96571,121.54088999999999,50.7 69 | 2013,8.5,104.8101,5,24.966739999999998,121.54066999999999,56.8 70 | 2013,30.4,464.223,6,24.97964,121.53805,36.2 71 | 2012,12.5,561.9845,5,24.987460000000002,121.54391000000001,42.0 72 | 2013,6.6,90.45606,9,24.97433,121.5431,59.0 73 | 2013,35.5,640.7391,3,24.97563,121.53715,40.8 74 | 2013,32.5,424.5442,8,24.97587,121.53913,36.3 75 | 2013,13.8,4082.015,0,24.94155,121.50381000000002,20.0 76 | 2012,6.8,379.5575,10,24.98343,121.53761999999999,54.4 77 | 2013,12.3,1360.139,1,24.95204,121.54842,29.5 78 | 2013,35.9,616.4004,3,24.97723,121.53766999999999,36.8 79 | 2012,20.5,2185.1279999999997,3,24.96322,121.51236999999999,25.6 80 | 2012,38.2,552.4371,2,24.97598,121.53381,29.8 81 | 2013,18.0,1414.8370000000002,1,24.95182,121.54887,26.5 82 | 2013,11.8,533.4762,4,24.97445,121.54765,40.3 83 | 2013,30.8,377.7956,6,24.964270000000003,121.53963999999999,36.8 84 | 2013,13.2,150.9347,7,24.96725,121.54252,48.1 85 | 2012,25.3,2707.3920000000003,3,24.96056,121.50831000000001,17.7 86 | 2013,15.1,383.2805,7,24.96735,121.54463999999999,43.7 87 | 2012,0.0,338.9679,9,24.968529999999998,121.54413000000001,50.8 88 | 2012,1.8,1455.7979999999998,1,24.9512,121.54899999999999,27.0 89 | 2013,16.9,4066.587,0,24.942970000000003,121.50341999999999,18.3 90 | 2012,8.9,1406.43,0,24.98573,121.52758,48.0 91 | 2013,23.0,3947.945,0,24.94783,121.50243,25.3 92 | 2012,0.0,274.0144,1,24.9748,121.53058999999999,45.4 93 | 2013,9.1,1402.016,0,24.985689999999998,121.5276,43.2 94 | 2012,20.6,2469.645,4,24.96108,121.51046000000001,21.8 95 | 2012,31.9,1146.329,0,24.9492,121.53076000000001,16.1 96 | 2012,40.9,167.5989,5,24.9663,121.54026,41.0 97 | 2012,8.0,104.8101,5,24.966739999999998,121.54066999999999,51.8 98 | 2013,6.4,90.45606,9,24.97433,121.5431,59.5 99 | 2013,28.4,617.4424,3,24.97746,121.53298999999998,34.6 100 | 2013,16.4,289.3248,5,24.982029999999998,121.54348,51.0 101 | 2013,6.4,90.45606,9,24.97433,121.5431,62.2 102 | 2013,17.5,964.7496,4,24.98872,121.53411000000001,38.2 103 | 2012,12.7,170.1289,1,24.97371,121.52984,32.9 104 | 2013,1.1,193.5845,6,24.96571,121.54088999999999,54.4 105 | 2012,0.0,208.3905,6,24.95618,121.53844,45.7 106 | 2012,32.7,392.4459,6,24.96398,121.5425,30.5 107 | 2012,0.0,292.9978,6,24.977439999999998,121.54458000000001,71.0 108 | 2013,17.2,189.5181,8,24.97707,121.54308,47.1 109 | 2013,12.2,1360.139,1,24.95204,121.54842,26.6 110 | 2013,31.4,592.5006,2,24.9726,121.53561,34.1 111 | 2013,4.0,2147.376,3,24.962989999999998,121.51284,28.4 112 | 2013,8.1,104.8101,5,24.966739999999998,121.54066999999999,51.6 113 | 2013,33.3,196.6172,7,24.97701,121.54223999999999,39.4 114 | 2013,9.9,2102.427,3,24.96044,121.51462,23.1 115 | 2013,14.8,393.2606,6,24.96172,121.53811999999999,7.6 116 | 2012,30.6,143.8383,8,24.98155,121.54142,53.3 117 | 2013,20.6,737.9161,2,24.98092,121.54739,46.4 118 | 2013,30.9,6396.283,1,24.94375,121.47883,12.2 119 | 2013,13.6,4197.349,0,24.93885,121.50383000000001,13.0 120 | 2013,25.3,1583.7220000000002,3,24.96622,121.51709,30.6 121 | 2013,16.6,289.3248,5,24.982029999999998,121.54348,59.6 122 | 2013,13.3,492.2313,5,24.96515,121.53737,31.3 123 | 2013,13.6,492.2313,5,24.96515,121.53737,48.0 124 | 2013,31.5,414.9476,4,24.98199,121.54463999999999,32.5 125 | 2013,0.0,185.4296,0,24.9711,121.5317,45.5 126 | 2012,9.9,279.1726,7,24.975279999999998,121.54541,57.4 127 | 2013,1.1,193.5845,6,24.96571,121.54088999999999,48.6 128 | 2013,38.6,804.6897,4,24.978379999999998,121.53477,62.9 129 | 2013,3.8,383.8624,5,24.98085,121.54391000000001,55.0 130 | 2013,41.3,124.9912,6,24.966739999999998,121.54038999999999,60.7 131 | 2013,38.5,216.8329,7,24.98086,121.54162,41.0 132 | 2013,29.6,535.5269999999999,8,24.98092,121.53653,37.5 133 | 2013,4.0,2147.376,3,24.962989999999998,121.51284,30.7 134 | 2013,26.6,482.7581,5,24.97433,121.53863,37.5 135 | 2012,18.0,373.3937,8,24.9866,121.54082,39.5 136 | 2012,33.4,186.9686,6,24.96604,121.54211000000001,42.2 137 | 2012,18.9,1009.235,0,24.96357,121.54951000000001,20.8 138 | 2012,11.4,390.5684,5,24.97937,121.54245,46.8 139 | 2013,13.6,319.0708,6,24.96495,121.54276999999999,47.4 140 | 2013,10.0,942.4664,0,24.97843,121.52406,43.5 141 | 2012,12.9,492.2313,5,24.96515,121.53737,42.5 142 | 2013,16.2,289.3248,5,24.982029999999998,121.54348,51.4 143 | 2013,5.1,1559.8270000000002,3,24.97213,121.51626999999999,28.9 144 | 2013,19.8,640.6071,5,24.97017,121.54647,37.5 145 | 2013,13.6,492.2313,5,24.96515,121.53737,40.1 146 | 2013,11.9,1360.139,1,24.95204,121.54842,28.4 147 | 2012,2.1,451.2438,5,24.97563,121.54693999999999,45.5 148 | 2012,0.0,185.4296,0,24.9711,121.5317,52.2 149 | 2012,3.2,489.8821,8,24.97017,121.54494,43.2 150 | 2013,16.4,3780.59,0,24.93293,121.51203000000001,45.1 151 | 2012,34.9,179.4538,8,24.973489999999998,121.54245,39.7 152 | 2013,35.8,170.7311,7,24.96719,121.54269,48.5 153 | 2013,4.9,387.7721,9,24.98118,121.53788,44.7 154 | 2013,12.0,1360.139,1,24.95204,121.54842,28.9 155 | 2013,6.5,376.1709,6,24.954179999999997,121.53713,40.9 156 | 2013,16.9,4066.587,0,24.942970000000003,121.50341999999999,20.7 157 | 2013,13.8,4082.015,0,24.94155,121.50381000000002,15.6 158 | 2013,30.7,1264.73,0,24.948829999999997,121.52954,18.3 159 | 2013,16.1,815.9314,4,24.97886,121.53464,35.6 160 | 2013,11.6,390.5684,5,24.97937,121.54245,39.4 161 | 2012,15.5,815.9314,4,24.97886,121.53464,37.4 162 | 2012,3.5,49.66105,8,24.95836,121.53756000000001,57.8 163 | 2013,19.2,616.4004,3,24.97723,121.53766999999999,39.6 164 | 2012,16.0,4066.587,0,24.942970000000003,121.50341999999999,11.6 165 | 2013,8.5,104.8101,5,24.966739999999998,121.54066999999999,55.5 166 | 2012,0.0,185.4296,0,24.9711,121.5317,55.2 167 | 2012,13.7,1236.5639999999999,1,24.97694,121.55391000000002,30.6 168 | 2013,0.0,292.9978,6,24.977439999999998,121.54458000000001,73.6 169 | 2013,28.2,330.0854,8,24.974079999999997,121.54011000000001,43.4 170 | 2013,27.6,515.1122,5,24.962989999999998,121.5432,37.4 171 | 2013,8.4,1962.628,1,24.95468,121.55481,23.5 172 | 2013,24.0,4527.687,0,24.94741,121.49628,14.4 173 | 2013,3.6,383.8624,5,24.98085,121.54391000000001,58.8 174 | 2013,6.6,90.45606,9,24.97433,121.5431,58.1 175 | 2013,41.3,401.8807,4,24.98326,121.5446,35.1 176 | 2013,4.3,432.0385,7,24.9805,121.53778,45.2 177 | 2013,30.2,472.1745,3,24.97005,121.53758,36.5 178 | 2012,13.9,4573.779,0,24.94867,121.49507,19.2 179 | 2013,33.0,181.0766,9,24.97697,121.54262,42.0 180 | 2013,13.1,1144.4360000000001,4,24.99176,121.53456000000001,36.7 181 | 2013,14.0,438.8513,1,24.97493,121.5273,42.6 182 | 2012,26.9,4449.27,0,24.94898,121.49621,15.5 183 | 2013,11.6,201.8939,8,24.98489,121.54121,55.9 184 | 2013,13.5,2147.376,3,24.962989999999998,121.51284,23.6 185 | 2013,17.0,4082.015,0,24.94155,121.50381000000002,18.8 186 | 2012,14.1,2615.465,0,24.95495,121.56173999999999,21.8 187 | 2012,31.4,1447.286,3,24.97285,121.5173,21.5 188 | 2013,20.9,2185.1279999999997,3,24.96322,121.51236999999999,25.7 189 | 2013,8.9,3078.176,0,24.954639999999998,121.56626999999999,22.0 190 | 2012,34.8,190.0392,8,24.97707,121.54312,44.3 191 | 2012,16.3,4066.587,0,24.942970000000003,121.50341999999999,20.5 192 | 2013,35.3,616.5735,8,24.97945,121.53641999999999,42.3 193 | 2013,13.2,750.0704,2,24.97371,121.54951000000001,37.8 194 | 2013,43.8,57.58945,7,24.9675,121.54068999999998,42.7 195 | 2013,9.7,421.47900000000004,5,24.98246,121.54477,49.3 196 | 2013,15.2,3771.895,0,24.933629999999997,121.51158000000001,29.3 197 | 2013,15.2,461.1016,5,24.95425,121.5399,34.6 198 | 2013,22.8,707.9067,2,24.980999999999998,121.54713000000001,36.6 199 | 2013,34.4,126.7286,8,24.96881,121.54088999999999,48.2 200 | 2013,34.0,157.6052,7,24.966279999999998,121.54196,39.1 201 | 2013,18.2,451.6419,8,24.96945,121.5449,31.6 202 | 2013,17.4,995.7554,0,24.96305,121.54915,25.5 203 | 2013,13.1,561.9845,5,24.987460000000002,121.54391000000001,45.9 204 | 2012,38.3,642.6985,3,24.97559,121.53713,31.5 205 | 2012,15.6,289.3248,5,24.982029999999998,121.54348,46.1 206 | 2013,18.0,1414.8370000000002,1,24.95182,121.54887,26.6 207 | 2013,12.8,1449.7220000000002,3,24.97289,121.51728,21.4 208 | 2013,22.2,379.5575,10,24.98343,121.53761999999999,44.0 209 | 2013,38.5,665.0636,3,24.97503,121.53692,34.2 210 | 2012,11.5,1360.139,1,24.95204,121.54842,26.2 211 | 2012,34.8,175.6294,8,24.973470000000002,121.54271000000001,40.9 212 | 2013,5.2,390.5684,5,24.97937,121.54245,52.2 213 | 2013,0.0,274.0144,1,24.9748,121.53058999999999,43.5 214 | 2013,17.6,1805.665,2,24.986720000000002,121.52091000000001,31.1 215 | 2013,6.2,90.45606,9,24.97433,121.5431,58.0 216 | 2013,18.1,1783.18,3,24.96731,121.51486000000001,20.9 217 | 2013,19.2,383.7129,8,24.971999999999998,121.54477,48.1 218 | 2013,37.8,590.9292,1,24.971529999999998,121.53558999999998,39.7 219 | 2012,28.0,372.6242,6,24.978379999999998,121.54118999999999,40.8 220 | 2013,13.6,492.2313,5,24.96515,121.53737,43.8 221 | 2012,29.3,529.7771,8,24.98102,121.53655,40.2 222 | 2013,37.2,186.5101,9,24.97703,121.54265,78.3 223 | 2013,9.0,1402.016,0,24.985689999999998,121.5276,38.5 224 | 2013,30.6,431.1114,10,24.98123,121.53743,48.5 225 | 2013,9.1,1402.016,0,24.985689999999998,121.5276,42.3 226 | 2013,34.5,324.9419,6,24.97814,121.5417,46.0 227 | 2013,1.1,193.5845,6,24.96571,121.54088999999999,49.0 228 | 2013,16.5,4082.015,0,24.94155,121.50381000000002,12.8 229 | 2012,32.4,265.0609,8,24.98059,121.53986,40.2 230 | 2013,11.9,3171.329,0,25.00115,121.51776000000001,46.6 231 | 2013,31.0,1156.412,0,24.9489,121.53095,19.0 232 | 2013,4.0,2147.376,3,24.962989999999998,121.51284,33.4 233 | 2012,16.2,4074.736,0,24.94235,121.50357,14.7 234 | 2012,27.1,4412.765,1,24.95032,121.49587,17.4 235 | 2013,39.7,333.3679,9,24.98016,121.53931999999999,32.4 236 | 2013,8.0,2216.612,4,24.96007,121.51361000000001,23.9 237 | 2012,12.9,250.63099999999997,7,24.96606,121.54297,39.3 238 | 2013,3.6,373.8389,10,24.983220000000003,121.53765,61.9 239 | 2013,13.0,732.8528,0,24.976679999999998,121.52518,39.0 240 | 2013,12.8,732.8528,0,24.976679999999998,121.52518,40.6 241 | 2013,18.1,837.7233,0,24.96334,121.54767,29.7 242 | 2013,11.0,1712.632,2,24.96412,121.5167,28.8 243 | 2013,13.7,250.63099999999997,7,24.96606,121.54297,41.4 244 | 2012,2.0,2077.39,3,24.96357,121.51328999999998,33.4 245 | 2013,32.8,204.1705,8,24.98236,121.53923,48.2 246 | 2013,4.8,1559.8270000000002,3,24.97213,121.51626999999999,21.7 247 | 2013,7.5,639.6198,5,24.972579999999997,121.54813999999999,40.8 248 | 2013,16.4,389.8219,6,24.96412,121.54273,40.6 249 | 2013,21.7,1055.067,0,24.96211,121.54928000000001,23.1 250 | 2013,19.0,1009.235,0,24.96357,121.54951000000001,22.3 251 | 2012,18.0,6306.153,1,24.95743,121.47516,15.0 252 | 2013,39.2,424.7132,7,24.97429,121.53917,30.0 253 | 2012,31.7,1159.454,0,24.9496,121.53018,13.8 254 | 2012,5.9,90.45606,9,24.97433,121.5431,52.7 255 | 2012,30.4,1735.595,2,24.96464,121.51623000000001,25.9 256 | 2012,1.1,329.9747,5,24.98254,121.54395,51.8 257 | 2013,31.5,5512.0380000000005,1,24.95095,121.48458000000001,17.4 258 | 2012,14.6,339.2289,1,24.975189999999998,121.53151000000001,26.5 259 | 2013,17.3,444.1334,1,24.97501,121.5273,43.9 260 | 2013,0.0,292.9978,6,24.977439999999998,121.54458000000001,63.3 261 | 2013,17.7,837.7233,0,24.96334,121.54767,28.8 262 | 2013,17.0,1485.0970000000002,4,24.97073,121.51700000000001,30.7 263 | 2013,16.2,2288.011,3,24.95885,121.51359,24.4 264 | 2012,15.9,289.3248,5,24.982029999999998,121.54348,53.0 265 | 2013,3.9,2147.376,3,24.962989999999998,121.51284,31.7 266 | 2013,32.6,493.657,7,24.96968,121.54522,40.6 267 | 2012,15.7,815.9314,4,24.97886,121.53464,38.1 268 | 2013,17.8,1783.18,3,24.96731,121.51486000000001,23.7 269 | 2012,34.7,482.7581,5,24.97433,121.53863,41.1 270 | 2013,17.2,390.5684,5,24.97937,121.54245,40.1 271 | 2013,17.6,837.7233,0,24.96334,121.54767,23.0 272 | 2013,10.8,252.5822,1,24.9746,121.53046,117.5 273 | 2012,17.7,451.6419,8,24.96945,121.5449,26.5 274 | 2012,13.0,492.2313,5,24.96515,121.53737,40.5 275 | 2013,13.2,170.1289,1,24.97371,121.52984,29.3 276 | 2013,27.5,394.0173,7,24.97305,121.53993999999999,41.0 277 | 2012,1.5,23.382839999999998,7,24.96772,121.54101999999999,49.7 278 | 2013,19.1,461.1016,5,24.95425,121.5399,34.0 279 | 2013,21.2,2185.1279999999997,3,24.96322,121.51236999999999,27.7 280 | 2012,0.0,208.3905,6,24.95618,121.53844,44.0 281 | 2013,2.6,1554.25,3,24.97026,121.51642,31.1 282 | 2013,2.3,184.3302,6,24.96581,121.54086000000001,45.4 283 | 2013,4.7,387.7721,9,24.98118,121.53788,44.8 284 | 2012,2.0,1455.7979999999998,1,24.9512,121.54899999999999,25.6 285 | 2013,33.5,1978.671,2,24.98674,121.51843999999998,23.5 286 | 2012,15.0,383.2805,7,24.96735,121.54463999999999,34.4 287 | 2013,30.1,718.2937,3,24.975089999999998,121.53643999999998,55.3 288 | 2012,5.9,90.45606,9,24.97433,121.5431,56.3 289 | 2013,19.2,461.1016,5,24.95425,121.5399,32.9 290 | 2013,16.6,323.6912,6,24.97841,121.5428,51.0 291 | 2013,13.9,289.3248,5,24.982029999999998,121.54348,44.5 292 | 2013,37.7,490.3446,0,24.972170000000002,121.53471,37.0 293 | 2012,3.4,56.47425,7,24.95744,121.53711000000001,54.4 294 | 2013,17.5,395.6747,5,24.95674,121.53399999999999,24.5 295 | 2012,12.6,383.2805,7,24.96735,121.54463999999999,42.5 296 | 2013,26.4,335.5273,6,24.9796,121.5414,38.1 297 | 2013,18.2,2179.59,3,24.962989999999998,121.51252,21.8 298 | 2012,12.5,1144.4360000000001,4,24.99176,121.53456000000001,34.1 299 | 2012,34.9,567.0349,4,24.970029999999998,121.5458,28.5 300 | 2013,16.7,4082.015,0,24.94155,121.50381000000002,16.7 301 | 2013,33.2,121.7262,10,24.98178,121.54059,46.1 302 | 2013,2.5,156.2442,4,24.96696,121.53992,36.9 303 | 2012,38.0,461.7848,0,24.97229,121.53445,35.7 304 | 2013,16.5,2288.011,3,24.95885,121.51359,23.2 305 | 2013,38.3,439.7105,0,24.971610000000002,121.53423000000001,38.4 306 | 2013,20.0,1626.0829999999999,3,24.96622,121.51668000000001,29.4 307 | 2013,16.2,289.3248,5,24.982029999999998,121.54348,55.0 308 | 2013,14.4,169.9803,1,24.973689999999998,121.52978999999999,50.2 309 | 2012,10.3,3079.89,0,24.9546,121.56626999999999,24.7 310 | 2013,16.4,289.3248,5,24.982029999999998,121.54348,53.0 311 | 2013,30.3,1264.73,0,24.948829999999997,121.52954,19.1 312 | 2013,16.4,1643.499,2,24.95394,121.55174,24.7 313 | 2013,21.3,537.7971,4,24.97425,121.53813999999998,42.2 314 | 2013,35.4,318.5292,9,24.97071,121.54068999999998,78.0 315 | 2013,8.3,104.8101,5,24.966739999999998,121.54066999999999,42.8 316 | 2013,3.7,577.9615,6,24.97201,121.54722,41.6 317 | 2013,15.6,1756.411,2,24.9832,121.51812,27.3 318 | 2013,13.3,250.63099999999997,7,24.96606,121.54297,42.0 319 | 2012,15.6,752.7669,2,24.97795,121.53451000000001,37.5 320 | 2013,7.1,379.5575,10,24.98343,121.53761999999999,49.8 321 | 2013,34.6,272.6783,5,24.95562,121.53872,26.9 322 | 2012,13.5,4197.349,0,24.93885,121.50383000000001,18.6 323 | 2012,16.9,964.7496,4,24.98872,121.53411000000001,37.7 324 | 2013,12.9,187.4823,1,24.973879999999998,121.52981000000001,33.1 325 | 2013,28.6,197.1338,6,24.97631,121.54436000000001,42.5 326 | 2012,12.4,1712.632,2,24.96412,121.5167,31.3 327 | 2013,36.6,488.8193,8,24.97015,121.54494,38.1 328 | 2013,4.1,56.47425,7,24.95744,121.53711000000001,62.1 329 | 2013,3.5,757.3377,3,24.975379999999998,121.54971,36.7 330 | 2012,15.9,1497.713,3,24.970029999999998,121.51696000000001,23.6 331 | 2013,13.6,4197.349,0,24.93885,121.50383000000001,19.2 332 | 2013,32.0,1156.777,0,24.94935,121.53046,12.8 333 | 2013,25.6,4519.69,0,24.94826,121.49587,15.6 334 | 2013,39.8,617.7134,2,24.97577,121.53475,39.6 335 | 2012,7.8,104.8101,5,24.966739999999998,121.54066999999999,38.4 336 | 2012,30.0,1013.341,5,24.99006,121.5346,22.8 337 | 2013,27.3,337.6016,6,24.96431,121.54063000000001,36.5 338 | 2012,5.1,1867.233,2,24.984070000000003,121.51748,35.6 339 | 2012,31.3,600.8604,5,24.96871,121.54651000000001,30.9 340 | 2012,31.5,258.186,9,24.96867,121.54331,36.3 341 | 2013,1.7,329.9747,5,24.98254,121.54395,50.4 342 | 2013,33.6,270.8895,0,24.97281,121.53265,42.9 343 | 2013,13.0,750.0704,2,24.97371,121.54951000000001,37.0 344 | 2012,5.7,90.45606,9,24.97433,121.5431,53.5 345 | 2013,33.5,563.2854,8,24.982229999999998,121.53596999999999,46.6 346 | 2013,34.6,3085.17,0,24.998,121.5155,41.2 347 | 2012,0.0,185.4296,0,24.9711,121.5317,37.9 348 | 2013,13.2,1712.632,2,24.96412,121.5167,30.8 349 | 2013,17.4,6488.021,1,24.95719,121.47353000000001,11.2 350 | 2012,4.6,259.6607,6,24.97585,121.54516000000001,53.7 351 | 2012,7.8,104.8101,5,24.966739999999998,121.54066999999999,47.0 352 | 2013,13.2,492.2313,5,24.96515,121.53737,42.3 353 | 2012,4.0,2180.245,3,24.96324,121.51241,28.6 354 | 2012,18.4,2674.961,3,24.96143,121.50827,25.7 355 | 2013,4.1,2147.376,3,24.962989999999998,121.51284,31.3 356 | 2013,12.2,1360.139,1,24.95204,121.54842,30.1 357 | 2013,3.8,383.8624,5,24.98085,121.54391000000001,60.7 358 | 2012,10.3,211.4473,1,24.97417,121.52998999999998,45.3 359 | 2013,0.0,338.9679,9,24.968529999999998,121.54413000000001,44.9 360 | 2013,1.1,193.5845,6,24.96571,121.54088999999999,45.1 361 | 2013,5.6,2408.993,0,24.95505,121.55963999999999,24.7 362 | 2012,32.9,87.30221999999999,10,24.983,121.54021999999999,47.1 363 | 2013,41.4,281.205,8,24.97345,121.54093,63.3 364 | 2013,17.1,967.4,4,24.98872,121.53408,40.0 365 | 2013,32.3,109.9455,10,24.981820000000003,121.54086000000001,48.0 366 | 2013,35.3,614.1394,7,24.979129999999998,121.53666000000001,33.1 367 | 2012,17.3,2261.4320000000002,4,24.96182,121.51222,29.5 368 | 2012,14.2,1801.5439999999999,1,24.951529999999998,121.55254,24.8 369 | 2012,15.0,1828.319,2,24.96464,121.51531000000001,20.9 370 | 2013,18.2,350.8515,1,24.97544,121.53119,43.1 371 | 2012,20.2,2185.1279999999997,3,24.96322,121.51236999999999,22.8 372 | 2012,15.9,289.3248,5,24.982029999999998,121.54348,42.1 373 | 2013,4.1,312.8963,5,24.95591,121.53956000000001,51.7 374 | 2013,33.9,157.6052,7,24.966279999999998,121.54196,41.5 375 | 2013,0.0,274.0144,1,24.9748,121.53058999999999,52.2 376 | 2013,5.4,390.5684,5,24.97937,121.54245,49.5 377 | 2013,21.7,1157.988,0,24.96165,121.55011,23.8 378 | 2013,14.7,1717.193,2,24.964470000000002,121.51648999999999,30.5 379 | 2013,3.9,49.66105,8,24.95836,121.53756000000001,56.8 380 | 2013,37.3,587.8877,8,24.97077,121.54633999999999,37.4 381 | 2013,0.0,292.9978,6,24.977439999999998,121.54458000000001,69.7 382 | 2013,14.1,289.3248,5,24.982029999999998,121.54348,53.3 383 | 2013,8.0,132.5469,9,24.982979999999998,121.53981,47.3 384 | 2013,16.3,3529.5640000000003,0,24.93207,121.51597,29.3 385 | 2012,29.1,506.1144,4,24.97845,121.53889,40.3 386 | 2012,16.1,4066.587,0,24.942970000000003,121.50341999999999,12.9 387 | 2013,18.3,82.88643,10,24.983,121.54026,46.6 388 | 2012,0.0,185.4296,0,24.9711,121.5317,55.3 389 | 2013,16.2,2103.555,3,24.960420000000003,121.51462,25.6 390 | 2013,10.4,2251.938,4,24.959570000000003,121.51353,27.3 391 | 2013,40.9,122.3619,8,24.96756,121.5423,67.7 392 | 2013,32.8,377.8302,9,24.971510000000002,121.5435,38.6 393 | 2013,6.2,1939.749,1,24.95155,121.55386999999999,31.3 394 | 2013,42.7,443.80199999999996,6,24.97927,121.53873999999999,35.3 395 | 2013,16.9,967.4,4,24.98872,121.53408,40.3 396 | 2013,32.6,4136.271,1,24.95544,121.4963,24.7 397 | 2012,21.2,512.5487,4,24.974,121.53842,42.5 398 | 2012,37.1,918.6357,1,24.97198,121.55063,31.9 399 | 2013,13.1,1164.838,4,24.99156,121.53406000000001,32.2 400 | 2013,14.7,1717.193,2,24.964470000000002,121.51648999999999,23.0 401 | 2012,12.7,170.1289,1,24.97371,121.52984,37.3 402 | 2013,26.8,482.7581,5,24.97433,121.53863,35.5 403 | 2013,7.6,2175.03,3,24.96305,121.51253999999999,27.7 404 | 2012,12.7,187.4823,1,24.973879999999998,121.52981000000001,28.5 405 | 2012,30.9,161.942,9,24.98353,121.53966000000001,39.7 406 | 2013,16.4,289.3248,5,24.982029999999998,121.54348,41.2 407 | 2012,23.0,130.9945,6,24.95663,121.53765,37.2 408 | 2013,1.9,372.1386,7,24.972929999999998,121.54026,40.5 409 | 2013,5.2,2408.993,0,24.95505,121.55963999999999,22.3 410 | 2013,18.5,2175.744,3,24.9633,121.51243000000001,28.1 411 | 2013,13.7,4082.015,0,24.94155,121.50381000000002,15.4 412 | 2012,5.6,90.45606,9,24.97433,121.5431,50.0 413 | 2013,18.8,390.9696,7,24.979229999999998,121.53986,40.6 414 | 2013,8.1,104.8101,5,24.966739999999998,121.54066999999999,52.5 415 | 2013,6.5,90.45606,9,24.97433,121.5431,63.9 416 | -------------------------------------------------------------------------------- /SampleDBs/HousingPrices.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsouza/Databases_Course/c800a02ad2772c6d3156f650e2a45e51f68533d0/SampleDBs/HousingPrices.zip -------------------------------------------------------------------------------- /SampleDBs/LearningSQLExample.sql: -------------------------------------------------------------------------------- 1 | /* begin table creation */ 2 | 3 | create table department 4 | (dept_id smallint unsigned not null auto_increment, 5 | name varchar(20) not null, 6 | constraint pk_department primary key (dept_id) 7 | ); 8 | 9 | create table branch 10 | (branch_id smallint unsigned not null auto_increment, 11 | name varchar(20) not null, 12 | address varchar(30), 13 | city varchar(20), 14 | state varchar(2), 15 | zip varchar(12), 16 | constraint pk_branch primary key (branch_id) 17 | ); 18 | 19 | create table employee 20 | (emp_id smallint unsigned not null auto_increment, 21 | fname varchar(20) not null, 22 | lname varchar(20) not null, 23 | start_date date not null, 24 | end_date date, 25 | superior_emp_id smallint unsigned, 26 | dept_id smallint unsigned, 27 | title varchar(20), 28 | assigned_branch_id smallint unsigned, 29 | constraint fk_e_emp_id 30 | foreign key (superior_emp_id) references employee (emp_id), 31 | constraint fk_dept_id 32 | foreign key (dept_id) references department (dept_id), 33 | constraint fk_e_branch_id 34 | foreign key (assigned_branch_id) references branch (branch_id), 35 | constraint pk_employee primary key (emp_id) 36 | ); 37 | 38 | create table product_type 39 | (product_type_cd varchar(10) not null, 40 | name varchar(50) not null, 41 | constraint pk_product_type primary key (product_type_cd) 42 | ); 43 | 44 | create table product 45 | (product_cd varchar(10) not null, 46 | name varchar(50) not null, 47 | product_type_cd varchar(10) not null, 48 | date_offered date, 49 | date_retired date, 50 | constraint fk_product_type_cd foreign key (product_type_cd) 51 | references product_type (product_type_cd), 52 | constraint pk_product primary key (product_cd) 53 | ); 54 | 55 | create table customer 56 | (cust_id integer unsigned not null auto_increment, 57 | fed_id varchar(12) not null, 58 | cust_type_cd enum('I','B') not null, 59 | address varchar(30), 60 | city varchar(20), 61 | state varchar(20), 62 | postal_code varchar(10), 63 | constraint pk_customer primary key (cust_id) 64 | ); 65 | 66 | create table individual 67 | (cust_id integer unsigned not null, 68 | fname varchar(30) not null, 69 | lname varchar(30) not null, 70 | birth_date date, 71 | constraint fk_i_cust_id foreign key (cust_id) 72 | references customer (cust_id), 73 | constraint pk_individual primary key (cust_id) 74 | ); 75 | 76 | create table business 77 | (cust_id integer unsigned not null, 78 | name varchar(40) not null, 79 | state_id varchar(10) not null, 80 | incorp_date date, 81 | constraint fk_b_cust_id foreign key (cust_id) 82 | references customer (cust_id), 83 | constraint pk_business primary key (cust_id) 84 | ); 85 | 86 | create table officer 87 | (officer_id smallint unsigned not null auto_increment, 88 | cust_id integer unsigned not null, 89 | fname varchar(30) not null, 90 | lname varchar(30) not null, 91 | title varchar(20), 92 | start_date date not null, 93 | end_date date, 94 | constraint fk_o_cust_id foreign key (cust_id) 95 | references business (cust_id), 96 | constraint pk_officer primary key (officer_id) 97 | ); 98 | 99 | create table account 100 | (account_id integer unsigned not null auto_increment, 101 | product_cd varchar(10) not null, 102 | cust_id integer unsigned not null, 103 | open_date date not null, 104 | close_date date, 105 | last_activity_date date, 106 | status enum('ACTIVE','CLOSED','FROZEN'), 107 | open_branch_id smallint unsigned, 108 | open_emp_id smallint unsigned, 109 | avail_balance float(10,2), 110 | pending_balance float(10,2), 111 | constraint fk_product_cd foreign key (product_cd) 112 | references product (product_cd), 113 | constraint fk_a_cust_id foreign key (cust_id) 114 | references customer (cust_id), 115 | constraint fk_a_branch_id foreign key (open_branch_id) 116 | references branch (branch_id), 117 | constraint fk_a_emp_id foreign key (open_emp_id) 118 | references employee (emp_id), 119 | constraint pk_account primary key (account_id) 120 | ); 121 | 122 | create table transaction 123 | (txn_id integer unsigned not null auto_increment, 124 | txn_date datetime not null, 125 | account_id integer unsigned not null, 126 | txn_type_cd enum('DBT','CDT'), 127 | amount double(10,2) not null, 128 | teller_emp_id smallint unsigned, 129 | execution_branch_id smallint unsigned, 130 | funds_avail_date datetime, 131 | constraint fk_t_account_id foreign key (account_id) 132 | references account (account_id), 133 | constraint fk_teller_emp_id foreign key (teller_emp_id) 134 | references employee (emp_id), 135 | constraint fk_exec_branch_id foreign key (execution_branch_id) 136 | references branch (branch_id), 137 | constraint pk_transaction primary key (txn_id) 138 | ); 139 | 140 | /* end table creation */ 141 | 142 | /* begin data population */ 143 | 144 | /* department data */ 145 | insert into department (dept_id, name) 146 | values (null, 'Operations'); 147 | insert into department (dept_id, name) 148 | values (null, 'Loans'); 149 | insert into department (dept_id, name) 150 | values (null, 'Administration'); 151 | 152 | /* branch data */ 153 | insert into branch (branch_id, name, address, city, state, zip) 154 | values (null, 'Headquarters', '3882 Main St.', 'Waltham', 'MA', '02451'); 155 | insert into branch (branch_id, name, address, city, state, zip) 156 | values (null, 'Woburn Branch', '422 Maple St.', 'Woburn', 'MA', '01801'); 157 | insert into branch (branch_id, name, address, city, state, zip) 158 | values (null, 'Quincy Branch', '125 Presidential Way', 'Quincy', 'MA', '02169'); 159 | insert into branch (branch_id, name, address, city, state, zip) 160 | values (null, 'So. NH Branch', '378 Maynard Ln.', 'Salem', 'NH', '03079'); 161 | 162 | /* employee data */ 163 | insert into employee (emp_id, fname, lname, start_date, 164 | dept_id, title, assigned_branch_id) 165 | values (null, 'Michael', 'Smith', '2001-06-22', 166 | (select dept_id from department where name = 'Administration'), 167 | 'President', 168 | (select branch_id from branch where name = 'Headquarters')); 169 | insert into employee (emp_id, fname, lname, start_date, 170 | dept_id, title, assigned_branch_id) 171 | values (null, 'Susan', 'Barker', '2002-09-12', 172 | (select dept_id from department where name = 'Administration'), 173 | 'Vice President', 174 | (select branch_id from branch where name = 'Headquarters')); 175 | insert into employee (emp_id, fname, lname, start_date, 176 | dept_id, title, assigned_branch_id) 177 | values (null, 'Robert', 'Tyler', '2000-02-09', 178 | (select dept_id from department where name = 'Administration'), 179 | 'Treasurer', 180 | (select branch_id from branch where name = 'Headquarters')); 181 | insert into employee (emp_id, fname, lname, start_date, 182 | dept_id, title, assigned_branch_id) 183 | values (null, 'Susan', 'Hawthorne', '2002-04-24', 184 | (select dept_id from department where name = 'Operations'), 185 | 'Operations Manager', 186 | (select branch_id from branch where name = 'Headquarters')); 187 | insert into employee (emp_id, fname, lname, start_date, 188 | dept_id, title, assigned_branch_id) 189 | values (null, 'John', 'Gooding', '2003-11-14', 190 | (select dept_id from department where name = 'Loans'), 191 | 'Loan Manager', 192 | (select branch_id from branch where name = 'Headquarters')); 193 | insert into employee (emp_id, fname, lname, start_date, 194 | dept_id, title, assigned_branch_id) 195 | values (null, 'Helen', 'Fleming', '2004-03-17', 196 | (select dept_id from department where name = 'Operations'), 197 | 'Head Teller', 198 | (select branch_id from branch where name = 'Headquarters')); 199 | insert into employee (emp_id, fname, lname, start_date, 200 | dept_id, title, assigned_branch_id) 201 | values (null, 'Chris', 'Tucker', '2004-09-15', 202 | (select dept_id from department where name = 'Operations'), 203 | 'Teller', 204 | (select branch_id from branch where name = 'Headquarters')); 205 | insert into employee (emp_id, fname, lname, start_date, 206 | dept_id, title, assigned_branch_id) 207 | values (null, 'Sarah', 'Parker', '2002-12-02', 208 | (select dept_id from department where name = 'Operations'), 209 | 'Teller', 210 | (select branch_id from branch where name = 'Headquarters')); 211 | insert into employee (emp_id, fname, lname, start_date, 212 | dept_id, title, assigned_branch_id) 213 | values (null, 'Jane', 'Grossman', '2002-05-03', 214 | (select dept_id from department where name = 'Operations'), 215 | 'Teller', 216 | (select branch_id from branch where name = 'Headquarters')); 217 | insert into employee (emp_id, fname, lname, start_date, 218 | dept_id, title, assigned_branch_id) 219 | values (null, 'Paula', 'Roberts', '2002-07-27', 220 | (select dept_id from department where name = 'Operations'), 221 | 'Head Teller', 222 | (select branch_id from branch where name = 'Woburn Branch')); 223 | insert into employee (emp_id, fname, lname, start_date, 224 | dept_id, title, assigned_branch_id) 225 | values (null, 'Thomas', 'Ziegler', '2000-10-23', 226 | (select dept_id from department where name = 'Operations'), 227 | 'Teller', 228 | (select branch_id from branch where name = 'Woburn Branch')); 229 | insert into employee (emp_id, fname, lname, start_date, 230 | dept_id, title, assigned_branch_id) 231 | values (null, 'Samantha', 'Jameson', '2003-01-08', 232 | (select dept_id from department where name = 'Operations'), 233 | 'Teller', 234 | (select branch_id from branch where name = 'Woburn Branch')); 235 | insert into employee (emp_id, fname, lname, start_date, 236 | dept_id, title, assigned_branch_id) 237 | values (null, 'John', 'Blake', '2000-05-11', 238 | (select dept_id from department where name = 'Operations'), 239 | 'Head Teller', 240 | (select branch_id from branch where name = 'Quincy Branch')); 241 | insert into employee (emp_id, fname, lname, start_date, 242 | dept_id, title, assigned_branch_id) 243 | values (null, 'Cindy', 'Mason', '2002-08-09', 244 | (select dept_id from department where name = 'Operations'), 245 | 'Teller', 246 | (select branch_id from branch where name = 'Quincy Branch')); 247 | insert into employee (emp_id, fname, lname, start_date, 248 | dept_id, title, assigned_branch_id) 249 | values (null, 'Frank', 'Portman', '2003-04-01', 250 | (select dept_id from department where name = 'Operations'), 251 | 'Teller', 252 | (select branch_id from branch where name = 'Quincy Branch')); 253 | insert into employee (emp_id, fname, lname, start_date, 254 | dept_id, title, assigned_branch_id) 255 | values (null, 'Theresa', 'Markham', '2001-03-15', 256 | (select dept_id from department where name = 'Operations'), 257 | 'Head Teller', 258 | (select branch_id from branch where name = 'So. NH Branch')); 259 | insert into employee (emp_id, fname, lname, start_date, 260 | dept_id, title, assigned_branch_id) 261 | values (null, 'Beth', 'Fowler', '2002-06-29', 262 | (select dept_id from department where name = 'Operations'), 263 | 'Teller', 264 | (select branch_id from branch where name = 'So. NH Branch')); 265 | insert into employee (emp_id, fname, lname, start_date, 266 | dept_id, title, assigned_branch_id) 267 | values (null, 'Rick', 'Tulman', '2002-12-12', 268 | (select dept_id from department where name = 'Operations'), 269 | 'Teller', 270 | (select branch_id from branch where name = 'So. NH Branch')); 271 | 272 | /* create data for self-referencing foreign key 'superior_emp_id' */ 273 | create temporary table emp_tmp as 274 | select emp_id, fname, lname from employee; 275 | 276 | update employee set superior_emp_id = 277 | (select emp_id from emp_tmp where lname = 'Smith' and fname = 'Michael') 278 | where ((lname = 'Barker' and fname = 'Susan') 279 | or (lname = 'Tyler' and fname = 'Robert')); 280 | update employee set superior_emp_id = 281 | (select emp_id from emp_tmp where lname = 'Tyler' and fname = 'Robert') 282 | where lname = 'Hawthorne' and fname = 'Susan'; 283 | update employee set superior_emp_id = 284 | (select emp_id from emp_tmp where lname = 'Hawthorne' and fname = 'Susan') 285 | where ((lname = 'Gooding' and fname = 'John') 286 | or (lname = 'Fleming' and fname = 'Helen') 287 | or (lname = 'Roberts' and fname = 'Paula') 288 | or (lname = 'Blake' and fname = 'John') 289 | or (lname = 'Markham' and fname = 'Theresa')); 290 | update employee set superior_emp_id = 291 | (select emp_id from emp_tmp where lname = 'Fleming' and fname = 'Helen') 292 | where ((lname = 'Tucker' and fname = 'Chris') 293 | or (lname = 'Parker' and fname = 'Sarah') 294 | or (lname = 'Grossman' and fname = 'Jane')); 295 | update employee set superior_emp_id = 296 | (select emp_id from emp_tmp where lname = 'Roberts' and fname = 'Paula') 297 | where ((lname = 'Ziegler' and fname = 'Thomas') 298 | or (lname = 'Jameson' and fname = 'Samantha')); 299 | update employee set superior_emp_id = 300 | (select emp_id from emp_tmp where lname = 'Blake' and fname = 'John') 301 | where ((lname = 'Mason' and fname = 'Cindy') 302 | or (lname = 'Portman' and fname = 'Frank')); 303 | update employee set superior_emp_id = 304 | (select emp_id from emp_tmp where lname = 'Markham' and fname = 'Theresa') 305 | where ((lname = 'Fowler' and fname = 'Beth') 306 | or (lname = 'Tulman' and fname = 'Rick')); 307 | 308 | drop table emp_tmp; 309 | 310 | /* product type data */ 311 | insert into product_type (product_type_cd, name) 312 | values ('ACCOUNT','Customer Accounts'); 313 | insert into product_type (product_type_cd, name) 314 | values ('LOAN','Individual and Business Loans'); 315 | insert into product_type (product_type_cd, name) 316 | values ('INSURANCE','Insurance Offerings'); 317 | 318 | /* product data */ 319 | insert into product (product_cd, name, product_type_cd, date_offered) 320 | values ('CHK','checking account','ACCOUNT','2000-01-01'); 321 | insert into product (product_cd, name, product_type_cd, date_offered) 322 | values ('SAV','savings account','ACCOUNT','2000-01-01'); 323 | insert into product (product_cd, name, product_type_cd, date_offered) 324 | values ('MM','money market account','ACCOUNT','2000-01-01'); 325 | insert into product (product_cd, name, product_type_cd, date_offered) 326 | values ('CD','certificate of deposit','ACCOUNT','2000-01-01'); 327 | insert into product (product_cd, name, product_type_cd, date_offered) 328 | values ('MRT','home mortgage','LOAN','2000-01-01'); 329 | insert into product (product_cd, name, product_type_cd, date_offered) 330 | values ('AUT','auto loan','LOAN','2000-01-01'); 331 | insert into product (product_cd, name, product_type_cd, date_offered) 332 | values ('BUS','business line of credit','LOAN','2000-01-01'); 333 | insert into product (product_cd, name, product_type_cd, date_offered) 334 | values ('SBL','small business loan','LOAN','2000-01-01'); 335 | 336 | /* residential customer data */ 337 | insert into customer (cust_id, fed_id, cust_type_cd, 338 | address, city, state, postal_code) 339 | values (null, '111-11-1111', 'I', '47 Mockingbird Ln', 'Lynnfield', 'MA', '01940'); 340 | insert into individual (cust_id, fname, lname, birth_date) 341 | select cust_id, 'James', 'Hadley', '1972-04-22' from customer 342 | where fed_id = '111-11-1111'; 343 | insert into customer (cust_id, fed_id, cust_type_cd, 344 | address, city, state, postal_code) 345 | values (null, '222-22-2222', 'I', '372 Clearwater Blvd', 'Woburn', 'MA', '01801'); 346 | insert into individual (cust_id, fname, lname, birth_date) 347 | select cust_id, 'Susan', 'Tingley', '1968-08-15' from customer 348 | where fed_id = '222-22-2222'; 349 | insert into customer (cust_id, fed_id, cust_type_cd, 350 | address, city, state, postal_code) 351 | values (null, '333-33-3333', 'I', '18 Jessup Rd', 'Quincy', 'MA', '02169'); 352 | insert into individual (cust_id, fname, lname, birth_date) 353 | select cust_id, 'Frank', 'Tucker', '1958-02-06' from customer 354 | where fed_id = '333-33-3333'; 355 | insert into customer (cust_id, fed_id, cust_type_cd, 356 | address, city, state, postal_code) 357 | values (null, '444-44-4444', 'I', '12 Buchanan Ln', 'Waltham', 'MA', '02451'); 358 | insert into individual (cust_id, fname, lname, birth_date) 359 | select cust_id, 'John', 'Hayward', '1966-12-22' from customer 360 | where fed_id = '444-44-4444'; 361 | insert into customer (cust_id, fed_id, cust_type_cd, 362 | address, city, state, postal_code) 363 | values (null, '555-55-5555', 'I', '2341 Main St', 'Salem', 'NH', '03079'); 364 | insert into individual (cust_id, fname, lname, birth_date) 365 | select cust_id, 'Charles', 'Frasier', '1971-08-25' from customer 366 | where fed_id = '555-55-5555'; 367 | insert into customer (cust_id, fed_id, cust_type_cd, 368 | address, city, state, postal_code) 369 | values (null, '666-66-6666', 'I', '12 Blaylock Ln', 'Waltham', 'MA', '02451'); 370 | insert into individual (cust_id, fname, lname, birth_date) 371 | select cust_id, 'John', 'Spencer', '1962-09-14' from customer 372 | where fed_id = '666-66-6666'; 373 | insert into customer (cust_id, fed_id, cust_type_cd, 374 | address, city, state, postal_code) 375 | values (null, '777-77-7777', 'I', '29 Admiral Ln', 'Wilmington', 'MA', '01887'); 376 | insert into individual (cust_id, fname, lname, birth_date) 377 | select cust_id, 'Margaret', 'Young', '1947-03-19' from customer 378 | where fed_id = '777-77-7777'; 379 | insert into customer (cust_id, fed_id, cust_type_cd, 380 | address, city, state, postal_code) 381 | values (null, '888-88-8888', 'I', '472 Freedom Rd', 'Salem', 'NH', '03079'); 382 | insert into individual (cust_id, fname, lname, birth_date) 383 | select cust_id, 'Louis', 'Blake', '1977-07-01' from customer 384 | where fed_id = '888-88-8888'; 385 | insert into customer (cust_id, fed_id, cust_type_cd, 386 | address, city, state, postal_code) 387 | values (null, '999-99-9999', 'I', '29 Maple St', 'Newton', 'MA', '02458'); 388 | insert into individual (cust_id, fname, lname, birth_date) 389 | select cust_id, 'Richard', 'Farley', '1968-06-16' from customer 390 | where fed_id = '999-99-9999'; 391 | 392 | /* corporate customer data */ 393 | insert into customer (cust_id, fed_id, cust_type_cd, 394 | address, city, state, postal_code) 395 | values (null, '04-1111111', 'B', '7 Industrial Way', 'Salem', 'NH', '03079'); 396 | insert into business (cust_id, name, state_id, incorp_date) 397 | select cust_id, 'Chilton Engineering', '12-345-678', '1995-05-01' from customer 398 | where fed_id = '04-1111111'; 399 | insert into officer (officer_id, cust_id, fname, lname, 400 | title, start_date) 401 | select null, cust_id, 'John', 'Chilton', 'President', '1995-05-01' 402 | from customer 403 | where fed_id = '04-1111111'; 404 | insert into customer (cust_id, fed_id, cust_type_cd, 405 | address, city, state, postal_code) 406 | values (null, '04-2222222', 'B', '287A Corporate Ave', 'Wilmington', 'MA', '01887'); 407 | insert into business (cust_id, name, state_id, incorp_date) 408 | select cust_id, 'Northeast Cooling Inc.', '23-456-789', '2001-01-01' from customer 409 | where fed_id = '04-2222222'; 410 | insert into officer (officer_id, cust_id, fname, lname, 411 | title, start_date) 412 | select null, cust_id, 'Paul', 'Hardy', 'President', '2001-01-01' 413 | from customer 414 | where fed_id = '04-2222222'; 415 | insert into customer (cust_id, fed_id, cust_type_cd, 416 | address, city, state, postal_code) 417 | values (null, '04-3333333', 'B', '789 Main St', 'Salem', 'NH', '03079'); 418 | insert into business (cust_id, name, state_id, incorp_date) 419 | select cust_id, 'Superior Auto Body', '34-567-890', '2002-06-30' from customer 420 | where fed_id = '04-3333333'; 421 | insert into officer (officer_id, cust_id, fname, lname, 422 | title, start_date) 423 | select null, cust_id, 'Carl', 'Lutz', 'President', '2002-06-30' 424 | from customer 425 | where fed_id = '04-3333333'; 426 | insert into customer (cust_id, fed_id, cust_type_cd, 427 | address, city, state, postal_code) 428 | values (null, '04-4444444', 'B', '4772 Presidential Way', 'Quincy', 'MA', '02169'); 429 | insert into business (cust_id, name, state_id, incorp_date) 430 | select cust_id, 'AAA Insurance Inc.', '45-678-901', '1999-05-01' from customer 431 | where fed_id = '04-4444444'; 432 | insert into officer (officer_id, cust_id, fname, lname, 433 | title, start_date) 434 | select null, cust_id, 'Stanley', 'Cheswick', 'President', '1999-05-01' 435 | from customer 436 | where fed_id = '04-4444444'; 437 | 438 | /* residential account data */ 439 | insert into account (account_id, product_cd, cust_id, open_date, 440 | last_activity_date, status, open_branch_id, 441 | open_emp_id, avail_balance, pending_balance) 442 | select null, a.prod_cd, c.cust_id, a.open_date, a.last_date, 'ACTIVE', 443 | e.branch_id, e.emp_id, a.avail, a.pend 444 | from customer c cross join 445 | (select b.branch_id, e.emp_id 446 | from branch b inner join employee e on e.assigned_branch_id = b.branch_id 447 | where b.city = 'Woburn' limit 1) e 448 | cross join 449 | (select 'CHK' prod_cd, '2000-01-15' open_date, '2005-01-04' last_date, 450 | 1057.75 avail, 1057.75 pend union all 451 | select 'SAV' prod_cd, '2000-01-15' open_date, '2004-12-19' last_date, 452 | 500.00 avail, 500.00 pend union all 453 | select 'CD' prod_cd, '2004-06-30' open_date, '2004-06-30' last_date, 454 | 3000.00 avail, 3000.00 pend) a 455 | where c.fed_id = '111-11-1111'; 456 | insert into account (account_id, product_cd, cust_id, open_date, 457 | last_activity_date, status, open_branch_id, 458 | open_emp_id, avail_balance, pending_balance) 459 | select null, a.prod_cd, c.cust_id, a.open_date, a.last_date, 'ACTIVE', 460 | e.branch_id, e.emp_id, a.avail, a.pend 461 | from customer c cross join 462 | (select b.branch_id, e.emp_id 463 | from branch b inner join employee e on e.assigned_branch_id = b.branch_id 464 | where b.city = 'Woburn' limit 1) e 465 | cross join 466 | (select 'CHK' prod_cd, '2001-03-12' open_date, '2004-12-27' last_date, 467 | 2258.02 avail, 2258.02 pend union all 468 | select 'SAV' prod_cd, '2001-03-12' open_date, '2004-12-11' last_date, 469 | 200.00 avail, 200.00 pend) a 470 | where c.fed_id = '222-22-2222'; 471 | insert into account (account_id, product_cd, cust_id, open_date, 472 | last_activity_date, status, open_branch_id, 473 | open_emp_id, avail_balance, pending_balance) 474 | select null, a.prod_cd, c.cust_id, a.open_date, a.last_date, 'ACTIVE', 475 | e.branch_id, e.emp_id, a.avail, a.pend 476 | from customer c cross join 477 | (select b.branch_id, e.emp_id 478 | from branch b inner join employee e on e.assigned_branch_id = b.branch_id 479 | where b.city = 'Quincy' limit 1) e 480 | cross join 481 | (select 'CHK' prod_cd, '2002-11-23' open_date, '2004-11-30' last_date, 482 | 1057.75 avail, 1057.75 pend union all 483 | select 'MM' prod_cd, '2002-12-15' open_date, '2004-12-05' last_date, 484 | 2212.50 avail, 2212.50 pend) a 485 | where c.fed_id = '333-33-3333'; 486 | insert into account (account_id, product_cd, cust_id, open_date, 487 | last_activity_date, status, open_branch_id, 488 | open_emp_id, avail_balance, pending_balance) 489 | select null, a.prod_cd, c.cust_id, a.open_date, a.last_date, 'ACTIVE', 490 | e.branch_id, e.emp_id, a.avail, a.pend 491 | from customer c cross join 492 | (select b.branch_id, e.emp_id 493 | from branch b inner join employee e on e.assigned_branch_id = b.branch_id 494 | where b.city = 'Waltham' limit 1) e 495 | cross join 496 | (select 'CHK' prod_cd, '2003-09-12' open_date, '2005-01-03' last_date, 497 | 534.12 avail, 534.12 pend union all 498 | select 'SAV' prod_cd, '2000-01-15' open_date, '2004-10-24' last_date, 499 | 767.77 avail, 767.77 pend union all 500 | select 'MM' prod_cd, '2004-09-30' open_date, '2004-11-11' last_date, 501 | 5487.09 avail, 5487.09 pend) a 502 | where c.fed_id = '444-44-4444'; 503 | insert into account (account_id, product_cd, cust_id, open_date, 504 | last_activity_date, status, open_branch_id, 505 | open_emp_id, avail_balance, pending_balance) 506 | select null, a.prod_cd, c.cust_id, a.open_date, a.last_date, 'ACTIVE', 507 | e.branch_id, e.emp_id, a.avail, a.pend 508 | from customer c cross join 509 | (select b.branch_id, e.emp_id 510 | from branch b inner join employee e on e.assigned_branch_id = b.branch_id 511 | where b.city = 'Salem' limit 1) e 512 | cross join 513 | (select 'CHK' prod_cd, '2004-01-27' open_date, '2005-01-05' last_date, 514 | 2237.97 avail, 2897.97 pend) a 515 | where c.fed_id = '555-55-5555'; 516 | insert into account (account_id, product_cd, cust_id, open_date, 517 | last_activity_date, status, open_branch_id, 518 | open_emp_id, avail_balance, pending_balance) 519 | select null, a.prod_cd, c.cust_id, a.open_date, a.last_date, 'ACTIVE', 520 | e.branch_id, e.emp_id, a.avail, a.pend 521 | from customer c cross join 522 | (select b.branch_id, e.emp_id 523 | from branch b inner join employee e on e.assigned_branch_id = b.branch_id 524 | where b.city = 'Waltham' limit 1) e 525 | cross join 526 | (select 'CHK' prod_cd, '2002-08-24' open_date, '2004-11-29' last_date, 527 | 122.37 avail, 122.37 pend union all 528 | select 'CD' prod_cd, '2004-12-28' open_date, '2004-12-28' last_date, 529 | 10000.00 avail, 10000.00 pend) a 530 | where c.fed_id = '666-66-6666'; 531 | insert into account (account_id, product_cd, cust_id, open_date, 532 | last_activity_date, status, open_branch_id, 533 | open_emp_id, avail_balance, pending_balance) 534 | select null, a.prod_cd, c.cust_id, a.open_date, a.last_date, 'ACTIVE', 535 | e.branch_id, e.emp_id, a.avail, a.pend 536 | from customer c cross join 537 | (select b.branch_id, e.emp_id 538 | from branch b inner join employee e on e.assigned_branch_id = b.branch_id 539 | where b.city = 'Woburn' limit 1) e 540 | cross join 541 | (select 'CD' prod_cd, '2004-01-12' open_date, '2004-01-12' last_date, 542 | 5000.00 avail, 5000.00 pend) a 543 | where c.fed_id = '777-77-7777'; 544 | insert into account (account_id, product_cd, cust_id, open_date, 545 | last_activity_date, status, open_branch_id, 546 | open_emp_id, avail_balance, pending_balance) 547 | select null, a.prod_cd, c.cust_id, a.open_date, a.last_date, 'ACTIVE', 548 | e.branch_id, e.emp_id, a.avail, a.pend 549 | from customer c cross join 550 | (select b.branch_id, e.emp_id 551 | from branch b inner join employee e on e.assigned_branch_id = b.branch_id 552 | where b.city = 'Salem' limit 1) e 553 | cross join 554 | (select 'CHK' prod_cd, '2001-05-23' open_date, '2005-01-03' last_date, 555 | 3487.19 avail, 3487.19 pend union all 556 | select 'SAV' prod_cd, '2001-05-23' open_date, '2004-10-12' last_date, 557 | 387.99 avail, 387.99 pend) a 558 | where c.fed_id = '888-88-8888'; 559 | insert into account (account_id, product_cd, cust_id, open_date, 560 | last_activity_date, status, open_branch_id, 561 | open_emp_id, avail_balance, pending_balance) 562 | select null, a.prod_cd, c.cust_id, a.open_date, a.last_date, 'ACTIVE', 563 | e.branch_id, e.emp_id, a.avail, a.pend 564 | from customer c cross join 565 | (select b.branch_id, e.emp_id 566 | from branch b inner join employee e on e.assigned_branch_id = b.branch_id 567 | where b.city = 'Waltham' limit 1) e 568 | cross join 569 | (select 'CHK' prod_cd, '2003-07-30' open_date, '2004-12-15' last_date, 570 | 125.67 avail, 125.67 pend union all 571 | select 'MM' prod_cd, '2004-10-28' open_date, '2004-10-28' last_date, 572 | 9345.55 avail, 9845.55 pend union all 573 | select 'CD' prod_cd, '2004-06-30' open_date, '2004-06-30' last_date, 574 | 1500.00 avail, 1500.00 pend) a 575 | where c.fed_id = '999-99-9999'; 576 | 577 | /* corporate account data */ 578 | insert into account (account_id, product_cd, cust_id, open_date, 579 | last_activity_date, status, open_branch_id, 580 | open_emp_id, avail_balance, pending_balance) 581 | select null, a.prod_cd, c.cust_id, a.open_date, a.last_date, 'ACTIVE', 582 | e.branch_id, e.emp_id, a.avail, a.pend 583 | from customer c cross join 584 | (select b.branch_id, e.emp_id 585 | from branch b inner join employee e on e.assigned_branch_id = b.branch_id 586 | where b.city = 'Salem' limit 1) e 587 | cross join 588 | (select 'CHK' prod_cd, '2002-09-30' open_date, '2004-12-15' last_date, 589 | 23575.12 avail, 23575.12 pend union all 590 | select 'BUS' prod_cd, '2002-10-01' open_date, '2004-08-28' last_date, 591 | 0 avail, 0 pend) a 592 | where c.fed_id = '04-1111111'; 593 | insert into account (account_id, product_cd, cust_id, open_date, 594 | last_activity_date, status, open_branch_id, 595 | open_emp_id, avail_balance, pending_balance) 596 | select null, a.prod_cd, c.cust_id, a.open_date, a.last_date, 'ACTIVE', 597 | e.branch_id, e.emp_id, a.avail, a.pend 598 | from customer c cross join 599 | (select b.branch_id, e.emp_id 600 | from branch b inner join employee e on e.assigned_branch_id = b.branch_id 601 | where b.city = 'Woburn' limit 1) e 602 | cross join 603 | (select 'BUS' prod_cd, '2004-03-22' open_date, '2004-11-14' last_date, 604 | 9345.55 avail, 9345.55 pend) a 605 | where c.fed_id = '04-2222222'; 606 | insert into account (account_id, product_cd, cust_id, open_date, 607 | last_activity_date, status, open_branch_id, 608 | open_emp_id, avail_balance, pending_balance) 609 | select null, a.prod_cd, c.cust_id, a.open_date, a.last_date, 'ACTIVE', 610 | e.branch_id, e.emp_id, a.avail, a.pend 611 | from customer c cross join 612 | (select b.branch_id, e.emp_id 613 | from branch b inner join employee e on e.assigned_branch_id = b.branch_id 614 | where b.city = 'Salem' limit 1) e 615 | cross join 616 | (select 'CHK' prod_cd, '2003-07-30' open_date, '2004-12-15' last_date, 617 | 38552.05 avail, 38552.05 pend) a 618 | where c.fed_id = '04-3333333'; 619 | insert into account (account_id, product_cd, cust_id, open_date, 620 | last_activity_date, status, open_branch_id, 621 | open_emp_id, avail_balance, pending_balance) 622 | select null, a.prod_cd, c.cust_id, a.open_date, a.last_date, 'ACTIVE', 623 | e.branch_id, e.emp_id, a.avail, a.pend 624 | from customer c cross join 625 | (select b.branch_id, e.emp_id 626 | from branch b inner join employee e on e.assigned_branch_id = b.branch_id 627 | where b.city = 'Quincy' limit 1) e 628 | cross join 629 | (select 'SBL' prod_cd, '2004-02-22' open_date, '2004-12-17' last_date, 630 | 50000.00 avail, 50000.00 pend) a 631 | where c.fed_id = '04-4444444'; 632 | 633 | /* put $100 in all checking/savings accounts on date account opened */ 634 | insert into transaction (txn_id, txn_date, account_id, txn_type_cd, 635 | amount, funds_avail_date) 636 | select null, a.open_date, a.account_id, 'CDT', 100, a.open_date 637 | from account a 638 | where a.product_cd IN ('CHK','SAV','CD','MM'); 639 | 640 | /* end data population */ 641 | -------------------------------------------------------------------------------- /SampleDBs/RobertFrost_Poetry.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsouza/Databases_Course/c800a02ad2772c6d3156f650e2a45e51f68533d0/SampleDBs/RobertFrost_Poetry.pdf -------------------------------------------------------------------------------- /SampleDBs/big_csv.tar.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsouza/Databases_Course/c800a02ad2772c6d3156f650e2a45e51f68533d0/SampleDBs/big_csv.tar.xz -------------------------------------------------------------------------------- /SampleDBs/chinook.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsouza/Databases_Course/c800a02ad2772c6d3156f650e2a45e51f68533d0/SampleDBs/chinook.sqlite -------------------------------------------------------------------------------- /SampleDBs/df_yummly.pkl.gzip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsouza/Databases_Course/c800a02ad2772c6d3156f650e2a45e51f68533d0/SampleDBs/df_yummly.pkl.gzip -------------------------------------------------------------------------------- /SampleDBs/dvdrental.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsouza/Databases_Course/c800a02ad2772c6d3156f650e2a45e51f68533d0/SampleDBs/dvdrental.zip -------------------------------------------------------------------------------- /SampleDBs/employees_db.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsouza/Databases_Course/c800a02ad2772c6d3156f650e2a45e51f68533d0/SampleDBs/employees_db.zip -------------------------------------------------------------------------------- /SampleDBs/euro_soccer_sqlite.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsouza/Databases_Course/c800a02ad2772c6d3156f650e2a45e51f68533d0/SampleDBs/euro_soccer_sqlite.zip -------------------------------------------------------------------------------- /SampleDBs/fakedata.csv: -------------------------------------------------------------------------------- 1 | name|email|birthdate 2 | Fatima|Quisque.varius@Integervitae.org|11-25-01 3 | Katelyn|mi.pede.nonummy@Sedid.ca|11-27-03 4 | Gillian|odio.semper@sodalesMaurisblandit.org|02-19-14 5 | Preston|faucibus.orci@lacusQuisque.edu|08-07-09 6 | Priscilla|semper.auctor@cursusvestibulum.co.uk|08-12-01 7 | Zena|ante@magnaPraesent.com|11-24-11 8 | Oren|rutrum.eu.ultrices@nec.org|01-12-07 9 | Jamalia|Phasellus.vitae.mauris@vel.org|05-17-17 10 | Libby|velit.eu@Maecenasmi.edu|07-29-17 11 | Finn|natoque.penatibus@lectusa.net|06-23-10 12 | Graiden|neque.Nullam@hendreritidante.com|09-18-10 13 | Kitra|quis.tristique@estmollis.com|11-18-02 14 | Baxter|vitae.mauris@sed.org|04-12-17 15 | Margaret|ullamcorper@nec.ca|08-16-04 16 | Sopoline|vitae.diam@velitQuisquevarius.co.uk|09-03-06 17 | Flynn|eget.varius@magnased.org|08-28-14 18 | Beck|arcu.et.pede@idblandit.co.uk|01-14-06 19 | Sheila|natoque.penatibus.et@mattis.com|09-01-20 20 | Francis|ligula.Aliquam@nullaatsem.ca|08-25-14 21 | Harrison|nec.urna@loremauctorquis.com|02-10-16 22 | Imogene|malesuada.id@nibh.com|04-19-03 23 | Moana|sit@Morbinequetellus.net|07-05-03 24 | Kamal|pede.Nunc.sed@Proindolor.ca|01-14-01 25 | Fiona|nulla.at@Sedmalesuadaaugue.net|02-07-01 26 | Yuri|pharetra@cursuspurusNullam.edu|12-03-19 27 | Nichole|aliquet@euenim.org|04-06-03 28 | Irma|elit@pedeCras.net|04-29-18 29 | Leilani|et.ultrices@semegestasblandit.edu|10-25-14 30 | Rashad|imperdiet.nec@convallisligulaDonec.co.uk|02-09-11 31 | Jerry|augue@imperdiet.edu|08-28-02 32 | Thomas|mauris@utodio.com|09-30-10 33 | Yvette|ligula@Aliquam.edu|03-03-04 34 | Hashim|Integer.sem@pede.co.uk|07-29-07 35 | Theodore|ut.pharetra@egetipsumDonec.org|08-11-05 36 | Basil|facilisis.vitae@Curae.ca|08-31-15 37 | Blythe|lacinia@vehiculaPellentesquetincidunt.co.uk|10-05-02 38 | Suki|Suspendisse.commodo.tincidunt@velit.co.uk|04-17-07 39 | Mia|vulputate.velit@etmagnisdis.org|08-03-07 40 | Chastity|venenatis.lacus.Etiam@quisurna.edu|12-07-05 41 | Natalie|at.libero@feugiatplacerat.org|06-25-14 42 | Scarlet|ac.fermentum@ullamcorpereu.edu|09-13-00 43 | Gage|dolor.Fusce@ametlorem.edu|03-25-20 44 | Miranda|auctor.nunc@magnisdis.ca|09-20-02 45 | Nicole|elit@egestasSed.co.uk|09-08-15 46 | Tatyana|nec.urna@sagittis.co.uk|09-24-12 47 | Marcia|consectetuer@fringilla.co.uk|05-12-19 48 | Germaine|eget.ipsum@dui.ca|09-08-04 49 | Keith|Sed.et@nisimagna.net|12-20-05 50 | Zephr|Praesent.interdum.ligula@eu.edu|06-17-17 51 | Preston|accumsan.convallis.ante@sociisnatoquepenatibus.ca|08-30-08 52 | Stone|elit.sed@leoinlobortis.ca|09-30-02 53 | Phelan|Nam.ligula.elit@Sedpharetra.ca|03-29-02 54 | Colton|risus.Donec@blanditcongueIn.co.uk|04-28-11 55 | Stuart|senectus.et.netus@sit.com|03-24-09 56 | Brendan|enim.mi.tempor@nectempusscelerisque.edu|09-16-20 57 | Eden|velit.egestas.lacinia@non.com|07-13-04 58 | Geoffrey|Donec.egestas@ullamcorperDuis.co.uk|11-02-11 59 | Xantha|libero.at.auctor@mauris.ca|09-17-00 60 | Dylan|primis.in.faucibus@tortorIntegeraliquam.net|08-25-20 61 | Claire|pede@sagittissemperNam.ca|05-23-13 62 | Hayden|sed@Nam.org|02-24-09 63 | Russell|laoreet@infaucibus.net|03-06-04 64 | Ingrid|tincidunt.nibh@disparturient.co.uk|12-23-05 65 | Michael|orci.consectetuer@porttitorscelerisqueneque.com|06-30-18 66 | Elton|sem.molestie.sodales@vitaesemperegestas.edu|03-07-01 67 | Zena|sed@malesuadamalesuadaInteger.net|12-23-14 68 | Emi|dictum.eu@lorem.org|06-15-02 69 | Walter|non@loremacrisus.edu|08-27-08 70 | Jason|scelerisque.dui@maurissitamet.net|08-30-19 71 | Shannon|elit@sempercursus.net|10-31-13 72 | Seth|Aliquam.nec@eu.ca|10-30-09 73 | Magee|dui@odiosagittis.com|11-01-02 74 | Kasper|Praesent@Proinvel.edu|08-02-18 75 | Chester|et@orciPhasellus.org|11-04-07 76 | Cadman|a.sollicitudin@enimconsequat.com|12-03-15 77 | Erich|ligula.eu@sociosquadlitora.org|10-07-06 78 | Allegra|lorem@Suspendisse.edu|02-05-08 79 | Quemby|Curae.Donec@imperdietnonvestibulum.co.uk|09-28-06 80 | Beverly|ullamcorper@temporeratneque.net|11-17-08 81 | Kylee|luctus.felis.purus@variusultrices.net|01-16-04 82 | Aidan|Curae@sempereratin.net|10-15-08 83 | India|luctus@et.com|08-03-02 84 | Asher|dictum.Proin@in.edu|07-02-07 85 | Illana|interdum.libero@sitamet.com|08-05-21 86 | Maxine|enim.commodo.hendrerit@consequat.com|08-04-02 87 | Rashad|convallis.in@dapibusquam.org|07-19-20 88 | Pearl|facilisis.Suspendisse@arcuMorbi.co.uk|05-13-10 89 | Yael|facilisis.eget.ipsum@placerat.edu|09-19-17 90 | Michael|facilisi.Sed@consequat.co.uk|07-08-21 91 | Lawrence|luctus.Curabitur@lectusantedictum.ca|06-02-09 92 | Raja|et.rutrum@orciinconsequat.edu|02-13-11 93 | Irene|fermentum@maurisut.com|09-03-02 94 | Perry|velit@Pellentesqueut.co.uk|09-30-08 95 | Kellie|elit.Curabitur@erat.net|07-08-02 96 | Quemby|sed@sedest.ca|07-09-15 97 | Adrienne|Sed@faucibus.edu|03-06-01 98 | Quintessa|Aliquam.adipiscing@urna.org|08-21-07 99 | Baxter|blandit.at.nisi@Donec.edu|01-15-01 100 | Rylee|at@cursus.net|07-12-16 101 | Keegan|eu.lacus.Quisque@Inscelerisquescelerisque.net|12-23-05 -------------------------------------------------------------------------------- /SampleDBs/hr.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsouza/Databases_Course/c800a02ad2772c6d3156f650e2a45e51f68533d0/SampleDBs/hr.sqlite -------------------------------------------------------------------------------- /SampleDBs/matrix.sql: -------------------------------------------------------------------------------- 1 | -- MySQL dump 10.13 Distrib 5.7.4-m14, for Linux (x86_64) 2 | -- 3 | -- Host: localhost Database: matrix 4 | -- ------------------------------------------------------ 5 | -- Server version 5.7.4-m14 6 | 7 | /*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */; 8 | /*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */; 9 | /*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */; 10 | /*!40101 SET NAMES utf8 */; 11 | /*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */; 12 | /*!40103 SET TIME_ZONE='+00:00' */; 13 | /*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */; 14 | /*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */; 15 | /*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */; 16 | /*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */; 17 | 18 | -- 19 | -- Table structure for table `a` 20 | -- 21 | 22 | DROP TABLE IF EXISTS `a`; 23 | /*!40101 SET @saved_cs_client = @@character_set_client */; 24 | /*!40101 SET character_set_client = utf8 */; 25 | CREATE TABLE `a` ( 26 | `row_num` int(11) NOT NULL, 27 | `col_num` int(11) NOT NULL, 28 | `value` int(11) DEFAULT NULL, 29 | PRIMARY KEY (`row_num`,`col_num`) 30 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1; 31 | /*!40101 SET character_set_client = @saved_cs_client */; 32 | 33 | -- 34 | -- Dumping data for table `a` 35 | -- 36 | 37 | LOCK TABLES `a` WRITE; 38 | /*!40000 ALTER TABLE `a` DISABLE KEYS */; 39 | INSERT INTO `a` VALUES (0,3,55),(0,4,78),(1,0,19),(1,2,21),(1,3,3),(1,4,81),(2,1,48),(2,2,50),(2,3,1),(3,2,33),(3,4,67),(4,0,95),(4,4,31); 40 | /*!40000 ALTER TABLE `a` ENABLE KEYS */; 41 | UNLOCK TABLES; 42 | 43 | -- 44 | -- Table structure for table `b` 45 | -- 46 | 47 | DROP TABLE IF EXISTS `b`; 48 | /*!40101 SET @saved_cs_client = @@character_set_client */; 49 | /*!40101 SET character_set_client = utf8 */; 50 | CREATE TABLE `b` ( 51 | `row_num` int(11) NOT NULL, 52 | `col_num` int(11) NOT NULL, 53 | `value` int(11) DEFAULT NULL, 54 | PRIMARY KEY (`row_num`,`col_num`) 55 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1; 56 | /*!40101 SET character_set_client = @saved_cs_client */; 57 | 58 | -- 59 | -- Dumping data for table `b` 60 | -- 61 | 62 | LOCK TABLES `b` WRITE; 63 | /*!40000 ALTER TABLE `b` DISABLE KEYS */; 64 | INSERT INTO `b` VALUES (0,1,73),(0,4,42),(1,2,82),(2,0,83),(2,1,13),(2,3,57),(3,0,48),(3,1,85),(3,2,18),(3,3,24),(4,0,98),(4,1,7),(4,4,3); 65 | /*!40000 ALTER TABLE `b` ENABLE KEYS */; 66 | UNLOCK TABLES; 67 | /*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */; 68 | 69 | /*!40101 SET SQL_MODE=@OLD_SQL_MODE */; 70 | /*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */; 71 | /*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */; 72 | /*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */; 73 | /*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */; 74 | /*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */; 75 | /*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */; 76 | 77 | -- Dump completed on 2014-08-29 22:16:09 78 | -------------------------------------------------------------------------------- /SampleDBs/nursery_create.sql: -------------------------------------------------------------------------------- 1 | DROP DATABASE IF EXISTS Nursery; 2 | CREATE DATABASE IF NOT EXISTS Nursery; 3 | USE Nursery; 4 | 5 | CREATE TABLE Nursery.baby ( 6 | id int AUTO_INCREMENT NOT NULL, 7 | id_mother int NOT NULL, 8 | id_doctor int NOT NULL, 9 | name_baby varchar(60) NOT NULL, 10 | date_baby date NOT NULL, 11 | height_baby int NULL, 12 | weight_baby int NULL, 13 | CONSTRAINT baby_pk PRIMARY KEY (id) 14 | ); 15 | 16 | CREATE TABLE Nursery.specializations ( 17 | id int AUTO_INCREMENT NOT NULL, 18 | name_specialization varchar(60) NOT NULL, 19 | CONSTRAINT specializations_pk PRIMARY KEY (id) 20 | ); 21 | 22 | CREATE TABLE Nursery.mother ( 23 | id int AUTO_INCREMENT NOT NULL, 24 | name_mother varchar(1500) NOT NULL, 25 | street varchar(60) NULL, 26 | streetnumber varchar(20) NULL, 27 | zip char(8) NULL, 28 | date_mother date NULL, 29 | CONSTRAINT mother_pk PRIMARY KEY (id) 30 | ); 31 | 32 | CREATE TABLE Nursery.doctor ( 33 | id int AUTO_INCREMENT NOT NULL, 34 | register char(10) NOT NULL, 35 | name varchar(60) NOT NULL, 36 | id_specialization int NOT NULL, 37 | CONSTRAINT doctor_pk PRIMARY KEY (id) 38 | ); 39 | 40 | CREATE TABLE Nursery.phone_mother ( 41 | id int AUTO_INCREMENT NOT NULL, 42 | id_mother int NOT NULL, 43 | phone varchar(13) NOT NULL, 44 | CONSTRAINT phone_mother_pk PRIMARY KEY (id) 45 | ); 46 | 47 | CREATE TABLE Nursery.phone_doctor ( 48 | id int AUTO_INCREMENT NOT NULL, 49 | id_doctor int NOT NULL, 50 | phone varchar(13) NOT NULL, 51 | CONSTRAINT phone_doctor_pk PRIMARY KEY (id) 52 | ); 53 | 54 | ALTER TABLE baby ADD CONSTRAINT baby_mother FOREIGN KEY baby_mother (id_mother) 55 | REFERENCES Nursery.mother (id); 56 | 57 | ALTER TABLE baby ADD CONSTRAINT baby_doctor FOREIGN KEY baby_doctor (id_doctor) 58 | REFERENCES Nursery.doctor (id); 59 | 60 | ALTER TABLE doctor ADD CONSTRAINT doctor_specializations FOREIGN KEY doctor_specializations (id_specialization) 61 | REFERENCES Nursery.specializations (id); 62 | 63 | ALTER TABLE phone_mother ADD CONSTRAINT phone_mother FOREIGN KEY phone_mother (id_mother) 64 | REFERENCES Nursery.mother (id); 65 | 66 | ALTER TABLE phone_doctor ADD CONSTRAINT phone_doctor FOREIGN KEY phone_doctor (id_doctor) 67 | REFERENCES Nursery.doctor (id); 68 | -------------------------------------------------------------------------------- /SampleDBs/searchindex.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsouza/Databases_Course/c800a02ad2772c6d3156f650e2a45e51f68533d0/SampleDBs/searchindex.sqlite -------------------------------------------------------------------------------- /SampleDBs/simple.yaml: -------------------------------------------------------------------------------- 1 | # Employee records 2 | - martin: 3 | name: Martin D'vloper 4 | job: Developer 5 | skills: 6 | - python 7 | - perl 8 | - pascal 9 | - tabitha: 10 | name: Tabitha Bitumen 11 | job: Developer 12 | skills: 13 | - lisp 14 | - fortran 15 | - erlang 16 | --------------------------------------------------------------------------------