├── README.md
├── notebooks
├── README.md
├── EX-1.1-Something-You-Should-Not-Do.ipynb
├── iris.csv
├── iris_with_names.csv
├── EX-4-Pivot-Using-SQL-And-Pandas.ipynb
├── EX-1.0-Getting-to-Know-the-Clickhouse-driver-Client.ipynb
├── EX-3-Sql-Magic-Functions.ipynb
├── EX-5-Airline-OnTime-Data.ipynb
└── EX-2-ClickHouse-SQL-Alchemy.ipynb
├── LICENSE
└── .gitignore
/README.md:
--------------------------------------------------------------------------------
1 | # ClickHouse Python Samples
2 |
3 | This project contains Python samples for ClickHouse. For now the samples
4 | consist of Jupyter Notebook files in the notebooks directory.
5 |
--------------------------------------------------------------------------------
/notebooks/README.md:
--------------------------------------------------------------------------------
1 | # Juypter Notebooks for ClickHouse
2 |
3 | This directory contains sample notebooks showing how to connect Python
4 | to ClickHouse.
5 |
6 | Notebook code has been tested on Ubuntu using Anaconda and Python 3.7.
7 | They should work from Python 3.5 onwards.
8 |
9 | To get started you'll need to add the following packages using conda:
10 | ```
11 | conda install -c conda-forge clickhouse-driver
12 | conda install -c conda-forge clickhouse-sqlalchemy
13 | ```
14 |
15 | Examples use sample data from the iris dataset.
16 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Altinity
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | MANIFEST
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 | .pytest_cache/
49 |
50 | # Translations
51 | *.mo
52 | *.pot
53 |
54 | # Django stuff:
55 | *.log
56 | local_settings.py
57 | db.sqlite3
58 |
59 | # Flask stuff:
60 | instance/
61 | .webassets-cache
62 |
63 | # Scrapy stuff:
64 | .scrapy
65 |
66 | # Sphinx documentation
67 | docs/_build/
68 |
69 | # PyBuilder
70 | target/
71 |
72 | # Jupyter Notebook
73 | .ipynb_checkpoints
74 |
75 | # pyenv
76 | .python-version
77 |
78 | # celery beat schedule file
79 | celerybeat-schedule
80 |
81 | # SageMath parsed files
82 | *.sage.py
83 |
84 | # Environments
85 | .env
86 | .venv
87 | env/
88 | venv/
89 | ENV/
90 | env.bak/
91 | venv.bak/
92 |
93 | # Spyder project settings
94 | .spyderproject
95 | .spyproject
96 |
97 | # Rope project settings
98 | .ropeproject
99 |
100 | # mkdocs documentation
101 | /site
102 |
103 | # mypy
104 | .mypy_cache/
105 |
--------------------------------------------------------------------------------
/notebooks/EX-1.1-Something-You-Should-Not-Do.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Something You Should Not Do\n",
8 | "\n",
9 | "This notebook has samples that were included in the [Altinity blog article that introduces the clickhouse-driver client library](https://www.altinity.com/blog/clickhouse-and-python-getting-to-know-the-clickhouse-driver-client).\n",
10 | "\n",
11 | "_WARNING_: The final example hangs. It must be cancelled manually. "
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {},
17 | "source": [
18 | "Load the clickhouse driver and connect to a local server. "
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": null,
24 | "metadata": {},
25 | "outputs": [],
26 | "source": [
27 | "from clickhouse_driver import Client\n",
28 | "client = Client('localhost')"
29 | ]
30 | },
31 | {
32 | "cell_type": "markdown",
33 | "metadata": {},
34 | "source": [
35 | "Create the iris table, dropping any previously existing table of the same name. "
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": null,
41 | "metadata": {},
42 | "outputs": [],
43 | "source": [
44 | "client.execute('DROP TABLE IF EXISTS iris')\n",
45 | "client.execute('CREATE TABLE iris ('\n",
46 | " 'sepal_length Float64, sepal_width Float64, '\n",
47 | " 'petal_length Float64, petal_width Float64, '\n",
48 | " 'species String) ENGINE = MergeTree '\n",
49 | " ' PARTITION BY species ORDER BY (species)')"
50 | ]
51 | },
52 | {
53 | "cell_type": "markdown",
54 | "metadata": {},
55 | "source": [
56 | "If you try to insert values using a single string INSERT command that works with clickhouse-client the results will be disappointing. This command will hang. Once you see it hang, press the STOP button in your Jupyter environment. "
57 | ]
58 | },
59 | {
60 | "cell_type": "code",
61 | "execution_count": null,
62 | "metadata": {},
63 | "outputs": [],
64 | "source": [
65 | "# Don’t do this.\n",
66 | "try:\n",
67 | " client.execute(\n",
68 | " \"INSERT INTO iris (sepal_length, sepal_width, petal_length, petal_width, species) \" \n",
69 | " \"VALUES (5.1, 3.7, 1.5, 0.4, 'Iris-setosa'), (4.6, 3.6, 1.0, 0.2, 'Iris-setosa')\"\n",
70 | " )\n",
71 | "except:\n",
72 | " print(\"The command failed as expected\")"
73 | ]
74 | },
75 | {
76 | "cell_type": "markdown",
77 | "metadata": {},
78 | "source": [
79 | "You have to put insert data in a separate array. Python cannot translate this the way clickhouse-client does. (It creates the array transparently.)"
80 | ]
81 | },
82 | {
83 | "cell_type": "code",
84 | "execution_count": null,
85 | "metadata": {},
86 | "outputs": [],
87 | "source": []
88 | }
89 | ],
90 | "metadata": {
91 | "kernelspec": {
92 | "display_name": "Python 3",
93 | "language": "python",
94 | "name": "python3"
95 | },
96 | "language_info": {
97 | "codemirror_mode": {
98 | "name": "ipython",
99 | "version": 3
100 | },
101 | "file_extension": ".py",
102 | "mimetype": "text/x-python",
103 | "name": "python",
104 | "nbconvert_exporter": "python",
105 | "pygments_lexer": "ipython3",
106 | "version": "3.7.1"
107 | }
108 | },
109 | "nbformat": 4,
110 | "nbformat_minor": 2
111 | }
112 |
--------------------------------------------------------------------------------
/notebooks/iris.csv:
--------------------------------------------------------------------------------
1 | 5.1,3.5,1.4,0.2,Iris-setosa
2 | 4.9,3.0,1.4,0.2,Iris-setosa
3 | 4.7,3.2,1.3,0.2,Iris-setosa
4 | 4.6,3.1,1.5,0.2,Iris-setosa
5 | 5.0,3.6,1.4,0.2,Iris-setosa
6 | 5.4,3.9,1.7,0.4,Iris-setosa
7 | 4.6,3.4,1.4,0.3,Iris-setosa
8 | 5.0,3.4,1.5,0.2,Iris-setosa
9 | 4.4,2.9,1.4,0.2,Iris-setosa
10 | 4.9,3.1,1.5,0.1,Iris-setosa
11 | 5.4,3.7,1.5,0.2,Iris-setosa
12 | 4.8,3.4,1.6,0.2,Iris-setosa
13 | 4.8,3.0,1.4,0.1,Iris-setosa
14 | 4.3,3.0,1.1,0.1,Iris-setosa
15 | 5.8,4.0,1.2,0.2,Iris-setosa
16 | 5.7,4.4,1.5,0.4,Iris-setosa
17 | 5.4,3.9,1.3,0.4,Iris-setosa
18 | 5.1,3.5,1.4,0.3,Iris-setosa
19 | 5.7,3.8,1.7,0.3,Iris-setosa
20 | 5.1,3.8,1.5,0.3,Iris-setosa
21 | 5.4,3.4,1.7,0.2,Iris-setosa
22 | 5.1,3.7,1.5,0.4,Iris-setosa
23 | 4.6,3.6,1.0,0.2,Iris-setosa
24 | 5.1,3.3,1.7,0.5,Iris-setosa
25 | 4.8,3.4,1.9,0.2,Iris-setosa
26 | 5.0,3.0,1.6,0.2,Iris-setosa
27 | 5.0,3.4,1.6,0.4,Iris-setosa
28 | 5.2,3.5,1.5,0.2,Iris-setosa
29 | 5.2,3.4,1.4,0.2,Iris-setosa
30 | 4.7,3.2,1.6,0.2,Iris-setosa
31 | 4.8,3.1,1.6,0.2,Iris-setosa
32 | 5.4,3.4,1.5,0.4,Iris-setosa
33 | 5.2,4.1,1.5,0.1,Iris-setosa
34 | 5.5,4.2,1.4,0.2,Iris-setosa
35 | 4.9,3.1,1.5,0.2,Iris-setosa
36 | 5.0,3.2,1.2,0.2,Iris-setosa
37 | 5.5,3.5,1.3,0.2,Iris-setosa
38 | 4.9,3.6,1.4,0.1,Iris-setosa
39 | 4.4,3.0,1.3,0.2,Iris-setosa
40 | 5.1,3.4,1.5,0.2,Iris-setosa
41 | 5.0,3.5,1.3,0.3,Iris-setosa
42 | 4.5,2.3,1.3,0.3,Iris-setosa
43 | 4.4,3.2,1.3,0.2,Iris-setosa
44 | 5.0,3.5,1.6,0.6,Iris-setosa
45 | 5.1,3.8,1.9,0.4,Iris-setosa
46 | 4.8,3.0,1.4,0.3,Iris-setosa
47 | 5.1,3.8,1.6,0.2,Iris-setosa
48 | 4.6,3.2,1.4,0.2,Iris-setosa
49 | 5.3,3.7,1.5,0.2,Iris-setosa
50 | 5.0,3.3,1.4,0.2,Iris-setosa
51 | 7.0,3.2,4.7,1.4,Iris-versicolor
52 | 6.4,3.2,4.5,1.5,Iris-versicolor
53 | 6.9,3.1,4.9,1.5,Iris-versicolor
54 | 5.5,2.3,4.0,1.3,Iris-versicolor
55 | 6.5,2.8,4.6,1.5,Iris-versicolor
56 | 5.7,2.8,4.5,1.3,Iris-versicolor
57 | 6.3,3.3,4.7,1.6,Iris-versicolor
58 | 4.9,2.4,3.3,1.0,Iris-versicolor
59 | 6.6,2.9,4.6,1.3,Iris-versicolor
60 | 5.2,2.7,3.9,1.4,Iris-versicolor
61 | 5.0,2.0,3.5,1.0,Iris-versicolor
62 | 5.9,3.0,4.2,1.5,Iris-versicolor
63 | 6.0,2.2,4.0,1.0,Iris-versicolor
64 | 6.1,2.9,4.7,1.4,Iris-versicolor
65 | 5.6,2.9,3.6,1.3,Iris-versicolor
66 | 6.7,3.1,4.4,1.4,Iris-versicolor
67 | 5.6,3.0,4.5,1.5,Iris-versicolor
68 | 5.8,2.7,4.1,1.0,Iris-versicolor
69 | 6.2,2.2,4.5,1.5,Iris-versicolor
70 | 5.6,2.5,3.9,1.1,Iris-versicolor
71 | 5.9,3.2,4.8,1.8,Iris-versicolor
72 | 6.1,2.8,4.0,1.3,Iris-versicolor
73 | 6.3,2.5,4.9,1.5,Iris-versicolor
74 | 6.1,2.8,4.7,1.2,Iris-versicolor
75 | 6.4,2.9,4.3,1.3,Iris-versicolor
76 | 6.6,3.0,4.4,1.4,Iris-versicolor
77 | 6.8,2.8,4.8,1.4,Iris-versicolor
78 | 6.7,3.0,5.0,1.7,Iris-versicolor
79 | 6.0,2.9,4.5,1.5,Iris-versicolor
80 | 5.7,2.6,3.5,1.0,Iris-versicolor
81 | 5.5,2.4,3.8,1.1,Iris-versicolor
82 | 5.5,2.4,3.7,1.0,Iris-versicolor
83 | 5.8,2.7,3.9,1.2,Iris-versicolor
84 | 6.0,2.7,5.1,1.6,Iris-versicolor
85 | 5.4,3.0,4.5,1.5,Iris-versicolor
86 | 6.0,3.4,4.5,1.6,Iris-versicolor
87 | 6.7,3.1,4.7,1.5,Iris-versicolor
88 | 6.3,2.3,4.4,1.3,Iris-versicolor
89 | 5.6,3.0,4.1,1.3,Iris-versicolor
90 | 5.5,2.5,4.0,1.3,Iris-versicolor
91 | 5.5,2.6,4.4,1.2,Iris-versicolor
92 | 6.1,3.0,4.6,1.4,Iris-versicolor
93 | 5.8,2.6,4.0,1.2,Iris-versicolor
94 | 5.0,2.3,3.3,1.0,Iris-versicolor
95 | 5.6,2.7,4.2,1.3,Iris-versicolor
96 | 5.7,3.0,4.2,1.2,Iris-versicolor
97 | 5.7,2.9,4.2,1.3,Iris-versicolor
98 | 6.2,2.9,4.3,1.3,Iris-versicolor
99 | 5.1,2.5,3.0,1.1,Iris-versicolor
100 | 5.7,2.8,4.1,1.3,Iris-versicolor
101 | 6.3,3.3,6.0,2.5,Iris-virginica
102 | 5.8,2.7,5.1,1.9,Iris-virginica
103 | 7.1,3.0,5.9,2.1,Iris-virginica
104 | 6.3,2.9,5.6,1.8,Iris-virginica
105 | 6.5,3.0,5.8,2.2,Iris-virginica
106 | 7.6,3.0,6.6,2.1,Iris-virginica
107 | 4.9,2.5,4.5,1.7,Iris-virginica
108 | 7.3,2.9,6.3,1.8,Iris-virginica
109 | 6.7,2.5,5.8,1.8,Iris-virginica
110 | 7.2,3.6,6.1,2.5,Iris-virginica
111 | 6.5,3.2,5.1,2.0,Iris-virginica
112 | 6.4,2.7,5.3,1.9,Iris-virginica
113 | 6.8,3.0,5.5,2.1,Iris-virginica
114 | 5.7,2.5,5.0,2.0,Iris-virginica
115 | 5.8,2.8,5.1,2.4,Iris-virginica
116 | 6.4,3.2,5.3,2.3,Iris-virginica
117 | 6.5,3.0,5.5,1.8,Iris-virginica
118 | 7.7,3.8,6.7,2.2,Iris-virginica
119 | 7.7,2.6,6.9,2.3,Iris-virginica
120 | 6.0,2.2,5.0,1.5,Iris-virginica
121 | 6.9,3.2,5.7,2.3,Iris-virginica
122 | 5.6,2.8,4.9,2.0,Iris-virginica
123 | 7.7,2.8,6.7,2.0,Iris-virginica
124 | 6.3,2.7,4.9,1.8,Iris-virginica
125 | 6.7,3.3,5.7,2.1,Iris-virginica
126 | 7.2,3.2,6.0,1.8,Iris-virginica
127 | 6.2,2.8,4.8,1.8,Iris-virginica
128 | 6.1,3.0,4.9,1.8,Iris-virginica
129 | 6.4,2.8,5.6,2.1,Iris-virginica
130 | 7.2,3.0,5.8,1.6,Iris-virginica
131 | 7.4,2.8,6.1,1.9,Iris-virginica
132 | 7.9,3.8,6.4,2.0,Iris-virginica
133 | 6.4,2.8,5.6,2.2,Iris-virginica
134 | 6.3,2.8,5.1,1.5,Iris-virginica
135 | 6.1,2.6,5.6,1.4,Iris-virginica
136 | 7.7,3.0,6.1,2.3,Iris-virginica
137 | 6.3,3.4,5.6,2.4,Iris-virginica
138 | 6.4,3.1,5.5,1.8,Iris-virginica
139 | 6.0,3.0,4.8,1.8,Iris-virginica
140 | 6.9,3.1,5.4,2.1,Iris-virginica
141 | 6.7,3.1,5.6,2.4,Iris-virginica
142 | 6.9,3.1,5.1,2.3,Iris-virginica
143 | 5.8,2.7,5.1,1.9,Iris-virginica
144 | 6.8,3.2,5.9,2.3,Iris-virginica
145 | 6.7,3.3,5.7,2.5,Iris-virginica
146 | 6.7,3.0,5.2,2.3,Iris-virginica
147 | 6.3,2.5,5.0,1.9,Iris-virginica
148 | 6.5,3.0,5.2,2.0,Iris-virginica
149 | 6.2,3.4,5.4,2.3,Iris-virginica
150 | 5.9,3.0,5.1,1.8,Iris-virginica
151 |
--------------------------------------------------------------------------------
/notebooks/iris_with_names.csv:
--------------------------------------------------------------------------------
1 | sepal_length,sepal_width,petal_length,petal_width,species
2 | 5.1,3.5,1.4,0.2,Iris-setosa
3 | 4.9,3.0,1.4,0.2,Iris-setosa
4 | 4.7,3.2,1.3,0.2,Iris-setosa
5 | 4.6,3.1,1.5,0.2,Iris-setosa
6 | 5.0,3.6,1.4,0.2,Iris-setosa
7 | 5.4,3.9,1.7,0.4,Iris-setosa
8 | 4.6,3.4,1.4,0.3,Iris-setosa
9 | 5.0,3.4,1.5,0.2,Iris-setosa
10 | 4.4,2.9,1.4,0.2,Iris-setosa
11 | 4.9,3.1,1.5,0.1,Iris-setosa
12 | 5.4,3.7,1.5,0.2,Iris-setosa
13 | 4.8,3.4,1.6,0.2,Iris-setosa
14 | 4.8,3.0,1.4,0.1,Iris-setosa
15 | 4.3,3.0,1.1,0.1,Iris-setosa
16 | 5.8,4.0,1.2,0.2,Iris-setosa
17 | 5.7,4.4,1.5,0.4,Iris-setosa
18 | 5.4,3.9,1.3,0.4,Iris-setosa
19 | 5.1,3.5,1.4,0.3,Iris-setosa
20 | 5.7,3.8,1.7,0.3,Iris-setosa
21 | 5.1,3.8,1.5,0.3,Iris-setosa
22 | 5.4,3.4,1.7,0.2,Iris-setosa
23 | 5.1,3.7,1.5,0.4,Iris-setosa
24 | 4.6,3.6,1.0,0.2,Iris-setosa
25 | 5.1,3.3,1.7,0.5,Iris-setosa
26 | 4.8,3.4,1.9,0.2,Iris-setosa
27 | 5.0,3.0,1.6,0.2,Iris-setosa
28 | 5.0,3.4,1.6,0.4,Iris-setosa
29 | 5.2,3.5,1.5,0.2,Iris-setosa
30 | 5.2,3.4,1.4,0.2,Iris-setosa
31 | 4.7,3.2,1.6,0.2,Iris-setosa
32 | 4.8,3.1,1.6,0.2,Iris-setosa
33 | 5.4,3.4,1.5,0.4,Iris-setosa
34 | 5.2,4.1,1.5,0.1,Iris-setosa
35 | 5.5,4.2,1.4,0.2,Iris-setosa
36 | 4.9,3.1,1.5,0.2,Iris-setosa
37 | 5.0,3.2,1.2,0.2,Iris-setosa
38 | 5.5,3.5,1.3,0.2,Iris-setosa
39 | 4.9,3.6,1.4,0.1,Iris-setosa
40 | 4.4,3.0,1.3,0.2,Iris-setosa
41 | 5.1,3.4,1.5,0.2,Iris-setosa
42 | 5.0,3.5,1.3,0.3,Iris-setosa
43 | 4.5,2.3,1.3,0.3,Iris-setosa
44 | 4.4,3.2,1.3,0.2,Iris-setosa
45 | 5.0,3.5,1.6,0.6,Iris-setosa
46 | 5.1,3.8,1.9,0.4,Iris-setosa
47 | 4.8,3.0,1.4,0.3,Iris-setosa
48 | 5.1,3.8,1.6,0.2,Iris-setosa
49 | 4.6,3.2,1.4,0.2,Iris-setosa
50 | 5.3,3.7,1.5,0.2,Iris-setosa
51 | 5.0,3.3,1.4,0.2,Iris-setosa
52 | 7.0,3.2,4.7,1.4,Iris-versicolor
53 | 6.4,3.2,4.5,1.5,Iris-versicolor
54 | 6.9,3.1,4.9,1.5,Iris-versicolor
55 | 5.5,2.3,4.0,1.3,Iris-versicolor
56 | 6.5,2.8,4.6,1.5,Iris-versicolor
57 | 5.7,2.8,4.5,1.3,Iris-versicolor
58 | 6.3,3.3,4.7,1.6,Iris-versicolor
59 | 4.9,2.4,3.3,1.0,Iris-versicolor
60 | 6.6,2.9,4.6,1.3,Iris-versicolor
61 | 5.2,2.7,3.9,1.4,Iris-versicolor
62 | 5.0,2.0,3.5,1.0,Iris-versicolor
63 | 5.9,3.0,4.2,1.5,Iris-versicolor
64 | 6.0,2.2,4.0,1.0,Iris-versicolor
65 | 6.1,2.9,4.7,1.4,Iris-versicolor
66 | 5.6,2.9,3.6,1.3,Iris-versicolor
67 | 6.7,3.1,4.4,1.4,Iris-versicolor
68 | 5.6,3.0,4.5,1.5,Iris-versicolor
69 | 5.8,2.7,4.1,1.0,Iris-versicolor
70 | 6.2,2.2,4.5,1.5,Iris-versicolor
71 | 5.6,2.5,3.9,1.1,Iris-versicolor
72 | 5.9,3.2,4.8,1.8,Iris-versicolor
73 | 6.1,2.8,4.0,1.3,Iris-versicolor
74 | 6.3,2.5,4.9,1.5,Iris-versicolor
75 | 6.1,2.8,4.7,1.2,Iris-versicolor
76 | 6.4,2.9,4.3,1.3,Iris-versicolor
77 | 6.6,3.0,4.4,1.4,Iris-versicolor
78 | 6.8,2.8,4.8,1.4,Iris-versicolor
79 | 6.7,3.0,5.0,1.7,Iris-versicolor
80 | 6.0,2.9,4.5,1.5,Iris-versicolor
81 | 5.7,2.6,3.5,1.0,Iris-versicolor
82 | 5.5,2.4,3.8,1.1,Iris-versicolor
83 | 5.5,2.4,3.7,1.0,Iris-versicolor
84 | 5.8,2.7,3.9,1.2,Iris-versicolor
85 | 6.0,2.7,5.1,1.6,Iris-versicolor
86 | 5.4,3.0,4.5,1.5,Iris-versicolor
87 | 6.0,3.4,4.5,1.6,Iris-versicolor
88 | 6.7,3.1,4.7,1.5,Iris-versicolor
89 | 6.3,2.3,4.4,1.3,Iris-versicolor
90 | 5.6,3.0,4.1,1.3,Iris-versicolor
91 | 5.5,2.5,4.0,1.3,Iris-versicolor
92 | 5.5,2.6,4.4,1.2,Iris-versicolor
93 | 6.1,3.0,4.6,1.4,Iris-versicolor
94 | 5.8,2.6,4.0,1.2,Iris-versicolor
95 | 5.0,2.3,3.3,1.0,Iris-versicolor
96 | 5.6,2.7,4.2,1.3,Iris-versicolor
97 | 5.7,3.0,4.2,1.2,Iris-versicolor
98 | 5.7,2.9,4.2,1.3,Iris-versicolor
99 | 6.2,2.9,4.3,1.3,Iris-versicolor
100 | 5.1,2.5,3.0,1.1,Iris-versicolor
101 | 5.7,2.8,4.1,1.3,Iris-versicolor
102 | 6.3,3.3,6.0,2.5,Iris-virginica
103 | 5.8,2.7,5.1,1.9,Iris-virginica
104 | 7.1,3.0,5.9,2.1,Iris-virginica
105 | 6.3,2.9,5.6,1.8,Iris-virginica
106 | 6.5,3.0,5.8,2.2,Iris-virginica
107 | 7.6,3.0,6.6,2.1,Iris-virginica
108 | 4.9,2.5,4.5,1.7,Iris-virginica
109 | 7.3,2.9,6.3,1.8,Iris-virginica
110 | 6.7,2.5,5.8,1.8,Iris-virginica
111 | 7.2,3.6,6.1,2.5,Iris-virginica
112 | 6.5,3.2,5.1,2.0,Iris-virginica
113 | 6.4,2.7,5.3,1.9,Iris-virginica
114 | 6.8,3.0,5.5,2.1,Iris-virginica
115 | 5.7,2.5,5.0,2.0,Iris-virginica
116 | 5.8,2.8,5.1,2.4,Iris-virginica
117 | 6.4,3.2,5.3,2.3,Iris-virginica
118 | 6.5,3.0,5.5,1.8,Iris-virginica
119 | 7.7,3.8,6.7,2.2,Iris-virginica
120 | 7.7,2.6,6.9,2.3,Iris-virginica
121 | 6.0,2.2,5.0,1.5,Iris-virginica
122 | 6.9,3.2,5.7,2.3,Iris-virginica
123 | 5.6,2.8,4.9,2.0,Iris-virginica
124 | 7.7,2.8,6.7,2.0,Iris-virginica
125 | 6.3,2.7,4.9,1.8,Iris-virginica
126 | 6.7,3.3,5.7,2.1,Iris-virginica
127 | 7.2,3.2,6.0,1.8,Iris-virginica
128 | 6.2,2.8,4.8,1.8,Iris-virginica
129 | 6.1,3.0,4.9,1.8,Iris-virginica
130 | 6.4,2.8,5.6,2.1,Iris-virginica
131 | 7.2,3.0,5.8,1.6,Iris-virginica
132 | 7.4,2.8,6.1,1.9,Iris-virginica
133 | 7.9,3.8,6.4,2.0,Iris-virginica
134 | 6.4,2.8,5.6,2.2,Iris-virginica
135 | 6.3,2.8,5.1,1.5,Iris-virginica
136 | 6.1,2.6,5.6,1.4,Iris-virginica
137 | 7.7,3.0,6.1,2.3,Iris-virginica
138 | 6.3,3.4,5.6,2.4,Iris-virginica
139 | 6.4,3.1,5.5,1.8,Iris-virginica
140 | 6.0,3.0,4.8,1.8,Iris-virginica
141 | 6.9,3.1,5.4,2.1,Iris-virginica
142 | 6.7,3.1,5.6,2.4,Iris-virginica
143 | 6.9,3.1,5.1,2.3,Iris-virginica
144 | 5.8,2.7,5.1,1.9,Iris-virginica
145 | 6.8,3.2,5.9,2.3,Iris-virginica
146 | 6.7,3.3,5.7,2.5,Iris-virginica
147 | 6.7,3.0,5.2,2.3,Iris-virginica
148 | 6.3,2.5,5.0,1.9,Iris-virginica
149 | 6.5,3.0,5.2,2.0,Iris-virginica
150 | 6.2,3.4,5.4,2.3,Iris-virginica
151 | 5.9,3.0,5.1,1.8,Iris-virginica
152 |
--------------------------------------------------------------------------------
/notebooks/EX-4-Pivot-Using-SQL-And-Pandas.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Pivoting SQL results in Panda\n",
8 | "\n",
9 | "This notebook shows how to pivot array data using SQL ARRAY JOIN and pandas DataFrame.pivot(). This problem appeared as a [question on Stack Overflow](https://stackoverflow.com/questions/54811905/return-clickhouse-array-as-column). "
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "First create some test data. We'll use clickhouse-driver for this so we can see the SQL. "
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 1,
22 | "metadata": {},
23 | "outputs": [],
24 | "source": [
25 | "from clickhouse_driver import Client\n",
26 | "client = Client('localhost')\n",
27 | "client.execute('CREATE TABLE IF NOT EXISTS f '\n",
28 | " '(f1 String, f2 Array(Int32), f3 Array(String)) '\n",
29 | " 'ENGINE = Memory')\n",
30 | "client.execute('TRUNCATE TABLE f')\n",
31 | "client.execute(\n",
32 | " 'INSERT INTO f (f1, f2, f3) VALUES', [\n",
33 | " ('a', [1,2,3], ['x', 'y', 'z']),\n",
34 | " ('b', [4,5,6], ['x', 'y', 'z']),\n",
35 | " ]\n",
36 | ")"
37 | ]
38 | },
39 | {
40 | "cell_type": "markdown",
41 | "metadata": {},
42 | "source": [
43 | "Now load SQLAlchemy. "
44 | ]
45 | },
46 | {
47 | "cell_type": "code",
48 | "execution_count": 2,
49 | "metadata": {
50 | "scrolled": true
51 | },
52 | "outputs": [],
53 | "source": [
54 | "from sqlalchemy import create_engine\n",
55 | "%load_ext sql"
56 | ]
57 | },
58 | {
59 | "cell_type": "markdown",
60 | "metadata": {},
61 | "source": [
62 | "Connect to ClickHouse, which is assumed to be on localhost with default user. "
63 | ]
64 | },
65 | {
66 | "cell_type": "code",
67 | "execution_count": 3,
68 | "metadata": {},
69 | "outputs": [
70 | {
71 | "data": {
72 | "text/plain": [
73 | "'Connected: default@default'"
74 | ]
75 | },
76 | "execution_count": 3,
77 | "metadata": {},
78 | "output_type": "execute_result"
79 | }
80 | ],
81 | "source": [
82 | "%sql clickhouse://default:@localhost/default"
83 | ]
84 | },
85 | {
86 | "cell_type": "markdown",
87 | "metadata": {},
88 | "source": [
89 | "Use SQL query with ARRAY JOIN to flip matching array indexes in f2, f3 to row values with f1. "
90 | ]
91 | },
92 | {
93 | "cell_type": "code",
94 | "execution_count": 13,
95 | "metadata": {},
96 | "outputs": [
97 | {
98 | "name": "stdout",
99 | "output_type": "stream",
100 | "text": [
101 | " * clickhouse://default:***@localhost/default\n",
102 | "Done.\n"
103 | ]
104 | },
105 | {
106 | "data": {
107 | "text/html": [
108 | "
\n",
109 | "\n",
122 | "
\n",
123 | " \n",
124 | " \n",
125 | " | \n",
126 | " f1 | \n",
127 | " f2 | \n",
128 | " f3 | \n",
129 | "
\n",
130 | " \n",
131 | " \n",
132 | " \n",
133 | " | 0 | \n",
134 | " a | \n",
135 | " 1 | \n",
136 | " x | \n",
137 | "
\n",
138 | " \n",
139 | " | 1 | \n",
140 | " a | \n",
141 | " 2 | \n",
142 | " y | \n",
143 | "
\n",
144 | " \n",
145 | " | 2 | \n",
146 | " a | \n",
147 | " 3 | \n",
148 | " z | \n",
149 | "
\n",
150 | " \n",
151 | " | 3 | \n",
152 | " b | \n",
153 | " 4 | \n",
154 | " x | \n",
155 | "
\n",
156 | " \n",
157 | " | 4 | \n",
158 | " b | \n",
159 | " 5 | \n",
160 | " y | \n",
161 | "
\n",
162 | " \n",
163 | " | 5 | \n",
164 | " b | \n",
165 | " 6 | \n",
166 | " z | \n",
167 | "
\n",
168 | " \n",
169 | " | 6 | \n",
170 | " c | \n",
171 | " 7 | \n",
172 | " y | \n",
173 | "
\n",
174 | " \n",
175 | " | 7 | \n",
176 | " c | \n",
177 | " 8 | \n",
178 | " z | \n",
179 | "
\n",
180 | " \n",
181 | " | 8 | \n",
182 | " c | \n",
183 | " 9 | \n",
184 | " aa | \n",
185 | "
\n",
186 | " \n",
187 | " | 9 | \n",
188 | " c | \n",
189 | " 10 | \n",
190 | " bb | \n",
191 | "
\n",
192 | " \n",
193 | " | 10 | \n",
194 | " c | \n",
195 | " 7 | \n",
196 | " y | \n",
197 | "
\n",
198 | " \n",
199 | " | 11 | \n",
200 | " c | \n",
201 | " 8 | \n",
202 | " z | \n",
203 | "
\n",
204 | " \n",
205 | " | 12 | \n",
206 | " c | \n",
207 | " 9 | \n",
208 | " aa | \n",
209 | "
\n",
210 | " \n",
211 | " | 13 | \n",
212 | " c | \n",
213 | " 10 | \n",
214 | " bb | \n",
215 | "
\n",
216 | " \n",
217 | "
\n",
218 | "
"
219 | ],
220 | "text/plain": [
221 | " f1 f2 f3\n",
222 | "0 a 1 x\n",
223 | "1 a 2 y\n",
224 | "2 a 3 z\n",
225 | "3 b 4 x\n",
226 | "4 b 5 y\n",
227 | "5 b 6 z\n",
228 | "6 c 7 y\n",
229 | "7 c 8 z\n",
230 | "8 c 9 aa\n",
231 | "9 c 10 bb\n",
232 | "10 c 7 y\n",
233 | "11 c 8 z\n",
234 | "12 c 9 aa\n",
235 | "13 c 10 bb"
236 | ]
237 | },
238 | "execution_count": 13,
239 | "metadata": {},
240 | "output_type": "execute_result"
241 | }
242 | ],
243 | "source": [
244 | "result = %sql SELECT * FROM f ARRAY JOIN f2, f3\n",
245 | "df = result.DataFrame()\n",
246 | "df"
247 | ]
248 | },
249 | {
250 | "cell_type": "markdown",
251 | "metadata": {},
252 | "source": [
253 | "Now we can pivot f2 and f3 into a new data frame that has the f3 array entries as columns. "
254 | ]
255 | },
256 | {
257 | "cell_type": "code",
258 | "execution_count": 15,
259 | "metadata": {},
260 | "outputs": [
261 | {
262 | "name": "stdout",
263 | "output_type": "stream",
264 | "text": [
265 | "f3 aa bb x y z\n",
266 | "f1 \n",
267 | "a NaN NaN 1.0 2.0 3.0\n",
268 | "b NaN NaN 4.0 5.0 6.0\n",
269 | "c 9.0 10.0 NaN 7.0 8.0\n"
270 | ]
271 | }
272 | ],
273 | "source": [
274 | "dfp = df.pivot_table(columns='f3', values='f2', index='f1')\n",
275 | "print(dfp)"
276 | ]
277 | },
278 | {
279 | "cell_type": "markdown",
280 | "metadata": {},
281 | "source": [
282 | "This approach works if we add additional data with new property names in the f3 array. Try adding a row to the table and then rerun the cells that select and pivot data. "
283 | ]
284 | },
285 | {
286 | "cell_type": "code",
287 | "execution_count": 12,
288 | "metadata": {
289 | "scrolled": false
290 | },
291 | "outputs": [],
292 | "source": [
293 | "client.execute(\n",
294 | " 'INSERT INTO f (f1, f2, f3) VALUES', [\n",
295 | " ('c', [7,8,9,10], ['y', 'z', 'aa', 'bb']),\n",
296 | " ]\n",
297 | ")"
298 | ]
299 | },
300 | {
301 | "cell_type": "markdown",
302 | "metadata": {},
303 | "source": [
304 | "If you try this again the duplicate rows will be ignored. "
305 | ]
306 | }
307 | ],
308 | "metadata": {
309 | "kernelspec": {
310 | "display_name": "Python 3",
311 | "language": "python",
312 | "name": "python3"
313 | },
314 | "language_info": {
315 | "codemirror_mode": {
316 | "name": "ipython",
317 | "version": 3
318 | },
319 | "file_extension": ".py",
320 | "mimetype": "text/x-python",
321 | "name": "python",
322 | "nbconvert_exporter": "python",
323 | "pygments_lexer": "ipython3",
324 | "version": "3.7.1"
325 | },
326 | "widgets": {
327 | "application/vnd.jupyter.widget-state+json": {
328 | "state": {},
329 | "version_major": 1,
330 | "version_minor": 0
331 | }
332 | }
333 | },
334 | "nbformat": 4,
335 | "nbformat_minor": 2
336 | }
337 |
--------------------------------------------------------------------------------
/notebooks/EX-1.0-Getting-to-Know-the-Clickhouse-driver-Client.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Getting to know the ClickHouse-driver Client\n",
8 | "\n",
9 | "This notebook has samples that were included in the [Altinity blog article that introduces the clickhouse-driver client library](https://www.altinity.com/blog/clickhouse-and-python-getting-to-know-the-clickhouse-driver-client).\n",
10 | "\n",
11 | "_WARNING_: If you run the whole notebook it will hang. You must run the samples one by one as one of them is designed to hang and must be cancelled manually. "
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {},
17 | "source": [
18 | "It's easy to load the clickhouse driver. The `Client` class is the main client interface. "
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": null,
24 | "metadata": {},
25 | "outputs": [],
26 | "source": [
27 | "from clickhouse_driver import Client"
28 | ]
29 | },
30 | {
31 | "cell_type": "markdown",
32 | "metadata": {},
33 | "source": [
34 | "If you are running against an unencrypted local server setting up a connection is as simple as the following. Instantiating a client does not actually connect to ClickHouse. It just sets up the data structure used to connect later on when your code does something. "
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": null,
40 | "metadata": {},
41 | "outputs": [],
42 | "source": [
43 | "client = Client('localhost')"
44 | ]
45 | },
46 | {
47 | "cell_type": "markdown",
48 | "metadata": {},
49 | "source": [
50 | "Servers with sensitive data should be encrypted with a user/password and encrypted communications. The following command shows how to connect to a server with self-signed certificate using an explicit database name. "
51 | ]
52 | },
53 | {
54 | "cell_type": "code",
55 | "execution_count": null,
56 | "metadata": {},
57 | "outputs": [],
58 | "source": [
59 | "client = Client('localhost', \n",
60 | " user='python', \n",
61 | " password='secret', \n",
62 | " secure=True, \n",
63 | " verify=False, \n",
64 | " database='default',\n",
65 | " compression=True)"
66 | ]
67 | },
68 | {
69 | "cell_type": "markdown",
70 | "metadata": {},
71 | "source": [
72 | "The Python driver uses the Client.execute() method to issue select commands. Results are returned as a list of tuples. Let's send a very simple query and take apart the results to see values and types. \n",
73 | "\n",
74 | "*NOTE*: If you get an error about an unknown timezone, ensure your server has the timezone set properly. "
75 | ]
76 | },
77 | {
78 | "cell_type": "code",
79 | "execution_count": null,
80 | "metadata": {
81 | "scrolled": false
82 | },
83 | "outputs": [],
84 | "source": [
85 | "result = client.execute('SELECT now(), version()')\n",
86 | "print(\"RESULT: {0}: {1}\".format(type(result), result))\n",
87 | "for t in result:\n",
88 | " print(\" ROW: {0}: {1}\".format(type(t), t))\n",
89 | " for v in t:\n",
90 | " print(\" COLUMN: {0}: {1}\".format(type(v), v))"
91 | ]
92 | },
93 | {
94 | "cell_type": "markdown",
95 | "metadata": {},
96 | "source": [
97 | "Create the iris table, dropping any previously existing table of the same name. The print statements show that result sets from DDL are empty. "
98 | ]
99 | },
100 | {
101 | "cell_type": "code",
102 | "execution_count": null,
103 | "metadata": {},
104 | "outputs": [],
105 | "source": [
106 | "r1 = client.execute('DROP TABLE IF EXISTS iris')\n",
107 | "print(r1)\n",
108 | "r2 = client.execute('CREATE TABLE iris ('\n",
109 | " 'sepal_length Float64, sepal_width Float64, '\n",
110 | " 'petal_length Float64, petal_width Float64, '\n",
111 | " 'species String) ENGINE = MergeTree '\n",
112 | " ' PARTITION BY species ORDER BY (species)')\n",
113 | "print(r2)"
114 | ]
115 | },
116 | {
117 | "cell_type": "markdown",
118 | "metadata": {},
119 | "source": [
120 | "Add some data to the table. Note that the values are given in a separate array of tuples. "
121 | ]
122 | },
123 | {
124 | "cell_type": "code",
125 | "execution_count": null,
126 | "metadata": {},
127 | "outputs": [],
128 | "source": [
129 | "client.execute(\n",
130 | " 'INSERT INTO iris (sepal_length, sepal_width, petal_length, petal_width, species) VALUES',\n",
131 | " [(5.1, 3.7, 1.5, 0.4, 'Iris-setosa'), (4.6, 3.6, 1.0, 0.2, 'Iris-setosa')]\n",
132 | ")\n",
133 | "print(client.execute(\"SELECT * FROM iris\"))"
134 | ]
135 | },
136 | {
137 | "cell_type": "markdown",
138 | "metadata": {},
139 | "source": [
140 | "Here is an example of how to insert CSV. We read the values line by line using csv.DictReader() running inside the generator function row_reader(). This results in a tuple for each line. Note that *you must* assign types or your values will not convert. The csv module converts everything to a string. "
141 | ]
142 | },
143 | {
144 | "cell_type": "code",
145 | "execution_count": null,
146 | "metadata": {
147 | "scrolled": true
148 | },
149 | "outputs": [],
150 | "source": [
151 | "client.execute(\"TRUNCATE TABLE iris\")\n",
152 | "\n",
153 | "import datetime\n",
154 | "import csv\n",
155 | "\n",
156 | "# Create a generator to fetch parsed rows. CSV must have variable names in header row.\n",
157 | "def row_reader():\n",
158 | " with open('iris_with_names.csv') as iris_csv:\n",
159 | " # Use DictReader to get values as a dictionary with column names.\n",
160 | " for line in csv.DictReader(iris_csv):\n",
161 | " yield {\n",
162 | " 'sepal_length': float(line['sepal_length']), \n",
163 | " 'sepal_width': float(line['sepal_width']), \n",
164 | " 'petal_length': float(line['petal_length']), \n",
165 | " 'petal_width': float(line['petal_width']), \n",
166 | " 'species': str(line['species']), \n",
167 | " }\n",
168 | "\n",
169 | "# Use a generator expression to load values as a list of dictionaries. \n",
170 | "client.execute(\"INSERT INTO iris VALUES\", (line for line in row_reader()))\n",
171 | "client.execute(\"SELECT count(*) FROM iris\")"
172 | ]
173 | },
174 | {
175 | "cell_type": "markdown",
176 | "metadata": {},
177 | "source": [
178 | "That was painful. We dislike pain. A better approach to non-toy CSV files is to use Pandas, which has a very good method for reading CSV that automatically coerces types. This is much simpler! "
179 | ]
180 | },
181 | {
182 | "cell_type": "code",
183 | "execution_count": null,
184 | "metadata": {},
185 | "outputs": [],
186 | "source": [
187 | "client.execute(\"TRUNCATE TABLE iris\")\n",
188 | "\n",
189 | "import pandas as pd\n",
190 | "df = pd.read_csv('iris_with_names.csv')\n",
191 | "\n",
192 | "client.execute(\"INSERT INTO iris VALUES\", [tuple(x) for x in df.values])\n",
193 | "client.execute(\"SELECT count(*) FROM iris\")"
194 | ]
195 | },
196 | {
197 | "cell_type": "markdown",
198 | "metadata": {},
199 | "source": [
200 | "The next few queries show examples of select statements. "
201 | ]
202 | },
203 | {
204 | "cell_type": "code",
205 | "execution_count": null,
206 | "metadata": {},
207 | "outputs": [],
208 | "source": [
209 | "result = client.execute('SELECT COUNT(*), species FROM iris '\n",
210 | " 'WHERE petal_length > 3.4 '\n",
211 | " 'GROUP BY species ORDER BY species')\n",
212 | "print(result)"
213 | ]
214 | },
215 | {
216 | "cell_type": "code",
217 | "execution_count": null,
218 | "metadata": {},
219 | "outputs": [],
220 | "source": [
221 | "result = client.execute('SELECT COUNT(*), species FROM iris '\n",
222 | " 'WHERE petal_length > %(max_len)s '\n",
223 | " 'GROUP BY species ORDER BY species', \n",
224 | " {'max_len': 3.4})\n",
225 | "print(result)"
226 | ]
227 | },
228 | {
229 | "cell_type": "markdown",
230 | "metadata": {},
231 | "source": [
232 | "Show how to get the column names for results. Note that we also get the column types, which is convenient for conversions. "
233 | ]
234 | },
235 | {
236 | "cell_type": "code",
237 | "execution_count": null,
238 | "metadata": {},
239 | "outputs": [],
240 | "source": [
241 | "result, columns = client.execute('SELECT COUNT(*), species FROM iris '\n",
242 | " 'WHERE petal_length > %(max_len)s '\n",
243 | " 'GROUP BY species ORDER BY species', \n",
244 | " {'max_len': 3.4},\n",
245 | " with_column_types=True)\n",
246 | "print(result)\n",
247 | "print(columns)"
248 | ]
249 | },
250 | {
251 | "cell_type": "markdown",
252 | "metadata": {},
253 | "source": [
254 | "This final example shows how to put a result set into a pandas data frame. We'll use the column names so that the DataFrame has correct labels."
255 | ]
256 | },
257 | {
258 | "cell_type": "code",
259 | "execution_count": null,
260 | "metadata": {
261 | "scrolled": true
262 | },
263 | "outputs": [],
264 | "source": [
265 | "import pandas\n",
266 | "result, columns = client.execute('SELECT * FROM iris WHERE species = %(species)s LIMIT 5', \n",
267 | " {'species': \"Iris-setosa\"}, with_column_types=True)\n",
268 | "df = pandas.DataFrame(result, columns=[tuple[0] for tuple in columns])\n",
269 | "df.tail()"
270 | ]
271 | },
272 | {
273 | "cell_type": "markdown",
274 | "metadata": {},
275 | "source": [
276 | "Since we're using pandas and may like to put this data into graphs, etc., we need to ensure the data types are correct. Let's describe the data set to ensure that the columns with numbers really appear as numbers. The following should show metrics for length and width values but nothing for species, which is a string. "
277 | ]
278 | },
279 | {
280 | "cell_type": "code",
281 | "execution_count": null,
282 | "metadata": {},
283 | "outputs": [],
284 | "source": [
285 | "df.describe()"
286 | ]
287 | },
288 | {
289 | "cell_type": "code",
290 | "execution_count": null,
291 | "metadata": {},
292 | "outputs": [],
293 | "source": []
294 | }
295 | ],
296 | "metadata": {
297 | "kernelspec": {
298 | "display_name": "Python 3",
299 | "language": "python",
300 | "name": "python3"
301 | },
302 | "language_info": {
303 | "codemirror_mode": {
304 | "name": "ipython",
305 | "version": 3
306 | },
307 | "file_extension": ".py",
308 | "mimetype": "text/x-python",
309 | "name": "python",
310 | "nbconvert_exporter": "python",
311 | "pygments_lexer": "ipython3",
312 | "version": "3.7.1"
313 | }
314 | },
315 | "nbformat": 4,
316 | "nbformat_minor": 2
317 | }
318 |
--------------------------------------------------------------------------------
/notebooks/EX-3-Sql-Magic-Functions.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Using Sql Magic Function with ClickHouse\n",
8 | "\n",
9 | "This notebook goes from nothing to a nice graph in three steps. We'll extract data using the %sql magic function. You'll need to have created the iris table for this to work. Run the EX-1-Getting-to-Know-the-Clickhouse-driver-Client.ipynb notebook to load data into the iris table if you have not already done so. "
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "First import SQLAlchemy and activate the %sql function. This just needs to be done once. "
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 1,
22 | "metadata": {
23 | "scrolled": true
24 | },
25 | "outputs": [],
26 | "source": [
27 | "from sqlalchemy import create_engine\n",
28 | "%load_ext sql"
29 | ]
30 | },
31 | {
32 | "cell_type": "markdown",
33 | "metadata": {},
34 | "source": [
35 | "Connect to a local server using native protocol, select data from the iris table, and pop it into a data frame. You should see 150 rows in the data frame. "
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": 2,
41 | "metadata": {
42 | "scrolled": true
43 | },
44 | "outputs": [
45 | {
46 | "name": "stdout",
47 | "output_type": "stream",
48 | "text": [
49 | " * clickhouse://default:***@localhost/default\n",
50 | "Done.\n"
51 | ]
52 | },
53 | {
54 | "data": {
55 | "text/html": [
56 | "\n",
57 | "\n",
70 | "
\n",
71 | " \n",
72 | " \n",
73 | " | \n",
74 | " sepal_length | \n",
75 | " sepal_width | \n",
76 | " petal_length | \n",
77 | " petal_width | \n",
78 | " species | \n",
79 | "
\n",
80 | " \n",
81 | " \n",
82 | " \n",
83 | " | 145 | \n",
84 | " 5.7 | \n",
85 | " 3.0 | \n",
86 | " 4.2 | \n",
87 | " 1.2 | \n",
88 | " Iris-versicolor | \n",
89 | "
\n",
90 | " \n",
91 | " | 146 | \n",
92 | " 5.7 | \n",
93 | " 2.9 | \n",
94 | " 4.2 | \n",
95 | " 1.3 | \n",
96 | " Iris-versicolor | \n",
97 | "
\n",
98 | " \n",
99 | " | 147 | \n",
100 | " 6.2 | \n",
101 | " 2.9 | \n",
102 | " 4.3 | \n",
103 | " 1.3 | \n",
104 | " Iris-versicolor | \n",
105 | "
\n",
106 | " \n",
107 | " | 148 | \n",
108 | " 5.1 | \n",
109 | " 2.5 | \n",
110 | " 3.0 | \n",
111 | " 1.1 | \n",
112 | " Iris-versicolor | \n",
113 | "
\n",
114 | " \n",
115 | " | 149 | \n",
116 | " 5.7 | \n",
117 | " 2.8 | \n",
118 | " 4.1 | \n",
119 | " 1.3 | \n",
120 | " Iris-versicolor | \n",
121 | "
\n",
122 | " \n",
123 | "
\n",
124 | "
"
125 | ],
126 | "text/plain": [
127 | " sepal_length sepal_width petal_length petal_width species\n",
128 | "145 5.7 3.0 4.2 1.2 Iris-versicolor\n",
129 | "146 5.7 2.9 4.2 1.3 Iris-versicolor\n",
130 | "147 6.2 2.9 4.3 1.3 Iris-versicolor\n",
131 | "148 5.1 2.5 3.0 1.1 Iris-versicolor\n",
132 | "149 5.7 2.8 4.1 1.3 Iris-versicolor"
133 | ]
134 | },
135 | "execution_count": 2,
136 | "metadata": {},
137 | "output_type": "execute_result"
138 | }
139 | ],
140 | "source": [
141 | "%sql clickhouse://default:@localhost/default\n",
142 | "result = %sql SELECT * FROM iris\n",
143 | "df = result.DataFrame()\n",
144 | "df.tail()"
145 | ]
146 | },
147 | {
148 | "cell_type": "markdown",
149 | "metadata": {},
150 | "source": [
151 | "Everybody likes a nice picture. Let's make a scatter graph that shows data with different markers for each species of iris. We'll use standard pandas tricks to find the species names and query for each one of the. \n",
152 | "\n",
153 | "Pro tip: For large datasets you could get unique names using SQL. We'll just cheat and use the pandas unique() method on the data frame."
154 | ]
155 | },
156 | {
157 | "cell_type": "code",
158 | "execution_count": 5,
159 | "metadata": {},
160 | "outputs": [
161 | {
162 | "data": {
163 | "image/png": "\n",
164 | "text/plain": [
165 | ""
166 | ]
167 | },
168 | "metadata": {
169 | "needs_background": "light"
170 | },
171 | "output_type": "display_data"
172 | }
173 | ],
174 | "source": [
175 | "import matplotlib.pyplot as plt\n",
176 | "%matplotlib inline\n",
177 | "\n",
178 | "# Break up the data frame and graph each species separately. \n",
179 | "species = df.species.unique()\n",
180 | "markers = ['o', 'x', '^']\n",
181 | "for i in range(3):\n",
182 | " df_segment = df[df['species'] == species[i]]\n",
183 | " plt.scatter('sepal_length', \n",
184 | " 'petal_length', \n",
185 | " data=df_segment, \n",
186 | " marker=markers[i],\n",
187 | " label=species[i])\n",
188 | "# Add proper labels and show the result. \n",
189 | "plt.xlabel('sepal length [cm]')\n",
190 | "plt.ylabel('petal length [cm]')\n",
191 | "plt.legend(loc='upper left')\n",
192 | "plt.show()"
193 | ]
194 | },
195 | {
196 | "cell_type": "markdown",
197 | "metadata": {},
198 | "source": [
199 | "From this we can see that iris-setosa is linearly separable from the other two species. This is significant for running machine learning. \n",
200 | "\n",
201 | "One final note is that when you don't know what's coming back it's good to describe the data set to ensure the values you think are numeric actually are. For example ClickHouse Decimal values don't automatically coerce to floats which means data science stack operations won't work as expected. "
202 | ]
203 | },
204 | {
205 | "cell_type": "code",
206 | "execution_count": 4,
207 | "metadata": {},
208 | "outputs": [
209 | {
210 | "data": {
211 | "text/html": [
212 | "\n",
213 | "\n",
226 | "
\n",
227 | " \n",
228 | " \n",
229 | " | \n",
230 | " sepal_length | \n",
231 | " sepal_width | \n",
232 | " petal_length | \n",
233 | " petal_width | \n",
234 | "
\n",
235 | " \n",
236 | " \n",
237 | " \n",
238 | " | count | \n",
239 | " 150.000000 | \n",
240 | " 150.000000 | \n",
241 | " 150.000000 | \n",
242 | " 150.000000 | \n",
243 | "
\n",
244 | " \n",
245 | " | mean | \n",
246 | " 5.843333 | \n",
247 | " 3.057333 | \n",
248 | " 3.758000 | \n",
249 | " 1.199333 | \n",
250 | "
\n",
251 | " \n",
252 | " | std | \n",
253 | " 0.828066 | \n",
254 | " 0.435866 | \n",
255 | " 1.765298 | \n",
256 | " 0.762238 | \n",
257 | "
\n",
258 | " \n",
259 | " | min | \n",
260 | " 4.300000 | \n",
261 | " 2.000000 | \n",
262 | " 1.000000 | \n",
263 | " 0.100000 | \n",
264 | "
\n",
265 | " \n",
266 | " | 25% | \n",
267 | " 5.100000 | \n",
268 | " 2.800000 | \n",
269 | " 1.600000 | \n",
270 | " 0.300000 | \n",
271 | "
\n",
272 | " \n",
273 | " | 50% | \n",
274 | " 5.800000 | \n",
275 | " 3.000000 | \n",
276 | " 4.350000 | \n",
277 | " 1.300000 | \n",
278 | "
\n",
279 | " \n",
280 | " | 75% | \n",
281 | " 6.400000 | \n",
282 | " 3.300000 | \n",
283 | " 5.100000 | \n",
284 | " 1.800000 | \n",
285 | "
\n",
286 | " \n",
287 | " | max | \n",
288 | " 7.900000 | \n",
289 | " 4.400000 | \n",
290 | " 6.900000 | \n",
291 | " 2.500000 | \n",
292 | "
\n",
293 | " \n",
294 | "
\n",
295 | "
"
296 | ],
297 | "text/plain": [
298 | " sepal_length sepal_width petal_length petal_width\n",
299 | "count 150.000000 150.000000 150.000000 150.000000\n",
300 | "mean 5.843333 3.057333 3.758000 1.199333\n",
301 | "std 0.828066 0.435866 1.765298 0.762238\n",
302 | "min 4.300000 2.000000 1.000000 0.100000\n",
303 | "25% 5.100000 2.800000 1.600000 0.300000\n",
304 | "50% 5.800000 3.000000 4.350000 1.300000\n",
305 | "75% 6.400000 3.300000 5.100000 1.800000\n",
306 | "max 7.900000 4.400000 6.900000 2.500000"
307 | ]
308 | },
309 | "execution_count": 4,
310 | "metadata": {},
311 | "output_type": "execute_result"
312 | }
313 | ],
314 | "source": [
315 | "import pandas\n",
316 | "df.describe()"
317 | ]
318 | },
319 | {
320 | "cell_type": "code",
321 | "execution_count": null,
322 | "metadata": {},
323 | "outputs": [],
324 | "source": []
325 | }
326 | ],
327 | "metadata": {
328 | "kernelspec": {
329 | "display_name": "Python 3",
330 | "language": "python",
331 | "name": "python3"
332 | },
333 | "language_info": {
334 | "codemirror_mode": {
335 | "name": "ipython",
336 | "version": 3
337 | },
338 | "file_extension": ".py",
339 | "mimetype": "text/x-python",
340 | "name": "python",
341 | "nbconvert_exporter": "python",
342 | "pygments_lexer": "ipython3",
343 | "version": "3.7.1"
344 | }
345 | },
346 | "nbformat": 4,
347 | "nbformat_minor": 2
348 | }
349 |
--------------------------------------------------------------------------------
/notebooks/EX-5-Airline-OnTime-Data.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Graphing Airline Ontime Data\n",
8 | "\n",
9 | "This notebook shows how to create a time-series graph from airline ontime data, which can be downloaded from the [US Bureau of Transportation Statistics](https://www.transtats.bts.gov/tables.asp?DB_ID=120). See https://github.com/Altinity/altinity-datasets for tools to help with loading. "
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "First import SQLAlchemy and activate the %sql function. This just needs to be done once. "
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 1,
22 | "metadata": {
23 | "scrolled": true
24 | },
25 | "outputs": [],
26 | "source": [
27 | "from sqlalchemy import create_engine\n",
28 | "%load_ext sql"
29 | ]
30 | },
31 | {
32 | "cell_type": "markdown",
33 | "metadata": {},
34 | "source": [
35 | "Run a query using %%sql. This needs to go in a separate cell. "
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": 2,
41 | "metadata": {},
42 | "outputs": [
43 | {
44 | "name": "stdout",
45 | "output_type": "stream",
46 | "text": [
47 | "Done.\n"
48 | ]
49 | },
50 | {
51 | "data": {
52 | "text/html": [
53 | "\n",
54 | " \n",
55 | " | t | \n",
56 | " cancelled | \n",
57 | " delayed | \n",
58 | "
\n",
59 | " \n",
60 | " | 1987 | \n",
61 | " 0.015005801074227831 | \n",
62 | " 0.15847681018671683 | \n",
63 | "
\n",
64 | " \n",
65 | " | 1988 | \n",
66 | " 0.009642843961357115 | \n",
67 | " 0.13082207633230913 | \n",
68 | "
\n",
69 | " \n",
70 | " | 1989 | \n",
71 | " 0.014711774974212489 | \n",
72 | " 0.16047806077917956 | \n",
73 | "
\n",
74 | " \n",
75 | " | 1990 | \n",
76 | " 0.009952393266188481 | \n",
77 | " 0.1329419891468106 | \n",
78 | "
\n",
79 | " \n",
80 | " | 1991 | \n",
81 | " 0.008569163420771431 | \n",
82 | " 0.11384922960256455 | \n",
83 | "
\n",
84 | " \n",
85 | " | 1992 | \n",
86 | " 0.010375956593639985 | \n",
87 | " 0.11081983528787506 | \n",
88 | "
\n",
89 | " \n",
90 | " | 1993 | \n",
91 | " 0.011802581243944139 | \n",
92 | " 0.118570334568517 | \n",
93 | "
\n",
94 | " \n",
95 | " | 1994 | \n",
96 | " 0.012884050495284986 | \n",
97 | " 0.12803356262335794 | \n",
98 | "
\n",
99 | " \n",
100 | " | 1995 | \n",
101 | " 0.01725126632234837 | \n",
102 | " 0.1554094981919066 | \n",
103 | "
\n",
104 | " \n",
105 | " | 1996 | \n",
106 | " 0.024016518737073715 | \n",
107 | " 0.18197890389412671 | \n",
108 | "
\n",
109 | " \n",
110 | " | 1997 | \n",
111 | " 0.018064640825685447 | \n",
112 | " 0.15648458390237854 | \n",
113 | "
\n",
114 | " \n",
115 | " | 1998 | \n",
116 | " 0.02683685932845917 | \n",
117 | " 0.16164161522946127 | \n",
118 | "
\n",
119 | " \n",
120 | " | 1999 | \n",
121 | " 0.027915021371649622 | \n",
122 | " 0.1695536664662283 | \n",
123 | "
\n",
124 | " \n",
125 | " | 2000 | \n",
126 | " 0.03299110494775074 | \n",
127 | " 0.19912962183842575 | \n",
128 | "
\n",
129 | " \n",
130 | " | 2001 | \n",
131 | " 0.03874103938147854 | \n",
132 | " 0.1598262670540804 | \n",
133 | "
\n",
134 | " \n",
135 | " | 2002 | \n",
136 | " 0.012357913775176383 | \n",
137 | " 0.1360878665255013 | \n",
138 | "
\n",
139 | " \n",
140 | " | 2003 | \n",
141 | " 0.01595893603961728 | \n",
142 | " 0.13144318872073027 | \n",
143 | "
\n",
144 | " \n",
145 | " | 2004 | \n",
146 | " 0.01792006755249836 | \n",
147 | " 0.1665800285302703 | \n",
148 | "
\n",
149 | " \n",
150 | " | 2005 | \n",
151 | " 0.019069045573072995 | \n",
152 | " 0.18150869494248564 | \n",
153 | "
\n",
154 | " \n",
155 | " | 2006 | \n",
156 | " 0.01707299519653113 | \n",
157 | " 0.1994948978720294 | \n",
158 | "
\n",
159 | " \n",
160 | " | 2007 | \n",
161 | " 0.021569298626590076 | \n",
162 | " 0.210983416444704 | \n",
163 | "
\n",
164 | " \n",
165 | " | 2008 | \n",
166 | " 0.01960590185693421 | \n",
167 | " 0.18941510695282526 | \n",
168 | "
\n",
169 | " \n",
170 | " | 2009 | \n",
171 | " 0.013856286970265655 | \n",
172 | " 0.16836124295283075 | \n",
173 | "
\n",
174 | " \n",
175 | " | 2010 | \n",
176 | " 0.017558596223913456 | \n",
177 | " 0.1726871000944634 | \n",
178 | "
\n",
179 | " \n",
180 | " | 2011 | \n",
181 | " 0.019058774771452625 | \n",
182 | " 0.17160785179846255 | \n",
183 | "
\n",
184 | " \n",
185 | " | 2012 | \n",
186 | " 0.01293506290716285 | \n",
187 | " 0.16289138398382616 | \n",
188 | "
\n",
189 | " \n",
190 | " | 2013 | \n",
191 | " 0.015073753250264308 | \n",
192 | " 0.19332121513177994 | \n",
193 | "
\n",
194 | " \n",
195 | " | 2014 | \n",
196 | " 0.021819265264799836 | \n",
197 | " 0.2053970481171983 | \n",
198 | "
\n",
199 | " \n",
200 | " | 2015 | \n",
201 | " 0.015446430612129514 | \n",
202 | " 0.18173906901762288 | \n",
203 | "
\n",
204 | " \n",
205 | " | 2016 | \n",
206 | " 0.011723924809947491 | \n",
207 | " 0.16974030814976632 | \n",
208 | "
\n",
209 | " \n",
210 | " | 2017 | \n",
211 | " 0.01399881896870509 | \n",
212 | " 0.19478701373546048 | \n",
213 | "
\n",
214 | " \n",
215 | " | 2018 | \n",
216 | " 0.010669114159862703 | \n",
217 | " 0.17753310427890504 | \n",
218 | "
\n",
219 | "
"
220 | ],
221 | "text/plain": [
222 | "[(1987, 0.015005801074227831, 0.15847681018671683),\n",
223 | " (1988, 0.009642843961357115, 0.13082207633230913),\n",
224 | " (1989, 0.014711774974212489, 0.16047806077917956),\n",
225 | " (1990, 0.009952393266188481, 0.1329419891468106),\n",
226 | " (1991, 0.008569163420771431, 0.11384922960256455),\n",
227 | " (1992, 0.010375956593639985, 0.11081983528787506),\n",
228 | " (1993, 0.011802581243944139, 0.118570334568517),\n",
229 | " (1994, 0.012884050495284986, 0.12803356262335794),\n",
230 | " (1995, 0.01725126632234837, 0.1554094981919066),\n",
231 | " (1996, 0.024016518737073715, 0.18197890389412671),\n",
232 | " (1997, 0.018064640825685447, 0.15648458390237854),\n",
233 | " (1998, 0.02683685932845917, 0.16164161522946127),\n",
234 | " (1999, 0.027915021371649622, 0.1695536664662283),\n",
235 | " (2000, 0.03299110494775074, 0.19912962183842575),\n",
236 | " (2001, 0.03874103938147854, 0.1598262670540804),\n",
237 | " (2002, 0.012357913775176383, 0.1360878665255013),\n",
238 | " (2003, 0.01595893603961728, 0.13144318872073027),\n",
239 | " (2004, 0.01792006755249836, 0.1665800285302703),\n",
240 | " (2005, 0.019069045573072995, 0.18150869494248564),\n",
241 | " (2006, 0.01707299519653113, 0.1994948978720294),\n",
242 | " (2007, 0.021569298626590076, 0.210983416444704),\n",
243 | " (2008, 0.01960590185693421, 0.18941510695282526),\n",
244 | " (2009, 0.013856286970265655, 0.16836124295283075),\n",
245 | " (2010, 0.017558596223913456, 0.1726871000944634),\n",
246 | " (2011, 0.019058774771452625, 0.17160785179846255),\n",
247 | " (2012, 0.01293506290716285, 0.16289138398382616),\n",
248 | " (2013, 0.015073753250264308, 0.19332121513177994),\n",
249 | " (2014, 0.021819265264799836, 0.2053970481171983),\n",
250 | " (2015, 0.015446430612129514, 0.18173906901762288),\n",
251 | " (2016, 0.011723924809947491, 0.16974030814976632),\n",
252 | " (2017, 0.01399881896870509, 0.19478701373546048),\n",
253 | " (2018, 0.010669114159862703, 0.17753310427890504)]"
254 | ]
255 | },
256 | "execution_count": 2,
257 | "metadata": {},
258 | "output_type": "execute_result"
259 | }
260 | ],
261 | "source": [
262 | "%%sql clickhouse://default:@localhost/airline\n",
263 | "SELECT toYear(FlightDate) t, \n",
264 | " sum(Cancelled)/count(*) cancelled, \n",
265 | " sum(DepDel15)/count(*) delayed\n",
266 | "FROM airline.ontime GROUP BY t ORDER BY t"
267 | ]
268 | },
269 | {
270 | "cell_type": "markdown",
271 | "metadata": {},
272 | "source": [
273 | "Turn the result into a nice dataframe and prove it has some data in it. "
274 | ]
275 | },
276 | {
277 | "cell_type": "code",
278 | "execution_count": 3,
279 | "metadata": {},
280 | "outputs": [
281 | {
282 | "data": {
283 | "text/html": [
284 | "\n",
285 | "\n",
298 | "
\n",
299 | " \n",
300 | " \n",
301 | " | \n",
302 | " t | \n",
303 | " cancelled | \n",
304 | " delayed | \n",
305 | "
\n",
306 | " \n",
307 | " \n",
308 | " \n",
309 | " | 27 | \n",
310 | " 2014 | \n",
311 | " 0.021819 | \n",
312 | " 0.205397 | \n",
313 | "
\n",
314 | " \n",
315 | " | 28 | \n",
316 | " 2015 | \n",
317 | " 0.015446 | \n",
318 | " 0.181739 | \n",
319 | "
\n",
320 | " \n",
321 | " | 29 | \n",
322 | " 2016 | \n",
323 | " 0.011724 | \n",
324 | " 0.169740 | \n",
325 | "
\n",
326 | " \n",
327 | " | 30 | \n",
328 | " 2017 | \n",
329 | " 0.013999 | \n",
330 | " 0.194787 | \n",
331 | "
\n",
332 | " \n",
333 | " | 31 | \n",
334 | " 2018 | \n",
335 | " 0.010669 | \n",
336 | " 0.177533 | \n",
337 | "
\n",
338 | " \n",
339 | "
\n",
340 | "
"
341 | ],
342 | "text/plain": [
343 | " t cancelled delayed\n",
344 | "27 2014 0.021819 0.205397\n",
345 | "28 2015 0.015446 0.181739\n",
346 | "29 2016 0.011724 0.169740\n",
347 | "30 2017 0.013999 0.194787\n",
348 | "31 2018 0.010669 0.177533"
349 | ]
350 | },
351 | "execution_count": 3,
352 | "metadata": {},
353 | "output_type": "execute_result"
354 | }
355 | ],
356 | "source": [
357 | "result = _\n",
358 | "df = result.DataFrame()\n",
359 | "df.tail()"
360 | ]
361 | },
362 | {
363 | "cell_type": "markdown",
364 | "metadata": {},
365 | "source": [
366 | "Time to make a quick graph using matplotlib. I'm not the greatest at this but once you have a data frame everything is possible. "
367 | ]
368 | },
369 | {
370 | "cell_type": "code",
371 | "execution_count": 4,
372 | "metadata": {},
373 | "outputs": [
374 | {
375 | "data": {
376 | "image/png": "\n",
377 | "text/plain": [
378 | ""
379 | ]
380 | },
381 | "metadata": {
382 | "needs_background": "light"
383 | },
384 | "output_type": "display_data"
385 | }
386 | ],
387 | "source": [
388 | "import matplotlib.pyplot as plt\n",
389 | "%matplotlib inline\n",
390 | "\n",
391 | "plt.plot('t', 'cancelled', \n",
392 | " data=df, linestyle='--', \n",
393 | " marker='o', label='Cancelled')\n",
394 | "plt.plot('t', 'delayed', \n",
395 | " data=df, linestyle='--', \n",
396 | " marker='o', label='Delayed')\n",
397 | "plt.xlabel('Year')\n",
398 | "plt.ylabel('Percentage')\n",
399 | "plt.legend(loc='upper left')\n",
400 | "plt.title('Fetch data the easy way')\n",
401 | "plt.show()"
402 | ]
403 | },
404 | {
405 | "cell_type": "markdown",
406 | "metadata": {},
407 | "source": [
408 | "The %sql magic function is great but we can also do the same thing using the clickhouse-driver client library and direct API calls. "
409 | ]
410 | },
411 | {
412 | "cell_type": "code",
413 | "execution_count": 5,
414 | "metadata": {},
415 | "outputs": [
416 | {
417 | "data": {
418 | "text/html": [
419 | "\n",
420 | "\n",
433 | "
\n",
434 | " \n",
435 | " \n",
436 | " | \n",
437 | " t | \n",
438 | " cancelled | \n",
439 | " delayed | \n",
440 | "
\n",
441 | " \n",
442 | " \n",
443 | " \n",
444 | " | 27 | \n",
445 | " 2014 | \n",
446 | " 0.021819 | \n",
447 | " 0.205397 | \n",
448 | "
\n",
449 | " \n",
450 | " | 28 | \n",
451 | " 2015 | \n",
452 | " 0.015446 | \n",
453 | " 0.181739 | \n",
454 | "
\n",
455 | " \n",
456 | " | 29 | \n",
457 | " 2016 | \n",
458 | " 0.011724 | \n",
459 | " 0.169740 | \n",
460 | "
\n",
461 | " \n",
462 | " | 30 | \n",
463 | " 2017 | \n",
464 | " 0.013999 | \n",
465 | " 0.194787 | \n",
466 | "
\n",
467 | " \n",
468 | " | 31 | \n",
469 | " 2018 | \n",
470 | " 0.010669 | \n",
471 | " 0.177533 | \n",
472 | "
\n",
473 | " \n",
474 | "
\n",
475 | "
"
476 | ],
477 | "text/plain": [
478 | " t cancelled delayed\n",
479 | "27 2014 0.021819 0.205397\n",
480 | "28 2015 0.015446 0.181739\n",
481 | "29 2016 0.011724 0.169740\n",
482 | "30 2017 0.013999 0.194787\n",
483 | "31 2018 0.010669 0.177533"
484 | ]
485 | },
486 | "execution_count": 5,
487 | "metadata": {},
488 | "output_type": "execute_result"
489 | }
490 | ],
491 | "source": [
492 | "import pandas\n",
493 | "from clickhouse_driver import Client\n",
494 | "\n",
495 | "client = Client('localhost', database='airline')\n",
496 | "result, columns = client.execute(\n",
497 | " 'SELECT toYear(FlightDate) t,'\n",
498 | " 'sum(Cancelled)/count(*) cancelled,'\n",
499 | " 'sum(DepDel15)/count(*) delayed '\n",
500 | " 'FROM airline.ontime GROUP BY t ORDER BY t', \n",
501 | " with_column_types=True)\n",
502 | "\n",
503 | "df2 = pandas.DataFrame(result, columns=[tuple[0] for tuple in columns])\n",
504 | "df2.tail()"
505 | ]
506 | },
507 | {
508 | "cell_type": "markdown",
509 | "metadata": {},
510 | "source": [
511 | "The graph looks just the same, so we change the title to tell them apart. "
512 | ]
513 | },
514 | {
515 | "cell_type": "code",
516 | "execution_count": 6,
517 | "metadata": {},
518 | "outputs": [
519 | {
520 | "data": {
521 | "image/png": "\n",
522 | "text/plain": [
523 | ""
524 | ]
525 | },
526 | "metadata": {
527 | "needs_background": "light"
528 | },
529 | "output_type": "display_data"
530 | }
531 | ],
532 | "source": [
533 | "import matplotlib.pyplot as plt\n",
534 | "%matplotlib inline\n",
535 | "\n",
536 | "plt.plot('t', 'cancelled', data=df2, \n",
537 | " linestyle='--', marker='o', label='Cancelled')\n",
538 | "plt.plot('t', 'delayed', data=df2, \n",
539 | " linestyle='--', marker='o', label='Delayed')\n",
540 | "plt.xlabel('Year')\n",
541 | "plt.ylabel('Percentage')\n",
542 | "plt.legend(loc='upper left')\n",
543 | "plt.title('Fetch data the hard way')\n",
544 | "plt.show()"
545 | ]
546 | },
547 | {
548 | "cell_type": "markdown",
549 | "metadata": {},
550 | "source": [
551 | "That's all folks!"
552 | ]
553 | },
554 | {
555 | "cell_type": "code",
556 | "execution_count": null,
557 | "metadata": {},
558 | "outputs": [],
559 | "source": []
560 | }
561 | ],
562 | "metadata": {
563 | "kernelspec": {
564 | "display_name": "Python 3",
565 | "language": "python",
566 | "name": "python3"
567 | },
568 | "language_info": {
569 | "codemirror_mode": {
570 | "name": "ipython",
571 | "version": 3
572 | },
573 | "file_extension": ".py",
574 | "mimetype": "text/x-python",
575 | "name": "python",
576 | "nbconvert_exporter": "python",
577 | "pygments_lexer": "ipython3",
578 | "version": "3.7.1"
579 | }
580 | },
581 | "nbformat": 4,
582 | "nbformat_minor": 2
583 | }
584 |
--------------------------------------------------------------------------------
/notebooks/EX-2-ClickHouse-SQL-Alchemy.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# ClickHouse SQLAlchemy\n",
8 | "\n",
9 | "This notebook provides simple examples from the clickhouse-sqlalchemy driver including a demonstration of integration with pandas and matplotlib. "
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "Import SQLAlchemy + clickhouse-sqlalchemy entities."
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 1,
22 | "metadata": {
23 | "scrolled": true
24 | },
25 | "outputs": [],
26 | "source": [
27 | "from sqlalchemy import create_engine, Column, MetaData, literal\n",
28 | "from clickhouse_sqlalchemy import Table, make_session, get_declarative_base, types, engines"
29 | ]
30 | },
31 | {
32 | "cell_type": "markdown",
33 | "metadata": {},
34 | "source": [
35 | "Initialize SQLAlchemy to use local server with native connectivity. If you leave off '+native' the driver will use http[s]. "
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "execution_count": 2,
41 | "metadata": {},
42 | "outputs": [],
43 | "source": [
44 | "uri = 'clickhouse+native://default:@localhost/default'\n",
45 | "\n",
46 | "engine = create_engine(uri)\n",
47 | "session = make_session(engine)\n",
48 | "metadata = MetaData(bind=engine)\n",
49 | "\n",
50 | "Base = get_declarative_base(metadata=metadata)"
51 | ]
52 | },
53 | {
54 | "cell_type": "markdown",
55 | "metadata": {},
56 | "source": [
57 | "Define a class to represent sensor data from devices. "
58 | ]
59 | },
60 | {
61 | "cell_type": "code",
62 | "execution_count": 3,
63 | "metadata": {},
64 | "outputs": [],
65 | "source": [
66 | "class SensorData(Base):\n",
67 | " dev_id = Column(types.Int32, primary_key=True)\n",
68 | " type = Column(types.String)\n",
69 | " mdate = Column(types.Date)\n",
70 | " mdatetime = Column(types.DateTime, primary_key=True)\n",
71 | " value = Column(types.Float64)\n",
72 | "\n",
73 | " __table_args__ = (\n",
74 | " engines.MergeTree('mdate', ('dev_id', 'mdate')),\n",
75 | " )"
76 | ]
77 | },
78 | {
79 | "cell_type": "markdown",
80 | "metadata": {},
81 | "source": [
82 | "Drop and then recreate the SQL table. Ignore errors if the table does not exist previously. "
83 | ]
84 | },
85 | {
86 | "cell_type": "code",
87 | "execution_count": 4,
88 | "metadata": {},
89 | "outputs": [],
90 | "source": [
91 | "table = SensorData.__table__\n",
92 | "try:\n",
93 | " table.drop()\n",
94 | "except:\n",
95 | " # Exceptions are ignored\n",
96 | " pass\n",
97 | "table.create()"
98 | ]
99 | },
100 | {
101 | "cell_type": "markdown",
102 | "metadata": {},
103 | "source": [
104 | "Create sensor data for 5 mythical devices. Readings increase linearly from a base that is randomly selected for each device. "
105 | ]
106 | },
107 | {
108 | "cell_type": "code",
109 | "execution_count": 5,
110 | "metadata": {},
111 | "outputs": [
112 | {
113 | "data": {
114 | "text/plain": [
115 | ""
116 | ]
117 | },
118 | "execution_count": 5,
119 | "metadata": {},
120 | "output_type": "execute_result"
121 | }
122 | ],
123 | "source": [
124 | "from datetime import date, datetime, timedelta\n",
125 | "from random import random\n",
126 | "today = date.today()\n",
127 | "this_instant = datetime.today()\n",
128 | "data = []\n",
129 | "for i in range(5):\n",
130 | " base = random()\n",
131 | " for j in range(10): \n",
132 | " data.append({'dev_id': i, \n",
133 | " 'type': 'widget-a', \n",
134 | " 'mdate': today, \n",
135 | " 'mdatetime': this_instant + timedelta(minutes=j), \n",
136 | " 'value': base + j * 0.1})\n",
137 | "session.execute(table.insert(), data)"
138 | ]
139 | },
140 | {
141 | "cell_type": "markdown",
142 | "metadata": {},
143 | "source": [
144 | "Enable %sql magic function. "
145 | ]
146 | },
147 | {
148 | "cell_type": "code",
149 | "execution_count": 6,
150 | "metadata": {},
151 | "outputs": [
152 | {
153 | "data": {
154 | "text/plain": [
155 | "'Connected: default@default'"
156 | ]
157 | },
158 | "execution_count": 6,
159 | "metadata": {},
160 | "output_type": "execute_result"
161 | }
162 | ],
163 | "source": [
164 | "%load_ext sql\n",
165 | "%sql clickhouse://default:@localhost/default"
166 | ]
167 | },
168 | {
169 | "cell_type": "markdown",
170 | "metadata": {},
171 | "source": [
172 | "Prove that the magic function works by showing tables. %sql can handle any query. "
173 | ]
174 | },
175 | {
176 | "cell_type": "code",
177 | "execution_count": 7,
178 | "metadata": {},
179 | "outputs": [
180 | {
181 | "name": "stdout",
182 | "output_type": "stream",
183 | "text": [
184 | " * clickhouse://default:***@localhost/default\n",
185 | "Done.\n"
186 | ]
187 | },
188 | {
189 | "data": {
190 | "text/html": [
191 | "\n",
192 | " \n",
193 | " | name | \n",
194 | "
\n",
195 | " \n",
196 | " | airports | \n",
197 | "
\n",
198 | " \n",
199 | " | f | \n",
200 | "
\n",
201 | " \n",
202 | " | f_1 | \n",
203 | "
\n",
204 | " \n",
205 | " | f_long | \n",
206 | "
\n",
207 | " \n",
208 | " | iris | \n",
209 | "
\n",
210 | " \n",
211 | " | iris_from_csv | \n",
212 | "
\n",
213 | " \n",
214 | " | iris_remote | \n",
215 | "
\n",
216 | " \n",
217 | " | iris_url_local | \n",
218 | "
\n",
219 | " \n",
220 | " | iris_web | \n",
221 | "
\n",
222 | " \n",
223 | " | ontime | \n",
224 | "
\n",
225 | " \n",
226 | " | sdata | \n",
227 | "
\n",
228 | " \n",
229 | " | sensor_data | \n",
230 | "
\n",
231 | "
"
232 | ],
233 | "text/plain": [
234 | "[('airports',),\n",
235 | " ('f',),\n",
236 | " ('f_1',),\n",
237 | " ('f_long',),\n",
238 | " ('iris',),\n",
239 | " ('iris_from_csv',),\n",
240 | " ('iris_remote',),\n",
241 | " ('iris_url_local',),\n",
242 | " ('iris_web',),\n",
243 | " ('ontime',),\n",
244 | " ('sdata',),\n",
245 | " ('sensor_data',)]"
246 | ]
247 | },
248 | "execution_count": 7,
249 | "metadata": {},
250 | "output_type": "execute_result"
251 | }
252 | ],
253 | "source": [
254 | "%sql show tables"
255 | ]
256 | },
257 | {
258 | "cell_type": "markdown",
259 | "metadata": {},
260 | "source": [
261 | "Select all rows back and convert to a data frame. "
262 | ]
263 | },
264 | {
265 | "cell_type": "code",
266 | "execution_count": 8,
267 | "metadata": {},
268 | "outputs": [
269 | {
270 | "name": "stdout",
271 | "output_type": "stream",
272 | "text": [
273 | " * clickhouse://default:***@localhost/default\n",
274 | "Done.\n"
275 | ]
276 | },
277 | {
278 | "data": {
279 | "text/html": [
280 | "\n",
281 | "\n",
294 | "
\n",
295 | " \n",
296 | " \n",
297 | " | \n",
298 | " dev_id | \n",
299 | " type | \n",
300 | " mdate | \n",
301 | " mdatetime | \n",
302 | " value | \n",
303 | "
\n",
304 | " \n",
305 | " \n",
306 | " \n",
307 | " | 0 | \n",
308 | " 0 | \n",
309 | " widget-a | \n",
310 | " 2019-02-24 | \n",
311 | " 2019-02-24 00:03:59 | \n",
312 | " 0.470658 | \n",
313 | "
\n",
314 | " \n",
315 | " | 1 | \n",
316 | " 0 | \n",
317 | " widget-a | \n",
318 | " 2019-02-24 | \n",
319 | " 2019-02-24 00:04:59 | \n",
320 | " 0.570658 | \n",
321 | "
\n",
322 | " \n",
323 | " | 2 | \n",
324 | " 0 | \n",
325 | " widget-a | \n",
326 | " 2019-02-24 | \n",
327 | " 2019-02-24 00:05:59 | \n",
328 | " 0.670658 | \n",
329 | "
\n",
330 | " \n",
331 | " | 3 | \n",
332 | " 0 | \n",
333 | " widget-a | \n",
334 | " 2019-02-24 | \n",
335 | " 2019-02-24 00:06:59 | \n",
336 | " 0.770658 | \n",
337 | "
\n",
338 | " \n",
339 | " | 4 | \n",
340 | " 0 | \n",
341 | " widget-a | \n",
342 | " 2019-02-24 | \n",
343 | " 2019-02-24 00:07:59 | \n",
344 | " 0.870658 | \n",
345 | "
\n",
346 | " \n",
347 | " | 5 | \n",
348 | " 0 | \n",
349 | " widget-a | \n",
350 | " 2019-02-24 | \n",
351 | " 2019-02-24 00:08:59 | \n",
352 | " 0.970658 | \n",
353 | "
\n",
354 | " \n",
355 | " | 6 | \n",
356 | " 0 | \n",
357 | " widget-a | \n",
358 | " 2019-02-24 | \n",
359 | " 2019-02-24 00:09:59 | \n",
360 | " 1.070658 | \n",
361 | "
\n",
362 | " \n",
363 | " | 7 | \n",
364 | " 0 | \n",
365 | " widget-a | \n",
366 | " 2019-02-24 | \n",
367 | " 2019-02-24 00:10:59 | \n",
368 | " 1.170658 | \n",
369 | "
\n",
370 | " \n",
371 | " | 8 | \n",
372 | " 0 | \n",
373 | " widget-a | \n",
374 | " 2019-02-24 | \n",
375 | " 2019-02-24 00:11:59 | \n",
376 | " 1.270658 | \n",
377 | "
\n",
378 | " \n",
379 | " | 9 | \n",
380 | " 0 | \n",
381 | " widget-a | \n",
382 | " 2019-02-24 | \n",
383 | " 2019-02-24 00:12:59 | \n",
384 | " 1.370658 | \n",
385 | "
\n",
386 | " \n",
387 | " | 10 | \n",
388 | " 1 | \n",
389 | " widget-a | \n",
390 | " 2019-02-24 | \n",
391 | " 2019-02-24 00:03:59 | \n",
392 | " 0.308329 | \n",
393 | "
\n",
394 | " \n",
395 | " | 11 | \n",
396 | " 1 | \n",
397 | " widget-a | \n",
398 | " 2019-02-24 | \n",
399 | " 2019-02-24 00:04:59 | \n",
400 | " 0.408329 | \n",
401 | "
\n",
402 | " \n",
403 | " | 12 | \n",
404 | " 1 | \n",
405 | " widget-a | \n",
406 | " 2019-02-24 | \n",
407 | " 2019-02-24 00:05:59 | \n",
408 | " 0.508329 | \n",
409 | "
\n",
410 | " \n",
411 | " | 13 | \n",
412 | " 1 | \n",
413 | " widget-a | \n",
414 | " 2019-02-24 | \n",
415 | " 2019-02-24 00:06:59 | \n",
416 | " 0.608329 | \n",
417 | "
\n",
418 | " \n",
419 | " | 14 | \n",
420 | " 1 | \n",
421 | " widget-a | \n",
422 | " 2019-02-24 | \n",
423 | " 2019-02-24 00:07:59 | \n",
424 | " 0.708329 | \n",
425 | "
\n",
426 | " \n",
427 | " | 15 | \n",
428 | " 1 | \n",
429 | " widget-a | \n",
430 | " 2019-02-24 | \n",
431 | " 2019-02-24 00:08:59 | \n",
432 | " 0.808329 | \n",
433 | "
\n",
434 | " \n",
435 | " | 16 | \n",
436 | " 1 | \n",
437 | " widget-a | \n",
438 | " 2019-02-24 | \n",
439 | " 2019-02-24 00:09:59 | \n",
440 | " 0.908329 | \n",
441 | "
\n",
442 | " \n",
443 | " | 17 | \n",
444 | " 1 | \n",
445 | " widget-a | \n",
446 | " 2019-02-24 | \n",
447 | " 2019-02-24 00:10:59 | \n",
448 | " 1.008329 | \n",
449 | "
\n",
450 | " \n",
451 | " | 18 | \n",
452 | " 1 | \n",
453 | " widget-a | \n",
454 | " 2019-02-24 | \n",
455 | " 2019-02-24 00:11:59 | \n",
456 | " 1.108329 | \n",
457 | "
\n",
458 | " \n",
459 | " | 19 | \n",
460 | " 1 | \n",
461 | " widget-a | \n",
462 | " 2019-02-24 | \n",
463 | " 2019-02-24 00:12:59 | \n",
464 | " 1.208329 | \n",
465 | "
\n",
466 | " \n",
467 | " | 20 | \n",
468 | " 2 | \n",
469 | " widget-a | \n",
470 | " 2019-02-24 | \n",
471 | " 2019-02-24 00:03:59 | \n",
472 | " 0.766993 | \n",
473 | "
\n",
474 | " \n",
475 | " | 21 | \n",
476 | " 2 | \n",
477 | " widget-a | \n",
478 | " 2019-02-24 | \n",
479 | " 2019-02-24 00:04:59 | \n",
480 | " 0.866993 | \n",
481 | "
\n",
482 | " \n",
483 | " | 22 | \n",
484 | " 2 | \n",
485 | " widget-a | \n",
486 | " 2019-02-24 | \n",
487 | " 2019-02-24 00:05:59 | \n",
488 | " 0.966993 | \n",
489 | "
\n",
490 | " \n",
491 | " | 23 | \n",
492 | " 2 | \n",
493 | " widget-a | \n",
494 | " 2019-02-24 | \n",
495 | " 2019-02-24 00:06:59 | \n",
496 | " 1.066993 | \n",
497 | "
\n",
498 | " \n",
499 | " | 24 | \n",
500 | " 2 | \n",
501 | " widget-a | \n",
502 | " 2019-02-24 | \n",
503 | " 2019-02-24 00:07:59 | \n",
504 | " 1.166993 | \n",
505 | "
\n",
506 | " \n",
507 | " | 25 | \n",
508 | " 2 | \n",
509 | " widget-a | \n",
510 | " 2019-02-24 | \n",
511 | " 2019-02-24 00:08:59 | \n",
512 | " 1.266993 | \n",
513 | "
\n",
514 | " \n",
515 | " | 26 | \n",
516 | " 2 | \n",
517 | " widget-a | \n",
518 | " 2019-02-24 | \n",
519 | " 2019-02-24 00:09:59 | \n",
520 | " 1.366993 | \n",
521 | "
\n",
522 | " \n",
523 | " | 27 | \n",
524 | " 2 | \n",
525 | " widget-a | \n",
526 | " 2019-02-24 | \n",
527 | " 2019-02-24 00:10:59 | \n",
528 | " 1.466993 | \n",
529 | "
\n",
530 | " \n",
531 | " | 28 | \n",
532 | " 2 | \n",
533 | " widget-a | \n",
534 | " 2019-02-24 | \n",
535 | " 2019-02-24 00:11:59 | \n",
536 | " 1.566993 | \n",
537 | "
\n",
538 | " \n",
539 | " | 29 | \n",
540 | " 2 | \n",
541 | " widget-a | \n",
542 | " 2019-02-24 | \n",
543 | " 2019-02-24 00:12:59 | \n",
544 | " 1.666993 | \n",
545 | "
\n",
546 | " \n",
547 | " | 30 | \n",
548 | " 3 | \n",
549 | " widget-a | \n",
550 | " 2019-02-24 | \n",
551 | " 2019-02-24 00:03:59 | \n",
552 | " 0.880996 | \n",
553 | "
\n",
554 | " \n",
555 | " | 31 | \n",
556 | " 3 | \n",
557 | " widget-a | \n",
558 | " 2019-02-24 | \n",
559 | " 2019-02-24 00:04:59 | \n",
560 | " 0.980996 | \n",
561 | "
\n",
562 | " \n",
563 | " | 32 | \n",
564 | " 3 | \n",
565 | " widget-a | \n",
566 | " 2019-02-24 | \n",
567 | " 2019-02-24 00:05:59 | \n",
568 | " 1.080996 | \n",
569 | "
\n",
570 | " \n",
571 | " | 33 | \n",
572 | " 3 | \n",
573 | " widget-a | \n",
574 | " 2019-02-24 | \n",
575 | " 2019-02-24 00:06:59 | \n",
576 | " 1.180996 | \n",
577 | "
\n",
578 | " \n",
579 | " | 34 | \n",
580 | " 3 | \n",
581 | " widget-a | \n",
582 | " 2019-02-24 | \n",
583 | " 2019-02-24 00:07:59 | \n",
584 | " 1.280996 | \n",
585 | "
\n",
586 | " \n",
587 | " | 35 | \n",
588 | " 3 | \n",
589 | " widget-a | \n",
590 | " 2019-02-24 | \n",
591 | " 2019-02-24 00:08:59 | \n",
592 | " 1.380996 | \n",
593 | "
\n",
594 | " \n",
595 | " | 36 | \n",
596 | " 3 | \n",
597 | " widget-a | \n",
598 | " 2019-02-24 | \n",
599 | " 2019-02-24 00:09:59 | \n",
600 | " 1.480996 | \n",
601 | "
\n",
602 | " \n",
603 | " | 37 | \n",
604 | " 3 | \n",
605 | " widget-a | \n",
606 | " 2019-02-24 | \n",
607 | " 2019-02-24 00:10:59 | \n",
608 | " 1.580996 | \n",
609 | "
\n",
610 | " \n",
611 | " | 38 | \n",
612 | " 3 | \n",
613 | " widget-a | \n",
614 | " 2019-02-24 | \n",
615 | " 2019-02-24 00:11:59 | \n",
616 | " 1.680996 | \n",
617 | "
\n",
618 | " \n",
619 | " | 39 | \n",
620 | " 3 | \n",
621 | " widget-a | \n",
622 | " 2019-02-24 | \n",
623 | " 2019-02-24 00:12:59 | \n",
624 | " 1.780996 | \n",
625 | "
\n",
626 | " \n",
627 | " | 40 | \n",
628 | " 4 | \n",
629 | " widget-a | \n",
630 | " 2019-02-24 | \n",
631 | " 2019-02-24 00:03:59 | \n",
632 | " 0.673713 | \n",
633 | "
\n",
634 | " \n",
635 | " | 41 | \n",
636 | " 4 | \n",
637 | " widget-a | \n",
638 | " 2019-02-24 | \n",
639 | " 2019-02-24 00:04:59 | \n",
640 | " 0.773713 | \n",
641 | "
\n",
642 | " \n",
643 | " | 42 | \n",
644 | " 4 | \n",
645 | " widget-a | \n",
646 | " 2019-02-24 | \n",
647 | " 2019-02-24 00:05:59 | \n",
648 | " 0.873713 | \n",
649 | "
\n",
650 | " \n",
651 | " | 43 | \n",
652 | " 4 | \n",
653 | " widget-a | \n",
654 | " 2019-02-24 | \n",
655 | " 2019-02-24 00:06:59 | \n",
656 | " 0.973713 | \n",
657 | "
\n",
658 | " \n",
659 | " | 44 | \n",
660 | " 4 | \n",
661 | " widget-a | \n",
662 | " 2019-02-24 | \n",
663 | " 2019-02-24 00:07:59 | \n",
664 | " 1.073713 | \n",
665 | "
\n",
666 | " \n",
667 | " | 45 | \n",
668 | " 4 | \n",
669 | " widget-a | \n",
670 | " 2019-02-24 | \n",
671 | " 2019-02-24 00:08:59 | \n",
672 | " 1.173713 | \n",
673 | "
\n",
674 | " \n",
675 | " | 46 | \n",
676 | " 4 | \n",
677 | " widget-a | \n",
678 | " 2019-02-24 | \n",
679 | " 2019-02-24 00:09:59 | \n",
680 | " 1.273713 | \n",
681 | "
\n",
682 | " \n",
683 | " | 47 | \n",
684 | " 4 | \n",
685 | " widget-a | \n",
686 | " 2019-02-24 | \n",
687 | " 2019-02-24 00:10:59 | \n",
688 | " 1.373713 | \n",
689 | "
\n",
690 | " \n",
691 | " | 48 | \n",
692 | " 4 | \n",
693 | " widget-a | \n",
694 | " 2019-02-24 | \n",
695 | " 2019-02-24 00:11:59 | \n",
696 | " 1.473713 | \n",
697 | "
\n",
698 | " \n",
699 | " | 49 | \n",
700 | " 4 | \n",
701 | " widget-a | \n",
702 | " 2019-02-24 | \n",
703 | " 2019-02-24 00:12:59 | \n",
704 | " 1.573713 | \n",
705 | "
\n",
706 | " \n",
707 | "
\n",
708 | "
"
709 | ],
710 | "text/plain": [
711 | " dev_id type mdate mdatetime value\n",
712 | "0 0 widget-a 2019-02-24 2019-02-24 00:03:59 0.470658\n",
713 | "1 0 widget-a 2019-02-24 2019-02-24 00:04:59 0.570658\n",
714 | "2 0 widget-a 2019-02-24 2019-02-24 00:05:59 0.670658\n",
715 | "3 0 widget-a 2019-02-24 2019-02-24 00:06:59 0.770658\n",
716 | "4 0 widget-a 2019-02-24 2019-02-24 00:07:59 0.870658\n",
717 | "5 0 widget-a 2019-02-24 2019-02-24 00:08:59 0.970658\n",
718 | "6 0 widget-a 2019-02-24 2019-02-24 00:09:59 1.070658\n",
719 | "7 0 widget-a 2019-02-24 2019-02-24 00:10:59 1.170658\n",
720 | "8 0 widget-a 2019-02-24 2019-02-24 00:11:59 1.270658\n",
721 | "9 0 widget-a 2019-02-24 2019-02-24 00:12:59 1.370658\n",
722 | "10 1 widget-a 2019-02-24 2019-02-24 00:03:59 0.308329\n",
723 | "11 1 widget-a 2019-02-24 2019-02-24 00:04:59 0.408329\n",
724 | "12 1 widget-a 2019-02-24 2019-02-24 00:05:59 0.508329\n",
725 | "13 1 widget-a 2019-02-24 2019-02-24 00:06:59 0.608329\n",
726 | "14 1 widget-a 2019-02-24 2019-02-24 00:07:59 0.708329\n",
727 | "15 1 widget-a 2019-02-24 2019-02-24 00:08:59 0.808329\n",
728 | "16 1 widget-a 2019-02-24 2019-02-24 00:09:59 0.908329\n",
729 | "17 1 widget-a 2019-02-24 2019-02-24 00:10:59 1.008329\n",
730 | "18 1 widget-a 2019-02-24 2019-02-24 00:11:59 1.108329\n",
731 | "19 1 widget-a 2019-02-24 2019-02-24 00:12:59 1.208329\n",
732 | "20 2 widget-a 2019-02-24 2019-02-24 00:03:59 0.766993\n",
733 | "21 2 widget-a 2019-02-24 2019-02-24 00:04:59 0.866993\n",
734 | "22 2 widget-a 2019-02-24 2019-02-24 00:05:59 0.966993\n",
735 | "23 2 widget-a 2019-02-24 2019-02-24 00:06:59 1.066993\n",
736 | "24 2 widget-a 2019-02-24 2019-02-24 00:07:59 1.166993\n",
737 | "25 2 widget-a 2019-02-24 2019-02-24 00:08:59 1.266993\n",
738 | "26 2 widget-a 2019-02-24 2019-02-24 00:09:59 1.366993\n",
739 | "27 2 widget-a 2019-02-24 2019-02-24 00:10:59 1.466993\n",
740 | "28 2 widget-a 2019-02-24 2019-02-24 00:11:59 1.566993\n",
741 | "29 2 widget-a 2019-02-24 2019-02-24 00:12:59 1.666993\n",
742 | "30 3 widget-a 2019-02-24 2019-02-24 00:03:59 0.880996\n",
743 | "31 3 widget-a 2019-02-24 2019-02-24 00:04:59 0.980996\n",
744 | "32 3 widget-a 2019-02-24 2019-02-24 00:05:59 1.080996\n",
745 | "33 3 widget-a 2019-02-24 2019-02-24 00:06:59 1.180996\n",
746 | "34 3 widget-a 2019-02-24 2019-02-24 00:07:59 1.280996\n",
747 | "35 3 widget-a 2019-02-24 2019-02-24 00:08:59 1.380996\n",
748 | "36 3 widget-a 2019-02-24 2019-02-24 00:09:59 1.480996\n",
749 | "37 3 widget-a 2019-02-24 2019-02-24 00:10:59 1.580996\n",
750 | "38 3 widget-a 2019-02-24 2019-02-24 00:11:59 1.680996\n",
751 | "39 3 widget-a 2019-02-24 2019-02-24 00:12:59 1.780996\n",
752 | "40 4 widget-a 2019-02-24 2019-02-24 00:03:59 0.673713\n",
753 | "41 4 widget-a 2019-02-24 2019-02-24 00:04:59 0.773713\n",
754 | "42 4 widget-a 2019-02-24 2019-02-24 00:05:59 0.873713\n",
755 | "43 4 widget-a 2019-02-24 2019-02-24 00:06:59 0.973713\n",
756 | "44 4 widget-a 2019-02-24 2019-02-24 00:07:59 1.073713\n",
757 | "45 4 widget-a 2019-02-24 2019-02-24 00:08:59 1.173713\n",
758 | "46 4 widget-a 2019-02-24 2019-02-24 00:09:59 1.273713\n",
759 | "47 4 widget-a 2019-02-24 2019-02-24 00:10:59 1.373713\n",
760 | "48 4 widget-a 2019-02-24 2019-02-24 00:11:59 1.473713\n",
761 | "49 4 widget-a 2019-02-24 2019-02-24 00:12:59 1.573713"
762 | ]
763 | },
764 | "execution_count": 8,
765 | "metadata": {},
766 | "output_type": "execute_result"
767 | }
768 | ],
769 | "source": [
770 | "result = %sql select * from sensor_data\n",
771 | "df = result.DataFrame()\n",
772 | "df"
773 | ]
774 | },
775 | {
776 | "cell_type": "code",
777 | "execution_count": 12,
778 | "metadata": {},
779 | "outputs": [
780 | {
781 | "data": {
782 | "text/html": [
783 | "\n",
784 | "\n",
797 | "
\n",
798 | " \n",
799 | " \n",
800 | " | \n",
801 | " dev_id | \n",
802 | " value | \n",
803 | "
\n",
804 | " \n",
805 | " \n",
806 | " \n",
807 | " | count | \n",
808 | " 50.000000 | \n",
809 | " 50.000000 | \n",
810 | "
\n",
811 | " \n",
812 | " | mean | \n",
813 | " 2.000000 | \n",
814 | " 1.070138 | \n",
815 | "
\n",
816 | " \n",
817 | " | std | \n",
818 | " 1.428571 | \n",
819 | " 0.357015 | \n",
820 | "
\n",
821 | " \n",
822 | " | min | \n",
823 | " 0.000000 | \n",
824 | " 0.308329 | \n",
825 | "
\n",
826 | " \n",
827 | " | 25% | \n",
828 | " 1.000000 | \n",
829 | " 0.822995 | \n",
830 | "
\n",
831 | " \n",
832 | " | 50% | \n",
833 | " 2.000000 | \n",
834 | " 1.072185 | \n",
835 | "
\n",
836 | " \n",
837 | " | 75% | \n",
838 | " 3.000000 | \n",
839 | " 1.345493 | \n",
840 | "
\n",
841 | " \n",
842 | " | max | \n",
843 | " 4.000000 | \n",
844 | " 1.780996 | \n",
845 | "
\n",
846 | " \n",
847 | "
\n",
848 | "
"
849 | ],
850 | "text/plain": [
851 | " dev_id value\n",
852 | "count 50.000000 50.000000\n",
853 | "mean 2.000000 1.070138\n",
854 | "std 1.428571 0.357015\n",
855 | "min 0.000000 0.308329\n",
856 | "25% 1.000000 0.822995\n",
857 | "50% 2.000000 1.072185\n",
858 | "75% 3.000000 1.345493\n",
859 | "max 4.000000 1.780996"
860 | ]
861 | },
862 | "execution_count": 12,
863 | "metadata": {},
864 | "output_type": "execute_result"
865 | }
866 | ],
867 | "source": [
868 | "df.describe()"
869 | ]
870 | },
871 | {
872 | "cell_type": "markdown",
873 | "metadata": {},
874 | "source": [
875 | "Data frames integrate nicely with graphics. Use selection on the data frame to pull out rows for each device in sucession and plot them as separate lines. "
876 | ]
877 | },
878 | {
879 | "cell_type": "code",
880 | "execution_count": 9,
881 | "metadata": {},
882 | "outputs": [
883 | {
884 | "data": {
885 | "image/png": "\n",
886 | "text/plain": [
887 | ""
888 | ]
889 | },
890 | "metadata": {
891 | "needs_background": "light"
892 | },
893 | "output_type": "display_data"
894 | }
895 | ],
896 | "source": [
897 | "import matplotlib.pyplot as plt\n",
898 | "%matplotlib inline\n",
899 | "\n",
900 | "# Break up the data frame and graph each device separately. \n",
901 | "markers = ['o', 'x', '^', '+', '*']\n",
902 | "for i in range(5):\n",
903 | " df_segment = df[df['dev_id'] == i]\n",
904 | " plt.plot('mdatetime', 'value', data=df_segment, linestyle='--', marker=markers[i])\n",
905 | " \n",
906 | "plt.xticks(rotation=90)\n",
907 | "plt.show()"
908 | ]
909 | },
910 | {
911 | "cell_type": "markdown",
912 | "metadata": {},
913 | "source": [
914 | "It's more common to use ClickHouse to compute aggregates. Find the min, average, and max values for each device and likewise convert them to a data frame."
915 | ]
916 | },
917 | {
918 | "cell_type": "code",
919 | "execution_count": 10,
920 | "metadata": {},
921 | "outputs": [
922 | {
923 | "name": "stdout",
924 | "output_type": "stream",
925 | "text": [
926 | " * clickhouse://default:***@localhost/default\n",
927 | "Done.\n"
928 | ]
929 | },
930 | {
931 | "data": {
932 | "text/html": [
933 | "\n",
934 | "\n",
947 | "
\n",
948 | " \n",
949 | " \n",
950 | " | \n",
951 | " dev_id | \n",
952 | " min(value) | \n",
953 | " avg(value) | \n",
954 | " max(value) | \n",
955 | "
\n",
956 | " \n",
957 | " \n",
958 | " \n",
959 | " | 0 | \n",
960 | " 0 | \n",
961 | " 0.470658 | \n",
962 | " 0.920658 | \n",
963 | " 1.370658 | \n",
964 | "
\n",
965 | " \n",
966 | " | 1 | \n",
967 | " 1 | \n",
968 | " 0.308329 | \n",
969 | " 0.758329 | \n",
970 | " 1.208329 | \n",
971 | "
\n",
972 | " \n",
973 | " | 2 | \n",
974 | " 2 | \n",
975 | " 0.766993 | \n",
976 | " 1.216993 | \n",
977 | " 1.666993 | \n",
978 | "
\n",
979 | " \n",
980 | " | 3 | \n",
981 | " 3 | \n",
982 | " 0.880996 | \n",
983 | " 1.330996 | \n",
984 | " 1.780996 | \n",
985 | "
\n",
986 | " \n",
987 | " | 4 | \n",
988 | " 4 | \n",
989 | " 0.673713 | \n",
990 | " 1.123713 | \n",
991 | " 1.573713 | \n",
992 | "
\n",
993 | " \n",
994 | "
\n",
995 | "
"
996 | ],
997 | "text/plain": [
998 | " dev_id min(value) avg(value) max(value)\n",
999 | "0 0 0.470658 0.920658 1.370658\n",
1000 | "1 1 0.308329 0.758329 1.208329\n",
1001 | "2 2 0.766993 1.216993 1.666993\n",
1002 | "3 3 0.880996 1.330996 1.780996\n",
1003 | "4 4 0.673713 1.123713 1.573713"
1004 | ]
1005 | },
1006 | "execution_count": 10,
1007 | "metadata": {},
1008 | "output_type": "execute_result"
1009 | }
1010 | ],
1011 | "source": [
1012 | "result = %sql select dev_id, min(value), avg(value), max(value) from sensor_data group by dev_id order by dev_id\n",
1013 | "df2 = result.DataFrame()\n",
1014 | "df2"
1015 | ]
1016 | },
1017 | {
1018 | "cell_type": "markdown",
1019 | "metadata": {},
1020 | "source": [
1021 | "Let's put the average values per device into a nice bar chart. It's easy to add additional sets of bars or create subplots but this will do for today. "
1022 | ]
1023 | },
1024 | {
1025 | "cell_type": "code",
1026 | "execution_count": 11,
1027 | "metadata": {},
1028 | "outputs": [
1029 | {
1030 | "data": {
1031 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEWCAYAAACJ0YulAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAGSVJREFUeJzt3Xm0ZWV95vHvQxUgIIJa5UQxqYWABNEUSBaJooAWJIBJjAEnNEil7aAR7LRoIipqEqMGZUEaqxUJKCA4VmxsIM3khFIiEAEhJYhVAUKBgIhjkV//sffdHC93Krj7Hrj3+1nrrrWH97znt+/07PE9qSokSQLYYNgFSJIeOQwFSVLHUJAkdQwFSVLHUJAkdQwFSVLHUNCjWpJ3J/nUNPRzcpJ3TkdN0qOZoTDLJbk4yV1JNh52LY9kVfXfquq9w65DD2h/d98w7DrmGkNhFkuyHfB7QAEH9fQe8/voVzPPn6XAUJjtXgtcBpwKHDayMMmeSW5LMm9g2R8mubqd3iDJMUl+kOTOJGcneUK7brskleTwJD8CLmyXn9P2eU+SS5M8e6DvJyb5lyQ/SXJ5kvcl+drA+h2TXJDkx0muT/KK8TYoyfZJLklyb5ILgAWj1u+Z5BtJ7k5yVZK92+WHJFk5qu1RSVa006cmed/AuoOTXNnW/IMkS9vlWyT5RJJbk/xHuy3zGEN7auucJJ9q6/23JDskeXuS25OsTvKSgfbj9p3kGUkubH8edyT5dJItB177tvY197bfw33G2a69k6wZmP9h+9qrgfuSzE/ytCSfS7I2yU1J3tzTNr0uydeSfKg9mr0pyf7tuvfT7NCcmOSnSU5M4/j2fe5JcnWSXcb7XdFDVFV+zdIvYBXw34HfBn4NPHlg3Q+A/QbmzwGOaaffQhMmi4CNgY8BZ7brtqM58jgN2AzYpF3+Z8DmbfuPAFcO9H1W+7UpsDOwGvhau26zdv71wHzgecAdwLPH2aZvAv/Yvs8LgHuBT7XrtgLuBA6g2eHZr51f2L73vcDigb4uBw5pp08F3tdO7wHc075+g7bfHdt1X2y/H5sBTwK+Dfz5OLW+G/gF8NJ2204DbgL+GtgQOAK4aaD9uH0Dz2zr2bjdnkuBj7TrntV+D5828DN6xujtauf3BtYMzP8QuBLYGtik3d7vAMcCGwFPB24EXtrDNr2O5vfyCGAe8EbgFiDt+ouBNwz09dK2ti2BADsBTx3239ls+xp6AX719IOF323/4Ba0898HjhpY/z7glHZ6c+A+YNt2/jpgn4G2T237ms8DofD0Cd57y7bNFu0f+6+BZ41675FQ+FPgq6Ne/zHgXWP0uw2wDthsYNkZPBAKbwNOH/Wa84DD2ulPAce204tpQmLTdr7759m+//FjvP+TgV/SBmG77FDgonG+D+8GLhiYPxD4KTBv4Pte7fdrfft+GfDddvqZwO3AvsCGo9p129XO782DQ+HPBuafD/xoVB9vBz453dtEEwqrBtZt2r72Ke38xfxmKLwYuAHYE9hg2H9js/XLc4iz12HA+VV1Rzt/Rrvs+IH5byR5I/BHwBVVdXO7blvgC0n+a6C/+2n+yEesHploTwe8H/gTmr3YkdctoNn7nD/YftT0tsDzk9w9sGw+cPoY2/Q04K6qum9g2c00e7kjff1JkgMH1m8IXDSwzR8GjgNeCXyxqn42xvtsDZw7xvJt2/5uTTKybINR2zPafw5M/xy4o6ruH5gHeGy7beP2neRJwAk0p1Q2b9fdBVBVq5K8heYf9rOTnAccXVW3TFDXoNE/j6eN+nnMA7463dvUum1koqp+1rZ77FhFVtWFSU4ETgK2SfIF4H9U1U8m3UJNmaEwCyXZBHgFMC/JyB/dxsCWSZ5TVVdV1bVJbgb2p/kHecZAF6tp9h6/Pkbf27WTg8PrvhI4mGZP9Yc0Rwh30Rzir6XZu19Es5cHD/wTH3mvS6pqvyls2q3A45NsNhAM2wzUsprmSOGIcV5/PrAgyW40e6xHjdNuNfCMcZb/kuboa90U6l0fk/X9dzTbuWtV3ZnkZcCJIyur6gzgjCSPoznS+QDwGpojwE0H+nnKGH0P/ixX05z+WfxwNmagr4fz/XrQEM5VdQJwQhuSZwN/BXgr8TTyQvPs9DKaPfudgd3ar51o9vZeO9DuDODNNOfmzxlYfjLw/iTbAiRZmOTgCd5vc5o//jtp/gH97ciKdg/y88C7k2yaZMdRNXwZ2CHJa5Js2H7tnmSn0W/SHsmsBN6TZKMkv0tz+mLEp4ADk7w0ybwkj2kvrC5qX78O+CzwQeAJwAXjbM8ngNcn2SfNRfetkuxYVbfSBMuHkzyuXfeMJC+c4HszJVPoe3Oa0zR3J9mK5p8hAEmeleTFaW47/gXN3vrInvuVwAFJnpDkKTTXiybybeAn7cXnTdrv4y5Jdu9hmybznzTXNABofy+en2RDmrD7BQ9sp6aJoTA7HUZzDvhHVXXbyBfNnuWr8sCth2fSnGO+cOA0E8BHgRXA+Unupbno/PwJ3u80mtM4/wFc27YfdCTN0cNtNKeFzqQJEarqXuAlwCE0Fxlvo9nLHe+5ile2tfwYeFf73rR9raY5YnkHzRHKapp/noO/52fQHNGcM97ea1V9m+bC9/E0F5wvoTmtAk2gbdRu5100IfPUcWpdXxP1/R6ai/D3AP+HJmhHbAz8Pc0F+ttoLui+o113OnAVzRHc+cBnJiqgDfEDaXYkbmr7/DjNz2+6t2kyHwVe3t6ZdALwOOB/t/3cTLMT8qGHWJfGMXKVX5oxST5AczHxsEkbS5pRHimod2meQ9i1vc98D+Bw4AvDrkvSg/UWCklOaR8y+d4k7XZPcn+Sl/dVi4Zuc5rTHffRXBz8MPCloVYkaUy9nT5K8gKaC2OnVdWYTx22tzJeQHPB6JSq+mwvxUiSpqS3I4WqupTmYuBE3gR8jubBG0nSkA3tOYX2tro/pHlKccLb3ZIsA5YBbLbZZr+944479l+gJM0i3/nOd+6oqoWTtRvmw2sfAd5WVfcPPO04pqpaDiwHWLJkSa1cuXLC9pKk39Q+rDqpYYbCEuCsNhAW0Dxgs66qvjjEmiRpThtaKFTV9iPTSU4FvmwgSNJw9RYKSUaell2QZvz2d9EMjkVVndzX+0qSHrreQqGqDl2Ptq/rqw5J0tT5RLMkqWMoSJI6hoIkqWMoSJI6hoIkqePHcUpzwPEX3DB5o0eBo/bbYdglzHoeKUiSOoaCJKljKEiSOoaCJKljKEiSOoaCJKljKEiSOoaCJKljKEiSOoaCJKljKEiSOoaCJKljKEiSOoaCJKljKEiSOoaCJKnT24fsJDkF+APg9qraZYz1rwLe1s7+FHhjVV3VVz2a22bLh8yAHzSjfvV5pHAqsHSC9TcBL6yqXYH3Ast7rEWSNAW9HSlU1aVJtptg/TcGZi8DFvVViyRpah4p1xQOB74y3soky5KsTLJy7dq1M1iWJM0tQw+FJC+iCYW3jdemqpZX1ZKqWrJw4cKZK06S5pjeTh9NRZJdgY8D+1fVncOsRZI0xCOFJNsAnwdeU1Wz59YQSXoU6/OW1DOBvYEFSdYA7wI2BKiqk4FjgScC/5QEYF1VLemrHknS5Pq8++jQSda/AXhDX+8vSVp/Q7/QLEl65DAUJEkdQ0GS1DEUJEkdQ0GS1DEUJEmdoT7RLEl9c9j09eORgiSpYyhIkjqGgiSpYyhIkjqGgiSpYyhIkjqGgiSpYyhIkjqGgiSpYyhIkjqGgiSpYyhIkjqGgiSpYyhIkjqGgiSp01soJDklye1JvjfO+iQ5IcmqJFcneV5ftUiSpqbPI4VTgaUTrN8fWNx+LQP+V4+1SJKmoLdQqKpLgR9P0ORg4LRqXAZsmeSpfdUjSZrcMK8pbAWsHphf0y57kCTLkqxMsnLt2rUzUpwkzUXDDIWMsazGalhVy6tqSVUtWbhwYc9lSdLcNcxQWANsPTC/CLhlSLVIkhhuKKwAXtvehbQncE9V3TrEeiRpzpvfV8dJzgT2BhYkWQO8C9gQoKpOBs4FDgBWAT8DXt9XLZKkqektFKrq0EnWF/AXfb2/JGn9+USzJKljKEiSOoaCJKljKEiSOoaCJKljKEiSOoaCJKnT23MKj0THX3DDsEuYNkftt8OwS5A0C3mkIEnqGAqSpI6hIEnqGAqSpI6hIEnqGAqSpI6hIEnqGAqSpI6hIEnqGAqSpI6hIEnqGAqSpI6hIEnqGAqSpE6voZBkaZLrk6xKcswY67dJclGS7ya5OskBfdYjSZpYb6GQZB5wErA/sDNwaJKdRzX7G+DsqnoucAjwT33VI0maXJ9HCnsAq6rqxqr6FXAWcPCoNgU8rp3eArilx3okSZPoMxS2AlYPzK9plw16N/DqJGuAc4E3jdVRkmVJViZZuXbt2j5qlSTRbyhkjGU1av5Q4NSqWgQcAJye5EE1VdXyqlpSVUsWLlzYQ6mSJOg3FNYAWw/ML+LBp4cOB84GqKpvAo8BFvRYkyRpAn2GwuXA4iTbJ9mI5kLyilFtfgTsA5BkJ5pQ8PyQJA1Jb6FQVeuAI4HzgOto7jK6JslxSQ5qm70VOCLJVcCZwOuqavQpJknSDJk/WYMkTwb+FnhaVe3f3lb6O1X1icleW1Xn0lxAHlx27MD0tcBe6121JKkXUzlSOJVmb/9p7fwNwFv6KkiSNDyTHikAC6rq7CRvh+a0UJL7e65LPTj+ghuGXcK0OWq/HYZdgjQrTeVI4b4kT6S9nTTJnsA9vVYlSRqKqRwpHE1z19AzknwdWAi8vNeqJElDMWkoVNUVSV4IPIvmgbTrq+rXvVcmSZpxU7n76LWjFj0vCVV1Wk81SZKGZCqnj3YfmH4MzcNmVwCGgiTNMlM5ffQbg9Ql2QI4vbeKJElD81CeaP4ZsHi6C5EkDd9Urin8Cw+MbroBzQfmnN1nUZKk4ZjKNYUPDUyvA26uqjU91SNJGqKpXFO4ZCYKkSQN37ihkOReHvyhONA8q1BV9bgx1kmSHsXGDYWq2nwmC5EkDd9UrikAkORJNM8pAFBVP+qlIknS0Ex6S2qSg5L8O3ATcAnwQ+ArPdclSRqCqTyn8F5gT+CGqtqe5onmr/dalSRpKKYSCr+uqjuBDZJsUFUXAbv1XJckaQimck3h7iSPBb4KfDrJ7TTPK0iSZplxjxSSnJhkL+BgmqEt3gL8X+AHwIEzU54kaSZNdKTw7zRPMz8V+AxwZlX984xUJUkainGPFKrqo1X1O8ALgR8Dn0xyXZJ3JvEDciVpFpr0QnNV3VxVH6iq5wKvBP4IuG4qnSdZmuT6JKuSHDNOm1ckuTbJNUnOWK/qJUnTaiqjpG4ILAUOobkd9RLgPVN43TzgJGA/YA1weZIVVXXtQJvFwNuBvarqrvYBOUnSkEw09tF+wKHA7wPfBs4CllXVfVPsew9gVVXd2PZ3Fs1F62sH2hwBnFRVdwFU1e3rvQWSpGkz0emjdwDfBHaqqgOr6tPrEQgAWwGrB+bXtMsG7QDskOTrSS5LsnSsjpIsS7Iyycq1a9euRwmSpPUx0YB4L3qYfWesbsd4/8XA3sAi4KtJdqmqu0fVshxYDrBkyZKxRm6VJE2Dh/JxnFO1Bth6YH4RcMsYbb5UVb+uqpuA6/GjPiVpaPoMhcuBxUm2T7IRzYXqFaPafBF4EUCSBTSnk27ssSZJ0gR6C4WqWgccCZxHcwvr2VV1TZLjkhzUNjsPuDPJtcBFwF+14yxJkoZgyp+n8FBU1bnAuaOWHTswXcDR7Zckacj6PH0kSXqUMRQkSR1DQZLUMRQkSR1DQZLUMRQkSR1DQZLUMRQkSR1DQZLUMRQkSR1DQZLUMRQkSR1DQZLUMRQkSR1DQZLUMRQkSR1DQZLUMRQkSR1DQZLUMRQkSR1DQZLUMRQkSZ1eQyHJ0iTXJ1mV5JgJ2r08SSVZ0mc9kqSJ9RYKSeYBJwH7AzsDhybZeYx2mwNvBr7VVy2SpKnp80hhD2BVVd1YVb8CzgIOHqPde4F/AH7RYy2SpCnoMxS2AlYPzK9pl3WSPBfYuqq+3GMdkqQp6jMUMsay6lYmGwDHA2+dtKNkWZKVSVauXbt2GkuUJA3qMxTWAFsPzC8CbhmY3xzYBbg4yQ+BPYEVY11srqrlVbWkqpYsXLiwx5IlaW7rMxQuBxYn2T7JRsAhwIqRlVV1T1UtqKrtqmo74DLgoKpa2WNNkqQJ9BYKVbUOOBI4D7gOOLuqrklyXJKD+npfSdJDN7/PzqvqXODcUcuOHaft3n3WIkmanE80S5I6hoIkqWMoSJI6hoIkqWMoSJI6hoIkqWMoSJI6hoIkqWMoSJI6hoIkqWMoSJI6hoIkqWMoSJI6hoIkqWMoSJI6hoIkqWMoSJI6hoIkqWMoSJI6hoIkqWMoSJI6hoIkqWMoSJI6vYZCkqVJrk+yKskxY6w/Osm1Sa5O8v+SbNtnPZKkifUWCknmAScB+wM7A4cm2XlUs+8CS6pqV+CzwD/0VY8kaXJ9HinsAayqqhur6lfAWcDBgw2q6qKq+lk7exmwqMd6JEmT6DMUtgJWD8yvaZeN53DgK2OtSLIsycokK9euXTuNJUqSBvUZChljWY3ZMHk1sAT44Fjrq2p5VS2pqiULFy6cxhIlSYPm99j3GmDrgflFwC2jGyXZF/hr4IVV9cse65EkTaLPI4XLgcVJtk+yEXAIsGKwQZLnAh8DDqqq23usRZI0Bb2FQlWtA44EzgOuA86uqmuSHJfkoLbZB4HHAuckuTLJinG6kyTNgD5PH1FV5wLnjlp27MD0vn2+vyRp/fhEsySpYyhIkjqGgiSpYyhIkjqGgiSpYyhIkjqGgiSpYyhIkjqGgiSpYyhIkjqGgiSpYyhIkjqGgiSpYyhIkjqGgiSpYyhIkjqGgiSpYyhIkjqGgiSpYyhIkjqGgiSpYyhIkjq9hkKSpUmuT7IqyTFjrN84yWfa9d9Ksl2f9UiSJtZbKCSZB5wE7A/sDByaZOdRzQ4H7qqqZwLHAx/oqx5J0uT6PFLYA1hVVTdW1a+As4CDR7U5GPjndvqzwD5J0mNNkqQJzO+x762A1QPza4Dnj9emqtYluQd4InDHYKMky4Bl7exPk1zfS8XTZwGjtmG6Hd1n5w9P79sOc3v73fZHpEfD7/22U2nUZyiMtcdfD6ENVbUcWD4dRc2EJCurasmw6xiGubztMLe3322fHdve5+mjNcDWA/OLgFvGa5NkPrAF8OMea5IkTaDPULgcWJxk+yQbAYcAK0a1WQEc1k6/HLiwqh50pCBJmhm9nT5qrxEcCZwHzANOqaprkhwHrKyqFcAngNOTrKI5Qjikr3pm2KPmVFcP5vK2w9zefrd9Fog75pKkET7RLEnqGAqSpI6hMI0mG9ZjNktySpLbk3xv2LXMtCRbJ7koyXVJrknyl8OuaSYleUySbye5qt3+9wy7ppmWZF6S7yb58rBrebgMhWkyxWE9ZrNTgaXDLmJI1gFvraqdgD2Bv5hjP/tfAi+uqucAuwFLk+w55Jpm2l8C1w27iOlgKEyfqQzrMWtV1aXM0WdMqurWqrqinb6X5p/DVsOtauZU46ft7Ibt15y5gyXJIuD3gY8Pu5bpYChMn7GG9Zgz/xjUaEf6fS7wreFWMrPa0ydXArcDF1TVXNr+jwD/E/ivYRcyHQyF6TOlITs0eyV5LPA54C1V9ZNh1zOTqur+qtqNZuSCPZLsMuyaZkKSPwBur6rvDLuW6WIoTJ+pDOuhWSrJhjSB8Omq+vyw6xmWqrobuJi5c31pL+CgJD+kOWX84iSfGm5JD4+hMH2mMqyHZqF2uPdPANdV1T8Ou56ZlmRhki3b6U2AfYHvD7eqmVFVb6+qRVW1Hc3f/IVV9eohl/WwGArTpKrWASPDelwHnF1V1wy3qpmT5Ezgm8CzkqxJcviwa5pBewGvodlLvLL9OmDYRc2gpwIXJbmaZufogqp61N+aOVc5zIUkqeORgiSpYyhIkjqGgiSpYyhIkjqGgiSpYyhozklyf3vb6DXtyJ5HJ3lIfwtJliQ5YRpqOjXJy9vpi9vRdq9O8v0kJ448ByD1zVDQXPTzqtqtqp4N7AccALzroXRUVSur6s3TWl3jVVW1K7ArzSikX+rhPaQHMRQ0p1XV7cAy4Mg05iX5YJLL2z31PwdI8pnBB9LaPfs/TrL3yBj6SR6b5JNJ/q197R+3y1+S5JtJrkhyTjtG0lTr+xXNYGvbJHnOdG67NBZDQXNeVd1I87fwJOBw4J6q2h3YHTgiyfY049r8KUA7jMk+wLmjunpn+9rfavfyL0yyAPgbYN+qeh6wEjh6Peu7H7gK2PEhbqI0ZfOHXYD0CDEyyu1LgF1Hzu8DWwCLga8AJyTZmGawt0ur6ufNsEedfWnGvwGgqu5qR9HcGfh623YjmuFAHmp9Uq8MBc15SZ4O3E/zWQAB3lRV543R7mLgpTRHDGeO1RUPHi49NGMBHfow6psH/Baz5JO99Mjm6SPNaUkWAicDJ1YzENh5wBvbobBJskOSzdrmZwGvB36vbTfa+TSDIo70/XjgMmCvJM9sl22aZIf1qG9D4O+A1VV19fpun7S+DAXNRZuM3JIK/CvNP/ORD5v/OHAtcEWS7wEf44Ej6vOBFwD/2l4AHu19wOOTfC/JVcCLqmot8DrgzHYU0cuY2rWBT7ftvwdsxhz6aFcNl6OkSpI6HilIkjqGgiSpYyhIkjqGgiSpYyhIkjqGgiSpYyhIkjr/H6366ib3DbP3AAAAAElFTkSuQmCC\n",
1032 | "text/plain": [
1033 | ""
1034 | ]
1035 | },
1036 | "metadata": {
1037 | "needs_background": "light"
1038 | },
1039 | "output_type": "display_data"
1040 | }
1041 | ],
1042 | "source": [
1043 | "plt.bar('dev_id', 'avg(value)', data=df2, align='center', alpha=0.5)\n",
1044 | "plt.title('Average device measurements')\n",
1045 | "plt.xlabel('Device ID')\n",
1046 | "plt.ylabel('Value')\n",
1047 | "plt.show()"
1048 | ]
1049 | },
1050 | {
1051 | "cell_type": "markdown",
1052 | "metadata": {},
1053 | "source": []
1054 | }
1055 | ],
1056 | "metadata": {
1057 | "kernelspec": {
1058 | "display_name": "Python 3",
1059 | "language": "python",
1060 | "name": "python3"
1061 | },
1062 | "language_info": {
1063 | "codemirror_mode": {
1064 | "name": "ipython",
1065 | "version": 3
1066 | },
1067 | "file_extension": ".py",
1068 | "mimetype": "text/x-python",
1069 | "name": "python",
1070 | "nbconvert_exporter": "python",
1071 | "pygments_lexer": "ipython3",
1072 | "version": "3.7.1"
1073 | }
1074 | },
1075 | "nbformat": 4,
1076 | "nbformat_minor": 2
1077 | }
1078 |
--------------------------------------------------------------------------------